--- /dev/null
+usr/sbin/dnsmasq
+#usr/share/man/man8/dnsmasq.8
#
# check if we the backup file already exist
if [ -e /var/ipfire/backup/update_$OLDVERSION-$NEWVERSION.tar.bz2 ]; then
- echo Error! The backupfile of this update already exist!!!
- echo Have you already installed this update?
- exit 3
+ echo Moving backup to backup-old ...
+ mv -f /var/ipfire/backup/update_$OLDVERSION-$NEWVERSION.tar.bz2 \
+ /var/ipfire/backup/update_$OLDVERSION-$NEWVERSION-old.tar.bz2
fi
echo First we made a backup of all files that was inside of the
echo update archive. This may take a while ...
#
# Remove obsolete packages
#
-echo '#!/bin/sh' > /tmp/remove_obsolete_paks
+echo '#!/bin/bash' > /tmp/remove_obsolete_paks
echo 'while [ "$(ps -A | grep " update.sh")" != "" ]; do' >> /tmp/remove_obsolete_paks
echo ' sleep 2' >> /tmp/remove_obsolete_paks
echo 'done' >> /tmp/remove_obsolete_paks
echo 'while [ "$(ps -A | grep " pakfire")" != "" ]; do' >> /tmp/remove_obsolete_paks
echo ' sleep 2' >> /tmp/remove_obsolete_paks
echo 'done' >> /tmp/remove_obsolete_paks
-echo 'pakfire remove zaptel -y' >> /tmp/remove_obsolete_paks
+echo '/opt/pakfire/pakfire remove zaptel -y' >> /tmp/remove_obsolete_paks
echo 'echo' >> /tmp/remove_obsolete_paks
echo 'echo Update to IPFire $NEWVERSION finished. Please reboot... ' >> /tmp/remove_obsolete_paks
echo 'echo' >> /tmp/remove_obsolete_paks
* openmailadmin-1.0.0
* openssh-4.7p1
* openssl-0.9.8g
-* openswan-2.4.13
-* openswan-2.4.13-kmod
+* openswan-2.4.12
+* openswan-2.4.12-kmod
* openvpn-2.0.9
* pam_mysql-0.7RC1
* patch-2.5.4
+++ /dev/null
-###############################################################################
-# #
-# IPFire.org - A linux based firewall #
-# Copyright (C) 2007 Michael Tremer & Christian Schmidt #
-# #
-# This program is free software: you can redistribute it and/or modify #
-# it under the terms of the GNU General Public License as published by #
-# the Free Software Foundation, either version 3 of the License, or #
-# (at your option) any later version. #
-# #
-# This program is distributed in the hope that it will be useful, #
-# but WITHOUT ANY WARRANTY; without even the implied warranty of #
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
-# GNU General Public License for more details. #
-# #
-# You should have received a copy of the GNU General Public License #
-# along with this program. If not, see <http://www.gnu.org/licenses/>. #
-# #
-###############################################################################
-
-###############################################################################
-# Definitions
-###############################################################################
-
-include Config
-
-VER = 1.2.40.2
-
-THISAPP = atl1-$(VER)
-DL_FILE = $(THISAPP).tar.bz2
-DL_FROM = $(URL_IPFIRE)
-DIR_APP = $(DIR_SRC)/$(THISAPP)
-ifeq "$(SMP)" "1"
- TARGET = $(DIR_INFO)/$(THISAPP)-smp
-else
- TARGET = $(DIR_INFO)/$(THISAPP)
-endif
-
-
-###############################################################################
-# Top-level Rules
-###############################################################################
-
-objects = $(DL_FILE)
-
-$(DL_FILE) = $(DL_FROM)/$(DL_FILE)
-
-$(DL_FILE)_MD5 = b9f30f9d3c9ab2e98309f8d229713b27
-
-install : $(TARGET)
-
-check : $(patsubst %,$(DIR_CHK)/%,$(objects))
-
-download :$(patsubst %,$(DIR_DL)/%,$(objects))
-
-md5 : $(subst %,%_MD5,$(objects))
-
-dist:
- $(PAK)
-
-###############################################################################
-# Downloading, checking, md5sum
-###############################################################################
-
-$(patsubst %,$(DIR_CHK)/%,$(objects)) :
- @$(CHECK)
-
-$(patsubst %,$(DIR_DL)/%,$(objects)) :
- @$(LOAD)
-
-$(subst %,%_MD5,$(objects)) :
- @$(MD5)
-
-###############################################################################
-# Installation Details
-###############################################################################
-
-$(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects))
- @$(PREBUILD)
- @rm -rf $(DIR_APP) && cd $(DIR_SRC) && tar jxf $(DIR_DL)/$(DL_FILE)
-
-ifeq "$(SMP)" "1"
- cd $(DIR_APP)/src && make -C /lib/modules/$(KVER)-ipfire-smp/build/ SUBDIRS=$(DIR_APP)/src modules
- cd $(DIR_APP)/src && install -m 644 atl1.ko /lib/modules/$(KVER)-ipfire-smp/kernel/drivers/net
-else
- cd $(DIR_APP)/src && make -C /lib/modules/$(KVER)-ipfire/build/ SUBDIRS=$(DIR_APP)/src modules
- cd $(DIR_APP)/src && install -m 644 atl1.ko /lib/modules/$(KVER)-ipfire/kernel/drivers/net
-endif
- @rm -rf $(DIR_APP)
- @$(POSTBUILD)
# Security fix for CIFS & Netfilter SNMP
cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-2.6.20.21-additional_check_on_BER_decoding.patch
- # Openswan nat-t
+ # Openswan
cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/openswan-2.4.x.kernel-2.6.23-natt.patch
# Reiser4
include Config
-VER = 2.4.13
+VER = 2.4.12
THISAPP = openswan-$(VER)
DL_FILE = $(THISAPP).tar.gz
$(DL_FILE) = $(DL_FROM)/$(DL_FILE)
-$(DL_FILE)_MD5 = 0c2505cf2639a7de051e815f41e8e1f4
+$(DL_FILE)_MD5 = 0bca0cc205d2d83eff64a7cea825ce7a
install : $(TARGET)
+++ /dev/null
-packaging/utils/nattpatch 2.6
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ nat-t/include/net/xfrmudp.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,10 @@
-+/*
-+ * pointer to function for type that xfrm4_input wants, to permit
-+ * decoupling of XFRM from udp.c
-+ */
-+#define HAVE_XFRM4_UDP_REGISTER
-+
-+typedef int (*xfrm4_rcv_encap_t)(struct sk_buff *skb, __u16 encap_type);
-+extern int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func
-+ , xfrm4_rcv_encap_t *oldfunc);
-+extern int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func);
---- /distros/kernel/linux-2.6.11.2/net/ipv4/Kconfig 2005-03-09 03:12:33.000000000 -0500
-+++ swan26/net/ipv4/Kconfig 2005-04-04 18:46:13.000000000 -0400
-@@ -351,2 +351,8 @@
-
-+config IPSEC_NAT_TRAVERSAL
-+ bool "IPSEC NAT-Traversal (KLIPS compatible)"
-+ depends on INET
-+ ---help---
-+ Includes support for RFC3947/RFC3948 NAT-Traversal of ESP over UDP.
-+
- config IP_TCPDIAG
---- plain26/net/ipv4/udp.c.orig 2006-01-02 22:21:10.000000000 -0500
-+++ plain26/net/ipv4/udp.c 2006-01-12 20:18:57.000000000 -0500
-@@ -108,6 +108,7 @@
- */
-
- DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
-+#include <net/xfrmudp.h>
-
- struct hlist_head udp_hash[UDP_HTABLE_SIZE];
- DEFINE_RWLOCK(udp_hash_lock);
-@@ -914,6 +915,44 @@
- return 0;
- }
-
-+#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+
-+/* if XFRM isn't a module, then register it directly. */
-+#if !defined(CONFIG_XFRM_MODULE)
-+static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = xfrm4_rcv_encap;
-+#else
-+static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = NULL;
-+#endif
-+
-+static xfrm4_rcv_encap_t xfrm4_rcv_encap_func;
-+
-+int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func
-+ , xfrm4_rcv_encap_t *oldfunc)
-+{
-+ if(oldfunc != NULL) {
-+ *oldfunc = xfrm4_rcv_encap_func;
-+ }
-+
-+#if 0
-+ if(xfrm4_rcv_encap_func != NULL)
-+ return -1;
-+#endif
-+
-+ xfrm4_rcv_encap_func = func;
-+ return 0;
-+}
-+
-+int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func)
-+{
-+ if(xfrm4_rcv_encap_func != func)
-+ return -1;
-+
-+ xfrm4_rcv_encap_func = NULL;
-+ return 0;
-+}
-+#endif /* CONFIG_XFRM || defined(CONFIG_IPSEC_NAT_TRAVERSAL)*/
-+
-+
- /* return:
- * 1 if the the UDP system should process it
- * 0 if we should drop this packet
-@@ -921,9 +960,9 @@
- */
- static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
- {
--#ifndef CONFIG_XFRM
-+#if !defined(CONFIG_XFRM) && !defined(CONFIG_IPSEC_NAT_TRAVERSAL)
- return 1;
--#else
-+#else /* either CONFIG_XFRM or CONFIG_IPSEC_NAT_TRAVERSAL */
- struct udp_sock *up = udp_sk(sk);
- struct udphdr *uh;
- struct iphdr *iph;
-@@ -1049,11 +1088,15 @@
- kfree_skb(skb);
- return 0;
- }
-- if (ret < 0) {
-- /* process the ESP packet */
-- ret = xfrm4_rcv_encap(skb, up->encap_type);
-- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
-- return -ret;
-+ if (ret < 0) {
-+ if(xfrm4_rcv_encap_func != NULL) {
-+ ret = (*xfrm4_rcv_encap_func)(skb, up->encap_type);
-+ UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
-+ } else {
-+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag);
-+ ret = 1;
-+ }
-+ return ret;
- }
- /* FALLTHROUGH -- it's a UDP Packet */
- }
-@@ -1732,3 +1775,8 @@
- EXPORT_SYMBOL(udp_proc_register);
- EXPORT_SYMBOL(udp_proc_unregister);
- #endif
-+
-+#if defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+EXPORT_SYMBOL(udp4_register_esp_rcvencap);
-+EXPORT_SYMBOL(udp4_unregister_esp_rcvencap);
-+#endif
+++ /dev/null
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ nat-t/include/net/xfrmudp.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,10 @@
-+/*
-+ * pointer to function for type that xfrm4_input wants, to permit
-+ * decoupling of XFRM from udp.c
-+ */
-+#define HAVE_XFRM4_UDP_REGISTER
-+
-+typedef int (*xfrm4_rcv_encap_t)(struct sk_buff *skb, __u16 encap_type);
-+extern int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func
-+ , xfrm4_rcv_encap_t *oldfunc);
-+extern int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func);
---- /distros/kernel/linux-2.6.11.2/net/ipv4/Kconfig 2005-03-09 03:12:33.000000000 -0500
-+++ swan26/net/ipv4/Kconfig 2005-04-04 18:46:13.000000000 -0400
-@@ -351,2 +351,8 @@
-
-+config IPSEC_NAT_TRAVERSAL
-+ bool "IPSEC NAT-Traversal (KLIPS compatible)"
-+ depends on INET
-+ ---help---
-+ Includes support for RFC3947/RFC3948 NAT-Traversal of ESP over UDP.
-+
- config IP_TCPDIAG
---- plain26/net/ipv4/udp.c.orig 2006-12-28 20:53:17.000000000 -0500
-+++ plain26/net/ipv4/udp.c 2007-05-11 10:22:50.000000000 -0400
-@@ -108,6 +108,7 @@
- #include <net/inet_common.h>
- #include <net/checksum.h>
- #include <net/xfrm.h>
-+#include <net/xfrmudp.h>
-
- /*
- * Snmp MIB for the UDP layer
-@@ -881,6 +882,31 @@
- sk_common_release(sk);
- }
-
-+#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+
-+static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = NULL;
-+int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func
-+ , xfrm4_rcv_encap_t *oldfunc)
-+{
-+ if(oldfunc != NULL) {
-+ *oldfunc = xfrm4_rcv_encap_func;
-+ }
-+
-+ xfrm4_rcv_encap_func = func;
-+ return 0;
-+}
-+
-+int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func)
-+{
-+ if(xfrm4_rcv_encap_func != func)
-+ return -1;
-+
-+ xfrm4_rcv_encap_func = NULL;
-+ return 0;
-+}
-+#endif /* CONFIG_XFRM_MODULE || CONFIG_IPSEC_NAT_TRAVERSAL */
-+
-+
- /* return:
- * 1 if the the UDP system should process it
- * 0 if we should drop this packet
-@@ -888,9 +914,9 @@
- */
- static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
- {
--#ifndef CONFIG_XFRM
-+#if !defined(CONFIG_XFRM) && !defined(CONFIG_IPSEC_NAT_TRAVERSAL)
- return 1;
--#else
-+#else /* either CONFIG_XFRM or CONFIG_IPSEC_NAT_TRAVERSAL */
- struct udp_sock *up = udp_sk(sk);
- struct udphdr *uh;
- struct iphdr *iph;
-@@ -1018,10 +1044,27 @@
- return 0;
- }
- if (ret < 0) {
-- /* process the ESP packet */
-- ret = xfrm4_rcv_encap(skb, up->encap_type);
-- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
-- return -ret;
-+ if(xfrm4_rcv_encap_func != NULL)
-+ ret = (*xfrm4_rcv_encap_func)(skb, up->encap_type);
-+
-+ switch(ret) {
-+ case 1:
-+ /* FALLTHROUGH to send-up */;
-+ break;
-+
-+ case 0:
-+ /* PROCESSED, free it */
-+ UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
-+ return 0;
-+
-+ case -1:
-+ /* PACKET wasn't for _func, or no func, pass it
-+ * to stock function
-+ */
-+ ret = xfrm4_rcv_encap(skb, up->encap_type);
-+ UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS);
-+ return -ret;
-+ }
- }
- /* FALLTHROUGH -- it's a UDP Packet */
- }
-@@ -1110,7 +1153,6 @@
- /*
- * All we need to do is get the socket, and then do a checksum.
- */
--
- int udp_rcv(struct sk_buff *skb)
- {
- struct sock *sk;
-@@ -1599,3 +1641,9 @@
- EXPORT_SYMBOL(udp_proc_register);
- EXPORT_SYMBOL(udp_proc_unregister);
- #endif
-+
-+#if defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+EXPORT_SYMBOL(udp4_register_esp_rcvencap);
-+EXPORT_SYMBOL(udp4_unregister_esp_rcvencap);
-+#endif
-+
-make[1]: Leaving directory `/usr/src/openswan-2.6.14'
+++ /dev/null
---- _startklips.orig 2008-07-11 01:55:19.000000000 +0200
-+++ _startklips 2008-07-12 09:11:56.000000000 +0200
-@@ -149,23 +149,35 @@
-
- # figure out ifconfig for interface
- addr=
-- eval `ifconfig $phys |
-- awk '$1 == "inet" && $2 ~ /^addr:/ && $NF ~ /^Mask:/ {
-- gsub(/:/, " ", $0)
-- print "addr=" $3
-- other = $5
-- if ($4 == "Bcast")
-- print "type=broadcast"
-- else if ($4 == "P-t-P")
-- print "type=pointopoint"
-- else if (NF == 5) {
-- print "type="
-- other = ""
-- } else
-- print "type=unknown"
-- print "otheraddr=" other
-- print "mask=" $NF
-- }'`
-+ eval `ip addr show $phys | awk '$3 ~ /BROADCAST|POINTOPOINT/ {
-+ if ($3 ~ /BROADCAST/)
-+ print "type=broadcast";
-+ else if ($3 ~ /POINTOPOINT/)
-+ print "type=pointopoint";
-+ else {
-+ print "type=";
-+ }
-+ }'`
-+
-+ if [ "$type" == "broadcast" ]; then
-+ eval `ip addr show $phys | awk '$1 == "inet" { gsub(/\//, " ");
-+ print "addr=" $2;
-+ print "mask=" $3;
-+ print "otheraddr=" $5;
-+ }'`
-+ elif [ "$type" == "pointopoint" ]; then
-+ eval `ip addr show $phys | awk '$1 == "inet" { gsub(/\//, " ");
-+ print "addr=" $2;
-+ print "mask=" $5;
-+ print "otheraddr=" $4;
-+ }'`
-+ else
-+ type="unknown"
-+ otheraddr=
-+ fi
-+
-+ eval `whatmask /$mask | awk -F': ' '$1 ~ /^Netmask =/ { print "mask=" $2 }'`
-+
- if test " $addr" = " "
- then
- echo "unable to determine address of \`$phys'"
+++ /dev/null
---- _updown.klips.orig 2008-07-11 01:55:19.000000000 +0200
-+++ _updown.klips 2008-07-12 09:20:26.000000000 +0200
-@@ -407,8 +407,8 @@
- # opportunistic encryption work around
- # need to provide route that eclipses default, without
- # replacing it.
-- it="ip route $1 0.0.0.0/1 $parms2 $parms3 &&
-- ip route $1 128.0.0.0/1 $parms2 $parms3"
-+ #it="ip route $1 0.0.0.0/1 $parms2 $parms3 &&
-+ # ip route $1 128.0.0.0/1 $parms2 $parms3"
- ;;
- *) it="ip route $1 $parms $parms2 $parms3"
- ;;
-@@ -432,13 +432,13 @@
- prepare-host:*|prepare-client:*)
- # delete possibly-existing route (preliminary to adding a route)
- case "$PLUTO_PEER_CLIENT" in
-- "0.0.0.0/0")
-+ "0.0.0.0/0")
- # need to provide route that eclipses default, without
- # replacing it.
- parms1="0.0.0.0/1"
- parms2="128.0.0.0/1"
-- it="ip route delete $parms1 $IPROUTEARGS 2>&1 ; ip route delete $parms2 $IPROUTEARGS 2>&1"
-- oops="`ip route delete $parms1 $IPROUTEARGS 2>&1 ; ip route delete $parms2 $IPROUTEARGS 2>&1`"
-+ # it="ip route delete $parms1 $IPROUTEARGS 2>&1 ; ip route delete $parms2 $IPROUTEARGS 2>&1"
-+ # oops="`ip route delete $parms1 $IPROUTEARGS 2>&1 ; ip route delete $parms2 $IPROUTEARGS 2>&1`"
- ;;
- *)
- parms="$PLUTO_PEER_CLIENT $IPROUTEARGS"
+++ /dev/null
-packaging/utils/kernelpatch 2.6
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/README.openswan-2 Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,112 @@
-+*
-+* RCSID $Id: README.openswan-2,v 1.1 2003/12/10 01:07:49 mcr Exp $
-+*
-+
-+ ****************************************
-+ * IPSEC for Linux, Release 2.xx series *
-+ ****************************************
-+
-+
-+
-+1. Files
-+
-+The contents of linux/net/ipsec/ (see below) join the linux kernel source tree.
-+as provided for higher up.
-+
-+The programs/ directory contains the user-level utilities which you need
-+to run IPSEC. See the top-level top/INSTALL to compile and install them.
-+
-+The testing/ directory contains test scripts.
-+
-+The doc/ directory contains -- what else -- documentation.
-+
-+1.1. Kernel files
-+
-+The following are found in net/ipsec/:
-+
-+Makefile The Makefile
-+Config.in The configuration script for make menuconfig
-+defconfig Configuration defaults for first time.
-+
-+radij.c General-purpose radix-tree operations
-+
-+ipsec_ipcomp.c IPCOMP encapsulate/decapsulate code.
-+ipsec_ah.c Authentication Header (AH) encapsulate/decapsulate code.
-+ipsec_esp.c Encapsulated Security Payload (ESP) encap/decap code.
-+
-+pfkey_v2.c PF_KEYv2 socket interface code.
-+pfkey_v2_parser.c PF_KEYv2 message parsing and processing code.
-+
-+ipsec_init.c Initialization code, /proc interface.
-+ipsec_radij.c Interface with the radix tree code.
-+ipsec_netlink.c Interface with the netlink code.
-+ipsec_xform.c Routines and structures common to transforms.
-+ipsec_tunnel.c The outgoing packet processing code.
-+ipsec_rcv.c The incoming packet processing code.
-+ipsec_md5c.c Somewhat modified RSADSI MD5 C code.
-+ipsec_sha1.c Somewhat modified Steve Reid SHA-1 C code.
-+
-+sysctl_net_ipsec.c /proc/sys/net/ipsec/* variable definitions.
-+
-+version.c symbolic link to project version.
-+
-+radij.h Headers for radij.c
-+
-+ipcomp.h Headers used by IPCOMP code.
-+
-+ipsec_radij.h Interface with the radix tree code.
-+ipsec_netlink.h Headers used by the netlink interface.
-+ipsec_encap.h Headers defining encapsulation structures.
-+ipsec_xform.h Transform headers.
-+ipsec_tunnel.h Headers used by tunneling code.
-+ipsec_ipe4.h Headers for the IP-in-IP code.
-+ipsec_ah.h Headers common to AH transforms.
-+ipsec_md5h.h RSADSI MD5 headers.
-+ipsec_sha1.h SHA-1 headers.
-+ipsec_esp.h Headers common to ESP transfroms.
-+ipsec_rcv.h Headers for incoming packet processing code.
-+
-+1.2. User-level files.
-+
-+The following are found in utils/:
-+
-+eroute.c Create an "extended route" source code
-+spi.c Set up Security Associations source code
-+spigrp.c Link SPIs together source code.
-+tncfg.c Configure the tunneling features of the virtual interface
-+ source code
-+klipsdebug.c Set/reset klips debugging features source code.
-+version.c symbolic link to project version.
-+
-+eroute.8 Create an "extended route" manual page
-+spi.8 Set up Security Associations manual page
-+spigrp.8 Link SPIs together manual page
-+tncfg.8 Configure the tunneling features of the virtual interface
-+ manual page
-+klipsdebug.8 Set/reset klips debugging features manual page
-+
-+eroute.5 /proc/net/ipsec_eroute format manual page
-+spi.5 /proc/net/ipsec_spi format manual page
-+spigrp.5 /proc/net/ipsec_spigrp format manual page
-+tncfg.5 /proc/net/ipsec_tncfg format manual page
-+klipsdebug.5 /proc/net/ipsec_klipsdebug format manual page
-+version.5 /proc/net/ipsec_version format manual page
-+pf_key.5 /proc/net/pf_key format manual page
-+
-+Makefile Utilities makefile.
-+
-+*.8 Manpages for the respective utils.
-+
-+
-+1.3. Test files
-+
-+The test scripts are locate in testing/ and and documentation is found
-+at doc/src/umltesting.html. Automated testing via "make check" is available
-+provided that the User-Mode-Linux patches are available.
-+
-+*
-+* $Log: README.openswan-2,v $
-+* Revision 1.1 2003/12/10 01:07:49 mcr
-+* documentation for additions.
-+*
-+*
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/des/des_locl.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,511 @@
-+/* crypto/des/des_locl.org */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+/* WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-+ *
-+ * Always modify des_locl.org since des_locl.h is automatically generated from
-+ * it during SSLeay configuration.
-+ *
-+ * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-+ */
-+
-+#ifndef HEADER_DES_LOCL_H
-+#define HEADER_DES_LOCL_H
-+
-+#if defined(WIN32) || defined(WIN16)
-+#ifndef MSDOS
-+#define MSDOS
-+#endif
-+#endif
-+
-+#include "klips-crypto/des.h"
-+#ifdef OCF_ASSIST
-+#include "klips-crypto/ocf_assist.h"
-+#endif
-+
-+#ifndef DES_DEFAULT_OPTIONS
-+/* the following is tweaked from a config script, that is why it is a
-+ * protected undef/define */
-+#ifndef DES_PTR
-+#define DES_PTR
-+#endif
-+
-+/* This helps C compiler generate the correct code for multiple functional
-+ * units. It reduces register dependancies at the expense of 2 more
-+ * registers */
-+#ifndef DES_RISC1
-+#define DES_RISC1
-+#endif
-+
-+#ifndef DES_RISC2
-+#undef DES_RISC2
-+#endif
-+
-+#if defined(DES_RISC1) && defined(DES_RISC2)
-+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
-+#endif
-+
-+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
-+ * Very mucy CPU dependant */
-+#ifndef DES_UNROLL
-+#define DES_UNROLL
-+#endif
-+
-+/* These default values were supplied by
-+ * Peter Gutman <pgut001@cs.auckland.ac.nz>
-+ * They are only used if nothing else has been defined */
-+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
-+/* Special defines which change the way the code is built depending on the
-+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
-+ even newer MIPS CPU's, but at the moment one size fits all for
-+ optimization options. Older Sparc's work better with only UNROLL, but
-+ there's no way to tell at compile time what it is you're running on */
-+
-+#if defined( sun ) /* Newer Sparc's */
-+ #define DES_PTR
-+ #define DES_RISC1
-+ #define DES_UNROLL
-+#elif defined( __ultrix ) /* Older MIPS */
-+ #define DES_PTR
-+ #define DES_RISC2
-+ #define DES_UNROLL
-+#elif defined( __osf1__ ) /* Alpha */
-+ #define DES_PTR
-+ #define DES_RISC2
-+#elif defined ( _AIX ) /* RS6000 */
-+ /* Unknown */
-+#elif defined( __hpux ) /* HP-PA */
-+ /* Unknown */
-+#elif defined( __aux ) /* 68K */
-+ /* Unknown */
-+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
-+ #define DES_UNROLL
-+#elif defined( __sgi ) /* Newer MIPS */
-+ #define DES_PTR
-+ #define DES_RISC2
-+ #define DES_UNROLL
-+#elif defined( i386 ) /* x86 boxes, should be gcc */
-+ #define DES_PTR
-+ #define DES_RISC1
-+ #define DES_UNROLL
-+#endif /* Systems-specific speed defines */
-+#endif
-+
-+#endif /* DES_DEFAULT_OPTIONS */
-+
-+#ifdef MSDOS /* Visual C++ 2.1 (Windows NT/95) */
-+#include <stdlib.h>
-+#include <errno.h>
-+#include <time.h>
-+#include <io.h>
-+#ifndef RAND
-+#define RAND
-+#endif
-+#undef NOPROTO
-+#endif
-+
-+#if defined(__STDC__) || defined(VMS) || defined(M_XENIX) || defined(MSDOS)
-+#ifndef __KERNEL__
-+#include <string.h>
-+#else
-+#include <linux/string.h>
-+#endif
-+#endif
-+
-+#ifndef RAND
-+#define RAND
-+#endif
-+
-+#ifdef linux
-+#undef RAND
-+#endif
-+
-+#ifdef MSDOS
-+#define getpid() 2
-+#define RAND
-+#undef NOPROTO
-+#endif
-+
-+#if defined(NOCONST)
-+#define const
-+#endif
-+
-+#ifdef __STDC__
-+#undef NOPROTO
-+#endif
-+
-+#define ITERATIONS 16
-+#define HALF_ITERATIONS 8
-+
-+/* used in des_read and des_write */
-+#define MAXWRITE (1024*16)
-+#define BSIZE (MAXWRITE+4)
-+
-+#define c2l(c,l) (l =((DES_LONG)(*((c)++))) , \
-+ l|=((DES_LONG)(*((c)++)))<< 8L, \
-+ l|=((DES_LONG)(*((c)++)))<<16L, \
-+ l|=((DES_LONG)(*((c)++)))<<24L)
-+
-+/* NOTE - c is not incremented as per c2l */
-+#define c2ln(c,l1,l2,n) { \
-+ c+=n; \
-+ l1=l2=0; \
-+ switch (n) { \
-+ case 8: l2 =((DES_LONG)(*(--(c))))<<24L; \
-+ case 7: l2|=((DES_LONG)(*(--(c))))<<16L; \
-+ case 6: l2|=((DES_LONG)(*(--(c))))<< 8L; \
-+ case 5: l2|=((DES_LONG)(*(--(c)))); \
-+ case 4: l1 =((DES_LONG)(*(--(c))))<<24L; \
-+ case 3: l1|=((DES_LONG)(*(--(c))))<<16L; \
-+ case 2: l1|=((DES_LONG)(*(--(c))))<< 8L; \
-+ case 1: l1|=((DES_LONG)(*(--(c)))); \
-+ } \
-+ }
-+
-+#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
-+ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
-+ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
-+ *((c)++)=(unsigned char)(((l)>>24L)&0xff))
-+
-+/* replacements for htonl and ntohl since I have no idea what to do
-+ * when faced with machines with 8 byte longs. */
-+#define HDRSIZE 4
-+
-+#define n2l(c,l) (l =((DES_LONG)(*((c)++)))<<24L, \
-+ l|=((DES_LONG)(*((c)++)))<<16L, \
-+ l|=((DES_LONG)(*((c)++)))<< 8L, \
-+ l|=((DES_LONG)(*((c)++))))
-+
-+#define l2n(l,c) (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \
-+ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
-+ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
-+ *((c)++)=(unsigned char)(((l) )&0xff))
-+
-+/* NOTE - c is not incremented as per l2c */
-+#define l2cn(l1,l2,c,n) { \
-+ c+=n; \
-+ switch (n) { \
-+ case 8: *(--(c))=(unsigned char)(((l2)>>24L)&0xff); \
-+ case 7: *(--(c))=(unsigned char)(((l2)>>16L)&0xff); \
-+ case 6: *(--(c))=(unsigned char)(((l2)>> 8L)&0xff); \
-+ case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
-+ case 4: *(--(c))=(unsigned char)(((l1)>>24L)&0xff); \
-+ case 3: *(--(c))=(unsigned char)(((l1)>>16L)&0xff); \
-+ case 2: *(--(c))=(unsigned char)(((l1)>> 8L)&0xff); \
-+ case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
-+ } \
-+ }
-+
-+#define ROTATE(a,n) (((a)>>(n))+((a)<<(32-(n))))
-+
-+/* Don't worry about the LOAD_DATA() stuff, that is used by
-+ * fcrypt() to add it's little bit to the front */
-+
-+#ifdef DES_FCRYPT
-+
-+#define LOAD_DATA_tmp(R,S,u,t,E0,E1) \
-+ { DES_LONG tmp; LOAD_DATA(R,S,u,t,E0,E1,tmp); }
-+
-+#define LOAD_DATA(R,S,u,t,E0,E1,tmp) \
-+ t=R^(R>>16L); \
-+ u=t&E0; t&=E1; \
-+ tmp=(u<<16); u^=R^s[S ]; u^=tmp; \
-+ tmp=(t<<16); t^=R^s[S+1]; t^=tmp
-+#else
-+#define LOAD_DATA_tmp(a,b,c,d,e,f) LOAD_DATA(a,b,c,d,e,f,g)
-+#define LOAD_DATA(R,S,u,t,E0,E1,tmp) \
-+ u=R^s[S ]; \
-+ t=R^s[S+1]
-+#endif
-+
-+/* The changes to this macro may help or hinder, depending on the
-+ * compiler and the achitecture. gcc2 always seems to do well :-).
-+ * Inspired by Dana How <how@isl.stanford.edu>
-+ * DO NOT use the alternative version on machines with 8 byte longs.
-+ * It does not seem to work on the Alpha, even when DES_LONG is 4
-+ * bytes, probably an issue of accessing non-word aligned objects :-( */
-+#ifdef DES_PTR
-+
-+/* It recently occured to me that 0^0^0^0^0^0^0 == 0, so there
-+ * is no reason to not xor all the sub items together. This potentially
-+ * saves a register since things can be xored directly into L */
-+
-+#if defined(DES_RISC1) || defined(DES_RISC2)
-+#ifdef DES_RISC1
-+#define D_ENCRYPT(LL,R,S) { \
-+ unsigned int u1,u2,u3; \
-+ LOAD_DATA(R,S,u,t,E0,E1,u1); \
-+ u2=(int)u>>8L; \
-+ u1=(int)u&0xfc; \
-+ u2&=0xfc; \
-+ t=ROTATE(t,4); \
-+ u>>=16L; \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP +u1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x200+u2); \
-+ u3=(int)(u>>8L); \
-+ u1=(int)u&0xfc; \
-+ u3&=0xfc; \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x400+u1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x600+u3); \
-+ u2=(int)t>>8L; \
-+ u1=(int)t&0xfc; \
-+ u2&=0xfc; \
-+ t>>=16L; \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x100+u1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x300+u2); \
-+ u3=(int)t>>8L; \
-+ u1=(int)t&0xfc; \
-+ u3&=0xfc; \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x500+u1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x700+u3); }
-+#endif
-+#ifdef DES_RISC2
-+#define D_ENCRYPT(LL,R,S) { \
-+ unsigned int u1,u2,s1,s2; \
-+ LOAD_DATA(R,S,u,t,E0,E1,u1); \
-+ u2=(int)u>>8L; \
-+ u1=(int)u&0xfc; \
-+ u2&=0xfc; \
-+ t=ROTATE(t,4); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP +u1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x200+u2); \
-+ s1=(int)(u>>16L); \
-+ s2=(int)(u>>24L); \
-+ s1&=0xfc; \
-+ s2&=0xfc; \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x400+s1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x600+s2); \
-+ u2=(int)t>>8L; \
-+ u1=(int)t&0xfc; \
-+ u2&=0xfc; \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x100+u1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x300+u2); \
-+ s1=(int)(t>>16L); \
-+ s2=(int)(t>>24L); \
-+ s1&=0xfc; \
-+ s2&=0xfc; \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x500+s1); \
-+ LL^= *(DES_LONG *)((unsigned char *)des_SP+0x700+s2); }
-+#endif
-+#else
-+#define D_ENCRYPT(LL,R,S) { \
-+ LOAD_DATA_tmp(R,S,u,t,E0,E1); \
-+ t=ROTATE(t,4); \
-+ LL^= \
-+ *(DES_LONG *)((unsigned char *)des_SP +((u )&0xfc))^ \
-+ *(DES_LONG *)((unsigned char *)des_SP+0x200+((u>> 8L)&0xfc))^ \
-+ *(DES_LONG *)((unsigned char *)des_SP+0x400+((u>>16L)&0xfc))^ \
-+ *(DES_LONG *)((unsigned char *)des_SP+0x600+((u>>24L)&0xfc))^ \
-+ *(DES_LONG *)((unsigned char *)des_SP+0x100+((t )&0xfc))^ \
-+ *(DES_LONG *)((unsigned char *)des_SP+0x300+((t>> 8L)&0xfc))^ \
-+ *(DES_LONG *)((unsigned char *)des_SP+0x500+((t>>16L)&0xfc))^ \
-+ *(DES_LONG *)((unsigned char *)des_SP+0x700+((t>>24L)&0xfc)); }
-+#endif
-+
-+#else /* original version */
-+
-+#if defined(DES_RISC1) || defined(DES_RISC2)
-+#ifdef DES_RISC1
-+#define D_ENCRYPT(LL,R,S) {\
-+ unsigned int u1,u2,u3; \
-+ LOAD_DATA(R,S,u,t,E0,E1,u1); \
-+ u>>=2L; \
-+ t=ROTATE(t,6); \
-+ u2=(int)u>>8L; \
-+ u1=(int)u&0x3f; \
-+ u2&=0x3f; \
-+ u>>=16L; \
-+ LL^=des_SPtrans[0][u1]; \
-+ LL^=des_SPtrans[2][u2]; \
-+ u3=(int)u>>8L; \
-+ u1=(int)u&0x3f; \
-+ u3&=0x3f; \
-+ LL^=des_SPtrans[4][u1]; \
-+ LL^=des_SPtrans[6][u3]; \
-+ u2=(int)t>>8L; \
-+ u1=(int)t&0x3f; \
-+ u2&=0x3f; \
-+ t>>=16L; \
-+ LL^=des_SPtrans[1][u1]; \
-+ LL^=des_SPtrans[3][u2]; \
-+ u3=(int)t>>8L; \
-+ u1=(int)t&0x3f; \
-+ u3&=0x3f; \
-+ LL^=des_SPtrans[5][u1]; \
-+ LL^=des_SPtrans[7][u3]; }
-+#endif
-+#ifdef DES_RISC2
-+#define D_ENCRYPT(LL,R,S) {\
-+ unsigned int u1,u2,s1,s2; \
-+ LOAD_DATA(R,S,u,t,E0,E1,u1); \
-+ u>>=2L; \
-+ t=ROTATE(t,6); \
-+ u2=(int)u>>8L; \
-+ u1=(int)u&0x3f; \
-+ u2&=0x3f; \
-+ LL^=des_SPtrans[0][u1]; \
-+ LL^=des_SPtrans[2][u2]; \
-+ s1=(int)u>>16L; \
-+ s2=(int)u>>24L; \
-+ s1&=0x3f; \
-+ s2&=0x3f; \
-+ LL^=des_SPtrans[4][s1]; \
-+ LL^=des_SPtrans[6][s2]; \
-+ u2=(int)t>>8L; \
-+ u1=(int)t&0x3f; \
-+ u2&=0x3f; \
-+ LL^=des_SPtrans[1][u1]; \
-+ LL^=des_SPtrans[3][u2]; \
-+ s1=(int)t>>16; \
-+ s2=(int)t>>24L; \
-+ s1&=0x3f; \
-+ s2&=0x3f; \
-+ LL^=des_SPtrans[5][s1]; \
-+ LL^=des_SPtrans[7][s2]; }
-+#endif
-+
-+#else
-+
-+#define D_ENCRYPT(LL,R,S) {\
-+ LOAD_DATA_tmp(R,S,u,t,E0,E1); \
-+ t=ROTATE(t,4); \
-+ LL^=\
-+ des_SPtrans[0][(u>> 2L)&0x3f]^ \
-+ des_SPtrans[2][(u>>10L)&0x3f]^ \
-+ des_SPtrans[4][(u>>18L)&0x3f]^ \
-+ des_SPtrans[6][(u>>26L)&0x3f]^ \
-+ des_SPtrans[1][(t>> 2L)&0x3f]^ \
-+ des_SPtrans[3][(t>>10L)&0x3f]^ \
-+ des_SPtrans[5][(t>>18L)&0x3f]^ \
-+ des_SPtrans[7][(t>>26L)&0x3f]; }
-+#endif
-+#endif
-+
-+ /* IP and FP
-+ * The problem is more of a geometric problem that random bit fiddling.
-+ 0 1 2 3 4 5 6 7 62 54 46 38 30 22 14 6
-+ 8 9 10 11 12 13 14 15 60 52 44 36 28 20 12 4
-+ 16 17 18 19 20 21 22 23 58 50 42 34 26 18 10 2
-+ 24 25 26 27 28 29 30 31 to 56 48 40 32 24 16 8 0
-+
-+ 32 33 34 35 36 37 38 39 63 55 47 39 31 23 15 7
-+ 40 41 42 43 44 45 46 47 61 53 45 37 29 21 13 5
-+ 48 49 50 51 52 53 54 55 59 51 43 35 27 19 11 3
-+ 56 57 58 59 60 61 62 63 57 49 41 33 25 17 9 1
-+
-+ The output has been subject to swaps of the form
-+ 0 1 -> 3 1 but the odd and even bits have been put into
-+ 2 3 2 0
-+ different words. The main trick is to remember that
-+ t=((l>>size)^r)&(mask);
-+ r^=t;
-+ l^=(t<<size);
-+ can be used to swap and move bits between words.
-+
-+ So l = 0 1 2 3 r = 16 17 18 19
-+ 4 5 6 7 20 21 22 23
-+ 8 9 10 11 24 25 26 27
-+ 12 13 14 15 28 29 30 31
-+ becomes (for size == 2 and mask == 0x3333)
-+ t = 2^16 3^17 -- -- l = 0 1 16 17 r = 2 3 18 19
-+ 6^20 7^21 -- -- 4 5 20 21 6 7 22 23
-+ 10^24 11^25 -- -- 8 9 24 25 10 11 24 25
-+ 14^28 15^29 -- -- 12 13 28 29 14 15 28 29
-+
-+ Thanks for hints from Richard Outerbridge - he told me IP&FP
-+ could be done in 15 xor, 10 shifts and 5 ands.
-+ When I finally started to think of the problem in 2D
-+ I first got ~42 operations without xors. When I remembered
-+ how to use xors :-) I got it to its final state.
-+ */
-+#define PERM_OP(a,b,t,n,m) ((t)=((((a)>>(n))^(b))&(m)),\
-+ (b)^=(t),\
-+ (a)^=((t)<<(n)))
-+
-+#define IP(l,r) \
-+ { \
-+ register DES_LONG tt; \
-+ PERM_OP(r,l,tt, 4,0x0f0f0f0fL); \
-+ PERM_OP(l,r,tt,16,0x0000ffffL); \
-+ PERM_OP(r,l,tt, 2,0x33333333L); \
-+ PERM_OP(l,r,tt, 8,0x00ff00ffL); \
-+ PERM_OP(r,l,tt, 1,0x55555555L); \
-+ }
-+
-+#define FP(l,r) \
-+ { \
-+ register DES_LONG tt; \
-+ PERM_OP(l,r,tt, 1,0x55555555L); \
-+ PERM_OP(r,l,tt, 8,0x00ff00ffL); \
-+ PERM_OP(l,r,tt, 2,0x33333333L); \
-+ PERM_OP(r,l,tt,16,0x0000ffffL); \
-+ PERM_OP(l,r,tt, 4,0x0f0f0f0fL); \
-+ }
-+
-+extern const DES_LONG des_SPtrans[8][64];
-+
-+#ifndef NO_FCRYPT
-+#ifndef NOPROTO
-+void fcrypt_body(DES_LONG *out,des_key_schedule ks,
-+ DES_LONG Eswap0, DES_LONG Eswap1);
-+#else
-+void fcrypt_body();
-+#endif
-+#endif /* NO_FCRYPT */
-+
-+#endif
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/des/des_ver.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,60 @@
-+/* crypto/des/des_ver.h */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+extern char *DES_version; /* SSLeay version string */
-+extern char *libdes_version; /* old libdes version string */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/des/podd.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,75 @@
-+/* crypto/des/podd.h */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+static const unsigned char odd_parity[256]={
-+ 1, 1, 2, 2, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 14, 14,
-+ 16, 16, 19, 19, 21, 21, 22, 22, 25, 25, 26, 26, 28, 28, 31, 31,
-+ 32, 32, 35, 35, 37, 37, 38, 38, 41, 41, 42, 42, 44, 44, 47, 47,
-+ 49, 49, 50, 50, 52, 52, 55, 55, 56, 56, 59, 59, 61, 61, 62, 62,
-+ 64, 64, 67, 67, 69, 69, 70, 70, 73, 73, 74, 74, 76, 76, 79, 79,
-+ 81, 81, 82, 82, 84, 84, 87, 87, 88, 88, 91, 91, 93, 93, 94, 94,
-+ 97, 97, 98, 98,100,100,103,103,104,104,107,107,109,109,110,110,
-+112,112,115,115,117,117,118,118,121,121,122,122,124,124,127,127,
-+128,128,131,131,133,133,134,134,137,137,138,138,140,140,143,143,
-+145,145,146,146,148,148,151,151,152,152,155,155,157,157,158,158,
-+161,161,162,162,164,164,167,167,168,168,171,171,173,173,174,174,
-+176,176,179,179,181,181,182,182,185,185,186,186,188,188,191,191,
-+193,193,194,194,196,196,199,199,200,200,203,203,205,205,206,206,
-+208,208,211,211,213,213,214,214,217,217,218,218,220,220,223,223,
-+224,224,227,227,229,229,230,230,233,233,234,234,236,236,239,239,
-+241,241,242,242,244,244,247,247,248,248,251,251,253,253,254,254};
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/des/sk.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,204 @@
-+/* crypto/des/sk.h */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+static const DES_LONG des_skb[8][64]={
-+{
-+/* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
-+0x00000000L,0x00000010L,0x20000000L,0x20000010L,
-+0x00010000L,0x00010010L,0x20010000L,0x20010010L,
-+0x00000800L,0x00000810L,0x20000800L,0x20000810L,
-+0x00010800L,0x00010810L,0x20010800L,0x20010810L,
-+0x00000020L,0x00000030L,0x20000020L,0x20000030L,
-+0x00010020L,0x00010030L,0x20010020L,0x20010030L,
-+0x00000820L,0x00000830L,0x20000820L,0x20000830L,
-+0x00010820L,0x00010830L,0x20010820L,0x20010830L,
-+0x00080000L,0x00080010L,0x20080000L,0x20080010L,
-+0x00090000L,0x00090010L,0x20090000L,0x20090010L,
-+0x00080800L,0x00080810L,0x20080800L,0x20080810L,
-+0x00090800L,0x00090810L,0x20090800L,0x20090810L,
-+0x00080020L,0x00080030L,0x20080020L,0x20080030L,
-+0x00090020L,0x00090030L,0x20090020L,0x20090030L,
-+0x00080820L,0x00080830L,0x20080820L,0x20080830L,
-+0x00090820L,0x00090830L,0x20090820L,0x20090830L,
-+},{
-+/* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */
-+0x00000000L,0x02000000L,0x00002000L,0x02002000L,
-+0x00200000L,0x02200000L,0x00202000L,0x02202000L,
-+0x00000004L,0x02000004L,0x00002004L,0x02002004L,
-+0x00200004L,0x02200004L,0x00202004L,0x02202004L,
-+0x00000400L,0x02000400L,0x00002400L,0x02002400L,
-+0x00200400L,0x02200400L,0x00202400L,0x02202400L,
-+0x00000404L,0x02000404L,0x00002404L,0x02002404L,
-+0x00200404L,0x02200404L,0x00202404L,0x02202404L,
-+0x10000000L,0x12000000L,0x10002000L,0x12002000L,
-+0x10200000L,0x12200000L,0x10202000L,0x12202000L,
-+0x10000004L,0x12000004L,0x10002004L,0x12002004L,
-+0x10200004L,0x12200004L,0x10202004L,0x12202004L,
-+0x10000400L,0x12000400L,0x10002400L,0x12002400L,
-+0x10200400L,0x12200400L,0x10202400L,0x12202400L,
-+0x10000404L,0x12000404L,0x10002404L,0x12002404L,
-+0x10200404L,0x12200404L,0x10202404L,0x12202404L,
-+},{
-+/* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */
-+0x00000000L,0x00000001L,0x00040000L,0x00040001L,
-+0x01000000L,0x01000001L,0x01040000L,0x01040001L,
-+0x00000002L,0x00000003L,0x00040002L,0x00040003L,
-+0x01000002L,0x01000003L,0x01040002L,0x01040003L,
-+0x00000200L,0x00000201L,0x00040200L,0x00040201L,
-+0x01000200L,0x01000201L,0x01040200L,0x01040201L,
-+0x00000202L,0x00000203L,0x00040202L,0x00040203L,
-+0x01000202L,0x01000203L,0x01040202L,0x01040203L,
-+0x08000000L,0x08000001L,0x08040000L,0x08040001L,
-+0x09000000L,0x09000001L,0x09040000L,0x09040001L,
-+0x08000002L,0x08000003L,0x08040002L,0x08040003L,
-+0x09000002L,0x09000003L,0x09040002L,0x09040003L,
-+0x08000200L,0x08000201L,0x08040200L,0x08040201L,
-+0x09000200L,0x09000201L,0x09040200L,0x09040201L,
-+0x08000202L,0x08000203L,0x08040202L,0x08040203L,
-+0x09000202L,0x09000203L,0x09040202L,0x09040203L,
-+},{
-+/* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */
-+0x00000000L,0x00100000L,0x00000100L,0x00100100L,
-+0x00000008L,0x00100008L,0x00000108L,0x00100108L,
-+0x00001000L,0x00101000L,0x00001100L,0x00101100L,
-+0x00001008L,0x00101008L,0x00001108L,0x00101108L,
-+0x04000000L,0x04100000L,0x04000100L,0x04100100L,
-+0x04000008L,0x04100008L,0x04000108L,0x04100108L,
-+0x04001000L,0x04101000L,0x04001100L,0x04101100L,
-+0x04001008L,0x04101008L,0x04001108L,0x04101108L,
-+0x00020000L,0x00120000L,0x00020100L,0x00120100L,
-+0x00020008L,0x00120008L,0x00020108L,0x00120108L,
-+0x00021000L,0x00121000L,0x00021100L,0x00121100L,
-+0x00021008L,0x00121008L,0x00021108L,0x00121108L,
-+0x04020000L,0x04120000L,0x04020100L,0x04120100L,
-+0x04020008L,0x04120008L,0x04020108L,0x04120108L,
-+0x04021000L,0x04121000L,0x04021100L,0x04121100L,
-+0x04021008L,0x04121008L,0x04021108L,0x04121108L,
-+},{
-+/* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */
-+0x00000000L,0x10000000L,0x00010000L,0x10010000L,
-+0x00000004L,0x10000004L,0x00010004L,0x10010004L,
-+0x20000000L,0x30000000L,0x20010000L,0x30010000L,
-+0x20000004L,0x30000004L,0x20010004L,0x30010004L,
-+0x00100000L,0x10100000L,0x00110000L,0x10110000L,
-+0x00100004L,0x10100004L,0x00110004L,0x10110004L,
-+0x20100000L,0x30100000L,0x20110000L,0x30110000L,
-+0x20100004L,0x30100004L,0x20110004L,0x30110004L,
-+0x00001000L,0x10001000L,0x00011000L,0x10011000L,
-+0x00001004L,0x10001004L,0x00011004L,0x10011004L,
-+0x20001000L,0x30001000L,0x20011000L,0x30011000L,
-+0x20001004L,0x30001004L,0x20011004L,0x30011004L,
-+0x00101000L,0x10101000L,0x00111000L,0x10111000L,
-+0x00101004L,0x10101004L,0x00111004L,0x10111004L,
-+0x20101000L,0x30101000L,0x20111000L,0x30111000L,
-+0x20101004L,0x30101004L,0x20111004L,0x30111004L,
-+},{
-+/* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */
-+0x00000000L,0x08000000L,0x00000008L,0x08000008L,
-+0x00000400L,0x08000400L,0x00000408L,0x08000408L,
-+0x00020000L,0x08020000L,0x00020008L,0x08020008L,
-+0x00020400L,0x08020400L,0x00020408L,0x08020408L,
-+0x00000001L,0x08000001L,0x00000009L,0x08000009L,
-+0x00000401L,0x08000401L,0x00000409L,0x08000409L,
-+0x00020001L,0x08020001L,0x00020009L,0x08020009L,
-+0x00020401L,0x08020401L,0x00020409L,0x08020409L,
-+0x02000000L,0x0A000000L,0x02000008L,0x0A000008L,
-+0x02000400L,0x0A000400L,0x02000408L,0x0A000408L,
-+0x02020000L,0x0A020000L,0x02020008L,0x0A020008L,
-+0x02020400L,0x0A020400L,0x02020408L,0x0A020408L,
-+0x02000001L,0x0A000001L,0x02000009L,0x0A000009L,
-+0x02000401L,0x0A000401L,0x02000409L,0x0A000409L,
-+0x02020001L,0x0A020001L,0x02020009L,0x0A020009L,
-+0x02020401L,0x0A020401L,0x02020409L,0x0A020409L,
-+},{
-+/* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */
-+0x00000000L,0x00000100L,0x00080000L,0x00080100L,
-+0x01000000L,0x01000100L,0x01080000L,0x01080100L,
-+0x00000010L,0x00000110L,0x00080010L,0x00080110L,
-+0x01000010L,0x01000110L,0x01080010L,0x01080110L,
-+0x00200000L,0x00200100L,0x00280000L,0x00280100L,
-+0x01200000L,0x01200100L,0x01280000L,0x01280100L,
-+0x00200010L,0x00200110L,0x00280010L,0x00280110L,
-+0x01200010L,0x01200110L,0x01280010L,0x01280110L,
-+0x00000200L,0x00000300L,0x00080200L,0x00080300L,
-+0x01000200L,0x01000300L,0x01080200L,0x01080300L,
-+0x00000210L,0x00000310L,0x00080210L,0x00080310L,
-+0x01000210L,0x01000310L,0x01080210L,0x01080310L,
-+0x00200200L,0x00200300L,0x00280200L,0x00280300L,
-+0x01200200L,0x01200300L,0x01280200L,0x01280300L,
-+0x00200210L,0x00200310L,0x00280210L,0x00280310L,
-+0x01200210L,0x01200310L,0x01280210L,0x01280310L,
-+},{
-+/* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */
-+0x00000000L,0x04000000L,0x00040000L,0x04040000L,
-+0x00000002L,0x04000002L,0x00040002L,0x04040002L,
-+0x00002000L,0x04002000L,0x00042000L,0x04042000L,
-+0x00002002L,0x04002002L,0x00042002L,0x04042002L,
-+0x00000020L,0x04000020L,0x00040020L,0x04040020L,
-+0x00000022L,0x04000022L,0x00040022L,0x04040022L,
-+0x00002020L,0x04002020L,0x00042020L,0x04042020L,
-+0x00002022L,0x04002022L,0x00042022L,0x04042022L,
-+0x00000800L,0x04000800L,0x00040800L,0x04040800L,
-+0x00000802L,0x04000802L,0x00040802L,0x04040802L,
-+0x00002800L,0x04002800L,0x00042800L,0x04042800L,
-+0x00002802L,0x04002802L,0x00042802L,0x04042802L,
-+0x00000820L,0x04000820L,0x00040820L,0x04040820L,
-+0x00000822L,0x04000822L,0x00040822L,0x04040822L,
-+0x00002820L,0x04002820L,0x00042820L,0x04042820L,
-+0x00002822L,0x04002822L,0x00042822L,0x04042822L,
-+}};
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/des/spr.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,204 @@
-+/* crypto/des/spr.h */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+const DES_LONG des_SPtrans[8][64]={
-+{
-+/* nibble 0 */
-+0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
-+0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
-+0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,
-+0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,
-+0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,
-+0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,
-+0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,
-+0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,
-+0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,
-+0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,
-+0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,
-+0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,
-+0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,
-+0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,
-+0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,
-+0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L,
-+},{
-+/* nibble 1 */
-+0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,
-+0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,
-+0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,
-+0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,
-+0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,
-+0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,
-+0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,
-+0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,
-+0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,
-+0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,
-+0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,
-+0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,
-+0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,
-+0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,
-+0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,
-+0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L,
-+},{
-+/* nibble 2 */
-+0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,
-+0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,
-+0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,
-+0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,
-+0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,
-+0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,
-+0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,
-+0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,
-+0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,
-+0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,
-+0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,
-+0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,
-+0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,
-+0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,
-+0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,
-+0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L,
-+},{
-+/* nibble 3 */
-+0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,
-+0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,
-+0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,
-+0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,
-+0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,
-+0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,
-+0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,
-+0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,
-+0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,
-+0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,
-+0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,
-+0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,
-+0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,
-+0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,
-+0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,
-+0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L,
-+},{
-+/* nibble 4 */
-+0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,
-+0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,
-+0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,
-+0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,
-+0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,
-+0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,
-+0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,
-+0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,
-+0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,
-+0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,
-+0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,
-+0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,
-+0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,
-+0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,
-+0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,
-+0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L,
-+},{
-+/* nibble 5 */
-+0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,
-+0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,
-+0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,
-+0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,
-+0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,
-+0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,
-+0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,
-+0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,
-+0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,
-+0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,
-+0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,
-+0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,
-+0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,
-+0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,
-+0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,
-+0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,
-+},{
-+/* nibble 6 */
-+0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,
-+0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,
-+0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,
-+0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,
-+0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,
-+0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,
-+0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,
-+0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,
-+0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,
-+0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,
-+0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,
-+0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,
-+0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,
-+0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,
-+0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,
-+0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,
-+},{
-+/* nibble 7 */
-+0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,
-+0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,
-+0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,
-+0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,
-+0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,
-+0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,
-+0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,
-+0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,
-+0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,
-+0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,
-+0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,
-+0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,
-+0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,
-+0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,
-+0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,
-+0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,
-+}};
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/klips-crypto/aes.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,97 @@
-+// I retain copyright in this code but I encourage its free use provided
-+// that I don't carry any responsibility for the results. I am especially
-+// happy to see it used in free and open source software. If you do use
-+// it I would appreciate an acknowledgement of its origin in the code or
-+// the product that results and I would also appreciate knowing a little
-+// about the use to which it is being put. I am grateful to Frank Yellin
-+// for some ideas that are used in this implementation.
-+//
-+// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
-+//
-+// This is an implementation of the AES encryption algorithm (Rijndael)
-+// designed by Joan Daemen and Vincent Rijmen. This version is designed
-+// to provide both fixed and dynamic block and key lengths and can also
-+// run with either big or little endian internal byte order (see aes.h).
-+// It inputs block and key lengths in bytes with the legal values being
-+// 16, 24 and 32.
-+
-+/*
-+ * Modified by Jari Ruusu, May 1 2001
-+ * - Fixed some compile warnings, code was ok but gcc warned anyway.
-+ * - Changed basic types: byte -> unsigned char, word -> u_int32_t
-+ * - Major name space cleanup: Names visible to outside now begin
-+ * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
-+ * - Removed C++ and DLL support as part of name space cleanup.
-+ * - Eliminated unnecessary recomputation of tables. (actual bug fix)
-+ * - Merged precomputed constant tables to aes.c file.
-+ * - Removed data alignment restrictions for portability reasons.
-+ * - Made block and key lengths accept bit count (128/192/256)
-+ * as well byte count (16/24/32).
-+ * - Removed all error checks. This change also eliminated the need
-+ * to preinitialize the context struct to zero.
-+ * - Removed some totally unused constants.
-+ */
-+
-+#ifndef _AES_H
-+#define _AES_H
-+
-+#if defined(__linux__) && defined(__KERNEL__)
-+# include <linux/types.h>
-+#else
-+# include <sys/types.h>
-+#endif
-+
-+// CONFIGURATION OPTIONS (see also aes.c)
-+//
-+// Define AES_BLOCK_SIZE to set the cipher block size (16, 24 or 32) or
-+// leave this undefined for dynamically variable block size (this will
-+// result in much slower code).
-+// IMPORTANT NOTE: AES_BLOCK_SIZE is in BYTES (16, 24, 32 or undefined). If
-+// left undefined a slower version providing variable block length is compiled
-+
-+#define AES_BLOCK_SIZE 16
-+
-+// The number of key schedule words for different block and key lengths
-+// allowing for method of computation which requires the length to be a
-+// multiple of the key length
-+//
-+// Nk = 4 6 8
-+// -------------
-+// Nb = 4 | 60 60 64
-+// 6 | 96 90 96
-+// 8 | 120 120 120
-+
-+#if !defined(AES_BLOCK_SIZE) || (AES_BLOCK_SIZE == 32)
-+#define AES_KS_LENGTH 120
-+#define AES_RC_LENGTH 29
-+#else
-+#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
-+#define AES_RC_LENGTH (9 * AES_BLOCK_SIZE) / 8 - 8
-+#endif
-+
-+typedef struct
-+{
-+ u_int32_t aes_Nkey; // the number of words in the key input block
-+ u_int32_t aes_Nrnd; // the number of cipher rounds
-+ u_int32_t aes_e_key[AES_KS_LENGTH]; // the encryption key schedule
-+ u_int32_t aes_d_key[AES_KS_LENGTH]; // the decryption key schedule
-+#if !defined(AES_BLOCK_SIZE)
-+ u_int32_t aes_Ncol; // the number of columns in the cipher state
-+#endif
-+} aes_context;
-+
-+// THE CIPHER INTERFACE
-+
-+#if !defined(AES_BLOCK_SIZE)
-+extern void aes_set_blk(aes_context *, const int);
-+#endif
-+extern void aes_set_key(aes_context *, const unsigned char [], const int, const int);
-+extern void aes_encrypt(const aes_context *, const unsigned char [], unsigned char []);
-+extern void aes_decrypt(const aes_context *, const unsigned char [], unsigned char []);
-+
-+// The block length inputs to aes_set_block and aes_set_key are in numbers
-+// of bytes or bits. The calls to subroutines must be made in the above
-+// order but multiple calls can be made without repeating earlier calls
-+// if their parameters have not changed.
-+
-+#endif // _AES_H
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/klips-crypto/aes_cbc.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,4 @@
-+/* Glue header */
-+#include "aes.h"
-+int AES_set_key(aes_context *aes_ctx, const u_int8_t * key, int keysize);
-+int AES_cbc_encrypt(aes_context *ctx, const u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt);
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/klips-crypto/aes_xcbc_mac.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,12 @@
-+#ifndef _AES_XCBC_MAC_H
-+#define _AES_XCBC_MAC_H
-+
-+typedef u_int32_t aes_block[4];
-+typedef struct {
-+ aes_context ctx_k1;
-+ aes_block k2;
-+ aes_block k3;
-+} aes_context_mac;
-+int AES_xcbc_mac_set_key(aes_context_mac *ctxm, const u_int8_t *key, int keylen);
-+int AES_xcbc_mac_hash(const aes_context_mac *ctxm, const u_int8_t * in, int ilen, u_int8_t hash[16]);
-+#endif /* _AES_XCBC_MAC_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/klips-crypto/cbc_generic.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,110 @@
-+#ifndef _CBC_GENERIC_H
-+#define _CBC_GENERIC_H
-+/*
-+ * CBC macro helpers
-+ *
-+ * Author: JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+
-+/*
-+ * Heavily inspired in loop_AES
-+ */
-+#define CBC_IMPL_BLK16(name, ctx_type, addr_type, enc_func, dec_func) \
-+int name(ctx_type *ctx, const u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt) { \
-+ int ret=ilen, pos; \
-+ const u_int32_t *iv_i; \
-+ if ((ilen) % 16) return 0; \
-+ if (encrypt) { \
-+ pos=0; \
-+ while(pos<ilen) { \
-+ if (pos==0) \
-+ iv_i=(const u_int32_t*) iv; \
-+ else \
-+ iv_i=(const u_int32_t*) (out-16); \
-+ *((u_int32_t *)(&out[ 0])) = iv_i[0]^*((const u_int32_t *)(&in[ 0])); \
-+ *((u_int32_t *)(&out[ 4])) = iv_i[1]^*((const u_int32_t *)(&in[ 4])); \
-+ *((u_int32_t *)(&out[ 8])) = iv_i[2]^*((const u_int32_t *)(&in[ 8])); \
-+ *((u_int32_t *)(&out[12])) = iv_i[3]^*((const u_int32_t *)(&in[12])); \
-+ enc_func(ctx, (addr_type) out, (addr_type) out); \
-+ in+=16; \
-+ out+=16; \
-+ pos+=16; \
-+ } \
-+ } else { \
-+ pos=ilen-16; \
-+ in+=pos; \
-+ out+=pos; \
-+ while(pos>=0) { \
-+ dec_func(ctx, (const addr_type) in, (addr_type) out); \
-+ if (pos==0) \
-+ iv_i=(const u_int32_t*) (iv); \
-+ else \
-+ iv_i=(const u_int32_t*) (in-16); \
-+ *((u_int32_t *)(&out[ 0])) ^= iv_i[0]; \
-+ *((u_int32_t *)(&out[ 4])) ^= iv_i[1]; \
-+ *((u_int32_t *)(&out[ 8])) ^= iv_i[2]; \
-+ *((u_int32_t *)(&out[12])) ^= iv_i[3]; \
-+ in-=16; \
-+ out-=16; \
-+ pos-=16; \
-+ } \
-+ } \
-+ return ret; \
-+}
-+#define CBC_IMPL_BLK8(name, ctx_type, addr_type, enc_func, dec_func) \
-+int name(ctx_type *ctx, u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt) { \
-+ int ret=ilen, pos; \
-+ const u_int32_t *iv_i; \
-+ if ((ilen) % 8) return 0; \
-+ if (encrypt) { \
-+ pos=0; \
-+ while(pos<ilen) { \
-+ if (pos==0) \
-+ iv_i=(const u_int32_t*) iv; \
-+ else \
-+ iv_i=(const u_int32_t*) (out-8); \
-+ *((u_int32_t *)(&out[ 0])) = iv_i[0]^*((const u_int32_t *)(&in[ 0])); \
-+ *((u_int32_t *)(&out[ 4])) = iv_i[1]^*((const u_int32_t *)(&in[ 4])); \
-+ enc_func(ctx, (addr_type)out, (addr_type)out); \
-+ in+=8; \
-+ out+=8; \
-+ pos+=8; \
-+ } \
-+ } else { \
-+ pos=ilen-8; \
-+ in+=pos; \
-+ out+=pos; \
-+ while(pos>=0) { \
-+ dec_func(ctx, (const addr_type)in, (addr_type)out); \
-+ if (pos==0) \
-+ iv_i=(const u_int32_t*) (iv); \
-+ else \
-+ iv_i=(const u_int32_t*) (in-8); \
-+ *((u_int32_t *)(&out[ 0])) ^= iv_i[0]; \
-+ *((u_int32_t *)(&out[ 4])) ^= iv_i[1]; \
-+ in-=8; \
-+ out-=8; \
-+ pos-=8; \
-+ } \
-+ } \
-+ return ret; \
-+}
-+#define CBC_DECL(name, ctx_type) \
-+int name(ctx_type *ctx, u_int8_t * in, u_int8_t * out, int ilen, const u_int8_t * iv, int encrypt)
-+/*
-+Eg.:
-+CBC_IMPL_BLK16(AES_cbc_encrypt, aes_context, u_int8_t *, aes_encrypt, aes_decrypt);
-+CBC_DECL(AES_cbc_encrypt, aes_context);
-+*/
-+#endif /* _CBC_GENERIC_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/klips-crypto/des.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,286 @@
-+/* crypto/des/des.org */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+/* WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-+ *
-+ * Always modify des.org since des.h is automatically generated from
-+ * it during SSLeay configuration.
-+ *
-+ * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-+ */
-+
-+#ifndef HEADER_DES_H
-+#define HEADER_DES_H
-+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+
-+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
-+ * %20 speed up (longs are 8 bytes, int's are 4). */
-+/* Must be unsigned int on ia64/Itanium or DES breaks badly */
-+
-+#ifdef __KERNEL__
-+#include <linux/types.h>
-+#else
-+#include <sys/types.h>
-+#endif
-+
-+#ifndef DES_LONG
-+#define DES_LONG u_int32_t
-+#endif
-+
-+typedef unsigned char des_cblock[8];
-+typedef struct { des_cblock ks; } des_key_schedule[16];
-+
-+#define DES_KEY_SZ (sizeof(des_cblock))
-+#define DES_SCHEDULE_SZ (sizeof(des_key_schedule))
-+
-+#define DES_ENCRYPT 1
-+#define DES_DECRYPT 0
-+
-+#define DES_CBC_MODE 0
-+#define DES_PCBC_MODE 1
-+
-+#define des_ecb2_encrypt(i,o,k1,k2,e) \
-+ des_ecb3_encrypt((i),(o),(k1),(k2),(k1),(e))
-+
-+#define des_ede2_cbc_encrypt(i,o,l,k1,k2,iv,e) \
-+ des_ede3_cbc_encrypt((i),(o),(l),(k1),(k2),(k1),(iv),(e))
-+
-+#define des_ede2_cfb64_encrypt(i,o,l,k1,k2,iv,n,e) \
-+ des_ede3_cfb64_encrypt((i),(o),(l),(k1),(k2),(k1),(iv),(n),(e))
-+
-+#define des_ede2_ofb64_encrypt(i,o,l,k1,k2,iv,n) \
-+ des_ede3_ofb64_encrypt((i),(o),(l),(k1),(k2),(k1),(iv),(n))
-+
-+#define C_Block des_cblock
-+#define Key_schedule des_key_schedule
-+#ifdef KERBEROS
-+#define ENCRYPT DES_ENCRYPT
-+#define DECRYPT DES_DECRYPT
-+#endif
-+#define KEY_SZ DES_KEY_SZ
-+#define string_to_key des_string_to_key
-+#define read_pw_string des_read_pw_string
-+#define random_key des_random_key
-+#define pcbc_encrypt des_pcbc_encrypt
-+#define set_key des_set_key
-+#define key_sched des_key_sched
-+#define ecb_encrypt des_ecb_encrypt
-+#define cbc_encrypt des_cbc_encrypt
-+#define ncbc_encrypt des_ncbc_encrypt
-+#define xcbc_encrypt des_xcbc_encrypt
-+#define cbc_cksum des_cbc_cksum
-+#define quad_cksum des_quad_cksum
-+
-+/* For compatibility with the MIT lib - eay 20/05/92 */
-+typedef des_key_schedule bit_64;
-+#define des_fixup_key_parity des_set_odd_parity
-+#define des_check_key_parity check_parity
-+
-+extern int des_check_key; /* defaults to false */
-+extern int des_rw_mode; /* defaults to DES_PCBC_MODE */
-+
-+/* The next line is used to disable full ANSI prototypes, if your
-+ * compiler has problems with the prototypes, make sure this line always
-+ * evaluates to true :-) */
-+#if defined(MSDOS) || defined(__STDC__)
-+#undef NOPROTO
-+#endif
-+#ifndef NOPROTO
-+char *des_options(void);
-+void des_ecb3_encrypt(des_cblock *input,des_cblock *output,
-+ des_key_schedule ks1,des_key_schedule ks2,
-+ des_key_schedule ks3, int enc);
-+DES_LONG des_cbc_cksum(des_cblock *input,des_cblock *output,
-+ long length,des_key_schedule schedule,des_cblock *ivec);
-+void des_cbc_encrypt(des_cblock *input,des_cblock *output,long length,
-+ des_key_schedule schedule,des_cblock *ivec,int enc);
-+void des_ncbc_encrypt(des_cblock *input,des_cblock *output,long length,
-+ des_key_schedule schedule,des_cblock *ivec,int enc);
-+void des_xcbc_encrypt(des_cblock *input,des_cblock *output,long length,
-+ des_key_schedule schedule,des_cblock *ivec,
-+ des_cblock *inw,des_cblock *outw,int enc);
-+void des_cfb_encrypt(unsigned char *in,unsigned char *out,int numbits,
-+ long length,des_key_schedule schedule,des_cblock *ivec,int enc);
-+void des_ecb_encrypt(des_cblock *input,des_cblock *output,
-+ des_key_schedule ks,int enc);
-+void des_encrypt(DES_LONG *data,des_key_schedule ks, int enc);
-+void des_encrypt2(DES_LONG *data,des_key_schedule ks, int enc);
-+void des_encrypt3(DES_LONG *data, des_key_schedule ks1,
-+ des_key_schedule ks2, des_key_schedule ks3);
-+void des_decrypt3(DES_LONG *data, des_key_schedule ks1,
-+ des_key_schedule ks2, des_key_schedule ks3);
-+void des_ede3_cbc_encrypt(des_cblock *input, des_cblock *output,
-+ long length, des_key_schedule ks1, des_key_schedule ks2,
-+ des_key_schedule ks3, des_cblock *ivec, int enc);
-+void des_ede3_cfb64_encrypt(unsigned char *in, unsigned char *out,
-+ long length, des_key_schedule ks1, des_key_schedule ks2,
-+ des_key_schedule ks3, des_cblock *ivec, int *num, int enc);
-+void des_ede3_ofb64_encrypt(unsigned char *in, unsigned char *out,
-+ long length, des_key_schedule ks1, des_key_schedule ks2,
-+ des_key_schedule ks3, des_cblock *ivec, int *num);
-+
-+void des_xwhite_in2out(des_cblock (*des_key), des_cblock (*in_white),
-+ des_cblock (*out_white));
-+
-+int des_enc_read(int fd,char *buf,int len,des_key_schedule sched,
-+ des_cblock *iv);
-+int des_enc_write(int fd,char *buf,int len,des_key_schedule sched,
-+ des_cblock *iv);
-+char *des_fcrypt(const char *buf,const char *salt, char *ret);
-+
-+void des_ofb_encrypt(unsigned char *in,unsigned char *out,
-+ int numbits,long length,des_key_schedule schedule,des_cblock *ivec);
-+void des_pcbc_encrypt(des_cblock *input,des_cblock *output,long length,
-+ des_key_schedule schedule,des_cblock *ivec,int enc);
-+DES_LONG des_quad_cksum(des_cblock *input,des_cblock *output,
-+ long length,int out_count,des_cblock *seed);
-+void des_random_seed(des_cblock key);
-+void des_random_key(des_cblock ret);
-+int des_read_password(des_cblock *key,char *prompt,int verify);
-+int des_read_2passwords(des_cblock *key1,des_cblock *key2,
-+ char *prompt,int verify);
-+int des_read_pw_string(char *buf,int length,char *prompt,int verify);
-+void des_set_odd_parity(des_cblock *key);
-+int des_is_weak_key(des_cblock *key);
-+int des_set_key(des_cblock *key,des_key_schedule schedule);
-+int des_key_sched(des_cblock *key,des_key_schedule schedule);
-+void des_string_to_key(char *str,des_cblock *key);
-+void des_string_to_2keys(char *str,des_cblock *key1,des_cblock *key2);
-+void des_cfb64_encrypt(unsigned char *in, unsigned char *out, long length,
-+ des_key_schedule schedule, des_cblock *ivec, int *num, int enc);
-+void des_ofb64_encrypt(unsigned char *in, unsigned char *out, long length,
-+ des_key_schedule schedule, des_cblock *ivec, int *num);
-+int des_read_pw(char *buf, char *buff, int size, char *prompt, int verify);
-+
-+/* Extra functions from Mark Murray <mark@grondar.za> */
-+/* The following functions are not in the normal unix build or the
-+ * SSLeay build. When using the SSLeay build, use RAND_seed()
-+ * and RAND_bytes() instead. */
-+int des_new_random_key(des_cblock *key);
-+void des_init_random_number_generator(des_cblock *key);
-+void des_set_random_generator_seed(des_cblock *key);
-+void des_set_sequence_number(des_cblock new_sequence_number);
-+void des_generate_random_block(des_cblock *block);
-+
-+#else
-+
-+char *des_options();
-+void des_ecb3_encrypt();
-+DES_LONG des_cbc_cksum();
-+void des_cbc_encrypt();
-+void des_ncbc_encrypt();
-+void des_xcbc_encrypt();
-+void des_cfb_encrypt();
-+void des_ede3_cfb64_encrypt();
-+void des_ede3_ofb64_encrypt();
-+void des_ecb_encrypt();
-+void des_encrypt();
-+void des_encrypt2();
-+void des_encrypt3();
-+void des_decrypt3();
-+void des_ede3_cbc_encrypt();
-+int des_enc_read();
-+int des_enc_write();
-+char *des_fcrypt();
-+#ifdef PERL5
-+char *des_crypt();
-+#else
-+char *crypt();
-+#endif
-+void des_ofb_encrypt();
-+void des_pcbc_encrypt();
-+DES_LONG des_quad_cksum();
-+void des_random_seed();
-+void des_random_key();
-+int des_read_password();
-+int des_read_2passwords();
-+int des_read_pw_string();
-+void des_set_odd_parity();
-+int des_is_weak_key();
-+int des_set_key();
-+int des_key_sched();
-+void des_string_to_key();
-+void des_string_to_2keys();
-+void des_cfb64_encrypt();
-+void des_ofb64_encrypt();
-+int des_read_pw();
-+void des_xwhite_in2out();
-+
-+/* Extra functions from Mark Murray <mark@grondar.za> */
-+/* The following functions are not in the normal unix build or the
-+ * SSLeay build. When using the SSLeay build, use RAND_seed()
-+ * and RAND_bytes() instead. */
-+#ifdef FreeBSD
-+int des_new_random_key();
-+void des_init_random_number_generator();
-+void des_set_random_generator_seed();
-+void des_set_sequence_number();
-+void des_generate_random_block();
-+#endif
-+
-+#endif
-+
-+#ifdef __cplusplus
-+}
-+#endif
-+
-+#endif
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/klips-crypto/ocf_assist.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,63 @@
-+#ifndef _OCF_ASSIST_H
-+#define _OCF_ASSIST_H 1
-+/****************************************************************************/
-+/* The various hw_assist functions return these bits */
-+
-+#define OCF_PROVIDES_AES 0x0001
-+#define OCF_PROVIDES_DES_3DES 0x0002
-+
-+/****************************************************************************/
-+#if !defined(OCF_ASSIST)
-+/****************************************************************************/
-+/*
-+ * stub it all out just in case
-+ */
-+
-+#define ocf_aes_assist() (0)
-+#define ocf_aes_set_key(a1,a2,a3,a4)
-+#define ocf_aes_cbc_encrypt(a1,a2,a3,a4,a5,a6)
-+
-+#define ocf_des_assist() (0)
-+#define ocf_des_set_key(a, b)
-+#define ocf_des_cbc_encrypt(a1,a2,a3,a4,a5,a6)
-+#define ocf_des_encrypt(a1,a2,a3)
-+#define ocf_des_ede3_cbc_encrypt(a1,a2,a3,a4,a5,a6,a7,a8)
-+#define ocf_des_ncbc_encrypt(a1,a2,a3,a4,a5,a6)
-+#define ocf_des_ecb_encrypt(a1,a2,a3,a4)
-+
-+/****************************************************************************/
-+#else
-+/****************************************************************************/
-+
-+#include <sys/types.h>
-+#include "aes.h"
-+#include "des.h"
-+
-+extern int ocf_aes_assist(void);
-+extern void ocf_aes_set_key(aes_context *cx, const unsigned char in_key[],
-+ int n_bytes, const int f);
-+extern int ocf_aes_cbc_encrypt(aes_context *ctx, const u_int8_t *input,
-+ u_int8_t *output,
-+ long length,
-+ const u_int8_t *ivec, int enc);
-+
-+extern int ocf_des_assist(void);
-+extern int ocf_des_set_key(des_cblock *key, des_key_schedule schedule);
-+extern void ocf_des_cbc_encrypt(des_cblock *input, des_cblock *output,
-+ long length, des_key_schedule schedule,
-+ des_cblock *ivec, int enc);
-+extern void ocf_des_encrypt(DES_LONG *data, des_key_schedule ks, int enc);
-+extern void ocf_des_ede3_cbc_encrypt(des_cblock *input, des_cblock *output,
-+ long length, des_key_schedule ks1,
-+ des_key_schedule ks2, des_key_schedule ks3,
-+ des_cblock *ivec, int enc);
-+extern void ocf_des_ncbc_encrypt(des_cblock *input, des_cblock *output,
-+ long length, des_key_schedule schedule,
-+ des_cblock *ivec, int enc);
-+extern void ocf_des_ecb_encrypt(des_cblock *input, des_cblock *output,
-+ des_key_schedule ks, int enc);
-+
-+/****************************************************************************/
-+#endif /* !defined(OCF_ASSIST) */
-+/****************************************************************************/
-+#endif /* _OCF_ASSIST_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,569 @@
-+#ifndef _OPENSWAN_H
-+/*
-+ * header file for FreeS/WAN library functions
-+ * Copyright (C) 1998, 1999, 2000 Henry Spencer.
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: openswan.h,v 1.95 2005/08/25 01:24:40 paul Exp $
-+ */
-+#define _OPENSWAN_H /* seen it, no need to see it again */
-+
-+/* you'd think this should be builtin to compiler... */
-+#ifndef TRUE
-+#define TRUE 1
-+#endif
-+
-+#ifndef FALSE
-+#define FALSE 0
-+#endif
-+
-+/*
-+ * When using uclibc, malloc(0) returns NULL instead of success. This is
-+ * to make it use the inbuilt work-around.
-+ * See: http://osdir.com/ml/network.freeswan.devel/2003-11/msg00009.html
-+ */
-+#ifdef __UCLIBC__
-+# if !defined(__MALLOC_GLIBC_COMPAT__) && !defined(MALLOC_GLIBC_COMPAT)
-+# warning Please compile uclibc with GLIBC_COMPATIBILITY defined
-+# endif
-+#endif
-+
-+
-+/*
-+ * We've just got to have some datatypes defined... And annoyingly, just
-+ * where we get them depends on whether we're in userland or not.
-+ */
-+/* things that need to come from one place or the other, depending */
-+#if defined(linux)
-+#if defined(__KERNEL__)
-+#include <linux/types.h>
-+#include <linux/socket.h>
-+#include <linux/in.h>
-+#include <linux/in6.h>
-+#include <linux/string.h>
-+#include <linux/ctype.h>
-+#include <openswan/ipsec_kversion.h>
-+#include <openswan/ipsec_param.h>
-+#define user_assert(foo) /*nothing*/
-+
-+#else /* NOT in kernel */
-+#include <sys/types.h>
-+#include <netinet/in.h>
-+#include <string.h>
-+#include <ctype.h>
-+#include <assert.h>
-+#define user_assert(foo) assert(foo)
-+#include <stdio.h>
-+
-+# define uint8_t u_int8_t
-+# define uint16_t u_int16_t
-+# define uint32_t u_int32_t
-+# define uint64_t u_int64_t
-+
-+
-+
-+#endif /* __KERNEL__ */
-+
-+#endif /* linux */
-+
-+#define DEBUG_NO_STATIC static
-+
-+/*
-+ * Yes Virginia, we have started a windows port.
-+ */
-+#if defined(__CYGWIN32__)
-+#if !defined(WIN32_KERNEL)
-+/* get windows equivalents */
-+#include <stdio.h>
-+#include <string.h>
-+#include <win32/types.h>
-+#include <netinet/in.h>
-+#include <cygwin/socket.h>
-+#include <assert.h>
-+#define user_assert(foo) assert(foo)
-+#endif /* _KERNEL */
-+#endif /* WIN32 */
-+
-+/*
-+ * Kovacs? A macosx port?
-+ */
-+#if defined(macintosh) || (defined(__MACH__) && defined(__APPLE__))
-+#include <TargetConditionals.h>
-+#include <AvailabilityMacros.h>
-+#include <machine/types.h>
-+#include <machine/endian.h>
-+#include <stdint.h>
-+#include <stddef.h>
-+#include <stdio.h>
-+#include <time.h>
-+#include <sys/time.h>
-+#include <string.h>
-+#include <netinet/in.h>
-+#include <arpa/inet.h>
-+#include <tcpd.h>
-+#include <assert.h>
-+#define user_assert(foo) assert(foo)
-+#define __u32 unsigned int
-+#define __u8 unsigned char
-+#define s6_addr16 __u6_addr.__u6_addr16
-+#define DEBUG_NO_STATIC static
-+#endif
-+
-+/*
-+ * FreeBSD
-+ */
-+#if defined(__FreeBSD__)
-+# define DEBUG_NO_STATIC static
-+#include <sys/types.h>
-+#include <netinet/in.h>
-+#include <sys/socket.h>
-+#include <arpa/inet.h>
-+#include <string.h>
-+#include <assert.h>
-+#define user_assert(foo) assert(foo)
-+/* apparently this way to deal with an IPv6 address is not standard. */
-+#define s6_addr16 __u6_addr.__u6_addr16
-+#endif
-+
-+
-+#ifndef IPPROTO_COMP
-+# define IPPROTO_COMP 108
-+#endif /* !IPPROTO_COMP */
-+
-+#ifndef IPPROTO_INT
-+# define IPPROTO_INT 61
-+#endif /* !IPPROTO_INT */
-+
-+#if !defined(ESPINUDP_WITH_NON_IKE)
-+#define ESPINUDP_WITH_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
-+#define ESPINUDP_WITH_NON_ESP 2 /* draft-ietf-ipsec-nat-t-ike-02 */
-+#endif
-+
-+/*
-+ * Basic data types for the address-handling functions.
-+ * ip_address and ip_subnet are supposed to be opaque types; do not
-+ * use their definitions directly, they are subject to change!
-+ */
-+
-+/* first, some quick fakes in case we're on an old system with no IPv6 */
-+#if !defined(s6_addr16) && defined(__CYGWIN32__)
-+struct in6_addr {
-+ union
-+ {
-+ u_int8_t u6_addr8[16];
-+ u_int16_t u6_addr16[8];
-+ u_int32_t u6_addr32[4];
-+ } in6_u;
-+#define s6_addr in6_u.u6_addr8
-+#define s6_addr16 in6_u.u6_addr16
-+#define s6_addr32 in6_u.u6_addr32
-+};
-+struct sockaddr_in6 {
-+ unsigned short int sin6_family; /* AF_INET6 */
-+ __u16 sin6_port; /* Transport layer port # */
-+ __u32 sin6_flowinfo; /* IPv6 flow information */
-+ struct in6_addr sin6_addr; /* IPv6 address */
-+ __u32 sin6_scope_id; /* scope id (new in RFC2553) */
-+};
-+#endif /* !s6_addr16 */
-+
-+/* then the main types */
-+typedef struct {
-+ union {
-+ struct sockaddr_in v4;
-+ struct sockaddr_in6 v6;
-+ } u;
-+} ip_address;
-+typedef struct {
-+ ip_address addr;
-+ int maskbits;
-+} ip_subnet;
-+
-+/* and the SA ID stuff */
-+#ifdef __KERNEL__
-+typedef __u32 ipsec_spi_t;
-+#else
-+typedef u_int32_t ipsec_spi_t;
-+#endif
-+typedef struct { /* to identify an SA, we need: */
-+ ip_address dst; /* A. destination host */
-+ ipsec_spi_t spi; /* B. 32-bit SPI, assigned by dest. host */
-+# define SPI_PASS 256 /* magic values... */
-+# define SPI_DROP 257 /* ...for use... */
-+# define SPI_REJECT 258 /* ...with SA_INT */
-+# define SPI_HOLD 259
-+# define SPI_TRAP 260
-+# define SPI_TRAPSUBNET 261
-+ int proto; /* C. protocol */
-+# define SA_ESP 50 /* IPPROTO_ESP */
-+# define SA_AH 51 /* IPPROTO_AH */
-+# define SA_IPIP 4 /* IPPROTO_IPIP */
-+# define SA_COMP 108 /* IPPROTO_COMP */
-+# define SA_INT 61 /* IANA reserved for internal use */
-+} ip_said;
-+
-+/* misc */
-+typedef const char *err_t; /* error message, or NULL for success */
-+struct prng { /* pseudo-random-number-generator guts */
-+ unsigned char sbox[256];
-+ int i, j;
-+ unsigned long count;
-+};
-+
-+
-+/*
-+ * definitions for user space, taken from freeswan/ipsec_sa.h
-+ */
-+typedef uint32_t IPsecSAref_t;
-+
-+/* Translation to/from nfmark.
-+ *
-+ * use bits 16-31. Leave bit 32 as a indicate that IPsec processing
-+ * has already been done.
-+ */
-+#define IPSEC_SA_REF_TABLE_IDX_WIDTH 15
-+#define IPSEC_SA_REF_TABLE_OFFSET 16
-+#define IPSEC_SA_REF_MAASK ((1<<IPSEC_SA_REF_TABLE_IDX_WIDTH)-1)
-+
-+#define IPsecSAref2NFmark(x) (((x)&IPSEC_SA_REF_MASK) << IPSEC_SA_REF_TABLE_OFFSET)
-+#define NFmark2IPsecSAref(x) (((x) >> IPSEC_SA_REF_TABLE_OFFSET)&IPSEC_SA_REF_MASK)
-+
-+#define IPSEC_SAREF_NULL ((IPsecSAref_t)0)
-+#define IPSEC_SAREF_NA ((IPsecSAref_t)0xffff0001)
-+
-+/* GCC magic for use in function definitions! */
-+#ifdef GCC_LINT
-+# define PRINTF_LIKE(n) __attribute__ ((format(printf, n, n+1)))
-+# define NEVER_RETURNS __attribute__ ((noreturn))
-+# define UNUSED __attribute__ ((unused))
-+# define BLANK_FORMAT " " /* GCC_LINT whines about empty formats */
-+#else
-+# define PRINTF_LIKE(n) /* ignore */
-+# define NEVER_RETURNS /* ignore */
-+# define UNUSED /* ignore */
-+# define BLANK_FORMAT ""
-+#endif
-+
-+
-+/*
-+ * function to log stuff from libraries that may be used in multiple
-+ * places.
-+ */
-+typedef int (*openswan_keying_debug_func_t)(const char *message, ...);
-+
-+
-+
-+/*
-+ * new IPv6-compatible functions
-+ */
-+
-+/* text conversions */
-+err_t ttoul(const char *src, size_t srclen, int format, unsigned long *dst);
-+size_t ultot(unsigned long src, int format, char *buf, size_t buflen);
-+#define ULTOT_BUF (22+1) /* holds 64 bits in octal */
-+
-+/* looks up names in DNS */
-+err_t ttoaddr(const char *src, size_t srclen, int af, ip_address *dst);
-+
-+/* does not look up names in DNS */
-+err_t ttoaddr_num(const char *src, size_t srclen, int af, ip_address *dst);
-+
-+err_t tnatoaddr(const char *src, size_t srclen, int af, ip_address *dst);
-+size_t addrtot(const ip_address *src, int format, char *buf, size_t buflen);
-+/* RFC 1886 old IPv6 reverse-lookup format is the bulkiest */
-+#define ADDRTOT_BUF (32*2 + 3 + 1 + 3 + 1 + 1)
-+err_t ttosubnet(const char *src, size_t srclen, int af, ip_subnet *dst);
-+size_t subnettot(const ip_subnet *src, int format, char *buf, size_t buflen);
-+#define SUBNETTOT_BUF (ADDRTOT_BUF + 1 + 3)
-+size_t subnetporttot(const ip_subnet *src, int format, char *buf, size_t buflen);
-+#define SUBNETPROTOTOT_BUF (SUBNETTOTO_BUF + ULTOT_BUF)
-+err_t ttosa(const char *src, size_t srclen, ip_said *dst);
-+size_t satot(const ip_said *src, int format, char *bufptr, size_t buflen);
-+#define SATOT_BUF (5 + ULTOA_BUF + 1 + ADDRTOT_BUF)
-+err_t ttodata(const char *src, size_t srclen, int base, char *buf,
-+ size_t buflen, size_t *needed);
-+err_t ttodatav(const char *src, size_t srclen, int base,
-+ char *buf, size_t buflen, size_t *needed,
-+ char *errp, size_t errlen, unsigned int flags);
-+#define TTODATAV_BUF 40 /* ttodatav's largest non-literal message */
-+#define TTODATAV_IGNORESPACE (1<<1) /* ignore spaces in base64 encodings*/
-+#define TTODATAV_SPACECOUNTS 0 /* do not ignore spaces in base64 */
-+
-+size_t datatot(const unsigned char *src, size_t srclen, int format
-+ , char *buf, size_t buflen);
-+size_t keyblobtoid(const unsigned char *src, size_t srclen, char *dst,
-+ size_t dstlen);
-+size_t splitkeytoid(const unsigned char *e, size_t elen, const unsigned char *m,
-+ size_t mlen, char *dst, size_t dstlen);
-+#define KEYID_BUF 10 /* up to 9 text digits plus NUL */
-+err_t ttoprotoport(char *src, size_t src_len, u_int8_t *proto, u_int16_t *port,
-+ int *has_port_wildcard);
-+
-+/* initializations */
-+void initsaid(const ip_address *addr, ipsec_spi_t spi, int proto, ip_said *dst);
-+err_t loopbackaddr(int af, ip_address *dst);
-+err_t unspecaddr(int af, ip_address *dst);
-+err_t anyaddr(int af, ip_address *dst);
-+err_t initaddr(const unsigned char *src, size_t srclen, int af, ip_address *dst);
-+err_t add_port(int af, ip_address *addr, unsigned short port);
-+err_t initsubnet(const ip_address *addr, int maskbits, int clash, ip_subnet *dst);
-+err_t addrtosubnet(const ip_address *addr, ip_subnet *dst);
-+
-+/* misc. conversions and related */
-+err_t rangetosubnet(const ip_address *from, const ip_address *to, ip_subnet *dst);
-+int addrtypeof(const ip_address *src);
-+int subnettypeof(const ip_subnet *src);
-+size_t addrlenof(const ip_address *src);
-+size_t addrbytesptr(const ip_address *src, const unsigned char **dst);
-+size_t addrbytesptr_write(ip_address *src, unsigned char **dst);
-+size_t addrbytesof(const ip_address *src, unsigned char *dst, size_t dstlen);
-+int masktocount(const ip_address *src);
-+void networkof(const ip_subnet *src, ip_address *dst);
-+void maskof(const ip_subnet *src, ip_address *dst);
-+
-+/* tests */
-+int sameaddr(const ip_address *a, const ip_address *b);
-+int addrcmp(const ip_address *a, const ip_address *b);
-+int samesubnet(const ip_subnet *a, const ip_subnet *b);
-+int addrinsubnet(const ip_address *a, const ip_subnet *s);
-+int subnetinsubnet(const ip_subnet *a, const ip_subnet *b);
-+int subnetishost(const ip_subnet *s);
-+int samesaid(const ip_said *a, const ip_said *b);
-+int sameaddrtype(const ip_address *a, const ip_address *b);
-+int samesubnettype(const ip_subnet *a, const ip_subnet *b);
-+int isvalidsubnet(const ip_subnet *a);
-+int isanyaddr(const ip_address *src);
-+int isunspecaddr(const ip_address *src);
-+int isloopbackaddr(const ip_address *src);
-+
-+/* low-level grot */
-+int portof(const ip_address *src);
-+void setportof(int port, ip_address *dst);
-+struct sockaddr *sockaddrof(ip_address *src);
-+size_t sockaddrlenof(const ip_address *src);
-+
-+/* PRNG */
-+void prng_init(struct prng *prng, const unsigned char *key, size_t keylen);
-+void prng_bytes(struct prng *prng, unsigned char *dst, size_t dstlen);
-+unsigned long prng_count(struct prng *prng);
-+void prng_final(struct prng *prng);
-+
-+/* odds and ends */
-+const char *ipsec_version_code(void);
-+const char *ipsec_version_string(void);
-+const char **ipsec_copyright_notice(void);
-+
-+const char *dns_string_rr(int rr, char *buf, int bufsize);
-+const char *dns_string_datetime(time_t seconds,
-+ char *buf,
-+ int bufsize);
-+
-+
-+/*
-+ * old functions, to be deleted eventually
-+ */
-+
-+/* unsigned long */
-+const char * /* NULL for success, else string literal */
-+atoul(
-+ const char *src,
-+ size_t srclen, /* 0 means strlen(src) */
-+ int base, /* 0 means figure it out */
-+ unsigned long *resultp
-+);
-+size_t /* space needed for full conversion */
-+ultoa(
-+ unsigned long n,
-+ int base,
-+ char *dst,
-+ size_t dstlen
-+);
-+#define ULTOA_BUF 21 /* just large enough for largest result, */
-+ /* assuming 64-bit unsigned long! */
-+
-+/* Internet addresses */
-+const char * /* NULL for success, else string literal */
-+atoaddr(
-+ const char *src,
-+ size_t srclen, /* 0 means strlen(src) */
-+ struct in_addr *addr
-+);
-+size_t /* space needed for full conversion */
-+addrtoa(
-+ struct in_addr addr,
-+ int format, /* character; 0 means default */
-+ char *dst,
-+ size_t dstlen
-+);
-+#define ADDRTOA_BUF 16 /* just large enough for largest result */
-+
-+/* subnets */
-+const char * /* NULL for success, else string literal */
-+atosubnet(
-+ const char *src,
-+ size_t srclen, /* 0 means strlen(src) */
-+ struct in_addr *addr,
-+ struct in_addr *mask
-+);
-+size_t /* space needed for full conversion */
-+subnettoa(
-+ struct in_addr addr,
-+ struct in_addr mask,
-+ int format, /* character; 0 means default */
-+ char *dst,
-+ size_t dstlen
-+);
-+#define SUBNETTOA_BUF 32 /* large enough for worst case result */
-+
-+/* ranges */
-+const char * /* NULL for success, else string literal */
-+atoasr(
-+ const char *src,
-+ size_t srclen, /* 0 means strlen(src) */
-+ char *type, /* 'a', 's', 'r' */
-+ struct in_addr *addrs /* two-element array */
-+);
-+size_t /* space needed for full conversion */
-+rangetoa(
-+ struct in_addr *addrs, /* two-element array */
-+ int format, /* character; 0 means default */
-+ char *dst,
-+ size_t dstlen
-+);
-+#define RANGETOA_BUF 34 /* large enough for worst case result */
-+
-+/* data types for SA conversion functions */
-+
-+/* generic data, e.g. keys */
-+const char * /* NULL for success, else string literal */
-+atobytes(
-+ const char *src,
-+ size_t srclen, /* 0 means strlen(src) */
-+ char *dst,
-+ size_t dstlen,
-+ size_t *lenp /* NULL means don't bother telling me */
-+);
-+size_t /* 0 failure, else true size */
-+bytestoa(
-+ const unsigned char *src,
-+ size_t srclen,
-+ int format, /* character; 0 means default */
-+ char *dst,
-+ size_t dstlen
-+);
-+
-+/* old versions of generic-data functions; deprecated */
-+size_t /* 0 failure, else true size */
-+atodata(
-+ const char *src,
-+ size_t srclen, /* 0 means strlen(src) */
-+ char *dst,
-+ size_t dstlen
-+);
-+size_t /* 0 failure, else true size */
-+datatoa(
-+ const unsigned char *src,
-+ size_t srclen,
-+ int format, /* character; 0 means default */
-+ char *dst,
-+ size_t dstlen
-+);
-+
-+/* part extraction and special addresses */
-+struct in_addr
-+subnetof(
-+ struct in_addr addr,
-+ struct in_addr mask
-+);
-+struct in_addr
-+hostof(
-+ struct in_addr addr,
-+ struct in_addr mask
-+);
-+struct in_addr
-+broadcastof(
-+ struct in_addr addr,
-+ struct in_addr mask
-+);
-+
-+/* mask handling */
-+int
-+goodmask(
-+ struct in_addr mask
-+);
-+extern int masktobits(struct in_addr mask);
-+extern struct in_addr bitstomask(int n);
-+extern struct in6_addr bitstomask6(int n);
-+
-+
-+
-+/*
-+ * ENUM of klips debugging values. Not currently used in klips.
-+ * debug flag is actually 32 -bits, but only one bit is ever used,
-+ * so we can actually pack it all into a single 32-bit word.
-+ */
-+enum klips_debug_flags {
-+ KDF_VERBOSE = 0,
-+ KDF_XMIT = 1,
-+ KDF_NETLINK = 2, /* obsolete */
-+ KDF_XFORM = 3,
-+ KDF_EROUTE = 4,
-+ KDF_SPI = 5,
-+ KDF_RADIJ = 6,
-+ KDF_ESP = 7,
-+ KDF_AH = 8, /* obsolete */
-+ KDF_RCV = 9,
-+ KDF_TUNNEL = 10,
-+ KDF_PFKEY = 11,
-+ KDF_COMP = 12,
-+ KDF_NATT = 13,
-+};
-+
-+
-+/*
-+ * Debugging levels for pfkey_lib_debug
-+ */
-+#define PF_KEY_DEBUG_PARSE_NONE 0
-+#define PF_KEY_DEBUG_PARSE_PROBLEM 1
-+#define PF_KEY_DEBUG_PARSE_STRUCT 2
-+#define PF_KEY_DEBUG_PARSE_FLOW 4
-+#define PF_KEY_DEBUG_BUILD 8
-+#define PF_KEY_DEBUG_PARSE_MAX 15
-+
-+extern unsigned int pfkey_lib_debug; /* bits selecting what to report */
-+
-+/*
-+ * pluto and lwdnsq need to know the maximum size of the commands to,
-+ * and replies from lwdnsq.
-+ */
-+
-+#define LWDNSQ_CMDBUF_LEN 1024
-+#define LWDNSQ_RESULT_LEN_MAX 4096
-+
-+
-+/* syntax for passthrough SA */
-+#ifndef PASSTHROUGHNAME
-+#define PASSTHROUGHNAME "%passthrough"
-+#define PASSTHROUGH4NAME "%passthrough4"
-+#define PASSTHROUGH6NAME "%passthrough6"
-+#define PASSTHROUGHIS "tun0@0.0.0.0"
-+#define PASSTHROUGH4IS "tun0@0.0.0.0"
-+#define PASSTHROUGH6IS "tun0@::"
-+#define PASSTHROUGHTYPE "tun"
-+#define PASSTHROUGHSPI 0
-+#define PASSTHROUGHDST 0
-+#endif
-+
-+
-+
-+#endif /* _OPENSWAN_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipcomp.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,58 @@
-+/*
-+ * IPCOMP zlib interface code.
-+ * Copyright (C) 2000 Svenning Soerensen <svenning@post5.tele.dk>
-+ * Copyright (C) 2000, 2001 Richard Guy Briggs <rgb@conscoop.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+
-+ RCSID $Id: ipcomp.h,v 1.14 2004/07/10 19:08:41 mcr Exp $
-+
-+ */
-+
-+/* SSS */
-+
-+#ifndef _IPCOMP_H
-+#define _IPCOMP_H
-+
-+/* Prefix all global deflate symbols with "ipcomp_" to avoid collisions with ppp_deflate & ext2comp */
-+#ifndef IPCOMP_PREFIX
-+#define IPCOMP_PREFIX
-+#endif /* IPCOMP_PREFIX */
-+
-+#ifndef IPPROTO_COMP
-+#define IPPROTO_COMP 108
-+#endif /* IPPROTO_COMP */
-+
-+#include "openswan/ipsec_sysctl.h"
-+
-+struct ipcomphdr { /* IPCOMP header */
-+ __u8 ipcomp_nh; /* Next header (protocol) */
-+ __u8 ipcomp_flags; /* Reserved, must be 0 */
-+ __u16 ipcomp_cpi; /* Compression Parameter Index */
-+};
-+
-+extern struct inet_protocol comp_protocol;
-+
-+#define IPCOMP_UNCOMPRESSABLE 0x000000001
-+#define IPCOMP_COMPRESSIONERROR 0x000000002
-+#define IPCOMP_PARMERROR 0x000000004
-+#define IPCOMP_DECOMPRESSIONERROR 0x000000008
-+
-+#define IPCOMP_ADAPT_INITIAL_TRIES 8
-+#define IPCOMP_ADAPT_INITIAL_SKIP 4
-+#define IPCOMP_ADAPT_SUBSEQ_TRIES 2
-+#define IPCOMP_ADAPT_SUBSEQ_SKIP 8
-+
-+/* Function prototypes */
-+struct sk_buff *skb_compress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags);
-+struct sk_buff *skb_decompress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags);
-+
-+#endif /* _IPCOMP_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_ah.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,201 @@
-+/*
-+ * Authentication Header declarations
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_ah.h,v 1.26 2004/09/13 02:22:10 mcr Exp $
-+ */
-+
-+#include "ipsec_md5h.h"
-+#include "ipsec_sha1.h"
-+
-+#ifndef IPPROTO_AH
-+#define IPPROTO_AH 51
-+#endif /* IPPROTO_AH */
-+
-+#include "ipsec_auth.h"
-+
-+#ifdef __KERNEL__
-+
-+#ifndef CONFIG_XFRM_ALTERNATE_STACK
-+extern struct inet_protocol ah_protocol;
-+#endif /* CONFIG_XFRM_ALTERNATE_STACK */
-+
-+struct options;
-+
-+struct ahhdr /* Generic AH header */
-+{
-+ __u8 ah_nh; /* Next header (protocol) */
-+ __u8 ah_hl; /* AH length, in 32-bit words */
-+ __u16 ah_rv; /* reserved, must be 0 */
-+ __u32 ah_spi; /* Security Parameters Index */
-+ __u32 ah_rpl; /* Replay prevention */
-+ __u8 ah_data[AHHMAC_HASHLEN];/* Authentication hash */
-+};
-+#define AH_BASIC_LEN 8 /* basic AH header is 8 bytes, nh,hl,rv,spi
-+ * and the ah_hl, says how many bytes after that
-+ * to cover. */
-+
-+extern struct xform_functions ah_xform_funcs[];
-+
-+#include "openswan/ipsec_sysctl.h"
-+
-+#endif /* __KERNEL__ */
-+
-+/*
-+ * $Log: ipsec_ah.h,v $
-+ * Revision 1.26 2004/09/13 02:22:10 mcr
-+ * #define inet_protocol if necessary.
-+ *
-+ * Revision 1.25 2004/09/06 18:35:41 mcr
-+ * 2.6.8.1 gets rid of inet_protocol->net_protocol compatibility,
-+ * so adjust for that.
-+ *
-+ * Revision 1.24 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.23 2004/04/05 19:55:04 mcr
-+ * Moved from linux/include/freeswan/ipsec_ah.h,v
-+ *
-+ * Revision 1.22 2004/04/05 19:41:05 mcr
-+ * merged alg-branch code.
-+ *
-+ * Revision 1.21 2003/12/13 19:10:16 mcr
-+ * refactored rcv and xmit code - same as FS 2.05.
-+ *
-+ * Revision 1.22 2003/12/11 20:14:58 mcr
-+ * refactored the xmit code, to move all encapsulation
-+ * code into protocol functions. Note that all functions
-+ * are essentially done by a single function, which is probably
-+ * wrong.
-+ * the rcv_functions structures are renamed xform_functions.
-+ *
-+ * Revision 1.21 2003/12/06 21:21:19 mcr
-+ * split up receive path into per-transform files, for
-+ * easier later removal.
-+ *
-+ * Revision 1.20.8.1 2003/12/22 15:25:52 jjo
-+ * Merged algo-0.8.1-rc11-test1 into alg-branch
-+ *
-+ * Revision 1.20 2003/02/06 02:21:34 rgb
-+ *
-+ * Moved "struct auth_alg" from ipsec_rcv.c to ipsec_ah.h .
-+ * Changed "struct ah" to "struct ahhdr" and "struct esp" to "struct esphdr".
-+ * Removed "#ifdef INBOUND_POLICY_CHECK_eroute" dead code.
-+ *
-+ * Revision 1.19 2002/09/16 21:19:13 mcr
-+ * fixes for west-ah-icmp-01 - length of AH header must be
-+ * calculated properly, and next_header field properly copied.
-+ *
-+ * Revision 1.18 2002/05/14 02:37:02 rgb
-+ * Change reference from _TDB to _IPSA.
-+ *
-+ * Revision 1.17 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_ah.h,v
-+ *
-+ * Revision 1.16 2002/02/20 01:27:06 rgb
-+ * Ditched a pile of structs only used by the old Netlink interface.
-+ *
-+ * Revision 1.15 2001/12/11 02:35:57 rgb
-+ * Change "struct net_device" to "struct device" for 2.2 compatibility.
-+ *
-+ * Revision 1.14 2001/11/26 09:23:47 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.13.2.1 2001/09/25 02:18:24 mcr
-+ * replace "struct device" with "struct netdevice"
-+ *
-+ * Revision 1.13 2001/06/14 19:35:08 rgb
-+ * Update copyright date.
-+ *
-+ * Revision 1.12 2000/09/12 03:21:20 rgb
-+ * Cleared out unused htonq.
-+ *
-+ * Revision 1.11 2000/09/08 19:12:55 rgb
-+ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG.
-+ *
-+ * Revision 1.10 2000/01/21 06:13:10 rgb
-+ * Tidied up spacing.
-+ * Added macros for HMAC padding magic numbers.(kravietz)
-+ *
-+ * Revision 1.9 1999/12/07 18:16:23 rgb
-+ * Fixed comments at end of #endif lines.
-+ *
-+ * Revision 1.8 1999/04/11 00:28:56 henry
-+ * GPL boilerplate
-+ *
-+ * Revision 1.7 1999/04/06 04:54:25 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.6 1999/01/26 02:06:01 rgb
-+ * Removed CONFIG_IPSEC_ALGO_SWITCH macro.
-+ *
-+ * Revision 1.5 1999/01/22 06:17:49 rgb
-+ * Updated macro comments.
-+ * Added context types to support algorithm switch code.
-+ * 64-bit clean-up -- converting 'u long long' to __u64.
-+ *
-+ * Revision 1.4 1998/07/14 15:54:56 rgb
-+ * Add #ifdef __KERNEL__ to protect kernel-only structures.
-+ *
-+ * Revision 1.3 1998/06/30 18:05:16 rgb
-+ * Comment out references to htonq.
-+ *
-+ * Revision 1.2 1998/06/25 19:33:46 rgb
-+ * Add prototype for protocol receive function.
-+ * Rearrange for more logical layout.
-+ *
-+ * Revision 1.1 1998/06/18 21:27:43 henry
-+ * move sources from klips/src to klips/net/ipsec, to keep stupid
-+ * kernel-build scripts happier in the presence of symlinks
-+ *
-+ * Revision 1.4 1998/05/18 22:28:43 rgb
-+ * Disable key printing facilities from /proc/net/ipsec_*.
-+ *
-+ * Revision 1.3 1998/04/21 21:29:07 rgb
-+ * Rearrange debug switches to change on the fly debug output from user
-+ * space. Only kernel changes checked in at this time. radij.c was also
-+ * changed to temporarily remove buggy debugging code in rj_delete causing
-+ * an OOPS and hence, netlink device open errors.
-+ *
-+ * Revision 1.2 1998/04/12 22:03:17 rgb
-+ * Updated ESP-3DES-HMAC-MD5-96,
-+ * ESP-DES-HMAC-MD5-96,
-+ * AH-HMAC-MD5-96,
-+ * AH-HMAC-SHA1-96 since Henry started freeswan cvs repository
-+ * from old standards (RFC182[5-9] to new (as of March 1998) drafts.
-+ *
-+ * Fixed eroute references in /proc/net/ipsec*.
-+ *
-+ * Started to patch module unloading memory leaks in ipsec_netlink and
-+ * radij tree unloading.
-+ *
-+ * Revision 1.1 1998/04/09 03:05:55 henry
-+ * sources moved up from linux/net/ipsec
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:02 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * Added definitions for new AH transforms.
-+ *
-+ * Revision 0.3 1996/11/20 14:35:48 ji
-+ * Minor Cleanup.
-+ * Rationalized debugging code.
-+ *
-+ * Revision 0.2 1996/11/02 00:18:33 ji
-+ * First limited release.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_alg.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,248 @@
-+/*
-+ * Modular extensions service and registration functions interface
-+ *
-+ * Author: JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>
-+ *
-+ * ipsec_alg.h,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp
-+ *
-+ */
-+/*
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+#ifndef IPSEC_ALG_H
-+#define IPSEC_ALG_H
-+
-+/*
-+ * gcc >= 3.2 has removed __FUNCTION__, replaced by C99 __func__
-+ * *BUT* its a compiler variable.
-+ */
-+#if (__GNUC__ >= 3)
-+#ifndef __FUNCTION__
-+#define __FUNCTION__ __func__
-+#endif
-+#endif
-+
-+/* Version 0.8.1-0 */
-+#define IPSEC_ALG_VERSION 0x00080100
-+
-+#include <linux/types.h>
-+#include <linux/list.h>
-+#include <asm/atomic.h>
-+#include <openswan/pfkey.h>
-+
-+/*
-+ * The following structs are used via pointers in ipsec_alg object to
-+ * avoid ipsec_alg.h coupling with freeswan headers, thus simplifying
-+ * module development
-+ */
-+struct ipsec_sa;
-+struct esp;
-+
-+/**************************************
-+ *
-+ * Main registration object
-+ *
-+ *************************************/
-+#define IPSEC_ALG_VERSION_QUAD(v) \
-+ (v>>24),((v>>16)&0xff),((v>>8)&0xff),(v&0xff)
-+/*
-+ * Main ipsec_alg objects: "OOPrograming wannabe"
-+ * Hierachy (carefully handled with _minimal_ cast'ing):
-+ *
-+ * ipsec_alg+
-+ * +->ipsec_alg_enc (ixt_alg_type=SADB_EXT_SUPPORTED_ENCRYPT)
-+ * +->ipsec_alg_auth (ixt_alg_type=SADB_EXT_SUPPORTED_AUTH)
-+ */
-+
-+/***************************************************************
-+ *
-+ * INTERFACE object: struct ipsec_alg
-+ *
-+ ***************************************************************/
-+
-+#define ixt_alg_type ixt_support.ias_exttype
-+#define ixt_alg_id ixt_support.ias_id
-+
-+#define IPSEC_ALG_ST_SUPP 0x01
-+#define IPSEC_ALG_ST_REGISTERED 0x02
-+#define IPSEC_ALG_ST_EXCL 0x04
-+struct ipsec_alg {
-+ unsigned ixt_version; /* only allow this version (or 'near')*/ \
-+ struct list_head ixt_list; /* dlinked list */ \
-+ struct module *ixt_module; /* THIS_MODULE */ \
-+ unsigned ixt_state; /* state flags */ \
-+ atomic_t ixt_refcnt; /* ref. count when pointed from ipsec_sa */ \
-+ char ixt_name[16]; /* descriptive short name, eg. "3des" */ \
-+ void *ixt_data; /* private for algo implementation */ \
-+ uint8_t ixt_blocksize; /* blocksize in bytes */ \
-+
-+ struct ipsec_alg_supported ixt_support;
-+};
-+/*
-+ * Note the const in cbc_encrypt IV arg:
-+ * some ciphers like to toast passed IV (eg. 3DES): make a local IV copy
-+ */
-+struct ipsec_alg_enc {
-+ struct ipsec_alg ixt_common;
-+ unsigned ixt_e_keylen; /* raw key length in bytes */
-+ unsigned ixt_e_ctx_size; /* sa_p->key_e_size */
-+ int (*ixt_e_set_key)(struct ipsec_alg_enc *alg, __u8 *key_e, const __u8 *key, size_t keysize);
-+ __u8 *(*ixt_e_new_key)(struct ipsec_alg_enc *alg, const __u8 *key, size_t keysize);
-+ void (*ixt_e_destroy_key)(struct ipsec_alg_enc *alg, __u8 *key_e);
-+ int (*ixt_e_cbc_encrypt)(struct ipsec_alg_enc *alg, __u8 *key_e, __u8 *in, int ilen, __u8 *iv, int encrypt);
-+};
-+struct ipsec_alg_auth {
-+ struct ipsec_alg ixt_common;
-+ unsigned ixt_a_keylen; /* raw key length in bytes */
-+ unsigned ixt_a_ctx_size; /* sa_p->key_a_size */
-+ unsigned ixt_a_authlen; /* 'natural' auth. hash len (bytes) */
-+ int (*ixt_a_hmac_set_key)(struct ipsec_alg_auth *alg, __u8 *key_a, const __u8 *key, int keylen);
-+ int (*ixt_a_hmac_hash)(struct ipsec_alg_auth *alg, __u8 *key_a, const __u8 *dat, int len, __u8 *hash, int hashlen);
-+};
-+/*
-+ * These are _copies_ of SADB_EXT_SUPPORTED_{AUTH,ENCRYPT},
-+ * to avoid header coupling for true constants
-+ * about headers ... "cp is your friend" --Linus
-+ */
-+#define IPSEC_ALG_TYPE_AUTH 14
-+#define IPSEC_ALG_TYPE_ENCRYPT 15
-+
-+/***************************************************************
-+ *
-+ * INTERFACE for module loading,testing, and unloading
-+ *
-+ ***************************************************************/
-+/* - registration calls */
-+int register_ipsec_alg(struct ipsec_alg *);
-+int unregister_ipsec_alg(struct ipsec_alg *);
-+/* - optional (simple test) for algos */
-+int ipsec_alg_test(unsigned alg_type, unsigned alg_id, int testparm);
-+/* inline wrappers (usefull for type validation */
-+static inline int register_ipsec_alg_enc(struct ipsec_alg_enc *ixt) {
-+ return register_ipsec_alg((struct ipsec_alg*)ixt);
-+}
-+static inline int unregister_ipsec_alg_enc(struct ipsec_alg_enc *ixt) {
-+ return unregister_ipsec_alg((struct ipsec_alg*)ixt);
-+}
-+static inline int register_ipsec_alg_auth(struct ipsec_alg_auth *ixt) {
-+ return register_ipsec_alg((struct ipsec_alg*)ixt);
-+}
-+static inline int unregister_ipsec_alg_auth(struct ipsec_alg_auth *ixt) {
-+ return unregister_ipsec_alg((struct ipsec_alg*)ixt);
-+}
-+
-+/*****************************************************************
-+ *
-+ * INTERFACE for ENC services: key creation, encrypt function
-+ *
-+ *****************************************************************/
-+
-+#define IPSEC_ALG_ENCRYPT 1
-+#define IPSEC_ALG_DECRYPT 0
-+
-+/* encryption key context creation function */
-+int ipsec_alg_enc_key_create(struct ipsec_sa *sa_p);
-+/*
-+ * ipsec_alg_esp_encrypt(): encrypt ilen bytes in idat returns
-+ * 0 or ERR<0
-+ */
-+int ipsec_alg_esp_encrypt(struct ipsec_sa *sa_p, __u8 *idat, int ilen, __u8 *iv, int action);
-+
-+/***************************************************************
-+ *
-+ * INTERFACE for AUTH services: key creation, hash functions
-+ *
-+ ***************************************************************/
-+int ipsec_alg_auth_key_create(struct ipsec_sa *sa_p);
-+int ipsec_alg_sa_esp_hash(const struct ipsec_sa *sa_p, const __u8 *espp, int len, __u8 *hash, int hashlen) ;
-+#define ipsec_alg_sa_esp_update(c,k,l) ipsec_alg_sa_esp_hash(c,k,l,NULL,0)
-+
-+/* only called from ipsec_init.c */
-+int ipsec_alg_init(void);
-+
-+/* algo module glue for static algos */
-+void ipsec_alg_static_init(void);
-+typedef int (*ipsec_alg_init_func_t) (void);
-+
-+/**********************************************
-+ *
-+ * INTERFACE for ipsec_sa init and wipe
-+ *
-+ **********************************************/
-+
-+/* returns true if ipsec_sa has ipsec_alg obj attached */
-+/*
-+ * Initializes ipsec_sa's ipsec_alg object, using already loaded
-+ * proto, authalg, encalg.; links ipsec_alg objects (enc, auth)
-+ */
-+int ipsec_alg_sa_init(struct ipsec_sa *sa_p);
-+/*
-+ * Destroys ipsec_sa's ipsec_alg object
-+ * unlinking ipsec_alg objects
-+ */
-+int ipsec_alg_sa_wipe(struct ipsec_sa *sa_p);
-+
-+#define IPSEC_ALG_MODULE_INIT_MOD( func_name ) \
-+ static int func_name(void); \
-+ module_init(func_name); \
-+ static int __init func_name(void)
-+#define IPSEC_ALG_MODULE_EXIT_MOD( func_name ) \
-+ static void func_name(void); \
-+ module_exit(func_name); \
-+ static void __exit func_name(void)
-+
-+#define IPSEC_ALG_MODULE_INIT_STATIC( func_name ) \
-+ extern int func_name(void); \
-+ int func_name(void)
-+#define IPSEC_ALG_MODULE_EXIT_STATIC( func_name ) \
-+ extern void func_name(void); \
-+ void func_name(void)
-+
-+/**********************************************
-+ *
-+ * 2.2 backport for some 2.4 useful module stuff
-+ *
-+ **********************************************/
-+#ifdef MODULE
-+#ifndef THIS_MODULE
-+#define THIS_MODULE (&__this_module)
-+#endif
-+#ifndef module_init
-+typedef int (*__init_module_func_t)(void);
-+typedef void (*__cleanup_module_func_t)(void);
-+
-+#define module_init(x) \
-+ int init_module(void) __attribute__((alias(#x))); \
-+ static inline __init_module_func_t __init_module_inline(void) \
-+ { return x; }
-+#define module_exit(x) \
-+ void cleanup_module(void) __attribute__((alias(#x))); \
-+ static inline __cleanup_module_func_t __cleanup_module_inline(void) \
-+ { return x; }
-+#endif
-+#define IPSEC_ALG_MODULE_INIT( func_name ) IPSEC_ALG_MODULE_INIT_MOD( func_name )
-+#define IPSEC_ALG_MODULE_EXIT( func_name ) IPSEC_ALG_MODULE_EXIT_MOD( func_name )
-+
-+#else /* not MODULE */
-+#ifndef THIS_MODULE
-+#define THIS_MODULE NULL
-+#endif
-+/*
-+ * I only want module_init() magic
-+ * when algo.c file *is THE MODULE*, in all other
-+ * cases, initialization is called explicitely from ipsec_alg_init()
-+ */
-+#define IPSEC_ALG_MODULE_INIT( func_name ) IPSEC_ALG_MODULE_INIT_STATIC(func_name)
-+#define IPSEC_ALG_MODULE_EXIT( func_name ) IPSEC_ALG_MODULE_EXIT_STATIC(func_name)
-+#endif
-+
-+#endif /* IPSEC_ALG_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_alg_3des.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,12 @@
-+struct TripleDES_context {
-+ des_key_schedule s1;
-+ des_key_schedule s2;
-+ des_key_schedule s3;
-+};
-+typedef struct TripleDES_context TripleDES_context;
-+
-+#define ESP_3DES_KEY_SZ 3*(sizeof(des_cblock))
-+#define ESP_3DES_CBC_BLK_LEN 8
-+
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_auth.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,100 @@
-+/*
-+ * Authentication Header declarations
-+ * Copyright (C) 2003 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_auth.h,v 1.3 2004/04/06 02:49:08 mcr Exp $
-+ */
-+
-+#include "ipsec_md5h.h"
-+#include "ipsec_sha1.h"
-+
-+#ifndef IPSEC_AUTH_H
-+#define IPSEC_AUTH_H
-+
-+#define AH_FLENGTH 12 /* size of fixed part */
-+#define AHMD5_KMAX 64 /* MD5 max 512 bits key */
-+#define AHMD5_AMAX 12 /* MD5 96 bits of authenticator */
-+
-+#define AHMD596_KLEN 16 /* MD5 128 bits key */
-+#define AHSHA196_KLEN 20 /* SHA1 160 bits key */
-+
-+#define AHMD596_ALEN 16 /* MD5 128 bits authentication length */
-+#define AHSHA196_ALEN 20 /* SHA1 160 bits authentication length */
-+
-+#define AHMD596_BLKLEN 64 /* MD5 block length */
-+#define AHSHA196_BLKLEN 64 /* SHA1 block length */
-+#define AHSHA2_256_BLKLEN 64 /* SHA2-256 block length */
-+#define AHSHA2_384_BLKLEN 128 /* SHA2-384 block length (?) */
-+#define AHSHA2_512_BLKLEN 128 /* SHA2-512 block length */
-+
-+#define AH_BLKLEN_MAX 128 /* keep up to date! */
-+
-+
-+#define AH_AMAX AHSHA196_ALEN /* keep up to date! */
-+#define AHHMAC_HASHLEN 12 /* authenticator length of 96bits */
-+#define AHHMAC_RPLLEN 4 /* 32 bit replay counter */
-+
-+#define DB_AH_PKTRX 0x0001
-+#define DB_AH_PKTRX2 0x0002
-+#define DB_AH_DMP 0x0004
-+#define DB_AH_IPSA 0x0010
-+#define DB_AH_XF 0x0020
-+#define DB_AH_INAU 0x0040
-+#define DB_AH_REPLAY 0x0100
-+
-+#ifdef __KERNEL__
-+
-+/* General HMAC algorithm is described in RFC 2104 */
-+
-+#define HMAC_IPAD 0x36
-+#define HMAC_OPAD 0x5C
-+
-+struct md5_ctx {
-+ MD5_CTX ictx; /* context after H(K XOR ipad) */
-+ MD5_CTX octx; /* context after H(K XOR opad) */
-+};
-+
-+struct sha1_ctx {
-+ SHA1_CTX ictx; /* context after H(K XOR ipad) */
-+ SHA1_CTX octx; /* context after H(K XOR opad) */
-+};
-+
-+struct auth_alg {
-+ void (*init)(void *ctx);
-+ void (*update)(void *ctx, unsigned char *bytes, __u32 len);
-+ void (*final)(unsigned char *hash, void *ctx);
-+ int hashlen;
-+};
-+
-+struct options;
-+
-+#endif /* __KERNEL__ */
-+#endif /* IPSEC_AUTH_H */
-+
-+/*
-+ * $Log: ipsec_auth.h,v $
-+ * Revision 1.3 2004/04/06 02:49:08 mcr
-+ * pullup of algo code from alg-branch.
-+ *
-+ * Revision 1.2 2004/04/05 19:55:04 mcr
-+ * Moved from linux/include/freeswan/ipsec_auth.h,v
-+ *
-+ * Revision 1.1 2003/12/13 19:10:16 mcr
-+ * refactored rcv and xmit code - same as FS 2.05.
-+ *
-+ * Revision 1.1 2003/12/06 21:21:19 mcr
-+ * split up receive path into per-transform files, for
-+ * easier later removal.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_encap.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,149 @@
-+/*
-+ * declarations relevant to encapsulation-like operations
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_encap.h,v 1.19 2004/04/05 19:55:04 mcr Exp $
-+ */
-+
-+#ifndef _IPSEC_ENCAP_H_
-+
-+#define SENT_IP4 16 /* data is two struct in_addr + proto + ports*/
-+ /* (2 * sizeof(struct in_addr)) */
-+ /* sizeof(struct sockaddr_encap)
-+ - offsetof(struct sockaddr_encap, Sen.Sip4.Src) */
-+
-+struct sockaddr_encap
-+{
-+ __u8 sen_len; /* length */
-+ __u8 sen_family; /* AF_ENCAP */
-+ __u16 sen_type; /* see SENT_* */
-+ union
-+ {
-+ struct /* SENT_IP4 */
-+ {
-+ struct in_addr Src;
-+ struct in_addr Dst;
-+ __u8 Proto;
-+ __u16 Sport;
-+ __u16 Dport;
-+ } Sip4;
-+ } Sen;
-+};
-+
-+#define sen_ip_src Sen.Sip4.Src
-+#define sen_ip_dst Sen.Sip4.Dst
-+#define sen_proto Sen.Sip4.Proto
-+#define sen_sport Sen.Sip4.Sport
-+#define sen_dport Sen.Sip4.Dport
-+
-+#ifndef AF_ENCAP
-+#define AF_ENCAP 26
-+#endif /* AF_ENCAP */
-+
-+#define _IPSEC_ENCAP_H_
-+#endif /* _IPSEC_ENCAP_H_ */
-+
-+/*
-+ * $Log: ipsec_encap.h,v $
-+ * Revision 1.19 2004/04/05 19:55:04 mcr
-+ * Moved from linux/include/freeswan/ipsec_encap.h,v
-+ *
-+ * Revision 1.18 2003/10/31 02:27:05 mcr
-+ * pulled up port-selector patches and sa_id elimination.
-+ *
-+ * Revision 1.17.30.1 2003/09/21 13:59:38 mcr
-+ * pre-liminary X.509 patch - does not yet pass tests.
-+ *
-+ * Revision 1.17 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_encap.h,v
-+ *
-+ * Revision 1.16 2001/11/26 09:23:47 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.15.2.1 2001/09/25 02:18:54 mcr
-+ * struct eroute moved to ipsec_eroute.h
-+ *
-+ * Revision 1.15 2001/09/14 16:58:36 rgb
-+ * Added support for storing the first and last packets through a HOLD.
-+ *
-+ * Revision 1.14 2001/09/08 21:13:31 rgb
-+ * Added pfkey ident extension support for ISAKMPd. (NetCelo)
-+ *
-+ * Revision 1.13 2001/06/14 19:35:08 rgb
-+ * Update copyright date.
-+ *
-+ * Revision 1.12 2001/05/27 06:12:10 rgb
-+ * Added structures for pid, packet count and last access time to eroute.
-+ * Added packet count to beginning of /proc/net/ipsec_eroute.
-+ *
-+ * Revision 1.11 2000/09/08 19:12:56 rgb
-+ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG.
-+ *
-+ * Revision 1.10 2000/03/22 16:15:36 rgb
-+ * Fixed renaming of dev_get (MB).
-+ *
-+ * Revision 1.9 2000/01/21 06:13:26 rgb
-+ * Added a macro for AF_ENCAP
-+ *
-+ * Revision 1.8 1999/12/31 14:56:55 rgb
-+ * MB fix for 2.3 dev-use-count.
-+ *
-+ * Revision 1.7 1999/11/18 04:09:18 rgb
-+ * Replaced all kernel version macros to shorter, readable form.
-+ *
-+ * Revision 1.6 1999/09/24 00:34:13 rgb
-+ * Add Marc Boucher's support for 2.3.xx+.
-+ *
-+ * Revision 1.5 1999/04/11 00:28:57 henry
-+ * GPL boilerplate
-+ *
-+ * Revision 1.4 1999/04/06 04:54:25 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.3 1998/10/19 14:44:28 rgb
-+ * Added inclusion of freeswan.h.
-+ * sa_id structure implemented and used: now includes protocol.
-+ *
-+ * Revision 1.2 1998/07/14 18:19:33 rgb
-+ * Added #ifdef __KERNEL__ directives to restrict scope of header.
-+ *
-+ * Revision 1.1 1998/06/18 21:27:44 henry
-+ * move sources from klips/src to klips/net/ipsec, to keep stupid
-+ * kernel-build scripts happier in the presence of symlinks
-+ *
-+ * Revision 1.2 1998/04/21 21:29:10 rgb
-+ * Rearrange debug switches to change on the fly debug output from user
-+ * space. Only kernel changes checked in at this time. radij.c was also
-+ * changed to temporarily remove buggy debugging code in rj_delete causing
-+ * an OOPS and hence, netlink device open errors.
-+ *
-+ * Revision 1.1 1998/04/09 03:05:58 henry
-+ * sources moved up from linux/net/ipsec
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:02 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * Minor cosmetic changes.
-+ *
-+ * Revision 0.3 1996/11/20 14:35:48 ji
-+ * Minor Cleanup.
-+ * Rationalized debugging code.
-+ *
-+ * Revision 0.2 1996/11/02 00:18:33 ji
-+ * First limited release.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_eroute.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,112 @@
-+/*
-+ * @(#) declarations of eroute structures
-+ *
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * Copyright (C) 2001 Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_eroute.h,v 1.5 2004/04/05 19:55:05 mcr Exp $
-+ *
-+ * derived from ipsec_encap.h 1.15 on 2001/9/18 by mcr.
-+ *
-+ */
-+
-+#ifndef _IPSEC_EROUTE_H_
-+
-+#include "radij.h"
-+#include "ipsec_encap.h"
-+#include "ipsec_radij.h"
-+
-+/*
-+ * The "type" is really part of the address as far as the routing
-+ * system is concerned. By using only one bit in the type field
-+ * for each type, we sort-of make sure that different types of
-+ * encapsulation addresses won't be matched against the wrong type.
-+ */
-+
-+/*
-+ * An entry in the radix tree
-+ */
-+
-+struct rjtentry
-+{
-+ struct radij_node rd_nodes[2]; /* tree glue, and other values */
-+#define rd_key(r) ((struct sockaddr_encap *)((r)->rd_nodes->rj_key))
-+#define rd_mask(r) ((struct sockaddr_encap *)((r)->rd_nodes->rj_mask))
-+ short rd_flags;
-+ short rd_count;
-+};
-+
-+struct ident
-+{
-+ __u16 type; /* identity type */
-+ __u64 id; /* identity id */
-+ __u8 len; /* identity len */
-+ caddr_t data; /* identity data */
-+};
-+
-+/*
-+ * An encapsulation route consists of a pointer to a
-+ * radix tree entry and a SAID (a destination_address/SPI/protocol triple).
-+ */
-+
-+struct eroute
-+{
-+ struct rjtentry er_rjt;
-+ ip_said er_said;
-+ uint32_t er_pid;
-+ uint32_t er_count;
-+ uint64_t er_lasttime;
-+ struct sockaddr_encap er_eaddr; /* MCR get rid of _encap, it is silly*/
-+ struct sockaddr_encap er_emask;
-+ struct ident er_ident_s;
-+ struct ident er_ident_d;
-+ struct sk_buff* er_first;
-+ struct sk_buff* er_last;
-+};
-+
-+#define er_dst er_said.dst
-+#define er_spi er_said.spi
-+
-+#define _IPSEC_EROUTE_H_
-+#endif /* _IPSEC_EROUTE_H_ */
-+
-+/*
-+ * $Log: ipsec_eroute.h,v $
-+ * Revision 1.5 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_eroute.h,v
-+ *
-+ * Revision 1.4 2003/10/31 02:27:05 mcr
-+ * pulled up port-selector patches and sa_id elimination.
-+ *
-+ * Revision 1.3.30.2 2003/10/29 01:10:19 mcr
-+ * elimited "struct sa_id"
-+ *
-+ * Revision 1.3.30.1 2003/09/21 13:59:38 mcr
-+ * pre-liminary X.509 patch - does not yet pass tests.
-+ *
-+ * Revision 1.3 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_eroute.h,v
-+ *
-+ * Revision 1.2 2001/11/26 09:16:13 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.1.2.1 2001/09/25 02:18:54 mcr
-+ * struct eroute moved to ipsec_eroute.h
-+ *
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_errs.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,53 @@
-+/*
-+ * @(#) definition of ipsec_errs structure
-+ *
-+ * Copyright (C) 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * and Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_errs.h,v 1.4 2004/04/05 19:55:05 mcr Exp $
-+ *
-+ */
-+
-+/*
-+ * This file describes the errors/statistics that FreeSWAN collects.
-+ *
-+ */
-+
-+struct ipsec_errs {
-+ __u32 ips_alg_errs; /* number of algorithm errors */
-+ __u32 ips_auth_errs; /* # of authentication errors */
-+ __u32 ips_encsize_errs; /* # of encryption size errors*/
-+ __u32 ips_encpad_errs; /* # of encryption pad errors*/
-+ __u32 ips_replaywin_errs; /* # of pkt sequence errors */
-+};
-+
-+/*
-+ * $Log: ipsec_errs.h,v $
-+ * Revision 1.4 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_errs.h,v
-+ *
-+ * Revision 1.3 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_errs.h,v
-+ *
-+ * Revision 1.2 2001/11/26 09:16:13 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.1.2.1 2001/09/25 02:25:57 mcr
-+ * lifetime structure created and common functions created.
-+ *
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_esp.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,161 @@
-+/*
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_esp.h,v 1.28 2004/09/13 02:22:10 mcr Exp $
-+ */
-+
-+#include "openswan/ipsec_md5h.h"
-+#include "openswan/ipsec_sha1.h"
-+
-+#include "klips-crypto/des.h"
-+
-+#ifndef IPPROTO_ESP
-+#define IPPROTO_ESP 50
-+#endif /* IPPROTO_ESP */
-+
-+#define ESP_HEADER_LEN 8 /* 64 bits header (spi+rpl)*/
-+
-+#define EMT_ESPDESCBC_ULEN 20 /* coming from user mode */
-+#define EMT_ESPDES_KMAX 64 /* 512 bit secret key enough? */
-+#define EMT_ESPDES_KEY_SZ 8 /* 56 bit secret key with parity = 64 bits */
-+#define EMT_ESP3DES_KEY_SZ 24 /* 168 bit secret key with parity = 192 bits */
-+#define EMT_ESPDES_IV_SZ 8 /* IV size */
-+#define ESP_DESCBC_BLKLEN 8 /* DES-CBC block size */
-+
-+#define ESP_IV_MAXSZ 16 /* This is _critical_ */
-+#define ESP_IV_MAXSZ_INT (ESP_IV_MAXSZ/sizeof(int))
-+
-+#define DB_ES_PKTRX 0x0001
-+#define DB_ES_PKTRX2 0x0002
-+#define DB_ES_IPSA 0x0010
-+#define DB_ES_XF 0x0020
-+#define DB_ES_IPAD 0x0040
-+#define DB_ES_INAU 0x0080
-+#define DB_ES_OINFO 0x0100
-+#define DB_ES_OINFO2 0x0200
-+#define DB_ES_OH 0x0400
-+#define DB_ES_REPLAY 0x0800
-+
-+#ifdef __KERNEL__
-+struct des_eks {
-+ des_key_schedule ks;
-+};
-+
-+#ifndef CONFIG_XFRM_ALTERNATE_STACK
-+extern struct inet_protocol esp_protocol;
-+#endif /* CONFIG_XFRM_ALTERNATE_STACK */
-+
-+struct options;
-+
-+struct esphdr
-+{
-+ __u32 esp_spi; /* Security Parameters Index */
-+ __u32 esp_rpl; /* Replay counter */
-+ __u8 esp_iv[8]; /* iv */
-+};
-+
-+extern struct xform_functions esp_xform_funcs[];
-+
-+extern enum ipsec_rcv_value ipsec_rcv_esp_post_decrypt(struct ipsec_rcv_state *irs);
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern int debug_esp;
-+#endif /* CONFIG_KLIPS_DEBUG */
-+#endif /* __KERNEL__ */
-+
-+/*
-+ * $Log: ipsec_esp.h,v $
-+ * Revision 1.28 2004/09/13 02:22:10 mcr
-+ * #define inet_protocol if necessary.
-+ *
-+ * Revision 1.27 2004/09/06 18:35:41 mcr
-+ * 2.6.8.1 gets rid of inet_protocol->net_protocol compatibility,
-+ * so adjust for that.
-+ *
-+ * Revision 1.26 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.25 2004/04/06 02:49:08 mcr
-+ * pullup of algo code from alg-branch.
-+ *
-+ * Revision 1.24 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_esp.h,v
-+ *
-+ * Revision 1.23 2004/04/05 19:41:05 mcr
-+ * merged alg-branch code.
-+ *
-+ * Revision 1.22 2003/12/13 19:10:16 mcr
-+ * refactored rcv and xmit code - same as FS 2.05.
-+ *
-+ * Revision 1.23 2003/12/11 20:14:58 mcr
-+ * refactored the xmit code, to move all encapsulation
-+ * code into protocol functions. Note that all functions
-+ * are essentially done by a single function, which is probably
-+ * wrong.
-+ * the rcv_functions structures are renamed xform_functions.
-+ *
-+ * Revision 1.22 2003/12/06 21:21:19 mcr
-+ * split up receive path into per-transform files, for
-+ * easier later removal.
-+ *
-+ * Revision 1.21.8.1 2003/12/22 15:25:52 jjo
-+ * Merged algo-0.8.1-rc11-test1 into alg-branch
-+ *
-+ * Revision 1.21 2003/02/06 02:21:34 rgb
-+ *
-+ * Moved "struct auth_alg" from ipsec_rcv.c to ipsec_ah.h .
-+ * Changed "struct ah" to "struct ahhdr" and "struct esp" to "struct esphdr".
-+ * Removed "#ifdef INBOUND_POLICY_CHECK_eroute" dead code.
-+ *
-+ * Revision 1.20 2002/05/14 02:37:02 rgb
-+ * Change reference from _TDB to _IPSA.
-+ *
-+ * Revision 1.19 2002/04/24 07:55:32 mcr
-+ * #include patches and Makefiles for post-reorg compilation.
-+ *
-+ * Revision 1.18 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_esp.h,v
-+ *
-+ * Revision 1.17 2002/02/20 01:27:07 rgb
-+ * Ditched a pile of structs only used by the old Netlink interface.
-+ *
-+ * Revision 1.16 2001/12/11 02:35:57 rgb
-+ * Change "struct net_device" to "struct device" for 2.2 compatibility.
-+ *
-+ * Revision 1.15 2001/11/26 09:23:48 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.14.2.3 2001/10/23 04:16:42 mcr
-+ * get definition of des_key_schedule from des.h
-+ *
-+ * Revision 1.14.2.2 2001/10/22 20:33:13 mcr
-+ * use "des_key_schedule" structure instead of cooking our own.
-+ *
-+ * Revision 1.14.2.1 2001/09/25 02:18:25 mcr
-+ * replace "struct device" with "struct netdevice"
-+ *
-+ * Revision 1.14 2001/06/14 19:35:08 rgb
-+ * Update copyright date.
-+ *
-+ * Revision 1.13 2000/09/08 19:12:56 rgb
-+ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG.
-+ *
-+ * Revision 1.12 2000/08/01 14:51:50 rgb
-+ * Removed _all_ remaining traces of DES.
-+ *
-+ * Revision 1.11 2000/01/10 16:36:20 rgb
-+ * Ditch last of EME option flags, including initiator.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_ipcomp.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,97 @@
-+/*
-+ * IP compression header declations
-+ *
-+ * Copyright (C) 2003 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_ipcomp.h,v 1.4 2004/07/10 19:08:41 mcr Exp $
-+ */
-+
-+#ifndef IPSEC_IPCOMP_H
-+#define IPSEC_IPCOMP_H
-+
-+#include "openswan/ipsec_auth.h"
-+
-+/* Prefix all global deflate symbols with "ipcomp_" to avoid collisions with ppp_deflate & ext2comp */
-+#ifndef IPCOMP_PREFIX
-+#define IPCOMP_PREFIX
-+#endif /* IPCOMP_PREFIX */
-+
-+#ifndef IPPROTO_COMP
-+#define IPPROTO_COMP 108
-+#endif /* IPPROTO_COMP */
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern int sysctl_ipsec_debug_ipcomp;
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+struct ipcomphdr { /* IPCOMP header */
-+ __u8 ipcomp_nh; /* Next header (protocol) */
-+ __u8 ipcomp_flags; /* Reserved, must be 0 */
-+ __u16 ipcomp_cpi; /* Compression Parameter Index */
-+};
-+
-+#ifndef CONFIG_XFRM_ALTERNATE_STACK
-+extern struct inet_protocol comp_protocol;
-+#endif /* CONFIG_XFRM_ALTERNATE_STACK */
-+
-+extern int sysctl_ipsec_debug_ipcomp;
-+
-+#define IPCOMP_UNCOMPRESSABLE 0x000000001
-+#define IPCOMP_COMPRESSIONERROR 0x000000002
-+#define IPCOMP_PARMERROR 0x000000004
-+#define IPCOMP_DECOMPRESSIONERROR 0x000000008
-+
-+#define IPCOMP_ADAPT_INITIAL_TRIES 8
-+#define IPCOMP_ADAPT_INITIAL_SKIP 4
-+#define IPCOMP_ADAPT_SUBSEQ_TRIES 2
-+#define IPCOMP_ADAPT_SUBSEQ_SKIP 8
-+
-+/* Function prototypes */
-+struct sk_buff *skb_compress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags);
-+struct sk_buff *skb_decompress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags);
-+
-+extern struct xform_functions ipcomp_xform_funcs[];
-+
-+#endif /* IPSEC_IPCOMP_H */
-+
-+/*
-+ * $Log: ipsec_ipcomp.h,v $
-+ * Revision 1.4 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.3 2004/04/06 02:49:08 mcr
-+ * pullup of algo code from alg-branch.
-+ *
-+ * Revision 1.2 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_ipcomp.h,v
-+ *
-+ * Revision 1.1 2003/12/13 19:10:16 mcr
-+ * refactored rcv and xmit code - same as FS 2.05.
-+ *
-+ * Revision 1.2 2003/12/11 20:14:58 mcr
-+ * refactored the xmit code, to move all encapsulation
-+ * code into protocol functions. Note that all functions
-+ * are essentially done by a single function, which is probably
-+ * wrong.
-+ * the rcv_functions structures are renamed xform_functions.
-+ *
-+ * Revision 1.1 2003/12/06 21:21:19 mcr
-+ * split up receive path into per-transform files, for
-+ * easier later removal.
-+ *
-+ *
-+ *
-+ */
-+
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_ipe4.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,68 @@
-+/*
-+ * IP-in-IP Header declarations
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_ipe4.h,v 1.6 2004/04/05 19:55:05 mcr Exp $
-+ */
-+
-+/* The packet header is an IP header! */
-+
-+struct ipe4_xdata /* transform table data */
-+{
-+ struct in_addr i4_src;
-+ struct in_addr i4_dst;
-+};
-+
-+#define EMT_IPE4_ULEN 8 /* coming from user mode */
-+
-+
-+/*
-+ * $Log: ipsec_ipe4.h,v $
-+ * Revision 1.6 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_ipe4.h,v
-+ *
-+ * Revision 1.5 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_ipe4.h,v
-+ *
-+ * Revision 1.4 2001/06/14 19:35:08 rgb
-+ * Update copyright date.
-+ *
-+ * Revision 1.3 1999/04/11 00:28:57 henry
-+ * GPL boilerplate
-+ *
-+ * Revision 1.2 1999/04/06 04:54:25 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.1 1998/06/18 21:27:47 henry
-+ * move sources from klips/src to klips/net/ipsec, to keep stupid
-+ * kernel-build scripts happier in the presence of symlinks
-+ *
-+ * Revision 1.1 1998/04/09 03:06:07 henry
-+ * sources moved up from linux/net/ipsec
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:03 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * No changes.
-+ *
-+ * Revision 0.3 1996/11/20 14:48:53 ji
-+ * Release update only.
-+ *
-+ * Revision 0.2 1996/11/02 00:18:33 ji
-+ * First limited release.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_ipip.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,45 @@
-+/*
-+ * Copyright (C) 2003 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_ipip.h,v 1.2 2004/04/05 19:55:05 mcr Exp $
-+ */
-+
-+#ifndef _IPSEC_IPIP_H_
-+
-+#ifndef IPPROTO_IPIP
-+#define IPPROTO_IPIP 4
-+#endif /* IPPROTO_ESP */
-+
-+extern struct xform_functions ipip_xform_funcs[];
-+
-+#define _IPSEC_IPIP_H_
-+
-+#endif /* _IPSEC_IPIP_H_ */
-+
-+/*
-+ * $Log: ipsec_ipip.h,v $
-+ * Revision 1.2 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_ipip.h,v
-+ *
-+ * Revision 1.1 2003/12/13 19:10:16 mcr
-+ * refactored rcv and xmit code - same as FS 2.05.
-+ *
-+ * Revision 1.1 2003/12/11 20:14:58 mcr
-+ * refactored the xmit code, to move all encapsulation
-+ * code into protocol functions. Note that all functions
-+ * are essentially done by a single function, which is probably
-+ * wrong.
-+ * the rcv_functions structures are renamed xform_functions.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_kern24.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,92 @@
-+/*
-+ * @(#) routines to makes kernel 2.4 compatible with 2.6 usage.
-+ *
-+ * Copyright (C) 2004 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ * Copyright (C) 2005 - 2008 Paul Wouters <paul@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+
-+#ifndef _IPSEC_KERN24_H
-+
-+
-+#ifdef NETDEV_23
-+#if 0
-+#ifndef NETDEV_25
-+#define device net_device
-+#endif
-+#endif
-+
-+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+# define __ipsec_dev_get(x) __dev_get_by_name(&init_net, x)
-+# define ipsec_dev_get(x) dev_get_by_name(&init_net, x)
-+# else
-+# define ipsec_dev_get(x) __dev_get_by_name(x)
-+# define __ipsec_dev_get(x) __dev_get_by_name(x)
-+# endif
-+
-+# define ipsec_dev_put(x) dev_put(x)
-+# define __ipsec_dev_put(x) __dev_put(x)
-+# define ipsec_dev_hold(x) dev_hold(x)
-+#else /* NETDEV_23 */
-+# define ipsec_dev_get dev_get
-+# define __ipsec_dev_put(x)
-+# define ipsec_dev_put(x)
-+# define ipsec_dev_hold(x)
-+#endif /* NETDEV_23 */
-+
-+#ifndef HAVE_NETDEV_PRINTK
-+#define netdev_printk(sevlevel, netdev, msglevel, format, arg...) \
-+ printk(sevlevel "%s: " format , netdev->name , ## arg)
-+#endif
-+
-+#ifndef NET_26
-+#define sk_receive_queue receive_queue
-+#define sk_destruct destruct
-+#define sk_reuse reuse
-+#define sk_zapped zapped
-+#define sk_family family
-+#define sk_protocol protocol
-+#define sk_protinfo protinfo
-+#define sk_sleep sleep
-+#define sk_state_change state_change
-+#define sk_shutdown shutdown
-+#define sk_err err
-+#define sk_stamp stamp
-+#define sk_socket socket
-+#define sk_sndbuf sndbuf
-+#define sock_flag(sk, flag) sk->dead
-+#define sk_for_each(sk, node, plist) for(sk=*plist; sk!=NULL; sk = sk->next)
-+#endif
-+
-+/* deal with 2.4 vs 2.6 issues with module counts */
-+
-+/* in 2.6, all refcounts are maintained *outside* of the
-+ * module to deal with race conditions.
-+ */
-+
-+#ifdef NET_26
-+#define KLIPS_INC_USE /* nothing */
-+#define KLIPS_DEC_USE /* nothing */
-+
-+#else
-+#define KLIPS_INC_USE MOD_INC_USE_COUNT
-+#define KLIPS_DEC_USE MOD_DEC_USE_COUNT
-+#endif
-+
-+extern int printk_ratelimit(void);
-+
-+
-+#define _IPSEC_KERN24_H 1
-+
-+#endif /* _IPSEC_KERN24_H */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_kversion.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,441 @@
-+#ifndef _OPENSWAN_KVERSIONS_H
-+/*
-+ * header file for Openswan library functions
-+ * Copyright (C) 1998, 1999, 2000 Henry Spencer.
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs
-+ * Copyright (C) 2003 - 2008 Paul Wouters <paul@xelerance.com>
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ */
-+#define _OPENSWAN_KVERSIONS_H /* seen it, no need to see it again */
-+
-+/*
-+ * this file contains a series of atomic defines that depend upon
-+ * kernel version numbers. The kernel versions are arranged
-+ * in version-order number (which is often not chronological)
-+ * and each clause enables or disables a feature.
-+ */
-+
-+/*
-+ * First, assorted kernel-version-dependent trickery.
-+ */
-+#include <linux/version.h>
-+#ifndef KERNEL_VERSION
-+# define KERNEL_VERSION(x,y,z) (((x)<<16)+((y)<<8)+(z))
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0)
-+# define HEADER_CACHE_BIND_21
-+# error "KLIPS is no longer supported on Linux 2.0. Sorry"
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,1,0)
-+# define SPINLOCK
-+# define PROC_FS_21
-+# define NETLINK_SOCK
-+# define NET_21
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,1,19)
-+# define net_device_stats enet_statistics
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
-+# define SPINLOCK_23
-+# define NETDEV_23
-+# ifndef CONFIG_IP_ALIAS
-+# define CONFIG_IP_ALIAS
-+# endif
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,25)
-+# define PROC_FS_2325
-+# undef PROC_FS_21
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,30)
-+# define PROC_NO_DUMMY
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,35)
-+# define SKB_COPY_EXPAND
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,37)
-+# define IP_SELECT_IDENT
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,2)
-+# define IP_SELECT_IDENT_NEW
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,4)
-+# define IPH_is_SKB_PULLED
-+# define SKB_COW_NEW
-+# define PROTO_HANDLER_SINGLE_PARM
-+# define IP_FRAGMENT_LINEARIZE 1
-+#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,4) */
-+# ifdef REDHAT_BOGOSITY
-+# define IP_SELECT_IDENT_NEW
-+# define IPH_is_SKB_PULLED
-+# define SKB_COW_NEW
-+# define PROTO_HANDLER_SINGLE_PARM
-+# endif /* REDHAT_BOGOSITY */
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,4) */
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9)
-+# define MALLOC_SLAB
-+# define LINUX_KERNEL_HAS_SNPRINTF
-+#endif
-+
-+/* API changes are documented at: http://lwn.net/Articles/2.6-kernel-api/ */
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-+# define HAVE_NETDEV_PRINTK 1
-+# define NET_26
-+# define NETDEV_25
-+# define NEED_SPINLOCK_TYPES
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8)
-+# define NEED_INET_PROTOCOL
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
-+# define HAVE_SOCK_ZAPPED
-+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+# define NET_26_24_SKALLOC
-+# else
-+# define NET_26_12_SKALLOC
-+# endif
-+#endif
-+#endif
-+
-+/* see <linux/security.h> */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13)
-+# define HAVE_SOCK_SECURITY
-+/* skb->nf_debug disappared completely in 2.6.13 */
-+# define ipsec_nf_debug_reset(skb) ((skb)->nf_debug = 0)
-+#else
-+# define ipsec_nf_debug_reset(skb)
-+#endif
-+
-+/* how to reset an skb we are reusing after encrpytion/decryption etc */
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
-+# define ipsec_nf_reset(skb) nf_reset((skb))
-+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,50) && defined(CONFIG_NETFILTER)
-+# define ipsec_nf_reset(skb) do { \
-+ nf_conntrack_put((skb)->nfct); \
-+ (skb)->nfct=NULL; \
-+ ipsec_nf_debug_reset(skb); \
-+ } while(0)
-+#else
-+# define ipsec_nf_reset(skb) /**/
-+#endif
-+
-+/* skb->stamp changed to skb->tstamp in 2.6.14 */
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
-+# define HAVE_TSTAMP
-+# define HAVE_INET_SK_SPORT
-+#else
-+# define HAVE_SKB_LIST
-+#endif
-+
-+/* it seems 2.6.14 accidentally removed sysctl_ip_default_ttl */
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
-+# define SYSCTL_IPSEC_DEFAULT_TTL IPSEC_DEFAULT_TTL
-+#else
-+# define SYSCTL_IPSEC_DEFAULT_TTL sysctl_ip_default_ttl
-+#endif
-+
-+/*
-+ The obsolete MODULE_PARM() macro is gone forevermore [in 2.6.17+]
-+ It was introduced in 2.6.0
-+ Zero-filled memory can now be allocated from slab caches with
-+ kmem_cache_zalloc(). There is also a new slab debugging option
-+ to produce a /proc/slab_allocators file with detailed allocation
-+ information.
-+ */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-+# define module_param(a,b,c) MODULE_PARM(#a,"i")
-+/* note below is only true for our current calls to module_param_array */
-+# define module_param_array(a,b,c,d) MODULE_PARM(#a,"1-2i")
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18)
-+/*
-+ The skb_linearize() function has been reworked, and no longer has a
-+ GFP flags argument. There is also a new skb_linearize_cow() function
-+ which ensures that the resulting SKB is writable.
-+ Network drivers should no longer manipulate the xmit_lock spinlock
-+ in the net_device structure; instead, the following new functions
-+ should be used:
-+ int netif_tx_lock(struct net_device *dev);
-+ int netif_tx_lock_bh(struct net_device *dev);
-+ void netif_tx_unlock(struct net_device *dev);
-+ void netif_tx_unlock_bh(struct net_device *dev);
-+ int netif_tx_trylock(struct net_device *dev);
-+ A number of crypto API changes have been merged, the biggest being
-+ a change to most algorithm-specific functions to take a pointer to
-+ the crypto_tfm structure, rather than the old "context" pointer. This
-+ change was necessary to support parameterized algorithms.
-+*/
-+
-+# define HAVE_NEW_SKB_LINEARIZE
-+#endif
-+
-+/* this is the best we can do to detect XEN, which makes
-+ * patches to linux/skbuff.h, making it look like 2.6.18 version
-+ */
-+#ifdef CONFIG_XEN
-+# define HAVE_NEW_SKB_LINEARIZE
-+#endif
-+
-+/* And the same for SuSe kernels who have it before it got into the
-+ * linus kernel.
-+ */
-+#ifdef SLE_VERSION_CODE
-+# if SLE_VERSION_CODE >= 655616
-+# define HAVE_NEW_SKB_LINEARIZE
-+# else
-+# warning "A Suse kernel was detected, but we are unsure if it requires HAVE_NEW_SKB_LINEARIZE"
-+# endif
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
-+# define VOID_SOCK_UNREGISTER
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
-+/* skb->nfmark changed to skb->mark in 2.6.20 */
-+# define nfmark mark
-+#else
-+# define HAVE_KMEM_CACHE_T
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
-+/*
-+ Significant changes have been made to the crypto support interface.
-+ The sysctl code has been heavily reworked, leading to a number of
-+ internal API changes.
-+*/
-+# define ipsec_register_sysctl_table(a,b) register_sysctl_table(a)
-+# define CTL_TABLE_PARENT
-+#else
-+# define ipsec_register_sysctl_table(a,b) register_sysctl_table(a,b)
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
-+/*
-+ The eth_type_trans() function now sets the skb->dev field, consistent
-+ with how similar functions for other link types operate. As a result,
-+ many Ethernet drivers have been changed to remove the (now) redundant
-+ assignment.
-+ The header fields in the sk_buff structure have been renamed
-+ and are no longer unions. Networking code and drivers can
-+ now just use skb->transport_header, skb->network_header, and
-+ skb->skb_mac_header. There are new functions for finding specific
-+ headers within packets: tcp_hdr(), udp_hdr(), ipip_hdr(), and
-+ ipipv6_hdr().
-+ The crypto API has a new set of functions for use with asynchronous
-+ block ciphers. There is also a new cryptd kernel thread which can
-+ run any synchronous cipher in an asynchronous mode.
-+ A new macro has been added to make the creation of slab caches easier:
-+ struct kmem_cache KMEM_CACHE(struct-type, flags);
-+ The result is the creation of a cache holding objects of the given
-+ struct_type, named after that type, and with the additional slab
-+ flags (if any).
-+*/
-+
-+/* need to include ip.h early, no longer pick it up in skbuff.h */
-+# include <linux/ip.h>
-+# define HAVE_KERNEL_TSTAMP
-+/* type of sock.sk_stamp changed from timeval to ktime */
-+# define grab_socket_timeval(tv, sock) { (tv) = ktime_to_timeval((sock).sk_stamp); }
-+#else
-+# define grab_socket_timeval(tv, sock) { (tv) = (sock).sk_stamp; }
-+/* internals of struct skbuff changed */
-+# define HAVE_DEV_NEXT
-+# define ip_hdr(skb) ((skb)->nh.iph)
-+# define skb_tail_pointer(skb) ((skb)->tail)
-+# define skb_end_pointer(skb) ((skb)->end)
-+# define skb_network_header(skb) ((skb)->nh.raw)
-+# define skb_set_network_header(skb,off) ((skb)->nh.raw = (skb)->data + (off))
-+# define tcp_hdr(skb) ((skb)->h.th)
-+# define udp_hdr(skb) ((skb)->h.uh)
-+# define skb_transport_header(skb) ((skb)->h.raw)
-+# define skb_set_transport_header(skb,off) ((skb)->h.raw = (skb)->data + (off))
-+# define skb_mac_header(skb) ((skb)->mac.raw)
-+# define skb_set_mac_header(skb,off) ((skb)->mac.raw = (skb)->data + (off))
-+#endif
-+/* turn a pointer into an offset for above macros */
-+#define ipsec_skb_offset(skb, ptr) (((unsigned char *)(ptr)) - (skb)->data)
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
-+/*
-+ * The macro got introduced in 2,6,22 but it does not work properly, and
-+ * still uses the old number of arguments.
-+ */
-+ /*
-+ The destructor argument has been removed from kmem_cache_create(), as
-+ destructors are no longer supported. All in-kernel callers have been
-+ updated
-+ */
-+# define HAVE_KMEM_CACHE_MACRO
-+
-+/* Try using the new kernel encaps hook for nat-t, instead of udp.c */
-+# ifdef NOT_YET_FINISHED
-+# define HAVE_UDP_ENCAP_CONVERT
-+# endif
-+
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+/*
-+ * We can switch on earlier kernels, but from here on we have no choice
-+ * but to abandon the old style proc_net and use seq_file
-+ * The hard_header() method has been removed from struct net_device;
-+ it has been replaced by a per-protocol header_ops structure pointer.
-+
-+ The prototype for slab constructor callbacks has changed to:
-+ void (*ctor)(struct kmem_cache *cache, void *object);
-+ The unused flags argument has been removed and the order of the other
-+ two arguments has been reversed to match other slab functions.
-+ */
-+# define HAVE_PROC_DIR_ENTRY
-+# define PROC_NET init_net.proc_net
-+
-+# define __ipsec_dev_get(x) __dev_get_by_name(&init_net, x)
-+# define ipsec_dev_get(x) dev_get_by_name(&init_net, x)
-+#else
-+
-+# define PROC_NET proc_net
-+
-+# define ipsec_dev_get(x) __dev_get_by_name(x)
-+# define __ipsec_dev_get(x) __dev_get_by_name(x)
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
-+# define ip_chk_addr(a) inet_addr_type(&init_net, a)
-+
-+# define l_inet_addr_type(a) inet_addr_type(&init_net, a)
-+
-+#else
-+# define ip_chk_addr inet_addr_type
-+
-+#define l_inet_addr_type inet_addr_type
-+
-+#endif
-+
-+#ifndef NETDEV_TX_BUSY
-+# ifdef NETDEV_XMIT_CN
-+# define NETDEV_TX_BUSY NETDEV_XMIT_CN
-+# else
-+# define NETDEV_TX_BUSY 1
-+# endif
-+#endif
-+
-+
-+#ifdef NET_21
-+# define ipsec_kfree_skb(a) kfree_skb(a)
-+#else /* NET_21 */
-+# define ipsec_kfree_skb(a) kfree_skb(a, FREE_WRITE)
-+#endif /* NET_21 */
-+
-+#ifdef NETDEV_23
-+
-+#ifndef SPINLOCK
-+# include <linux/bios32.h>
-+ /* simulate spin locks and read/write locks */
-+ typedef struct {
-+ volatile char lock;
-+ } spinlock_t;
-+
-+ typedef struct {
-+ volatile unsigned int lock;
-+ } rwlock_t;
-+
-+# define spin_lock_init(x) { (x)->lock = 0;}
-+# define rw_lock_init(x) { (x)->lock = 0; }
-+
-+# define spin_lock(x) { while ((x)->lock) barrier(); (x)->lock=1;}
-+# define spin_lock_irq(x) { cli(); spin_lock(x);}
-+# define spin_lock_irqsave(x,flags) { save_flags(flags); spin_lock_irq(x);}
-+
-+# define spin_unlock(x) { (x)->lock=0;}
-+# define spin_unlock_irq(x) { spin_unlock(x); sti();}
-+# define spin_unlock_irqrestore(x,flags) { spin_unlock(x); restore_flags(flags);}
-+
-+# define read_lock(x) spin_lock(x)
-+# define read_lock_irq(x) spin_lock_irq(x)
-+# define read_lock_irqsave(x,flags) spin_lock_irqsave(x,flags)
-+
-+# define read_unlock(x) spin_unlock(x)
-+# define read_unlock_irq(x) spin_unlock_irq(x)
-+# define read_unlock_irqrestore(x,flags) spin_unlock_irqrestore(x,flags)
-+
-+# define write_lock(x) spin_lock(x)
-+# define write_lock_irq(x) spin_lock_irq(x)
-+# define write_lock_irqsave(x,flags) spin_lock_irqsave(x,flags)
-+
-+# define write_unlock(x) spin_unlock(x)
-+# define write_unlock_irq(x) spin_unlock_irq(x)
-+# define write_unlock_irqrestore(x,flags) spin_unlock_irqrestore(x,flags)
-+#endif /* !SPINLOCK */
-+
-+#ifndef SPINLOCK_23
-+# define spin_lock_bh(x) spin_lock_irq(x)
-+# define spin_unlock_bh(x) spin_unlock_irq(x)
-+
-+# define read_lock_bh(x) read_lock_irq(x)
-+# define read_unlock_bh(x) read_unlock_irq(x)
-+
-+# define write_lock_bh(x) write_lock_irq(x)
-+# define write_unlock_bh(x) write_unlock_irq(x)
-+#endif /* !SPINLOCK_23 */
-+
-+#ifndef HAVE_NETDEV_PRINTK
-+#define netdev_printk(sevlevel, netdev, msglevel, format, arg...) \
-+ printk(sevlevel "%s: " format , netdev->name , ## arg)
-+#endif
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+#define PROC_NET init_net.proc_net
-+#define PROC_EOF_DATA
-+#else
-+#define PROC_NET proc_net
-+#endif
-+
-+#ifdef NET_21
-+# include <linux/in6.h>
-+#else
-+ /* old kernel in.h has some IPv6 stuff, but not quite enough */
-+# define s6_addr16 s6_addr
-+# define AF_INET6 10
-+# define uint8_t __u8
-+# define uint16_t __u16
-+# define uint32_t __u32
-+# define uint64_t __u64
-+#endif
-+
-+#if __KERNEL__
-+# if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0)
-+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
-+# include "openswan/ipsec_kern24.h"
-+# else
-+# error "kernels before 2.4 are not supported at this time"
-+# endif
-+# endif
-+#endif
-+
-+#endif /* _OPENSWAN_KVERSIONS_H */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_life.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,112 @@
-+/*
-+ * Definitions relevant to IPSEC lifetimes
-+ * Copyright (C) 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * and Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_life.h,v 1.4 2004/04/05 19:55:05 mcr Exp $
-+ *
-+ * This file derived from ipsec_xform.h on 2001/9/18 by mcr.
-+ *
-+ */
-+
-+/*
-+ * This file describes the book keeping fields for the
-+ * IPsec Security Association Structure. ("ipsec_sa")
-+ *
-+ * This structure is never allocated directly by kernel code,
-+ * (it is always a static/auto or is part of a structure)
-+ * so it does not have a reference count.
-+ *
-+ */
-+
-+#ifndef _IPSEC_LIFE_H_
-+
-+/*
-+ * _count is total count.
-+ * _hard is hard limit (kill SA after this number)
-+ * _soft is soft limit (try to renew SA after this number)
-+ * _last is used in some special cases.
-+ *
-+ */
-+
-+struct ipsec_lifetime64
-+{
-+ __u64 ipl_count;
-+ __u64 ipl_soft;
-+ __u64 ipl_hard;
-+ __u64 ipl_last;
-+};
-+
-+struct ipsec_lifetimes
-+{
-+ /* number of bytes processed */
-+ struct ipsec_lifetime64 ipl_bytes;
-+
-+ /* number of packets processed */
-+ struct ipsec_lifetime64 ipl_packets;
-+
-+ /* time since SA was added */
-+ struct ipsec_lifetime64 ipl_addtime;
-+
-+ /* time since SA was first used */
-+ struct ipsec_lifetime64 ipl_usetime;
-+
-+ /* from rfc2367:
-+ * For CURRENT, the number of different connections,
-+ * endpoints, or flows that the association has been
-+ * allocated towards. For HARD and SOFT, the number of
-+ * these the association may be allocated towards
-+ * before it expires. The concept of a connection,
-+ * flow, or endpoint is system specific.
-+ *
-+ * mcr(2001-9-18) it is unclear what purpose these serve for FreeSWAN.
-+ * They are maintained for PF_KEY compatibility.
-+ */
-+ struct ipsec_lifetime64 ipl_allocations;
-+};
-+
-+enum ipsec_life_alive {
-+ ipsec_life_harddied = -1,
-+ ipsec_life_softdied = 0,
-+ ipsec_life_okay = 1
-+};
-+
-+enum ipsec_life_type {
-+ ipsec_life_timebased = 1,
-+ ipsec_life_countbased= 0
-+};
-+
-+#define _IPSEC_LIFE_H_
-+#endif /* _IPSEC_LIFE_H_ */
-+
-+
-+/*
-+ * $Log: ipsec_life.h,v $
-+ * Revision 1.4 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_life.h,v
-+ *
-+ * Revision 1.3 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_life.h,v
-+ *
-+ * Revision 1.2 2001/11/26 09:16:14 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.1.2.1 2001/09/25 02:25:58 mcr
-+ * lifetime structure created and common functions created.
-+ *
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_mast.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,59 @@
-+#ifndef _IPSEC_MAST_H
-+#define _IPSEC_MAST_H
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+#define DB_MAST_INIT 0x0001
-+#define DB_MAST_PROCFS 0x0002
-+#define DB_MAST_XMIT 0x0010
-+#define DB_MAST_OHDR 0x0020
-+#define DB_MAST_CROUT 0x0040
-+#define DB_MAST_OXFS 0x0080
-+#define DB_MAST_REVEC 0x0100
-+#define DB_MAST_ENCAP 0x0200
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+struct ipsecmastconf {
-+ __u32 cf_cmd;
-+ union
-+ {
-+ char cfu_name[12];
-+ } cf_u;
-+#define cf_name cf_u.cfu_name
-+};
-+
-+struct mastpriv
-+{
-+ struct sk_buff_head sendq;
-+ struct wait_queue *wait_queue;
-+ int (*hard_header) (struct sk_buff *skb,
-+ struct net_device *dev,
-+ unsigned short type,
-+ void *daddr,
-+ void *saddr,
-+ unsigned len);
-+#if 0
-+ char locked;
-+ int (*hard_start_xmit) (struct sk_buff *skb,
-+ struct net_device *dev);
-+ int (*rebuild_header)(struct sk_buff *skb);
-+ int (*set_mac_address)(struct net_device *dev, void *addr);
-+ void (*header_cache_bind)(struct hh_cache **hhp, struct net_device *dev,
-+ unsigned short htype, __u32 daddr);
-+ void (*header_cache_update)(struct hh_cache *hh,
-+ struct net_device *dev,
-+ unsigned char * haddr);
-+ struct net_device_stats *(*get_stats)(struct net_device *dev);
-+#endif
-+ struct net_device_stats mystats;
-+ int mtu; /* What is the desired MTU? */
-+};
-+
-+extern int ipsec_mast_init_devices(void);
-+extern int ipsec_mast_deletenum(int vifnum);
-+extern int ipsec_mast_createnum(int vifnum);
-+extern struct net_device *ipsec_mast_get_device(int vifnum);
-+extern unsigned int ipsec_mast_is_transport(int vifnum);
-+
-+
-+
-+#endif
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_md5h.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,143 @@
-+/*
-+ * RCSID $Id: ipsec_md5h.h,v 1.10 2004/09/08 17:21:35 ken Exp $
-+ */
-+
-+/*
-+ * The rest of this file is Copyright RSA DSI. See the following comments
-+ * for the full Copyright notice.
-+ */
-+
-+#ifndef _IPSEC_MD5H_H_
-+#define _IPSEC_MD5H_H_
-+
-+/* GLOBAL.H - RSAREF types and constants
-+ */
-+
-+/* PROTOTYPES should be set to one if and only if the compiler supports
-+ function argument prototyping.
-+ The following makes PROTOTYPES default to 0 if it has not already
-+ been defined with C compiler flags.
-+ */
-+#ifndef PROTOTYPES
-+#define PROTOTYPES 1
-+#endif /* !PROTOTYPES */
-+
-+/* POINTER defines a generic pointer type */
-+typedef __u8 *POINTER;
-+
-+/* UINT2 defines a two byte word */
-+typedef __u16 UINT2;
-+
-+/* UINT4 defines a four byte word */
-+typedef __u32 UINT4;
-+
-+/* PROTO_LIST is defined depending on how PROTOTYPES is defined above.
-+ If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it
-+ returns an empty list.
-+ */
-+
-+#if PROTOTYPES
-+#define PROTO_LIST(list) list
-+#else /* PROTOTYPES */
-+#define PROTO_LIST(list) ()
-+#endif /* PROTOTYPES */
-+
-+
-+/* MD5.H - header file for MD5C.C
-+ */
-+
-+/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
-+rights reserved.
-+
-+License to copy and use this software is granted provided that it
-+is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-+Algorithm" in all material mentioning or referencing this software
-+or this function.
-+
-+License is also granted to make and use derivative works provided
-+that such works are identified as "derived from the RSA Data
-+Security, Inc. MD5 Message-Digest Algorithm" in all material
-+mentioning or referencing the derived work.
-+
-+RSA Data Security, Inc. makes no representations concerning either
-+the merchantability of this software or the suitability of this
-+software for any particular purpose. It is provided "as is"
-+without express or implied warranty of any kind.
-+
-+These notices must be retained in any copies of any part of this
-+documentation and/or software.
-+ */
-+
-+/* MD5 context. */
-+typedef struct {
-+ UINT4 state[4]; /* state (ABCD) */
-+ UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
-+ unsigned char buffer[64]; /* input buffer */
-+} MD5_CTX;
-+
-+void osMD5Init PROTO_LIST ((void *));
-+void osMD5Update PROTO_LIST
-+ ((void *, unsigned char *, __u32));
-+void osMD5Final PROTO_LIST ((unsigned char [16], void *));
-+
-+#endif /* _IPSEC_MD5H_H_ */
-+
-+/*
-+ * $Log: ipsec_md5h.h,v $
-+ * Revision 1.10 2004/09/08 17:21:35 ken
-+ * Rename MD5* -> osMD5 functions to prevent clashes with other symbols exported by kernel modules (CIFS in 2.6 initiated this)
-+ *
-+ * Revision 1.9 2004/04/05 19:55:05 mcr
-+ * Moved from linux/include/freeswan/ipsec_md5h.h,v
-+ *
-+ * Revision 1.8 2002/09/10 01:45:09 mcr
-+ * changed type of MD5_CTX and SHA1_CTX to void * so that
-+ * the function prototypes would match, and could be placed
-+ * into a pointer to a function.
-+ *
-+ * Revision 1.7 2002/04/24 07:36:46 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_md5h.h,v
-+ *
-+ * Revision 1.6 1999/12/13 13:59:13 rgb
-+ * Quick fix to argument size to Update bugs.
-+ *
-+ * Revision 1.5 1999/12/07 18:16:23 rgb
-+ * Fixed comments at end of #endif lines.
-+ *
-+ * Revision 1.4 1999/04/06 04:54:26 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.3 1999/01/22 06:19:58 rgb
-+ * 64-bit clean-up.
-+ *
-+ * Revision 1.2 1998/11/30 13:22:54 rgb
-+ * Rationalised all the klips kernel file headers. They are much shorter
-+ * now and won't conflict under RH5.2.
-+ *
-+ * Revision 1.1 1998/06/18 21:27:48 henry
-+ * move sources from klips/src to klips/net/ipsec, to keep stupid
-+ * kernel-build scripts happier in the presence of symlinks
-+ *
-+ * Revision 1.2 1998/04/23 20:54:03 rgb
-+ * Fixed md5 and sha1 include file nesting issues, to be cleaned up when
-+ * verified.
-+ *
-+ * Revision 1.1 1998/04/09 03:04:21 henry
-+ * sources moved up from linux/net/ipsec
-+ * these two include files modified not to include others except in kernel
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:03 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * No changes.
-+ *
-+ * Revision 0.3 1996/11/20 14:48:53 ji
-+ * Release update only.
-+ *
-+ * Revision 0.2 1996/11/02 00:18:33 ji
-+ * First limited release.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_param.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,268 @@
-+/*
-+ * @(#) Openswan tunable paramaters
-+ *
-+ * Copyright (C) 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * and Michael Richardson <mcr@freeswan.org>
-+ * Copyright (C) 2004 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ *
-+ */
-+
-+/*
-+ * This file provides a set of #define's which may be tuned by various
-+ * people/configurations. It keeps all compile-time tunables in one place.
-+ *
-+ * This file should be included before all other IPsec kernel-only files.
-+ *
-+ */
-+
-+#ifndef _IPSEC_PARAM_H_
-+
-+#ifdef __KERNEL__
-+
-+#include "openswan/ipsec_kversion.h"
-+
-+/* Set number of ipsecX virtual devices here. */
-+/* This must be < exp(field width of IPSEC_DEV_FORMAT) */
-+/* It must also be reasonable so as not to overload the memory and CPU */
-+/* constraints of the host. */
-+#ifdef CONFIG_KLIPS_IF_MAX
-+#define IPSEC_NUM_IFMAX CONFIG_KLIPS_IF_MAX
-+#endif
-+#ifndef IPSEC_NUM_IFMAX
-+#define IPSEC_NUM_IFMAX 64
-+#endif
-+
-+/* default number of ipsecX devices to create */
-+#define IPSEC_NUM_IF 2
-+
-+/* The field width must be < IF_NAM_SIZ - strlen("ipsec") - 1. */
-+/* With "ipsec" being 5 characters, that means 10 is the max field width */
-+/* but machine memory and CPU constraints are not likely to tollerate */
-+/* more than 3 digits. The default is one digit. */
-+/* Update: userland scripts get upset if they can't find "ipsec0", so */
-+/* for now, no "0"-padding should be used (which would have been helpful */
-+/* to make text-searches work */
-+#define IPSEC_DEV_FORMAT "ipsec%d"
-+#define MAST_DEV_FORMAT "mast%d"
-+
-+/* For, say, 500 virtual ipsec devices, I would recommend: */
-+/* #define IPSEC_NUM_IF 500 */
-+/* #define IPSEC_DEV_FORMAT "ipsec%03d" */
-+/* Note that the "interfaces=" line in /etc/ipsec.conf would be, um, challenging. */
-+
-+/* use dynamic ipsecX device allocation */
-+#ifndef CONFIG_KLIPS_DYNDEV
-+#define CONFIG_KLIPS_DYNDEV 1
-+#endif /* CONFIG_KLIPS_DYNDEV */
-+
-+
-+#ifdef CONFIG_KLIPS_BIGGATE
-+# define SADB_HASHMOD 8069
-+#else /* CONFIG_KLIPS_BIGGATE */
-+# define SADB_HASHMOD 257
-+#endif /* CONFIG_KLIPS_BIGGATE */
-+
-+#endif /* __KERNEL__ */
-+
-+/*
-+ * This is for the SA reference table. This number is related to the
-+ * maximum number of SAs that KLIPS can concurrently deal with, plus enough
-+ * space for keeping expired SAs around.
-+ *
-+ * TABLE_IDX_WIDTH is the number of bits that we will use.
-+ * MAIN_TABLE_WIDTH is the number of bits used for the primary index table.
-+ *
-+ */
-+#ifndef IPSEC_SA_REF_MAINTABLE_IDX_WIDTH
-+# define IPSEC_SA_REF_MAINTABLE_IDX_WIDTH 4
-+#endif
-+
-+#ifndef IPSEC_SA_REF_FREELIST_NUM_ENTRIES
-+# define IPSEC_SA_REF_FREELIST_NUM_ENTRIES 256
-+#endif
-+
-+#ifndef IPSEC_SA_REF_CODE
-+# define IPSEC_SA_REF_CODE 1
-+#endif
-+
-+#ifdef __KERNEL__
-+/* This is defined for 2.4, but not 2.2.... */
-+#ifndef ARPHRD_VOID
-+# define ARPHRD_VOID 0xFFFF
-+#endif
-+
-+/* always turn on IPIP mode */
-+#ifndef CONFIG_KLIPS_IPIP
-+#define CONFIG_KLIPS_IPIP 1
-+#endif
-+
-+/*
-+ * Worry about PROC_FS stuff
-+ */
-+#if defined(PROC_FS_2325)
-+/* kernel 2.4 */
-+# define IPSEC_PROC_LAST_ARG ,int *eof,void *data
-+# define IPSEC_PROCFS_DEBUG_NO_STATIC
-+# define IPSEC_PROC_SUBDIRS
-+#else
-+/* kernel <2.4 */
-+# define IPSEC_PROCFS_DEBUG_NO_STATIC DEBUG_NO_STATIC
-+
-+# ifndef PROC_NO_DUMMY
-+# define IPSEC_PROC_LAST_ARG , int dummy
-+# else
-+# define IPSEC_PROC_LAST_ARG
-+# endif /* !PROC_NO_DUMMY */
-+#endif /* PROC_FS_2325 */
-+
-+#if !defined(LINUX_KERNEL_HAS_SNPRINTF)
-+/* GNU CPP specific! */
-+# define snprintf(buf, len, fmt...) sprintf(buf, ##fmt)
-+#endif /* !LINUX_KERNEL_HAS_SNPRINTF */
-+
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#ifndef KLIPS_FIXES_DES_PARITY
-+# define KLIPS_FIXES_DES_PARITY 1
-+#endif /* !KLIPS_FIXES_DES_PARITY */
-+
-+/* we don't really want to print these unless there are really big problems */
-+#ifndef KLIPS_DIVULGE_CYPHER_KEY
-+# define KLIPS_DIVULGE_CYPHER_KEY 0
-+#endif /* !KLIPS_DIVULGE_CYPHER_KEY */
-+
-+#ifndef KLIPS_DIVULGE_HMAC_KEY
-+# define KLIPS_DIVULGE_HMAC_KEY 0
-+#endif /* !KLIPS_DIVULGE_HMAC_KEY */
-+
-+#ifndef IPSEC_DISALLOW_IPOPTIONS
-+# define IPSEC_DISALLOW_IPOPTIONS 1
-+#endif /* !KLIPS_DIVULGE_HMAC_KEY */
-+
-+/* extra toggles for regression testing */
-+#ifdef CONFIG_KLIPS_REGRESS
-+
-+/*
-+ * should pfkey_acquire() become 100% lossy?
-+ *
-+ */
-+extern int sysctl_ipsec_regress_pfkey_lossage;
-+#ifndef KLIPS_PFKEY_ACQUIRE_LOSSAGE
-+# ifdef CONFIG_KLIPS_PFKEY_ACQUIRE_LOSSAGE
-+# define KLIPS_PFKEY_ACQUIRE_LOSSAGE 100
-+# else /* CONFIG_KLIPS_PFKEY_ACQUIRE_LOSSAGE */
-+/* not by default! */
-+# define KLIPS_PFKEY_ACQUIRE_LOSSAGE 0
-+# endif /* CONFIG_KLIPS_PFKEY_ACQUIRE_LOSSAGE */
-+#endif /* KLIPS_PFKEY_ACQUIRE_LOSSAGE */
-+
-+#endif /* CONFIG_KLIPS_REGRESS */
-+
-+
-+/*
-+ * debugging routines.
-+ */
-+#ifdef CONFIG_KLIPS_DEBUG
-+ #define KLIPS_ERROR(flag, format, args...) if(printk_ratelimit() || flag) printk(KERN_ERR "KLIPS " format, ## args)
-+ #define KLIPS_PRINT(flag, format, args...) \
-+ ((flag) ? printk(KERN_INFO format , ## args) : 0)
-+ #define KLIPS_PRINTMORE(flag, format, args...) \
-+ ((flag) ? printk(format , ## args) : 0)
-+ #define KLIPS_IP_PRINT(flag, ip) \
-+ ((flag) ? ipsec_print_ip(ip) : 0)
-+ #define KLIPS_SATOT(flag, sa, format, dst, dstlen) \
-+ ((flag) ? satot(sa, format, dst, dstlen) : 0)
-+#else /* CONFIG_KLIPS_DEBUG */
-+ #define KLIPS_ERROR(flag, format, args...) if(printk_ratelimit()) printk(KERN_ERR "KLIPS " format, ## args)
-+ #define KLIPS_PRINT(flag, format, args...) do ; while(0)
-+ #define KLIPS_PRINTMORE(flag, format, args...) do ; while(0)
-+ #define KLIPS_IP_PRINT(flag, ip) do ; while(0)
-+ #define KLIPS_SATOT(flag, sa, format, dst, dstlen) (0)
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+
-+/*
-+ * Stupid kernel API differences in APIs. Not only do some
-+ * kernels not have ip_select_ident, but some have differing APIs,
-+ * and SuSE has one with one parameter, but no way of checking to
-+ * see what is really what.
-+ */
-+
-+#ifdef SUSE_LINUX_2_4_19_IS_STUPID
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph)
-+#else
-+
-+/* simplest case, nothing */
-+#if !defined(IP_SELECT_IDENT)
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) do { iph->id = htons(ip_id_count++); } while(0)
-+#endif
-+
-+/* kernels > 2.3.37-ish */
-+#if defined(IP_SELECT_IDENT) && !defined(IP_SELECT_IDENT_NEW)
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst)
-+#endif
-+
-+/* kernels > 2.4.2 */
-+#if defined(IP_SELECT_IDENT) && defined(IP_SELECT_IDENT_NEW)
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst, NULL)
-+#endif
-+
-+#endif /* SUSE_LINUX_2_4_19_IS_STUPID */
-+
-+/*
-+ * make klips fail test:east-espiv-01.
-+ * exploit is at testing/attacks/espiv
-+ *
-+ */
-+#define KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK 0
-+
-+
-+/* IP_FRAGMENT_LINEARIZE is set in freeswan.h if Kernel > 2.4.4 */
-+#ifndef IP_FRAGMENT_LINEARIZE
-+# define IP_FRAGMENT_LINEARIZE 0
-+#endif /* IP_FRAGMENT_LINEARIZE */
-+#endif /* __KERNEL__ */
-+
-+#ifdef NEED_INET_PROTOCOL
-+#define inet_protocol net_protocol
-+#endif
-+
-+#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) && CONFIG_IPSEC_NAT_TRAVERSAL
-+#define NAT_TRAVERSAL 1
-+#else
-+/* let people either #undef, or #define = 0 it */
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+#undef CONFIG_IPSEC_NAT_TRAVERSAL
-+#endif
-+#endif
-+
-+#ifndef IPSEC_DEFAULT_TTL
-+#define IPSEC_DEFAULT_TTL 64
-+#endif
-+
-+#define _IPSEC_PARAM_H_
-+#endif /* _IPSEC_PARAM_H_ */
-+
-+/*
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_policy.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,239 @@
-+#ifndef _IPSEC_POLICY_H
-+/*
-+ * policy interface file between pluto and applications
-+ * Copyright (C) 2003 Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: ipsec_policy.h,v 1.8 2005/07/26 01:12:38 mcr Exp $
-+ */
-+#define _IPSEC_POLICY_H /* seen it, no need to see it again */
-+
-+
-+/*
-+ * this file defines an interface between an application (or rather an
-+ * application library) and a key/policy daemon. It provides for inquiries
-+ * as to the current state of a connected socket, as well as for general
-+ * questions.
-+ *
-+ * In general, the interface is defined as a series of functional interfaces,
-+ * and the policy messages should be internal. However, because this is in
-+ * fact an ABI between pieces of the system that may get compiled and revised
-+ * seperately, this ABI must be public and revision controlled.
-+ *
-+ * It is expected that the daemon will always support previous versions.
-+ */
-+
-+#define IPSEC_POLICY_MSG_REVISION (unsigned)200305061
-+
-+enum ipsec_policy_command {
-+ IPSEC_CMD_QUERY_FD = 1,
-+ IPSEC_CMD_QUERY_HOSTPAIR = 2,
-+ IPSEC_CMD_QUERY_DSTONLY = 3,
-+};
-+
-+struct ipsec_policy_msg_head {
-+ u_int32_t ipm_version;
-+ u_int32_t ipm_msg_len;
-+ u_int32_t ipm_msg_type;
-+ u_int32_t ipm_msg_seq;
-+};
-+
-+enum ipsec_privacy_quality {
-+ IPSEC_PRIVACY_NONE = 0,
-+ IPSEC_PRIVACY_INTEGRAL = 4, /* not private at all. AH-like */
-+ IPSEC_PRIVACY_UNKNOWN = 8, /* something is claimed, but details unavail */
-+ IPSEC_PRIVACY_ROT13 = 12, /* trivially breakable, i.e. 1DES */
-+ IPSEC_PRIVACY_GAK = 16, /* known eavesdroppers */
-+ IPSEC_PRIVACY_PRIVATE = 32, /* secure for at least a decade */
-+ IPSEC_PRIVACY_STRONG = 64, /* ridiculously secure */
-+ IPSEC_PRIVACY_TORTOISE = 192, /* even stronger, but very slow */
-+ IPSEC_PRIVACY_OTP = 224, /* some kind of *true* one time pad */
-+};
-+
-+enum ipsec_bandwidth_quality {
-+ IPSEC_QOS_UNKNOWN = 0, /* unknown bandwidth */
-+ IPSEC_QOS_INTERACTIVE = 16, /* reasonably moderate jitter, moderate fast.
-+ Good enough for telnet/ssh. */
-+ IPSEC_QOS_VOIP = 32, /* faster crypto, predicable jitter */
-+ IPSEC_QOS_FTP = 64, /* higher throughput crypto, perhaps hardware
-+ offloaded, but latency/jitter may be bad */
-+ IPSEC_QOS_WIRESPEED = 128, /* expect to be able to fill your pipe */
-+};
-+
-+/* moved from programs/pluto/constants.h */
-+/* IPsec AH transform values
-+ * RFC2407 The Internet IP security Domain of Interpretation for ISAKMP 4.4.3
-+ * and in http://www.iana.org/assignments/isakmp-registry
-+ */
-+enum ipsec_authentication_algo {
-+ AH_NONE=0,
-+ AH_MD5=2,
-+ AH_SHA=3,
-+ AH_DES=4,
-+ AH_SHA2_256=5,
-+ AH_SHA2_384=6,
-+ AH_SHA2_512=7,
-+ AH_RIPEMD=8,
-+ AH__AES_XCBC_MAC=9,
-+ AH_RSA=10
-+};
-+
-+/* IPsec ESP transform values
-+ * RFC2407 The Internet IP security Domain of Interpretation for ISAKMP 4.4.4
-+ * and from http://www.iana.org/assignments/isakmp-registry
-+ */
-+
-+enum ipsec_cipher_algo {
-+ ESP_reserved=0,
-+ ESP_DES_IV64=1,
-+ ESP_DES=2,
-+ ESP_3DES=3,
-+ ESP_RC5=4,
-+ ESP_IDEA=5,
-+ ESP_CAST=6,
-+ ESP_BLOWFISH=7,
-+ ESP_3IDEA=8,
-+ ESP_DES_IV32=9,
-+ ESP_RC4=10,
-+ ESP_NULL=11,
-+ ESP_AES=12, /* 128 bit AES */
-+ ESP_AES_CTR=13,
-+ ESP_AES_CCM_8=14,
-+ ESP_AES_CCM_12=15,
-+ ESP_AES_CCM_16=16,
-+ /* unassigned=17 */
-+ ESP_AES_GCM_8=18,
-+ ESP_AES_GCM_12=19,
-+ ESP_AES_GCM_16=20,
-+ ESP_SEED_CBC=21,
-+ ESP_CAMELLIA=22,
-+ /* 249-255 reserved for private use */
-+};
-+
-+/* IPCOMP transform values
-+ * RFC2407 The Internet IP security Domain of Interpretation for ISAKMP 4.4.5
-+ */
-+
-+enum ipsec_comp_algo {
-+ IPCOMP_OUI= 1,
-+ IPCOMP_DEFLATE= 2,
-+ IPCOMP_LZS= 3,
-+ IPCOMP_V42BIS= 4
-+};
-+
-+/* Identification type values
-+ * RFC 2407 The Internet IP security Domain of Interpretation for
-+ * ISAKMP 4.6.2.1
-+ *
-+ * Also for RFC4306.
-+ *
-+ * enum ident_names;
-+ */
-+
-+enum ipsec_id_type {
-+ ID_FROMCERT= (-3), /* taken from certificate */
-+ ID_IMPOSSIBLE= (-2), /* private to Pluto */
-+ ID_MYID= (-1), /* private to Pluto */
-+ ID_NONE= 0, /* private to Pluto */
-+ ID_IPV4_ADDR= 1,
-+ ID_FQDN= 2,
-+ ID_USER_FQDN= 3,
-+ ID_RFC822_ADDR = ID_USER_FQDN, /* RFC4306 */
-+ ID_IPV4_ADDR_SUBNET= 4,
-+ ID_IPV6_ADDR= 5,
-+ ID_IPV6_ADDR_SUBNET= 6,
-+ ID_IPV4_ADDR_RANGE= 7,
-+ ID_IPV6_ADDR_RANGE= 8,
-+ ID_DER_ASN1_DN= 9,
-+ ID_DER_ASN1_GN= 10,
-+ ID_KEY_ID= 11
-+};
-+
-+/* Certificate type values
-+ * RFC 2408 ISAKMP, chapter 3.9
-+ */
-+enum ipsec_cert_type {
-+ CERT_NONE= 0, /* none, or guess from file contents */
-+ CERT_PKCS7_WRAPPED_X509= 1, /* self-signed certificate from disk */
-+ CERT_PGP= 2,
-+ CERT_DNS_SIGNED_KEY= 3, /* KEY RR from DNS */
-+ CERT_X509_SIGNATURE= 4,
-+ CERT_X509_KEY_EXCHANGE= 5,
-+ CERT_KERBEROS_TOKENS= 6,
-+ CERT_CRL= 7,
-+ CERT_ARL= 8,
-+ CERT_SPKI= 9,
-+ CERT_X509_ATTRIBUTE= 10,
-+ CERT_RAW_RSA= 11, /* raw RSA from config file */
-+};
-+
-+/* a SIG record in ASCII */
-+struct ipsec_dns_sig {
-+ char fqdn[256];
-+ char dns_sig[768]; /* empty string if not signed */
-+};
-+
-+struct ipsec_raw_key {
-+ char id_name[256];
-+ char fs_keyid[8];
-+};
-+
-+struct ipsec_identity {
-+ enum ipsec_id_type ii_type;
-+ enum ipsec_cert_type ii_format;
-+ union {
-+ struct ipsec_dns_sig ipsec_dns_signed;
-+ /* some thing for PGP */
-+ /* some thing for PKIX */
-+ struct ipsec_raw_key ipsec_raw_key;
-+ } ii_credential;
-+};
-+
-+#define IPSEC_MAX_CREDENTIALS 32
-+
-+struct ipsec_policy_cmd_query {
-+ struct ipsec_policy_msg_head head;
-+
-+ /* Query section */
-+ ip_address query_local; /* us */
-+ ip_address query_remote; /* them */
-+ u_int8_t proto; /* TCP, ICMP, etc. */
-+ u_short src_port, dst_port;
-+
-+ /* Answer section */
-+ enum ipsec_privacy_quality strength;
-+ enum ipsec_bandwidth_quality bandwidth;
-+ enum ipsec_authentication_algo auth_detail;
-+ enum ipsec_cipher_algo esp_detail;
-+ enum ipsec_comp_algo comp_detail;
-+
-+ int credential_count;
-+
-+ struct ipsec_identity credentials[IPSEC_MAX_CREDENTIALS];
-+};
-+
-+#define IPSEC_POLICY_SOCKET "/var/run/pluto/pluto.info"
-+
-+/* prototypes */
-+extern err_t ipsec_policy_lookup(int fd, struct ipsec_policy_cmd_query *result);
-+extern err_t ipsec_policy_init(void);
-+extern err_t ipsec_policy_final(void);
-+extern err_t ipsec_policy_readmsg(int policysock,
-+ unsigned char *buf, size_t buflen);
-+extern err_t ipsec_policy_sendrecv(unsigned char *buf, size_t buflen);
-+extern err_t ipsec_policy_cgilookup(struct ipsec_policy_cmd_query *result);
-+
-+
-+extern const char *ipsec_policy_version_code(void);
-+extern const char *ipsec_policy_version_string(void);
-+
-+#endif /* _IPSEC_POLICY_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_proto.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,195 @@
-+/*
-+ * @(#) prototypes for FreeSWAN functions
-+ *
-+ * Copyright (C) 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * and Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_proto.h,v 1.14 2005/04/29 04:50:03 mcr Exp $
-+ *
-+ */
-+
-+#ifndef _IPSEC_PROTO_H_
-+
-+#include "ipsec_param.h"
-+
-+/*
-+ * This file is a kernel only file that declares prototypes for
-+ * all intra-module function calls and global data structures.
-+ *
-+ * Include this file last.
-+ *
-+ */
-+
-+/* forward references */
-+enum ipsec_direction;
-+enum ipsec_life_type;
-+struct ipsec_lifetime64;
-+struct ident;
-+struct sockaddr_encap;
-+struct ipsec_sa;
-+
-+/* ipsec_init.c */
-+extern struct prng ipsec_prng;
-+
-+/* ipsec_sa.c */
-+extern struct ipsec_sa *ipsec_sadb_hash[SADB_HASHMOD];
-+extern spinlock_t tdb_lock;
-+extern int ipsec_sadb_init(void);
-+extern int ipsec_sadb_cleanup(__u8);
-+
-+extern struct ipsec_sa *ipsec_sa_alloc(int*error);
-+
-+
-+extern struct ipsec_sa *ipsec_sa_getbyid(ip_said *);
-+extern /* void */ int ipsec_sa_add(struct ipsec_sa *);
-+
-+extern int ipsec_sa_init(struct ipsec_sa *ipsp);
-+
-+/* debug declarations */
-+
-+/* ipsec_proc.c */
-+extern int ipsec_proc_init(void);
-+extern void ipsec_proc_cleanup(void);
-+
-+/* ipsec_rcv.c */
-+extern int ipsec_rcv(struct sk_buff *skb);
-+extern int klips26_rcv_encap(struct sk_buff *skb, __u16 encap_type);
-+
-+/* ipsec_xmit.c */
-+struct ipsec_xmit_state;
-+extern enum ipsec_xmit_value ipsec_xmit_sanity_check_dev(struct ipsec_xmit_state *ixs);
-+extern enum ipsec_xmit_value ipsec_xmit_sanity_check_skb(struct ipsec_xmit_state *ixs);
-+extern void ipsec_print_ip(struct iphdr *ip);
-+
-+
-+
-+/* ipsec_radij.c */
-+extern int ipsec_makeroute(struct sockaddr_encap *ea,
-+ struct sockaddr_encap *em,
-+ ip_said said,
-+ uint32_t pid,
-+ struct sk_buff *skb,
-+ struct ident *ident_s,
-+ struct ident *ident_d);
-+
-+extern int ipsec_breakroute(struct sockaddr_encap *ea,
-+ struct sockaddr_encap *em,
-+ struct sk_buff **first,
-+ struct sk_buff **last);
-+
-+int ipsec_radijinit(void);
-+int ipsec_cleareroutes(void);
-+int ipsec_radijcleanup(void);
-+
-+/* ipsec_life.c */
-+extern enum ipsec_life_alive ipsec_lifetime_check(struct ipsec_lifetime64 *il64,
-+ const char *lifename,
-+ const char *saname,
-+ enum ipsec_life_type ilt,
-+ enum ipsec_direction idir,
-+ struct ipsec_sa *ips);
-+
-+
-+extern int ipsec_lifetime_format(char *buffer,
-+ int buflen,
-+ char *lifename,
-+ enum ipsec_life_type timebaselife,
-+ struct ipsec_lifetime64 *lifetime);
-+
-+extern void ipsec_lifetime_update_hard(struct ipsec_lifetime64 *lifetime,
-+ __u64 newvalue);
-+
-+extern void ipsec_lifetime_update_soft(struct ipsec_lifetime64 *lifetime,
-+ __u64 newvalue);
-+
-+/* ipsec_snprintf.c */
-+extern int ipsec_snprintf(char * buf, ssize_t size, const char *fmt, ...);
-+extern void ipsec_dmp_block(char *s, caddr_t bb, int len);
-+
-+
-+/* ipsec_alg.c */
-+extern int ipsec_alg_init(void);
-+
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+
-+extern int debug_xform;
-+extern int debug_eroute;
-+extern int debug_spi;
-+extern int debug_netlink;
-+
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+
-+
-+
-+#define _IPSEC_PROTO_H
-+#endif /* _IPSEC_PROTO_H_ */
-+
-+/*
-+ * $Log: ipsec_proto.h,v $
-+ * Revision 1.14 2005/04/29 04:50:03 mcr
-+ * prototypes for xmit and alg code.
-+ *
-+ * Revision 1.13 2005/04/17 03:46:07 mcr
-+ * added prototypes for ipsec_rcv() routines.
-+ *
-+ * Revision 1.12 2005/04/14 20:28:37 mcr
-+ * added additional prototypes.
-+ *
-+ * Revision 1.11 2005/04/14 01:16:28 mcr
-+ * add prototypes for snprintf.
-+ *
-+ * Revision 1.10 2005/04/13 22:47:28 mcr
-+ * make sure that forward references are available.
-+ *
-+ * Revision 1.9 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.8 2004/04/05 19:55:06 mcr
-+ * Moved from linux/include/freeswan/ipsec_proto.h,v
-+ *
-+ * Revision 1.7 2003/10/31 02:27:05 mcr
-+ * pulled up port-selector patches and sa_id elimination.
-+ *
-+ * Revision 1.6.30.1 2003/10/29 01:10:19 mcr
-+ * elimited "struct sa_id"
-+ *
-+ * Revision 1.6 2002/05/23 07:13:48 rgb
-+ * Added ipsec_sa_put() for releasing an ipsec_sa refcount.
-+ *
-+ * Revision 1.5 2002/05/14 02:36:40 rgb
-+ * Converted reference from ipsec_sa_put to ipsec_sa_add to avoid confusion
-+ * with "put" usage in the kernel.
-+ *
-+ * Revision 1.4 2002/04/24 07:36:47 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_proto.h,v
-+ *
-+ * Revision 1.3 2002/04/20 00:12:25 rgb
-+ * Added esp IV CBC attack fix, disabled.
-+ *
-+ * Revision 1.2 2001/11/26 09:16:15 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.1.2.1 2001/09/25 02:21:01 mcr
-+ * ipsec_proto.h created to keep prototypes rather than deal with
-+ * cyclic dependancies of structures and prototypes in .h files.
-+ *
-+ *
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_radij.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,179 @@
-+/*
-+ * @(#) Definitions relevant to the IPSEC <> radij tree interfacing
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_radij.h,v 1.22 2004/07/10 19:08:41 mcr Exp $
-+ */
-+
-+#ifndef _IPSEC_RADIJ_H
-+
-+#include <openswan.h>
-+
-+int ipsec_walk(char *);
-+
-+int ipsec_rj_walker_procprint(struct radij_node *, void *);
-+int ipsec_rj_walker_delete(struct radij_node *, void *);
-+
-+/* This structure is used to pass information between
-+ * ipsec_eroute_get_info and ipsec_rj_walker_procprint
-+ * (through rj_walktree) and between calls of ipsec_rj_walker_procprint.
-+ */
-+struct wsbuf
-+{
-+ /* from caller of ipsec_eroute_get_info: */
-+ char *const buffer; /* start of buffer provided */
-+ const int length; /* length of buffer provided */
-+ const off_t offset; /* file position of first character of interest */
-+ /* accumulated by ipsec_rj_walker_procprint: */
-+ int len; /* number of character filled into buffer */
-+ off_t begin; /* file position contained in buffer[0] (<=offset) */
-+};
-+
-+extern struct radij_node_head *rnh;
-+extern spinlock_t eroute_lock;
-+
-+struct eroute * ipsec_findroute(struct sockaddr_encap *);
-+
-+#define O1(x) (int)(((x)>>24)&0xff)
-+#define O2(x) (int)(((x)>>16)&0xff)
-+#define O3(x) (int)(((x)>>8)&0xff)
-+#define O4(x) (int)(((x))&0xff)
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern int debug_radij;
-+void rj_dumptrees(void);
-+
-+#define DB_RJ_DUMPTREES 0x0001
-+#define DB_RJ_FINDROUTE 0x0002
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+#define _IPSEC_RADIJ_H
-+#endif
-+
-+/*
-+ * $Log: ipsec_radij.h,v $
-+ * Revision 1.22 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.21 2004/04/29 11:06:42 ken
-+ * Last bits from 2.06 procfs updates
-+ *
-+ * Revision 1.20 2004/04/06 02:49:08 mcr
-+ * pullup of algo code from alg-branch.
-+ *
-+ * Revision 1.19 2004/04/05 19:55:06 mcr
-+ * Moved from linux/include/freeswan/ipsec_radij.h,v
-+ *
-+ * Revision 1.18 2002/04/24 07:36:47 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_radij.h,v
-+ *
-+ * Revision 1.17 2001/11/26 09:23:49 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.16.2.1 2001/09/25 02:21:17 mcr
-+ * ipsec_proto.h created to keep prototypes rather than deal with
-+ * cyclic dependancies of structures and prototypes in .h files.
-+ *
-+ * Revision 1.16 2001/09/15 16:24:04 rgb
-+ * Re-inject first and last HOLD packet when an eroute REPLACE is done.
-+ *
-+ * Revision 1.15 2001/09/14 16:58:37 rgb
-+ * Added support for storing the first and last packets through a HOLD.
-+ *
-+ * Revision 1.14 2001/09/08 21:13:32 rgb
-+ * Added pfkey ident extension support for ISAKMPd. (NetCelo)
-+ *
-+ * Revision 1.13 2001/06/14 19:35:09 rgb
-+ * Update copyright date.
-+ *
-+ * Revision 1.12 2001/05/27 06:12:11 rgb
-+ * Added structures for pid, packet count and last access time to eroute.
-+ * Added packet count to beginning of /proc/net/ipsec_eroute.
-+ *
-+ * Revision 1.11 2000/09/08 19:12:56 rgb
-+ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG.
-+ *
-+ * Revision 1.10 1999/11/17 15:53:39 rgb
-+ * Changed all occurrences of #include "../../../lib/freeswan.h"
-+ * to #include <freeswan.h> which works due to -Ilibfreeswan in the
-+ * klips/net/ipsec/Makefile.
-+ *
-+ * Revision 1.9 1999/10/01 00:01:23 rgb
-+ * Added eroute structure locking.
-+ *
-+ * Revision 1.8 1999/04/11 00:28:59 henry
-+ * GPL boilerplate
-+ *
-+ * Revision 1.7 1999/04/06 04:54:26 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.6 1999/01/22 06:23:26 rgb
-+ * Cruft clean-out.
-+ *
-+ * Revision 1.5 1998/10/25 02:42:08 rgb
-+ * Change return type on ipsec_breakroute and ipsec_makeroute and add an
-+ * argument to be able to transmit more infomation about errors.
-+ *
-+ * Revision 1.4 1998/10/19 14:44:29 rgb
-+ * Added inclusion of freeswan.h.
-+ * sa_id structure implemented and used: now includes protocol.
-+ *
-+ * Revision 1.3 1998/07/28 00:03:31 rgb
-+ * Comment out temporary inet_nto4u() kluge.
-+ *
-+ * Revision 1.2 1998/07/14 18:22:00 rgb
-+ * Add function to clear the eroute table.
-+ *
-+ * Revision 1.1 1998/06/18 21:27:49 henry
-+ * move sources from klips/src to klips/net/ipsec, to keep stupid
-+ * kernel-build scripts happier in the presence of symlinks
-+ *
-+ * Revision 1.5 1998/05/25 20:30:38 rgb
-+ * Remove temporary ipsec_walk, rj_deltree and rj_delnodes functions.
-+ *
-+ * Rename ipsec_rj_walker (ipsec_walk) to ipsec_rj_walker_procprint and
-+ * add ipsec_rj_walker_delete.
-+ *
-+ * Revision 1.4 1998/05/21 13:02:56 rgb
-+ * Imported definitions from ipsec_radij.c and radij.c to support /proc 3k
-+ * limit fix.
-+ *
-+ * Revision 1.3 1998/04/21 21:29:09 rgb
-+ * Rearrange debug switches to change on the fly debug output from user
-+ * space. Only kernel changes checked in at this time. radij.c was also
-+ * changed to temporarily remove buggy debugging code in rj_delete causing
-+ * an OOPS and hence, netlink device open errors.
-+ *
-+ * Revision 1.2 1998/04/14 17:30:39 rgb
-+ * Fix up compiling errors for radij tree memory reclamation.
-+ *
-+ * Revision 1.1 1998/04/09 03:06:10 henry
-+ * sources moved up from linux/net/ipsec
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:04 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * No changes.
-+ *
-+ * Revision 0.3 1996/11/20 14:39:04 ji
-+ * Minor cleanups.
-+ * Rationalized debugging code.
-+ *
-+ * Revision 0.2 1996/11/02 00:18:33 ji
-+ * First limited release.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_rcv.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,261 @@
-+/*
-+ *
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_rcv.h,v 1.28.2.1 2006/07/10 15:52:20 paul Exp $
-+ */
-+
-+#ifndef IPSEC_RCV_H
-+#define IPSEC_RCV_H
-+
-+#include "openswan/ipsec_auth.h"
-+
-+#define DB_RX_PKTRX 0x0001
-+#define DB_RX_PKTRX2 0x0002
-+#define DB_RX_DMP 0x0004
-+#define DB_RX_IPSA 0x0010
-+#define DB_RX_XF 0x0020
-+#define DB_RX_IPAD 0x0040
-+#define DB_RX_INAU 0x0080
-+#define DB_RX_OINFO 0x0100
-+#define DB_RX_OINFO2 0x0200
-+#define DB_RX_OH 0x0400
-+#define DB_RX_REPLAY 0x0800
-+
-+#ifdef __KERNEL__
-+/* struct options; */
-+
-+#define __NO_VERSION__
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif /* for CONFIG_IP_FORWARD */
-+#ifdef CONFIG_MODULES
-+#include <linux/module.h>
-+#endif
-+#include <linux/version.h>
-+#include <openswan.h>
-+
-+#ifdef CONFIG_KLIPS_OCF
-+#include <cryptodev.h>
-+#endif
-+
-+#define IPSEC_BIRTH_TEMPLATE_MAXLEN 256
-+
-+struct ipsec_birth_reply {
-+ int packet_template_len;
-+ unsigned char packet_template[IPSEC_BIRTH_TEMPLATE_MAXLEN];
-+};
-+
-+extern struct ipsec_birth_reply ipsec_ipv4_birth_packet;
-+extern struct ipsec_birth_reply ipsec_ipv6_birth_packet;
-+
-+enum ipsec_rcv_value {
-+ IPSEC_RCV_PENDING=2,
-+ IPSEC_RCV_LASTPROTO=1,
-+ IPSEC_RCV_OK=0,
-+ IPSEC_RCV_BADPROTO=-1,
-+ IPSEC_RCV_BADLEN=-2,
-+ IPSEC_RCV_ESP_BADALG=-3,
-+ IPSEC_RCV_3DES_BADBLOCKING=-4,
-+ IPSEC_RCV_ESP_DECAPFAIL=-5,
-+ IPSEC_RCV_DECAPFAIL=-6,
-+ IPSEC_RCV_SAIDNOTFOUND=-7,
-+ IPSEC_RCV_IPCOMPALONE=-8,
-+ IPSEC_RCV_IPCOMPFAILED=-10,
-+ IPSEC_RCV_SAIDNOTLIVE=-11,
-+ IPSEC_RCV_FAILEDINBOUND=-12,
-+ IPSEC_RCV_LIFETIMEFAILED=-13,
-+ IPSEC_RCV_BADAUTH=-14,
-+ IPSEC_RCV_REPLAYFAILED=-15,
-+ IPSEC_RCV_AUTHFAILED=-16,
-+ IPSEC_RCV_REPLAYROLLED=-17,
-+ IPSEC_RCV_BAD_DECRYPT=-18,
-+ IPSEC_RCV_REALLYBAD=-19
-+};
-+
-+/*
-+ * state machine states
-+ */
-+
-+#define IPSEC_RSM_INIT 0 /* make it easy, starting state is 0 */
-+#define IPSEC_RSM_DECAP_INIT 1
-+#define IPSEC_RSM_DECAP_LOOKUP 2
-+#define IPSEC_RSM_AUTH_INIT 3
-+#define IPSEC_RSM_AUTH_DECAP 4
-+#define IPSEC_RSM_AUTH_CALC 5
-+#define IPSEC_RSM_AUTH_CHK 6
-+#define IPSEC_RSM_DECRYPT 7
-+#define IPSEC_RSM_DECAP_CONT 8 /* do we restart at IPSEC_RSM_DECAP_INIT */
-+#define IPSEC_RSM_CLEANUP 9
-+#define IPSEC_RSM_IPCOMP 10
-+#define IPSEC_RSM_COMPLETE 11
-+#define IPSEC_RSM_DONE 100
-+
-+struct ipsec_rcv_state {
-+ struct sk_buff *skb;
-+ struct net_device_stats *stats;
-+ struct iphdr *ipp; /* the IP header */
-+ struct ipsec_sa *ipsp; /* current SA being processed */
-+ struct ipsec_sa *lastipsp; /* last SA that was processed */
-+ int len; /* length of packet */
-+ int ilen; /* length of inner payload (-authlen) */
-+ int authlen; /* how big is the auth data at end */
-+ int hard_header_len; /* layer 2 size */
-+ int iphlen; /* how big is IP header */
-+ unsigned int transport_direct:1;
-+ struct auth_alg *authfuncs;
-+ ip_said said;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ __u8 next_header;
-+ __u8 hash[AH_AMAX];
-+ char ipsaddr_txt[ADDRTOA_BUF];
-+ char ipdaddr_txt[ADDRTOA_BUF];
-+ __u8 *octx;
-+ __u8 *ictx;
-+ int ictx_len;
-+ int octx_len;
-+ union {
-+ struct {
-+ struct esphdr *espp;
-+ } espstuff;
-+ struct {
-+ struct ahhdr *ahp;
-+ } ahstuff;
-+ struct {
-+ struct ipcomphdr *compp;
-+ } ipcompstuff;
-+ } protostuff;
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ __u8 natt_type;
-+ __u16 natt_sport;
-+ __u16 natt_dport;
-+ int natt_len;
-+#endif
-+
-+ /*
-+ * rcv state machine use
-+ */
-+ int state;
-+ int next_state;
-+ int auth_checked;
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ struct work_struct workq;
-+#ifdef DECLARE_TASKLET
-+ struct tasklet_struct tasklet;
-+#endif
-+#endif
-+#ifndef NET_21
-+ struct net_device *devp;
-+ struct inet_protocol *protop;
-+#endif
-+ struct xform_functions *proto_funcs;
-+ __u8 proto;
-+ int replay;
-+ unsigned char *authenticator;
-+ int esphlen;
-+#ifdef CONFIG_KLIPS_ALG
-+ struct ipsec_alg_auth *ixt_a;
-+#endif
-+ __u8 ttl, tos;
-+ __u16 frag_off, check;
-+};
-+
-+extern void ipsec_rsm(struct ipsec_rcv_state *irs);
-+#ifdef HAVE_KMEM_CACHE_T
-+extern kmem_cache_t *ipsec_irs_cache;
-+#else
-+extern struct kmem_cache *ipsec_irs_cache;
-+#endif
-+extern int ipsec_irs_max;
-+extern atomic_t ipsec_irs_cnt;
-+
-+extern int
-+#ifdef PROTO_HANDLER_SINGLE_PARM
-+ipsec_rcv(struct sk_buff *skb);
-+#else /* PROTO_HANDLER_SINGLE_PARM */
-+ipsec_rcv(struct sk_buff *skb,
-+ unsigned short xlen);
-+#endif /* PROTO_HANDLER_SINGLE_PARM */
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern int debug_rcv;
-+#define ipsec_rcv_dmp(_x,_y, _z) if (debug_rcv && sysctl_ipsec_debug_verbose) ipsec_dmp_block(_x,_y,_z)
-+#else
-+#define ipsec_rcv_dmp(_x,_y, _z) do {} while(0)
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+extern int sysctl_ipsec_inbound_policy_check;
-+#endif /* __KERNEL__ */
-+
-+extern int klips26_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
-+extern int klips26_rcv_encap(struct sk_buff *skb, __u16 encap_type);
-+
-+// manage ipsec rcv state objects
-+extern int ipsec_rcv_state_cache_init (void);
-+extern void ipsec_rcv_state_cache_cleanup (void);
-+
-+#endif /* IPSEC_RCV_H */
-+
-+/*
-+ * $Log: ipsec_rcv.h,v $
-+ * Revision 1.28.2.1 2006/07/10 15:52:20 paul
-+ * Fix for bug #642 by Bart Trojanowski
-+ *
-+ * Revision 1.28 2005/05/11 00:59:45 mcr
-+ * do not call debug routines if !defined KLIPS_DEBUG.
-+ *
-+ * Revision 1.27 2005/04/29 04:59:46 mcr
-+ * use ipsec_dmp_block.
-+ *
-+ * Revision 1.26 2005/04/13 22:48:35 mcr
-+ * added comments, and removed some log.
-+ * removed Linux 2.0 support.
-+ *
-+ * Revision 1.25 2005/04/08 18:25:37 mcr
-+ * prototype klips26 encap receive function
-+ *
-+ * Revision 1.24 2004/08/20 21:45:37 mcr
-+ * CONFIG_KLIPS_NAT_TRAVERSAL is not used in an attempt to
-+ * be 26sec compatible. But, some defines where changed.
-+ *
-+ * Revision 1.23 2004/08/03 18:17:40 mcr
-+ * in 2.6, use "net_device" instead of #define device->net_device.
-+ * this probably breaks 2.0 compiles.
-+ *
-+ * Revision 1.22 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.21 2004/04/06 02:49:08 mcr
-+ * pullup of algo code from alg-branch.
-+ *
-+ * Revision 1.20 2004/04/05 19:55:06 mcr
-+ * Moved from linux/include/freeswan/ipsec_rcv.h,v
-+ *
-+ * Revision 1.19 2003/12/15 18:13:09 mcr
-+ * when compiling with NAT traversal, don't assume that the
-+ * kernel has been patched, unless CONFIG_IPSEC_NAT_NON_ESP
-+ * is set.
-+ *
-+ * history elided 2005-04-12.
-+ *
-+ * Local Variables:
-+ * c-basic-offset:8
-+ * c-style:linux
-+ * End:
-+ *
-+ */
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_sa.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,280 @@
-+/*
-+ * @(#) Definitions of IPsec Security Association (ipsec_sa)
-+ *
-+ * Copyright (C) 2001, 2002, 2003
-+ * Richard Guy Briggs <rgb@freeswan.org>
-+ * and Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_sa.h,v 1.23.2.1 2007/09/05 02:31:15 paul Exp $
-+ *
-+ * This file derived from ipsec_xform.h on 2001/9/18 by mcr.
-+ *
-+ */
-+
-+/*
-+ * This file describes the IPsec Security Association Structure.
-+ *
-+ * This structure keeps track of a single transform that may be done
-+ * to a set of packets. It can describe applying the transform or
-+ * apply the reverse. (e.g. compression vs expansion). However, it
-+ * only describes one at a time. To describe both, two structures would
-+ * be used, but since the sides of the transform are performed
-+ * on different machines typically it is usual to have only one side
-+ * of each association.
-+ *
-+ */
-+
-+#ifndef _IPSEC_SA_H_
-+
-+#ifdef __KERNEL__
-+#include "openswan/ipsec_stats.h"
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_eroute.h"
-+#endif /* __KERNEL__ */
-+#include "openswan/ipsec_param.h"
-+
-+#include "openswan/pfkeyv2.h"
-+
-+
-+/* SAs are held in a table.
-+ * Entries in this table are referenced by IPsecSAref_t values.
-+ * IPsecSAref_t values are conceptually subscripts. Because
-+ * we want to allocate the table piece-meal, the subscripting
-+ * is implemented with two levels, a bit like paged virtual memory.
-+ * This representation mechanism is known as an Iliffe Vector.
-+ *
-+ * The Main table (AKA the refTable) consists of 2^IPSEC_SA_REF_MAINTABLE_IDX_WIDTH
-+ * pointers to subtables.
-+ * Each subtable has 2^IPSEC_SA_REF_SUBTABLE_IDX_WIDTH entries, each of which
-+ * is a pointer to an SA.
-+ *
-+ * An IPsecSAref_t contains either an exceptional value (signified by the
-+ * high-order bit being on) or a reference to a table entry. A table entry
-+ * reference has the subtable subscript in the low-order
-+ * IPSEC_SA_REF_SUBTABLE_IDX_WIDTH bits and the Main table subscript
-+ * in the next lowest IPSEC_SA_REF_MAINTABLE_IDX_WIDTH bits.
-+ *
-+ * The Maintable entry for an IPsecSAref_t x, a pointer to its subtable, is
-+ * IPsecSAref2table(x). It is of type struct IPsecSArefSubTable *.
-+ *
-+ * The pointer to the SA for x is IPsecSAref2SA(x). It is of type
-+ * struct ipsec_sa*. The macro definition clearly shows the two-level
-+ * access needed to find the SA pointer.
-+ *
-+ * The Maintable is allocated when IPsec is initialized.
-+ * Each subtable is allocated when needed, but the first is allocated
-+ * when IPsec is initialized.
-+ *
-+ * IPsecSAref_t is designed to be smaller than an NFmark so that
-+ * they can be stored in NFmarks and still leave a few bits for other
-+ * purposes. The spare bits are in the low order of the NFmark
-+ * but in the high order of the IPsecSAref_t, so conversion is required.
-+ * We pick the upper bits of NFmark on the theory that they are less likely to
-+ * interfere with more pedestrian uses of nfmark.
-+ */
-+
-+
-+typedef unsigned short int IPsecRefTableUnusedCount;
-+
-+#define IPSEC_SA_REF_TABLE_NUM_ENTRIES (1 << IPSEC_SA_REF_TABLE_IDX_WIDTH)
-+
-+#ifdef __KERNEL__
-+#if ((IPSEC_SA_REF_TABLE_IDX_WIDTH - (1 + IPSEC_SA_REF_MAINTABLE_IDX_WIDTH)) < 0)
-+#error "IPSEC_SA_REF_TABLE_IDX_WIDTH("IPSEC_SA_REF_TABLE_IDX_WIDTH") MUST be < 1 + IPSEC_SA_REF_MAINTABLE_IDX_WIDTH("IPSEC_SA_REF_MAINTABLE_IDX_WIDTH")"
-+#endif
-+
-+#define IPSEC_SA_REF_SUBTABLE_IDX_WIDTH (IPSEC_SA_REF_TABLE_IDX_WIDTH - IPSEC_SA_REF_MAINTABLE_IDX_WIDTH)
-+
-+#define IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES (1 << IPSEC_SA_REF_MAINTABLE_IDX_WIDTH)
-+#define IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES (1 << IPSEC_SA_REF_SUBTABLE_IDX_WIDTH)
-+
-+#ifdef CONFIG_NETFILTER
-+#define IPSEC_SA_REF_HOST_FIELD(x) ((struct sk_buff*)(x))->nfmark
-+#define IPSEC_SA_REF_HOST_FIELD_TYPE typeof(IPSEC_SA_REF_HOST_FIELD(NULL))
-+#else /* CONFIG_NETFILTER */
-+/* just make it work for now, it doesn't matter, since there is no nfmark */
-+#define IPSEC_SA_REF_HOST_FIELD_TYPE unsigned long
-+#endif /* CONFIG_NETFILTER */
-+#define IPSEC_SA_REF_HOST_FIELD_WIDTH (8 * sizeof(IPSEC_SA_REF_HOST_FIELD_TYPE))
-+#define IPSEC_SA_REF_FIELD_WIDTH (8 * sizeof(IPsecSAref_t))
-+
-+#define IPSEC_SA_REF_MAX (~IPSEC_SAREF_NULL)
-+#define IPSEC_SAREF_FIRST 1
-+#define IPSEC_SA_REF_MASK (IPSEC_SA_REF_MAX >> (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_TABLE_IDX_WIDTH))
-+#define IPSEC_SA_REF_TABLE_MASK ((IPSEC_SA_REF_MAX >> (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_MAINTABLE_IDX_WIDTH)) << IPSEC_SA_REF_SUBTABLE_IDX_WIDTH)
-+#define IPSEC_SA_REF_ENTRY_MASK (IPSEC_SA_REF_MAX >> (IPSEC_SA_REF_FIELD_WIDTH - IPSEC_SA_REF_SUBTABLE_IDX_WIDTH))
-+
-+#define IPsecSAref2table(x) (((x) & IPSEC_SA_REF_TABLE_MASK) >> IPSEC_SA_REF_SUBTABLE_IDX_WIDTH)
-+#define IPsecSAref2entry(x) ((x) & IPSEC_SA_REF_ENTRY_MASK)
-+#define IPsecSArefBuild(x,y) (((x) << IPSEC_SA_REF_SUBTABLE_IDX_WIDTH) + (y))
-+
-+#define IPsecSAref2SA(x) (ipsec_sadb.refTable[IPsecSAref2table(x)]->entry[IPsecSAref2entry(x)])
-+#define IPsecSA2SAref(x) ((x)->ips_ref)
-+
-+#define EMT_INBOUND 0x01 /* SA direction, 1=inbound */
-+
-+/* 'struct ipsec_sa' should be 64bit aligned when allocated. */
-+struct ipsec_sa
-+{
-+ atomic_t ips_refcount; /* reference count for this struct */
-+ int ips_marked_deleted; /* used with reference counting */
-+ IPsecSAref_t ips_ref; /* reference table entry number */
-+ IPsecSAref_t ips_refhim; /* ref of paired SA, if any */
-+ struct ipsec_sa *ips_next; /* pointer to next xform */
-+
-+ struct ipsec_sa *ips_hnext; /* next in hash chain */
-+
-+ struct ifnet *ips_rcvif; /* related rcv encap interface */
-+
-+ struct xform_functions *ips_xformfuncs; /* pointer to routines to process this SA */
-+
-+ struct net_device *ips_out; /* what interface to emerge on */
-+ __u8 ips_transport_direct; /* if true, punt directly to
-+ * the protocol layer */
-+ struct socket *ips_sock; /* cache of transport socket */
-+
-+ ip_said ips_said; /* SA ID */
-+
-+ __u32 ips_seq; /* seq num of msg that initiated this SA */
-+ __u32 ips_pid; /* PID of process that initiated this SA */
-+ __u8 ips_authalg; /* auth algorithm for this SA */
-+ __u8 ips_encalg; /* enc algorithm for this SA */
-+
-+ struct ipsec_stats ips_errs;
-+
-+ __u8 ips_replaywin; /* replay window size */
-+ enum sadb_sastate ips_state; /* state of SA */
-+ __u32 ips_replaywin_lastseq; /* last pkt sequence num */
-+ __u64 ips_replaywin_bitmap; /* bitmap of received pkts */
-+ __u32 ips_replaywin_maxdiff; /* max pkt sequence difference */
-+
-+ __u32 ips_flags; /* generic xform flags */
-+
-+
-+ struct ipsec_lifetimes ips_life; /* lifetime records */
-+
-+ /* selector information */
-+ __u8 ips_transport_protocol; /* protocol for this SA, if ports are involved */
-+ struct sockaddr*ips_addr_s; /* src sockaddr */
-+ struct sockaddr*ips_addr_d; /* dst sockaddr */
-+ struct sockaddr*ips_addr_p; /* proxy sockaddr */
-+ __u16 ips_addr_s_size;
-+ __u16 ips_addr_d_size;
-+ __u16 ips_addr_p_size;
-+ ip_address ips_flow_s;
-+ ip_address ips_flow_d;
-+ ip_address ips_mask_s;
-+ ip_address ips_mask_d;
-+
-+ __u16 ips_key_bits_a; /* size of authkey in bits */
-+ __u16 ips_auth_bits; /* size of authenticator in bits */
-+ __u16 ips_key_bits_e; /* size of enckey in bits */
-+ __u16 ips_iv_bits; /* size of IV in bits */
-+ __u8 ips_iv_size;
-+ __u16 ips_key_a_size;
-+ __u16 ips_key_e_size;
-+
-+ caddr_t ips_key_a; /* authentication key */
-+ caddr_t ips_key_e; /* encryption key */
-+ caddr_t ips_iv; /* Initialisation Vector */
-+
-+ struct ident ips_ident_s; /* identity src */
-+ struct ident ips_ident_d; /* identity dst */
-+
-+ /* these are included even if CONFIG_KLIPS_IPCOMP is off */
-+ __u16 ips_comp_adapt_tries; /* ipcomp self-adaption tries */
-+ __u16 ips_comp_adapt_skip; /* ipcomp self-adaption to-skip */
-+ __u64 ips_comp_ratio_cbytes; /* compressed bytes */
-+ __u64 ips_comp_ratio_dbytes; /* decompressed (or uncompressed) bytes */
-+
-+ /* these are included even if CONFIG_IPSEC_NAT_TRAVERSAL is off */
-+ __u8 ips_natt_type;
-+ __u8 ips_natt_reserved[3];
-+ __u16 ips_natt_sport;
-+ __u16 ips_natt_dport;
-+
-+ struct sockaddr *ips_natt_oa;
-+ __u16 ips_natt_oa_size;
-+ __u16 ips_natt_reserved2;
-+
-+#if 0
-+ __u32 ips_sens_dpd;
-+ __u8 ips_sens_sens_level;
-+ __u8 ips_sens_sens_len;
-+ __u64* ips_sens_sens_bitmap;
-+ __u8 ips_sens_integ_level;
-+ __u8 ips_sens_integ_len;
-+ __u64* ips_sens_integ_bitmap;
-+#endif
-+ struct ipsec_alg_enc *ips_alg_enc;
-+ struct ipsec_alg_auth *ips_alg_auth;
-+
-+ int ocf_in_use;
-+ int64_t ocf_cryptoid;
-+};
-+
-+struct IPsecSArefSubTable
-+{
-+ struct ipsec_sa* entry[IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES];
-+};
-+
-+struct ipsec_sadb {
-+ struct IPsecSArefSubTable* refTable[IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES];
-+ IPsecSAref_t refFreeList[IPSEC_SA_REF_FREELIST_NUM_ENTRIES];
-+ int refFreeListHead;
-+ int refFreeListTail;
-+ IPsecSAref_t refFreeListCont;
-+ IPsecSAref_t said_hash[SADB_HASHMOD];
-+ spinlock_t sadb_lock;
-+};
-+
-+extern struct ipsec_sadb ipsec_sadb;
-+
-+extern int ipsec_SAref_recycle(void);
-+extern int ipsec_SArefSubTable_alloc(unsigned table);
-+extern int ipsec_saref_freelist_init(void);
-+extern int ipsec_sadb_init(void);
-+extern struct ipsec_sa *ipsec_sa_alloc(int*error); /* pass in error var by pointer */
-+extern IPsecSAref_t ipsec_SAref_alloc(int*erorr); /* pass in error var by pointer */
-+extern int ipsec_sa_free(struct ipsec_sa* ips);
-+
-+#define ipsec_sa_get(ips) __ipsec_sa_get(ips, __FUNCTION__, __LINE__)
-+extern struct ipsec_sa * __ipsec_sa_get(struct ipsec_sa *ips, const char *func, int line);
-+
-+#define ipsec_sa_put(ips) __ipsec_sa_put(ips, __FUNCTION__, __LINE__)
-+extern void __ipsec_sa_put(struct ipsec_sa *ips, const char *func, int line);
-+extern int ipsec_sa_add(struct ipsec_sa *ips);
-+extern void ipsec_sa_rm(struct ipsec_sa *ips);
-+extern int ipsec_sadb_cleanup(__u8 proto);
-+extern int ipsec_sadb_free(void);
-+extern int ipsec_sa_wipe(struct ipsec_sa *ips);
-+extern int ipsec_sa_intern(struct ipsec_sa *ips);
-+extern struct ipsec_sa *ipsec_sa_getbyref(IPsecSAref_t ref);
-+
-+extern void ipsec_sa_untern(struct ipsec_sa *ips);
-+#endif /* __KERNEL__ */
-+
-+enum ipsec_direction {
-+ ipsec_incoming = 1,
-+ ipsec_outgoing = 2
-+};
-+
-+#define _IPSEC_SA_H_
-+#endif /* _IPSEC_SA_H_ */
-+
-+/*
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_sha1.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,79 @@
-+/*
-+ * RCSID $Id: ipsec_sha1.h,v 1.8 2004/04/05 19:55:07 mcr Exp $
-+ */
-+
-+/*
-+ * Here is the original comment from the distribution:
-+
-+SHA-1 in C
-+By Steve Reid <steve@edmweb.com>
-+100% Public Domain
-+
-+ * Adapted for use by the IPSEC code by John Ioannidis
-+ */
-+
-+
-+#ifndef _IPSEC_SHA1_H_
-+#define _IPSEC_SHA1_H_
-+
-+typedef struct
-+{
-+ __u32 state[5];
-+ __u32 count[2];
-+ __u8 buffer[64];
-+} SHA1_CTX;
-+
-+void SHA1Transform(__u32 state[5], __u8 buffer[64]);
-+void SHA1Init(void *context);
-+void SHA1Update(void *context, unsigned char *data, __u32 len);
-+void SHA1Final(unsigned char digest[20], void *context);
-+
-+
-+#endif /* _IPSEC_SHA1_H_ */
-+
-+/*
-+ * $Log: ipsec_sha1.h,v $
-+ * Revision 1.8 2004/04/05 19:55:07 mcr
-+ * Moved from linux/include/freeswan/ipsec_sha1.h,v
-+ *
-+ * Revision 1.7 2002/09/10 01:45:09 mcr
-+ * changed type of MD5_CTX and SHA1_CTX to void * so that
-+ * the function prototypes would match, and could be placed
-+ * into a pointer to a function.
-+ *
-+ * Revision 1.6 2002/04/24 07:36:47 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_sha1.h,v
-+ *
-+ * Revision 1.5 1999/12/13 13:59:13 rgb
-+ * Quick fix to argument size to Update bugs.
-+ *
-+ * Revision 1.4 1999/12/07 18:16:23 rgb
-+ * Fixed comments at end of #endif lines.
-+ *
-+ * Revision 1.3 1999/04/06 04:54:27 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.2 1998/11/30 13:22:54 rgb
-+ * Rationalised all the klips kernel file headers. They are much shorter
-+ * now and won't conflict under RH5.2.
-+ *
-+ * Revision 1.1 1998/06/18 21:27:50 henry
-+ * move sources from klips/src to klips/net/ipsec, to keep stupid
-+ * kernel-build scripts happier in the presence of symlinks
-+ *
-+ * Revision 1.2 1998/04/23 20:54:05 rgb
-+ * Fixed md5 and sha1 include file nesting issues, to be cleaned up when
-+ * verified.
-+ *
-+ * Revision 1.1 1998/04/09 03:04:21 henry
-+ * sources moved up from linux/net/ipsec
-+ * these two include files modified not to include others except in kernel
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:04 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * New transform
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_stats.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,76 @@
-+/*
-+ * @(#) definition of ipsec_stats structure
-+ *
-+ * Copyright (C) 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * and Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_stats.h,v 1.7 2005/04/14 01:17:45 mcr Exp $
-+ *
-+ */
-+
-+/*
-+ * This file describes the errors/statistics that FreeSWAN collects.
-+ */
-+
-+#ifndef _IPSEC_STATS_H_
-+
-+struct ipsec_stats {
-+ __u32 ips_alg_errs; /* number of algorithm errors */
-+ __u32 ips_auth_errs; /* # of authentication errors */
-+ __u32 ips_encsize_errs; /* # of encryption size errors*/
-+ __u32 ips_encpad_errs; /* # of encryption pad errors*/
-+ __u32 ips_replaywin_errs; /* # of pkt sequence errors */
-+};
-+
-+#define _IPSEC_STATS_H_
-+#endif /* _IPSEC_STATS_H_ */
-+
-+/*
-+ * $Log: ipsec_stats.h,v $
-+ * Revision 1.7 2005/04/14 01:17:45 mcr
-+ * add prototypes for snprintf.
-+ *
-+ * Revision 1.6 2004/04/05 19:55:07 mcr
-+ * Moved from linux/include/freeswan/ipsec_stats.h,v
-+ *
-+ * Revision 1.5 2004/04/05 19:41:05 mcr
-+ * merged alg-branch code.
-+ *
-+ * Revision 1.4 2004/03/28 20:27:19 paul
-+ * Included tested and confirmed fixes mcr made and dhr verified for
-+ * snprint statements. Changed one other snprintf to use ipsec_snprintf
-+ * so it wouldnt break compatibility with 2.0/2.2 kernels. Verified with
-+ * dhr. (thanks dhr!)
-+ *
-+ * Revision 1.4 2004/03/24 01:58:31 mcr
-+ * sprintf->snprintf for formatting into proc buffer.
-+ *
-+ * Revision 1.3.34.1 2004/04/05 04:30:46 mcr
-+ * patches for alg-branch to compile/work with 2.x openswan
-+ *
-+ * Revision 1.3 2002/04/24 07:36:47 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_stats.h,v
-+ *
-+ * Revision 1.2 2001/11/26 09:16:16 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.1.2.1 2001/09/25 02:27:00 mcr
-+ * statistics moved to seperate structure.
-+ *
-+ *
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_sysctl.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,20 @@
-+#ifndef OPENSWAN_SYSCTL_H
-+#define OPENSWAN_SYSCTL_H
-+
-+extern int debug_ah;
-+extern int debug_esp;
-+extern int debug_xform;
-+extern int debug_eroute;
-+extern int debug_spi;
-+extern int debug_netlink;
-+extern int debug_radij;
-+extern int debug_rcv;
-+extern int debug_tunnel;
-+extern int debug_xmit;
-+extern int debug_mast;
-+
-+extern int sysctl_ip_default_ttl;
-+extern int sysctl_ipsec_inbound_policy_check;
-+extern int sysctl_ipsec_debug_ipcomp;
-+extern int sysctl_ipsec_debug_verbose;
-+#endif
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_tunnel.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,286 @@
-+/*
-+ * IPSEC tunneling code
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Richard Guy Briggs.
-+ * Copyright (C) 2006 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+
-+
-+# define DEV_QUEUE_XMIT(skb, device, pri) {\
-+ skb->dev = device; \
-+ neigh_compat_output(skb); \
-+ /* skb->dst->output(skb); */ \
-+ }
-+# define ICMP_SEND(skb_in, type, code, info, dev) \
-+ icmp_send(skb_in, type, code, htonl(info))
-+# define IP_SEND(skb, dev) \
-+ ip_send(skb);
-+
-+
-+#if defined(KLIPS)
-+/*
-+ * Heavily based on drivers/net/new_tunnel.c. Lots
-+ * of ideas also taken from the 2.1.x version of drivers/net/shaper.c
-+ */
-+
-+struct ipsectunnelconf
-+{
-+ uint32_t cf_cmd;
-+ union
-+ {
-+ char cfu_name[12];
-+ } cf_u;
-+#define cf_name cf_u.cfu_name
-+};
-+
-+#define IPSEC_SET_DEV (SIOCDEVPRIVATE)
-+#define IPSEC_DEL_DEV (SIOCDEVPRIVATE + 1)
-+#define IPSEC_CLR_DEV (SIOCDEVPRIVATE + 2)
-+#define IPSEC_UDP_ENCAP_CONVERT (SIOCDEVPRIVATE + 3)
-+#endif
-+
-+#ifdef __KERNEL__
-+#include <linux/version.h>
-+#ifndef KERNEL_VERSION
-+# define KERNEL_VERSION(x,y,z) (((x)<<16)+((y)<<8)+(z))
-+#endif
-+struct ipsecpriv
-+{
-+ struct sk_buff_head sendq;
-+ struct net_device *dev;
-+ struct wait_queue *wait_queue;
-+ int vifnum;
-+ char locked;
-+ int (*hard_start_xmit) (struct sk_buff *skb,
-+ struct net_device *dev);
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ const struct header_ops *header_ops;
-+#else
-+
-+ int (*hard_header) (struct sk_buff *skb,
-+ struct net_device *dev,
-+ unsigned short type,
-+ void *daddr,
-+ void *saddr,
-+ unsigned len);
-+#ifdef NET_21
-+ int (*rebuild_header)(struct sk_buff *skb);
-+#else /* NET_21 */
-+ int (*rebuild_header)(void *buff, struct net_device *dev,
-+ unsigned long raddr, struct sk_buff *skb);
-+#endif /* NET_21 */
-+#ifndef NET_21
-+ void (*header_cache_bind)(struct hh_cache **hhp, struct net_device *dev,
-+ unsigned short htype, __u32 daddr);
-+#endif /* !NET_21 */
-+ void (*header_cache_update)(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr);
-+#endif
-+ int (*set_mac_address)(struct net_device *dev, void *addr);
-+ struct net_device_stats *(*get_stats)(struct net_device *dev);
-+ struct net_device_stats mystats;
-+ int mtu; /* What is the desired MTU? */
-+};
-+
-+extern char ipsec_tunnel_c_version[];
-+
-+extern struct net_device *ipsecdevices[IPSEC_NUM_IFMAX];
-+extern int ipsecdevices_max;
-+
-+int ipsec_tunnel_init_devices(void);
-+
-+/* void */ int ipsec_tunnel_cleanup_devices(void);
-+
-+extern /* void */ int ipsec_init(void);
-+
-+extern int ipsec_tunnel_start_xmit(struct sk_buff *skb, struct net_device *dev);
-+extern struct net_device *ipsec_get_device(int inst);
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern int debug_tunnel;
-+extern int sysctl_ipsec_debug_verbose;
-+#endif /* CONFIG_KLIPS_DEBUG */
-+#endif /* __KERNEL__ */
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+#define DB_TN_INIT 0x0001
-+#define DB_TN_PROCFS 0x0002
-+#define DB_TN_XMIT 0x0010
-+#define DB_TN_OHDR 0x0020
-+#define DB_TN_CROUT 0x0040
-+#define DB_TN_OXFS 0x0080
-+#define DB_TN_REVEC 0x0100
-+#define DB_TN_ENCAP 0x0200
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+extern int ipsec_tunnel_deletenum(int vifnum);
-+extern int ipsec_tunnel_createnum(int vifnum);
-+extern struct net_device *ipsec_tunnel_get_device(int vifnum);
-+
-+
-+/* manage ipsec xmit state objects */
-+extern int ipsec_xmit_state_cache_init (void);
-+extern void ipsec_xmit_state_cache_cleanup (void);
-+struct ipsec_xmit_state *ipsec_xmit_state_new (void);
-+void ipsec_xmit_state_delete (struct ipsec_xmit_state *ixs);
-+
-+/*
-+ * $Log: ipsec_tunnel.h,v $
-+ * Revision 1.33 2005/06/04 16:06:05 mcr
-+ * better patch for nat-t rcv-device code.
-+ *
-+ * Revision 1.32 2005/05/21 03:18:35 mcr
-+ * added additional debug flag tunnelling.
-+ *
-+ * Revision 1.31 2004/08/03 18:18:02 mcr
-+ * in 2.6, use "net_device" instead of #define device->net_device.
-+ * this probably breaks 2.0 compiles.
-+ *
-+ * Revision 1.30 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.29 2004/04/05 19:55:07 mcr
-+ * Moved from linux/include/freeswan/ipsec_tunnel.h,v
-+ *
-+ * Revision 1.28 2003/06/24 20:22:32 mcr
-+ * added new global: ipsecdevices[] so that we can keep track of
-+ * the ipsecX devices. They will be referenced with dev_hold(),
-+ * so 2.2 may need this as well.
-+ *
-+ * Revision 1.27 2003/04/03 17:38:09 rgb
-+ * Centralised ipsec_kfree_skb and ipsec_dev_{get,put}.
-+ *
-+ * Revision 1.26 2003/02/12 19:32:20 rgb
-+ * Updated copyright year.
-+ *
-+ * Revision 1.25 2002/05/27 18:56:07 rgb
-+ * Convert to dynamic ipsec device allocation.
-+ *
-+ * Revision 1.24 2002/04/24 07:36:48 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_tunnel.h,v
-+ *
-+ * Revision 1.23 2001/11/06 19:50:44 rgb
-+ * Moved IP_SEND, ICMP_SEND, DEV_QUEUE_XMIT macros to ipsec_tunnel.h for
-+ * use also by pfkey_v2_parser.c
-+ *
-+ * Revision 1.22 2001/09/15 16:24:05 rgb
-+ * Re-inject first and last HOLD packet when an eroute REPLACE is done.
-+ *
-+ * Revision 1.21 2001/06/14 19:35:10 rgb
-+ * Update copyright date.
-+ *
-+ * Revision 1.20 2000/09/15 11:37:02 rgb
-+ * Merge in heavily modified Svenning Soerensen's <svenning@post5.tele.dk>
-+ * IPCOMP zlib deflate code.
-+ *
-+ * Revision 1.19 2000/09/08 19:12:56 rgb
-+ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG.
-+ *
-+ * Revision 1.18 2000/07/28 13:50:54 rgb
-+ * Changed enet_statistics to net_device_stats and added back compatibility
-+ * for pre-2.1.19.
-+ *
-+ * Revision 1.17 1999/11/19 01:12:15 rgb
-+ * Purge unneeded proc_info prototypes, now that static linking uses
-+ * dynamic proc_info registration.
-+ *
-+ * Revision 1.16 1999/11/18 18:51:00 rgb
-+ * Changed all device registrations for static linking to
-+ * dynamic to reduce the number and size of patches.
-+ *
-+ * Revision 1.15 1999/11/18 04:14:21 rgb
-+ * Replaced all kernel version macros to shorter, readable form.
-+ * Added CONFIG_PROC_FS compiler directives in case it is shut off.
-+ * Added Marc Boucher's 2.3.25 proc patches.
-+ *
-+ * Revision 1.14 1999/05/25 02:50:10 rgb
-+ * Fix kernel version macros for 2.0.x static linking.
-+ *
-+ * Revision 1.13 1999/05/25 02:41:06 rgb
-+ * Add ipsec_klipsdebug support for static linking.
-+ *
-+ * Revision 1.12 1999/05/05 22:02:32 rgb
-+ * Add a quick and dirty port to 2.2 kernels by Marc Boucher <marc@mbsi.ca>.
-+ *
-+ * Revision 1.11 1999/04/29 15:19:50 rgb
-+ * Add return values to init and cleanup functions.
-+ *
-+ * Revision 1.10 1999/04/16 16:02:39 rgb
-+ * Bump up macro to 4 ipsec I/Fs.
-+ *
-+ * Revision 1.9 1999/04/15 15:37:25 rgb
-+ * Forward check changes from POST1_00 branch.
-+ *
-+ * Revision 1.5.2.1 1999/04/02 04:26:14 rgb
-+ * Backcheck from HEAD, pre1.0.
-+ *
-+ * Revision 1.8 1999/04/11 00:29:01 henry
-+ * GPL boilerplate
-+ *
-+ * Revision 1.7 1999/04/06 04:54:28 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.6 1999/03/31 05:44:48 rgb
-+ * Keep PMTU reduction private.
-+ *
-+ * Revision 1.5 1999/02/10 22:31:20 rgb
-+ * Change rebuild_header member to reflect generality of link layer.
-+ *
-+ * Revision 1.4 1998/12/01 13:22:04 rgb
-+ * Added support for debug printing of version info.
-+ *
-+ * Revision 1.3 1998/07/29 20:42:46 rgb
-+ * Add a macro for clearing all tunnel devices.
-+ * Rearrange structures and declarations for sharing with userspace.
-+ *
-+ * Revision 1.2 1998/06/25 20:01:45 rgb
-+ * Make prototypes available for ipsec_init and ipsec proc_dir_entries
-+ * for static linking.
-+ *
-+ * Revision 1.1 1998/06/18 21:27:50 henry
-+ * move sources from klips/src to klips/net/ipsec, to keep stupid
-+ * kernel-build scripts happier in the presence of symlinks
-+ *
-+ * Revision 1.3 1998/05/18 21:51:50 rgb
-+ * Added macros for num of I/F's and a procfs debug switch.
-+ *
-+ * Revision 1.2 1998/04/21 21:29:09 rgb
-+ * Rearrange debug switches to change on the fly debug output from user
-+ * space. Only kernel changes checked in at this time. radij.c was also
-+ * changed to temporarily remove buggy debugging code in rj_delete causing
-+ * an OOPS and hence, netlink device open errors.
-+ *
-+ * Revision 1.1 1998/04/09 03:06:13 henry
-+ * sources moved up from linux/net/ipsec
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:05 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.5 1997/06/03 04:24:48 ji
-+ * Added transport mode.
-+ * Changed the way routing is done.
-+ * Lots of bug fixes.
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * No changes.
-+ *
-+ * Revision 0.3 1996/11/20 14:39:04 ji
-+ * Minor cleanups.
-+ * Rationalized debugging code.
-+ *
-+ * Revision 0.2 1996/11/02 00:18:33 ji
-+ * First limited release.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_xform.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,278 @@
-+/*
-+ * Definitions relevant to IPSEC transformations
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ * COpyright (C) 2003 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_xform.h,v 1.42 2005/08/05 08:50:45 mcr Exp $
-+ */
-+
-+#ifndef _IPSEC_XFORM_H_
-+
-+#include <openswan.h>
-+
-+#define XF_NONE 0 /* No transform set */
-+#define XF_IP4 1 /* IPv4 inside IPv4 */
-+#define XF_AHMD5 2 /* AH MD5 */
-+#define XF_AHSHA 3 /* AH SHA */
-+#define XF_ESP3DES 5 /* ESP DES3-CBC */
-+#define XF_AHHMACMD5 6 /* AH-HMAC-MD5 with opt replay prot */
-+#define XF_AHHMACSHA1 7 /* AH-HMAC-SHA1 with opt replay prot */
-+#define XF_ESP3DESMD5 9 /* triple DES, HMAC-MD-5, 128-bits of authentication */
-+#define XF_ESP3DESMD596 10 /* triple DES, HMAC-MD-5, 96-bits of authentication */
-+#define XF_ESPNULLMD596 12 /* NULL, HMAC-MD-5 with 96-bits of authentication */
-+#define XF_ESPNULLSHA196 13 /* NULL, HMAC-SHA-1 with 96-bits of authentication */
-+#define XF_ESP3DESSHA196 14 /* triple DES, HMAC-SHA-1, 96-bits of authentication */
-+#define XF_IP6 15 /* IPv6 inside IPv6 */
-+#define XF_COMPDEFLATE 16 /* IPCOMP deflate */
-+
-+#define XF_CLR 126 /* Clear SA table */
-+#define XF_DEL 127 /* Delete SA */
-+
-+/* IPsec AH transform values
-+ * RFC 2407
-+ * draft-ietf-ipsec-doi-tc-mib-02.txt
-+ */
-+
-+/* why are these hardcoded here? See ipsec_policy.h for their enums -- Paul*/
-+/* ---------- These really need to go from here ------------------ */
-+#define AH_NONE 0
-+#define AH_MD5 2
-+#define AH_SHA 3
-+/* draft-ietf-ipsec-ciph-aes-cbc-03.txt */
-+#define AH_SHA2_256 5
-+#define AH_SHA2_384 6
-+#define AH_SHA2_512 7
-+#define AH_RIPEMD 8
-+#define AH_AES 9
-+#define AH_NULL 251
-+#define AH_MAX 251
-+
-+/* IPsec ESP transform values */
-+
-+#define ESP_NONE 0
-+#define ESP_DES 2
-+#define ESP_3DES 3
-+#define ESP_RC5 4
-+#define ESP_IDEA 5
-+#define ESP_CAST 6
-+#define ESP_BLOWFISH 7
-+#define ESP_3IDEA 8
-+#define ESP_RC4 10
-+#define ESP_NULL 11
-+#define ESP_AES 12
-+#define ESP_AES_CTR 13
-+#define ESP_AES_CCM_A 14
-+#define ESP_AES_CCM_B 15
-+#define ESP_AES_CCM_C 16
-+#define ESP_ID17 17
-+#define ESP_AES_GCM_A 18
-+#define ESP_AES_GCM_B 19
-+#define ESP_AES_GCM_C 20
-+#define ESP_SEED_CBC 21
-+#define ESP_CAMELLIA 22
-+
-+/* as draft-ietf-ipsec-ciph-aes-cbc-02.txt */
-+#define ESP_MARS 249
-+#define ESP_RC6 250
-+#define ESP_SERPENT 252
-+#define ESP_TWOFISH 253
-+
-+/* IPCOMP transform values */
-+
-+#define IPCOMP_NONE 0
-+#define IPCOMP_OUI 1
-+#define IPCOMP_DEFLAT 2
-+#define IPCOMP_LZS 3
-+#define IPCOMP_V42BIS 4
-+
-+#define XFT_AUTH 0x0001
-+#define XFT_CONF 0x0100
-+
-+/* available if CONFIG_KLIPS_DEBUG is defined */
-+#define DB_XF_INIT 0x0001
-+
-+#define PROTO2TXT(x) \
-+ (x) == IPPROTO_AH ? "AH" : \
-+ (x) == IPPROTO_ESP ? "ESP" : \
-+ (x) == IPPROTO_IPIP ? "IPIP" : \
-+ (x) == IPPROTO_COMP ? "COMP" : \
-+ "UNKNOWN_proto"
-+static inline const char *enc_name_id (unsigned id) {
-+ static char buf[16];
-+ snprintf(buf, sizeof(buf), "_ID%d", id);
-+ return buf;
-+}
-+static inline const char *auth_name_id (unsigned id) {
-+ static char buf[16];
-+ snprintf(buf, sizeof(buf), "_ID%d", id);
-+ return buf;
-+}
-+#define IPS_XFORM_NAME(x) \
-+ PROTO2TXT((x)->ips_said.proto), \
-+ (x)->ips_said.proto == IPPROTO_COMP ? \
-+ ((x)->ips_encalg == SADB_X_CALG_DEFLATE ? \
-+ "_DEFLATE" : "_UNKNOWN_comp") : \
-+ (x)->ips_encalg == ESP_NONE ? "" : \
-+ (x)->ips_encalg == ESP_3DES ? "_3DES" : \
-+ (x)->ips_encalg == ESP_AES ? "_AES" : \
-+ (x)->ips_encalg == ESP_SERPENT ? "_SERPENT" : \
-+ (x)->ips_encalg == ESP_TWOFISH ? "_TWOFISH" : \
-+ enc_name_id(x->ips_encalg)/* "_UNKNOWN_encr" */, \
-+ (x)->ips_authalg == AH_NONE ? "" : \
-+ (x)->ips_authalg == AH_MD5 ? "_HMAC_MD5" : \
-+ (x)->ips_authalg == AH_SHA ? "_HMAC_SHA1" : \
-+ (x)->ips_authalg == AH_SHA2_256 ? "_HMAC_SHA2_256" : \
-+ (x)->ips_authalg == AH_SHA2_384 ? "_HMAC_SHA2_384" : \
-+ (x)->ips_authalg == AH_SHA2_512 ? "_HMAC_SHA2_512" : \
-+ auth_name_id(x->ips_authalg) /* "_UNKNOWN_auth" */ \
-+
-+#ifdef __KERNEL__
-+#include <linux/skbuff.h>
-+
-+struct ipsec_rcv_state;
-+struct ipsec_xmit_state;
-+
-+struct xform_functions {
-+ u8 protocol;
-+ enum ipsec_rcv_value (*rcv_checks)(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb);
-+ enum ipsec_rcv_value (*rcv_decrypt)(struct ipsec_rcv_state *irs);
-+
-+ enum ipsec_rcv_value (*rcv_setup_auth)(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb,
-+ __u32 *replay,
-+ unsigned char **authenticator);
-+ enum ipsec_rcv_value (*rcv_calc_auth)(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb);
-+
-+ enum ipsec_xmit_value (*xmit_setup)(struct ipsec_xmit_state *ixs);
-+ enum ipsec_xmit_value (*xmit_encrypt)(struct ipsec_xmit_state *ixs);
-+
-+ enum ipsec_xmit_value (*xmit_setup_auth)(struct ipsec_xmit_state *ixs,
-+ struct sk_buff *skb,
-+ __u32 *replay,
-+ unsigned char **authenticator);
-+ enum ipsec_xmit_value (*xmit_calc_auth)(struct ipsec_xmit_state *ixs,
-+ struct sk_buff *skb);
-+ int xmit_headroom;
-+ int xmit_needtailroom;
-+};
-+
-+#endif /* __KERNEL__ */
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern void ipsec_dmp(char *s, caddr_t bb, int len);
-+#else /* CONFIG_KLIPS_DEBUG */
-+#define ipsec_dmp(_x, _y, _z)
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+
-+#define _IPSEC_XFORM_H_
-+#endif /* _IPSEC_XFORM_H_ */
-+
-+/*
-+ * $Log: ipsec_xform.h,v $
-+ * Revision 1.42 2005/08/05 08:50:45 mcr
-+ * move #include of skbuff.h to a place where
-+ * we know it will be kernel only code.
-+ *
-+ * Revision 1.41 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.40 2004/04/06 02:49:08 mcr
-+ * pullup of algo code from alg-branch.
-+ *
-+ * Revision 1.39 2004/04/05 19:55:07 mcr
-+ * Moved from linux/include/freeswan/ipsec_xform.h,v
-+ *
-+ * Revision 1.38 2004/04/05 19:41:05 mcr
-+ * merged alg-branch code.
-+ *
-+ * Revision 1.37 2003/12/13 19:10:16 mcr
-+ * refactored rcv and xmit code - same as FS 2.05.
-+ *
-+ * Revision 1.36.34.1 2003/12/22 15:25:52 jjo
-+ * Merged algo-0.8.1-rc11-test1 into alg-branch
-+ *
-+ * Revision 1.36 2002/04/24 07:36:48 mcr
-+ * Moved from ./klips/net/ipsec/ipsec_xform.h,v
-+ *
-+ * Revision 1.35 2001/11/26 09:23:51 rgb
-+ * Merge MCR's ipsec_sa, eroute, proc and struct lifetime changes.
-+ *
-+ * Revision 1.33.2.1 2001/09/25 02:24:58 mcr
-+ * struct tdb -> struct ipsec_sa.
-+ * sa(tdb) manipulation functions renamed and moved to ipsec_sa.c
-+ * ipsec_xform.c removed. header file still contains useful things.
-+ *
-+ * Revision 1.34 2001/11/06 19:47:17 rgb
-+ * Changed lifetime_packets to uint32 from uint64.
-+ *
-+ * Revision 1.33 2001/09/08 21:13:34 rgb
-+ * Added pfkey ident extension support for ISAKMPd. (NetCelo)
-+ *
-+ * Revision 1.32 2001/07/06 07:40:01 rgb
-+ * Reformatted for readability.
-+ * Added inbound policy checking fields for use with IPIP SAs.
-+ *
-+ * Revision 1.31 2001/06/14 19:35:11 rgb
-+ * Update copyright date.
-+ *
-+ * Revision 1.30 2001/05/30 08:14:03 rgb
-+ * Removed vestiges of esp-null transforms.
-+ *
-+ * Revision 1.29 2001/01/30 23:42:47 rgb
-+ * Allow pfkey msgs from pid other than user context required for ACQUIRE
-+ * and subsequent ADD or UDATE.
-+ *
-+ * Revision 1.28 2000/11/06 04:30:40 rgb
-+ * Add Svenning's adaptive content compression.
-+ *
-+ * Revision 1.27 2000/09/19 00:38:25 rgb
-+ * Fixed algorithm name bugs introduced for ipcomp.
-+ *
-+ * Revision 1.26 2000/09/17 21:36:48 rgb
-+ * Added proto2txt macro.
-+ *
-+ * Revision 1.25 2000/09/17 18:56:47 rgb
-+ * Added IPCOMP support.
-+ *
-+ * Revision 1.24 2000/09/12 19:34:12 rgb
-+ * Defined XF_IP6 from Gerhard for ipv6 tunnel support.
-+ *
-+ * Revision 1.23 2000/09/12 03:23:14 rgb
-+ * Cleaned out now unused tdb_xform and tdb_xdata members of struct tdb.
-+ *
-+ * Revision 1.22 2000/09/08 19:12:56 rgb
-+ * Change references from DEBUG_IPSEC to CONFIG_IPSEC_DEBUG.
-+ *
-+ * Revision 1.21 2000/09/01 18:32:43 rgb
-+ * Added (disabled) sensitivity members to tdb struct.
-+ *
-+ * Revision 1.20 2000/08/30 05:31:01 rgb
-+ * Removed all the rest of the references to tdb_spi, tdb_proto, tdb_dst.
-+ * Kill remainder of tdb_xform, tdb_xdata, xformsw.
-+ *
-+ * Revision 1.19 2000/08/01 14:51:52 rgb
-+ * Removed _all_ remaining traces of DES.
-+ *
-+ * Revision 1.18 2000/01/21 06:17:45 rgb
-+ * Tidied up spacing.
-+ *
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/ipsec_xmit.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,267 @@
-+/*
-+ * IPSEC tunneling code
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_xmit.h,v 1.14 2005/05/11 01:00:26 mcr Exp $
-+ */
-+
-+#include "openswan/ipsec_sa.h"
-+
-+#ifdef CONFIG_KLIPS_OCF
-+#include <cryptodev.h>
-+#endif
-+
-+enum ipsec_xmit_value
-+{
-+ IPSEC_XMIT_STOLEN=2,
-+ IPSEC_XMIT_PASS=1,
-+ IPSEC_XMIT_OK=0,
-+ IPSEC_XMIT_ERRMEMALLOC=-1,
-+ IPSEC_XMIT_ESP_BADALG=-2,
-+ IPSEC_XMIT_BADPROTO=-3,
-+ IPSEC_XMIT_ESP_PUSHPULLERR=-4,
-+ IPSEC_XMIT_BADLEN=-5,
-+ IPSEC_XMIT_AH_BADALG=-6,
-+ IPSEC_XMIT_SAIDNOTFOUND=-7,
-+ IPSEC_XMIT_SAIDNOTLIVE=-8,
-+ IPSEC_XMIT_REPLAYROLLED=-9,
-+ IPSEC_XMIT_LIFETIMEFAILED=-10,
-+ IPSEC_XMIT_CANNOTFRAG=-11,
-+ IPSEC_XMIT_MSSERR=-12,
-+ IPSEC_XMIT_ERRSKBALLOC=-13,
-+ IPSEC_XMIT_ENCAPFAIL=-14,
-+ IPSEC_XMIT_NODEV=-15,
-+ IPSEC_XMIT_NOPRIVDEV=-16,
-+ IPSEC_XMIT_NOPHYSDEV=-17,
-+ IPSEC_XMIT_NOSKB=-18,
-+ IPSEC_XMIT_NOIPV6=-19,
-+ IPSEC_XMIT_NOIPOPTIONS=-20,
-+ IPSEC_XMIT_TTLEXPIRED=-21,
-+ IPSEC_XMIT_BADHHLEN=-22,
-+ IPSEC_XMIT_PUSHPULLERR=-23,
-+ IPSEC_XMIT_ROUTEERR=-24,
-+ IPSEC_XMIT_RECURSDETECT=-25,
-+ IPSEC_XMIT_IPSENDFAILURE=-26,
-+ IPSEC_XMIT_ESPUDP=-27,
-+ IPSEC_XMIT_ESPUDP_BADTYPE=-28,
-+ IPSEC_XMIT_PENDING=-29,
-+};
-+
-+
-+/*
-+ * state machine states
-+ */
-+
-+#define IPSEC_XSM_INIT1 0 /* make it easy, starting state is 0 */
-+#define IPSEC_XSM_INIT2 1
-+#define IPSEC_XSM_ENCAP_INIT 2
-+#define IPSEC_XSM_ENCAP_SELECT 3
-+#define IPSEC_XSM_ESP 4
-+#define IPSEC_XSM_ESP_AH 5
-+#define IPSEC_XSM_AH 6
-+#define IPSEC_XSM_IPIP 7
-+#define IPSEC_XSM_IPCOMP 8
-+#define IPSEC_XSM_CONT 9
-+#define IPSEC_XSM_DONE 100
-+
-+
-+struct ipsec_xmit_state
-+{
-+ struct sk_buff *skb; /* working skb pointer */
-+ struct net_device *dev; /* working dev pointer */
-+ struct ipsecpriv *prv; /* Our device' private space */
-+ struct sk_buff *oskb; /* Original skb pointer */
-+ struct net_device_stats *stats; /* This device's statistics */
-+ struct iphdr *iph; /* Our new IP header */
-+ __u32 newdst; /* The other SG's IP address */
-+ __u32 orgdst; /* Original IP destination address */
-+ __u32 orgedst; /* 1st SG's IP address */
-+ __u32 newsrc; /* The new source SG's IP address */
-+ __u32 orgsrc; /* Original IP source address */
-+ __u32 innersrc; /* Innermost IP source address */
-+ int iphlen; /* IP header length */
-+ int pyldsz; /* upper protocol payload size */
-+ int headroom;
-+ int tailroom;
-+ int authlen;
-+ int max_headroom; /* The extra header space needed */
-+ int max_tailroom; /* The extra stuffing needed */
-+ int ll_headroom; /* The extra link layer hard_header space needed */
-+ int tot_headroom; /* The total header space needed */
-+ int tot_tailroom; /* The totalstuffing needed */
-+ __u8 *saved_header; /* saved copy of the hard header */
-+ unsigned short sport, dport;
-+
-+ struct sockaddr_encap matcher; /* eroute search key */
-+ struct eroute *eroute;
-+ struct ipsec_sa *ipsp; /* ipsec_sa pointers */
-+ //struct ipsec_sa *ipsp_outer; /* last SA applied by encap_bundle */
-+ char sa_txt[SATOT_BUF];
-+ size_t sa_len;
-+ int hard_header_stripped; /* has the hard header been removed yet? */
-+ int hard_header_len;
-+ struct net_device *physdev;
-+/* struct device *virtdev; */
-+ short physmtu;
-+ short cur_mtu; /* copy of prv->mtu, cause prv may == NULL */
-+ short mtudiff;
-+#ifdef NET_21
-+ struct rtable *route;
-+#endif /* NET_21 */
-+ ip_said outgoing_said;
-+#ifdef NET_21
-+ int pass;
-+#endif /* NET_21 */
-+ uint32_t eroute_pid;
-+ struct ipsec_sa ips;
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ uint8_t natt_type;
-+ uint8_t natt_head;
-+ uint16_t natt_sport;
-+ uint16_t natt_dport;
-+#endif
-+
-+ /*
-+ * xmit state machine use
-+ */
-+ void (*xsm_complete)(struct ipsec_xmit_state *ixs,
-+ enum ipsec_xmit_value stat);
-+ int state;
-+ int next_state;
-+#ifdef CONFIG_KLIPS_OCF
-+ struct work_struct workq;
-+#ifdef DECLARE_TASKLET
-+ struct tasklet_struct tasklet;
-+#endif
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ struct ipsec_alg_auth *ixt_a;
-+ struct ipsec_alg_enc *ixt_e;
-+#endif
-+#ifdef CONFIG_KLIPS_ESP
-+ struct esphdr *espp;
-+ unsigned char *idat;
-+#endif /* !CONFIG_KLIPS_ESP */
-+ int blocksize;
-+ int ilen, len;
-+ unsigned char *dat;
-+ __u8 frag_off, tos;
-+ __u16 ttl, check;
-+};
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_sanity_check_dev(struct ipsec_xmit_state *ixs);
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_sanity_check_skb(struct ipsec_xmit_state *ixs);
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_encap_bundle(struct ipsec_xmit_state *ixs);
-+
-+extern void ipsec_xsm(struct ipsec_xmit_state *ixs);
-+#ifdef HAVE_KMEM_CACHE_T
-+extern kmem_cache_t *ipsec_ixs_cache;
-+#else
-+extern struct kmem_cache *ipsec_ixs_cache;
-+#endif
-+extern int ipsec_ixs_max;
-+extern atomic_t ipsec_ixs_cnt;
-+
-+extern void ipsec_extract_ports(struct iphdr * iph, struct sockaddr_encap * er);
-+
-+extern enum ipsec_xmit_value
-+ipsec_xmit_send(struct ipsec_xmit_state*ixs, struct flowi *fl);
-+
-+extern enum ipsec_xmit_value
-+ipsec_nat_encap(struct ipsec_xmit_state*ixs);
-+
-+extern enum ipsec_xmit_value
-+ipsec_tunnel_send(struct ipsec_xmit_state *ixs);
-+
-+extern void ipsec_xmit_cleanup(struct ipsec_xmit_state*ixs);
-+
-+
-+extern int ipsec_xmit_trap_count;
-+extern int ipsec_xmit_trap_sendcount;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern int debug_xmit;
-+extern int debug_mast;
-+
-+#define ipsec_xmit_dmp(_x,_y, _z) if (debug_xmit && sysctl_ipsec_debug_verbose) ipsec_dmp_block(_x,_y,_z)
-+#else
-+#define ipsec_xmit_dmp(_x,_y, _z) do {} while(0)
-+
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+extern int sysctl_ipsec_debug_verbose;
-+extern int sysctl_ipsec_icmp;
-+extern int sysctl_ipsec_tos;
-+
-+
-+/*
-+ * $Log: ipsec_xmit.h,v $
-+ * Revision 1.14 2005/05/11 01:00:26 mcr
-+ * do not call debug routines if !defined KLIPS_DEBUG.
-+ *
-+ * Revision 1.13 2005/04/29 05:01:38 mcr
-+ * use ipsec_dmp_block.
-+ * added cur_mtu to ixs instead of using ixs->dev.
-+ *
-+ * Revision 1.12 2004/08/20 21:45:37 mcr
-+ * CONFIG_KLIPS_NAT_TRAVERSAL is not used in an attempt to
-+ * be 26sec compatible. But, some defines where changed.
-+ *
-+ * Revision 1.11 2004/08/03 18:18:21 mcr
-+ * in 2.6, use "net_device" instead of #define device->net_device.
-+ * this probably breaks 2.0 compiles.
-+ *
-+ * Revision 1.10 2004/07/10 19:08:41 mcr
-+ * CONFIG_IPSEC -> CONFIG_KLIPS.
-+ *
-+ * Revision 1.9 2004/04/06 02:49:08 mcr
-+ * pullup of algo code from alg-branch.
-+ *
-+ * Revision 1.8 2004/04/05 19:55:07 mcr
-+ * Moved from linux/include/freeswan/ipsec_xmit.h,v
-+ *
-+ * Revision 1.7 2004/02/03 03:11:40 mcr
-+ * new xmit type if the UDP encapsulation is wrong.
-+ *
-+ * Revision 1.6 2003/12/13 19:10:16 mcr
-+ * refactored rcv and xmit code - same as FS 2.05.
-+ *
-+ * Revision 1.5 2003/12/10 01:20:06 mcr
-+ * NAT-traversal patches to KLIPS.
-+ *
-+ * Revision 1.4 2003/12/06 16:37:04 mcr
-+ * 1.4.7a X.509 patch applied.
-+ *
-+ * Revision 1.3 2003/10/31 02:27:05 mcr
-+ * pulled up port-selector patches and sa_id elimination.
-+ *
-+ * Revision 1.2.4.2 2003/10/29 01:10:19 mcr
-+ * elimited "struct sa_id"
-+ *
-+ * Revision 1.2.4.1 2003/09/21 13:59:38 mcr
-+ * pre-liminary X.509 patch - does not yet pass tests.
-+ *
-+ * Revision 1.2 2003/06/20 01:42:13 mcr
-+ * added counters to measure how many ACQUIREs we send to pluto,
-+ * and how many are successfully sent.
-+ *
-+ * Revision 1.1 2003/02/12 19:31:03 rgb
-+ * Refactored from ipsec_tunnel.c
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/mast.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,33 @@
-+struct mast_callbacks {
-+ int (*packet_encap)(struct device *mast, void *context,
-+ struct sk_buff *skb, int flowref);
-+ int (*link_inquire)(struct device *mast, void *context);
-+};
-+
-+
-+struct device *mast_init (int family,
-+ struct mast_callbacks *callbacks,
-+ unsigned int flags,
-+ unsigned int desired_unit,
-+ unsigned int max_flowref,
-+ void *context);
-+
-+int mast_destroy(struct device *mast);
-+
-+int mast_recv(struct device *mast, struct sk_buff *skb, int flowref);
-+
-+/* free this skb as being useless, increment failure count. */
-+int mast_toast(struct device *mast, struct sk_buff *skb, int flowref);
-+
-+int mast_linkstat (struct device *mast, int flowref,
-+ int status);
-+
-+int mast_setreference (struct device *mast,
-+ int defaultSA);
-+
-+int mast_setneighbor (struct device *mast,
-+ struct sockaddr *source,
-+ struct sockaddr *destination,
-+ int flowref);
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/passert.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,75 @@
-+/*
-+ * sanitize a string into a printable format.
-+ *
-+ * Copyright (C) 1998-2002 D. Hugh Redelmeier.
-+ * Copyright (C) 2003 Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: passert.h,v 1.7.8.1 2007/09/05 02:32:24 paul Exp $
-+ */
-+
-+#include "openswan.h"
-+
-+#ifndef _OPENSWAN_PASSERT_H
-+#define _OPENSWAN_PASSERT_H
-+/* our versions of assert: log result */
-+
-+#ifdef DEBUG
-+
-+typedef void (*openswan_passert_fail_t)(const char *pred_str,
-+ const char *file_str,
-+ unsigned long line_no) NEVER_RETURNS;
-+
-+extern openswan_passert_fail_t openswan_passert_fail;
-+
-+extern void pexpect_log(const char *pred_str
-+ , const char *file_str, unsigned long line_no);
-+
-+# define impossible() do { \
-+ if(openswan_passert_fail) { \
-+ (*openswan_passert_fail)("impossible", __FILE__, __LINE__); \
-+ }} while(0)
-+
-+extern void openswan_switch_fail(int n
-+ , const char *file_str, unsigned long line_no) NEVER_RETURNS;
-+
-+# define bad_case(n) openswan_switch_fail((int) n, __FILE__, __LINE__)
-+
-+# define passert(pred) do { \
-+ if (!(pred)) \
-+ if(openswan_passert_fail) { \
-+ (*openswan_passert_fail)(#pred, __FILE__, __LINE__); \
-+ } \
-+ } while(0)
-+
-+# define pexpect(pred) do { \
-+ if (!(pred)) \
-+ pexpect_log(#pred, __FILE__, __LINE__); \
-+ } while(0)
-+
-+/* assert that an err_t is NULL; evaluate exactly once */
-+# define happy(x) { \
-+ err_t ugh = x; \
-+ if (ugh != NULL) \
-+ if(openswan_passert_fail) { (*openswan_passert_fail)(ugh, __FILE__, __LINE__); } \
-+ }
-+
-+#else /*!DEBUG*/
-+
-+# define impossible() abort()
-+# define bad_case(n) abort()
-+# define passert(pred) { } /* do nothing */
-+# define happy(x) { (void) x; } /* evaluate non-judgementally */
-+
-+#endif /*!DEBUG*/
-+
-+#endif /* _OPENSWAN_PASSERT_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/pfkey.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,384 @@
-+/*
-+ * Openswan specific PF_KEY headers
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs.
-+ * Copyright (C) 2006-2007 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: pfkey.h,v 1.52 2005/11/09 00:30:37 mcr Exp $
-+ */
-+
-+#ifndef __NET_IPSEC_PF_KEY_H
-+#define __NET_IPSEC_PF_KEY_H
-+
-+#include "pfkeyv2.h"
-+#ifdef __KERNEL__
-+extern struct proto_ops pfkey_proto_ops;
-+typedef struct sock pfkey_sock;
-+extern int debug_pfkey;
-+
-+extern /* void */ int pfkey_init(void);
-+extern /* void */ int pfkey_cleanup(void);
-+
-+struct socket_list
-+{
-+ struct socket *socketp;
-+ struct socket_list *next;
-+};
-+extern int pfkey_list_insert_socket(struct socket*, struct socket_list**);
-+extern int pfkey_list_remove_socket(struct socket*, struct socket_list**);
-+extern struct socket_list *pfkey_open_sockets;
-+extern struct socket_list *pfkey_registered_sockets[];
-+
-+struct ipsec_alg_supported
-+{
-+ uint16_t ias_exttype;
-+ uint8_t ias_id;
-+ uint8_t ias_ivlen;
-+ uint16_t ias_keyminbits;
-+ uint16_t ias_keymaxbits;
-+ const char *ias_name;
-+};
-+
-+extern struct supported_list *pfkey_supported_list[];
-+struct supported_list
-+{
-+ struct ipsec_alg_supported *supportedp;
-+ struct supported_list *next;
-+};
-+extern int pfkey_list_insert_supported(struct ipsec_alg_supported*, struct supported_list**);
-+extern int pfkey_list_remove_supported(struct ipsec_alg_supported*, struct supported_list**);
-+
-+struct sockaddr_key
-+{
-+ uint16_t key_family; /* PF_KEY */
-+ uint16_t key_pad; /* not used */
-+ uint32_t key_pid; /* process ID */
-+};
-+
-+struct pfkey_extracted_data
-+{
-+ struct ipsec_sa* ips;
-+ struct ipsec_sa* ips2;
-+ struct eroute *eroute;
-+ int outif;
-+ IPsecSAref_t sarefme;
-+ IPsecSAref_t sarefhim;
-+};
-+
-+/* forward reference */
-+struct sadb_ext;
-+struct sadb_msg;
-+struct sockaddr;
-+struct sadb_comb;
-+struct sadb_sadb;
-+struct sadb_alg;
-+
-+extern int
-+pfkey_alloc_eroute(struct eroute** eroute);
-+
-+extern int
-+pfkey_sa_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_lifetime_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_address_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_key_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_ident_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_sens_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_prop_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_supported_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_spirange_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_x_kmprivate_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_x_satype_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int
-+pfkey_x_debug_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data* extr);
-+
-+extern int pfkey_upmsg(struct socket *, struct sadb_msg *);
-+extern int pfkey_upmsgsk(struct sock *, struct sadb_msg *);
-+extern int pfkey_expire(struct ipsec_sa *, int);
-+extern int pfkey_acquire(struct ipsec_sa *);
-+#else /* ! __KERNEL__ */
-+
-+extern openswan_keying_debug_func_t pfkey_debug_func;
-+extern openswan_keying_debug_func_t pfkey_error_func;
-+extern void pfkey_print(struct sadb_msg *msg, FILE *out);
-+
-+
-+#endif /* __KERNEL__ */
-+
-+extern uint8_t satype2proto(uint8_t satype);
-+extern uint8_t proto2satype(uint8_t proto);
-+extern char* satype2name(uint8_t satype);
-+extern char* proto2name(uint8_t proto);
-+
-+struct key_opt
-+{
-+ uint32_t key_pid; /* process ID */
-+ struct sock *sk;
-+};
-+
-+#define key_pid(sk) ((struct key_opt*)&((sk)->sk_protinfo))->key_pid
-+
-+/* XXX-mcr this is not an alignment, this is because the count is in 64-bit
-+ * words.
-+ */
-+#define IPSEC_PFKEYv2_ALIGN (sizeof(uint64_t)/sizeof(uint8_t))
-+#define BITS_PER_OCTET 8
-+#define OCTETBITS 8
-+#define PFKEYBITS 64
-+#define DIVUP(x,y) ((x + y -1) / y) /* divide, rounding upwards */
-+#define ALIGN_N(x,y) (DIVUP(x,y) * y) /* align on y boundary */
-+
-+#define IPSEC_PFKEYv2_LEN(x) ((x) * IPSEC_PFKEYv2_ALIGN)
-+#define IPSEC_PFKEYv2_WORDS(x) (DIVUP(x,IPSEC_PFKEYv2_ALIGN))
-+
-+
-+#define PFKEYv2_MAX_MSGSIZE 4096
-+
-+/*
-+ * PF_KEYv2 permitted and required extensions in and out bitmaps
-+ */
-+struct pf_key_ext_parsers_def {
-+ int (*parser)(struct sadb_ext*);
-+ char *parser_name;
-+};
-+
-+enum pfkey_ext_required {
-+ EXT_BITS_IN=0,
-+ EXT_BITS_OUT=1
-+};
-+
-+enum pfkey_ext_perm {
-+ EXT_BITS_PERM=0,
-+ EXT_BITS_REQ=1
-+};
-+
-+
-+typedef uint64_t pfkey_ext_track;
-+static inline void pfkey_mark_extension(enum sadb_extension_t exttype,
-+ pfkey_ext_track *exten_track)
-+{
-+ *exten_track |= (1 << exttype);
-+}
-+
-+extern int pfkey_extensions_missing(enum pfkey_ext_required inout,
-+ enum sadb_msg_t sadb_operation,
-+ pfkey_ext_track extensions_seen);
-+extern int pfkey_required_extension(enum pfkey_ext_required inout,
-+ enum sadb_msg_t sadb_operation,
-+ enum sadb_extension_t exttype);
-+extern int pfkey_permitted_extension(enum pfkey_ext_required inout,
-+ enum sadb_msg_t sadb_operation,
-+ enum sadb_extension_t exttype);
-+
-+
-+extern void pfkey_extensions_init(struct sadb_ext *extensions[]);
-+extern void pfkey_extensions_free(struct sadb_ext *extensions[]);
-+extern void pfkey_msg_free(struct sadb_msg **pfkey_msg);
-+
-+extern int pfkey_msg_parse(struct sadb_msg *pfkey_msg,
-+ struct pf_key_ext_parsers_def *ext_parsers[],
-+ struct sadb_ext **extensions,
-+ int dir);
-+
-+extern int pfkey_register_reply(int satype, struct sadb_msg *sadb_msg);
-+
-+/*
-+ * PF_KEYv2 build function prototypes
-+ */
-+
-+int
-+pfkey_msg_hdr_build(struct sadb_ext** pfkey_ext,
-+ uint8_t msg_type,
-+ uint8_t satype,
-+ uint8_t msg_errno,
-+ uint32_t seq,
-+ uint32_t pid);
-+
-+int
-+pfkey_sa_ref_build(struct sadb_ext ** pfkey_ext,
-+ uint16_t exttype,
-+ uint32_t spi, /* in network order */
-+ uint8_t replay_window,
-+ uint8_t sa_state,
-+ uint8_t auth,
-+ uint8_t encrypt,
-+ uint32_t flags,
-+ uint32_t/*IPsecSAref_t*/ ref);
-+
-+int
-+pfkey_sa_build(struct sadb_ext ** pfkey_ext,
-+ uint16_t exttype,
-+ uint32_t spi, /* in network order */
-+ uint8_t replay_window,
-+ uint8_t sa_state,
-+ uint8_t auth,
-+ uint8_t encrypt,
-+ uint32_t flags);
-+
-+extern int
-+pfkey_saref_build(struct sadb_ext **pfkey_ext,
-+ IPsecSAref_t in, IPsecSAref_t out);
-+
-+int
-+pfkey_lifetime_build(struct sadb_ext ** pfkey_ext,
-+ uint16_t exttype,
-+ uint32_t allocations,
-+ uint64_t bytes,
-+ uint64_t addtime,
-+ uint64_t usetime,
-+ uint32_t packets);
-+
-+int
-+pfkey_address_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint8_t proto,
-+ uint8_t prefixlen,
-+ struct sockaddr* address);
-+
-+int
-+pfkey_key_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint16_t key_bits,
-+ unsigned char *key);
-+
-+int
-+pfkey_ident_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint16_t ident_type,
-+ uint64_t ident_id,
-+ uint8_t ident_len,
-+ char* ident_string);
-+
-+#ifdef __KERNEL__
-+extern int pfkey_nat_t_new_mapping(struct ipsec_sa *, struct sockaddr *, __u16);
-+extern int pfkey_x_nat_t_type_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr);
-+extern int pfkey_x_nat_t_port_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr);
-+#endif /* __KERNEL__ */
-+int
-+pfkey_x_nat_t_type_build(struct sadb_ext** pfkey_ext,
-+ uint8_t type);
-+int
-+pfkey_x_nat_t_port_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint16_t port);
-+
-+int
-+pfkey_sens_build(struct sadb_ext** pfkey_ext,
-+ uint32_t dpd,
-+ uint8_t sens_level,
-+ uint8_t sens_len,
-+ uint64_t* sens_bitmap,
-+ uint8_t integ_level,
-+ uint8_t integ_len,
-+ uint64_t* integ_bitmap);
-+
-+int pfkey_x_protocol_build(struct sadb_ext **, uint8_t);
-+
-+
-+int
-+pfkey_prop_build(struct sadb_ext** pfkey_ext,
-+ uint8_t replay,
-+ unsigned int comb_num,
-+ struct sadb_comb* comb);
-+
-+int
-+pfkey_supported_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ unsigned int alg_num,
-+ struct sadb_alg* alg);
-+
-+int
-+pfkey_spirange_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint32_t min,
-+ uint32_t max);
-+
-+int
-+pfkey_x_kmprivate_build(struct sadb_ext** pfkey_ext);
-+
-+int
-+pfkey_x_satype_build(struct sadb_ext** pfkey_ext,
-+ uint8_t satype);
-+
-+int
-+pfkey_x_debug_build(struct sadb_ext** pfkey_ext,
-+ uint32_t tunnel,
-+ uint32_t netlink,
-+ uint32_t xform,
-+ uint32_t eroute,
-+ uint32_t spi,
-+ uint32_t radij,
-+ uint32_t esp,
-+ uint32_t ah,
-+ uint32_t rcv,
-+ uint32_t pfkey,
-+ uint32_t ipcomp,
-+ uint32_t verbose);
-+
-+int
-+pfkey_msg_build(struct sadb_msg** pfkey_msg,
-+ struct sadb_ext* extensions[],
-+ int dir);
-+
-+/* in pfkey_v2_debug.c - routines to decode numbers -> strings */
-+const char *
-+pfkey_v2_sadb_ext_string(int extnum);
-+
-+const char *
-+pfkey_v2_sadb_type_string(int sadb_type);
-+
-+struct sadb_builds {
-+ struct k_sadb_sa sa_base;
-+};
-+
-+int
-+pfkey_sa_builds(struct sadb_ext **pfkey_ext,
-+ struct sadb_builds sab);
-+
-+extern int
-+pfkey_outif_build(struct sadb_ext **pfkey_ext,
-+ uint16_t outif);
-+
-+#endif /* __NET_IPSEC_PF_KEY_H */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/pfkey_debug.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,54 @@
-+/*
-+ * sanitize a string into a printable format.
-+ *
-+ * Copyright (C) 1998-2002 D. Hugh Redelmeier.
-+ * Copyright (C) 2003 Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: pfkey_debug.h,v 1.3 2004/04/05 19:55:07 mcr Exp $
-+ */
-+
-+#ifndef _FREESWAN_PFKEY_DEBUG_H
-+#define _FREESWAN_PFKEY_DEBUG_H
-+
-+#ifdef __KERNEL__
-+
-+/* note, kernel version ignores pfkey levels */
-+# define DEBUGGING(level,args...) \
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:" args)
-+
-+# define ERROR(args...) printk(KERN_ERR "klips:" args)
-+
-+#else
-+
-+extern unsigned int pfkey_lib_debug;
-+
-+extern int (*pfkey_debug_func)(const char *message, ...) PRINTF_LIKE(1);
-+extern int (*pfkey_error_func)(const char *message, ...) PRINTF_LIKE(1);
-+
-+#define DEBUGGING(level,args...) if(pfkey_lib_debug & level) { \
-+ if(pfkey_debug_func != NULL) { \
-+ (*pfkey_debug_func)("pfkey_lib_debug:" args); \
-+ } else { \
-+ printf("pfkey_lib_debug:" args); \
-+ } }
-+
-+#define ERROR(args...) if(pfkey_error_func != NULL) { \
-+ (*pfkey_error_func)("pfkey_lib_debug:" args); \
-+ }
-+
-+# define MALLOC(size) malloc(size)
-+# define FREE(obj) free(obj)
-+
-+#endif
-+
-+#endif
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/pfkeyv2.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,347 @@
-+/*
-+ * RCSID $Id: pfkeyv2.h,v 1.31 2005/04/14 01:14:54 mcr Exp $
-+ */
-+
-+/*
-+RFC 2367 PF_KEY Key Management API July 1998
-+
-+
-+Appendix D: Sample Header File
-+
-+This file defines structures and symbols for the PF_KEY Version 2
-+key management interface. It was written at the U.S. Naval Research
-+Laboratory. This file is in the public domain. The authors ask that
-+you leave this credit intact on any copies of this file.
-+*/
-+
-+#ifndef __PFKEY_V2_H
-+#define __PFKEY_V2_H 1
-+
-+#include <linux/pfkeyv2.h>
-+
-+#define PF_KEY_V2 2
-+#define PFKEYV2_REVISION 199806L
-+
-+enum sadb_msg_t {
-+ K_SADB_RESERVED=SADB_RESERVED,
-+ K_SADB_GETSPI=SADB_GETSPI,
-+ K_SADB_UPDATE=SADB_UPDATE,
-+ K_SADB_ADD=SADB_ADD,
-+ K_SADB_DELETE=SADB_DELETE,
-+ K_SADB_GET=SADB_GET,
-+ K_SADB_ACQUIRE=SADB_ACQUIRE,
-+ K_SADB_REGISTER=SADB_REGISTER,
-+ K_SADB_EXPIRE=SADB_EXPIRE,
-+ K_SADB_FLUSH=SADB_FLUSH,
-+ K_SADB_DUMP=SADB_DUMP,
-+ K_SADB_X_PROMISC=SADB_X_PROMISC,
-+ K_SADB_X_PCHANGE=SADB_X_PCHANGE,
-+ K_SADB_X_GRPSA=13,
-+ K_SADB_X_ADDFLOW=14,
-+ K_SADB_X_DELFLOW=15,
-+ K_SADB_X_DEBUG=16,
-+ K_SADB_X_NAT_T_NEW_MAPPING=17,
-+ K_SADB_X_PLUMBIF=18,
-+ K_SADB_X_UNPLUMBIF=19,
-+ K_SADB_MAX=19
-+};
-+
-+#define SADB_X_GRPSA K_SADB_X_GRPSA
-+#define SADB_X_ADDFLOW K_SADB_X_ADDFLOW
-+#define SADB_X_DELFLOW K_SADB_X_DELFLOW
-+#define SADB_X_DEBUG K_SADB_X_DEBUG
-+#define SADB_X_PLUMBIF K_SADB_X_PLUMBIF
-+#define SADB_X_UNPLUMBIF K_SADB_X_UNPLUMBIF
-+
-+struct k_sadb_sa {
-+ uint16_t sadb_sa_len;
-+ uint16_t sadb_sa_exttype;
-+ uint32_t sadb_sa_spi;
-+ uint8_t sadb_sa_replay;
-+ uint8_t sadb_sa_state;
-+ uint8_t sadb_sa_auth;
-+ uint8_t sadb_sa_encrypt;
-+ uint32_t sadb_sa_flags;
-+ uint32_t /*IPsecSAref_t*/ sadb_x_sa_ref; /* 32 bits */
-+ uint8_t sadb_x_reserved[4];
-+} __attribute__((packed));
-+
-+struct sadb_sa_v1 {
-+ uint16_t sadb_sa_len;
-+ uint16_t sadb_sa_exttype;
-+ uint32_t sadb_sa_spi;
-+ uint8_t sadb_sa_replay;
-+ uint8_t sadb_sa_state;
-+ uint8_t sadb_sa_auth;
-+ uint8_t sadb_sa_encrypt;
-+ uint32_t sadb_sa_flags;
-+} __attribute__((packed));
-+
-+struct sadb_x_satype {
-+ uint16_t sadb_x_satype_len;
-+ uint16_t sadb_x_satype_exttype;
-+ uint8_t sadb_x_satype_satype;
-+ uint8_t sadb_x_satype_reserved[3];
-+} __attribute__((packed));
-+
-+struct sadb_x_debug {
-+ uint16_t sadb_x_debug_len;
-+ uint16_t sadb_x_debug_exttype;
-+ uint32_t sadb_x_debug_tunnel;
-+ uint32_t sadb_x_debug_netlink;
-+ uint32_t sadb_x_debug_xform;
-+ uint32_t sadb_x_debug_eroute;
-+ uint32_t sadb_x_debug_spi;
-+ uint32_t sadb_x_debug_radij;
-+ uint32_t sadb_x_debug_esp;
-+ uint32_t sadb_x_debug_ah;
-+ uint32_t sadb_x_debug_rcv;
-+ uint32_t sadb_x_debug_pfkey;
-+ uint32_t sadb_x_debug_ipcomp;
-+ uint32_t sadb_x_debug_verbose;
-+ uint8_t sadb_x_debug_reserved[4];
-+} __attribute__((packed));
-+
-+/*
-+ * a plumbif extension can appear in
-+ * - a plumbif message to create the interface.
-+ * - a unplumbif message to delete the interface.
-+ * - a sadb add/replace to indicate which interface
-+ * a decrypted packet should emerge on.
-+ *
-+ * the create/delete part could/should be replaced with netlink equivalents,
-+ * or better yet, FORCES versions of same.
-+ *
-+ */
-+struct sadb_x_plumbif {
-+ uint16_t sadb_x_outif_len;
-+ uint16_t sadb_x_outif_exttype;
-+ uint16_t sadb_x_outif_ifnum;
-+} __attribute__((packed));
-+
-+/*
-+ * the ifnum describes a device that you wish to create refer to.
-+ *
-+ * devices 0-40959 are mastXXX devices.
-+ * devices 40960-49141 are mastXXX devices with transport set.
-+ * devices 49152-65536 are deprecated ipsecXXX devices.
-+ */
-+#define IPSECDEV_OFFSET (48*1024)
-+#define MASTTRANSPORT_OFFSET (40*1024)
-+
-+/*
-+ * an saref extension sets the SA's reference number, and
-+ * may also set the paired SA's reference number.
-+ *
-+ */
-+struct sadb_x_saref {
-+ uint16_t sadb_x_saref_len;
-+ uint16_t sadb_x_saref_exttype;
-+ uint32_t sadb_x_saref_me;
-+ uint32_t sadb_x_saref_him;
-+} __attribute__((packed));
-+
-+/*
-+ * A protocol structure for passing through the transport level
-+ * protocol. It contains more fields than are actually used/needed
-+ * but it is this way to be compatible with the structure used in
-+ * OpenBSD (http://www.openbsd.org/cgi-bin/cvsweb/src/sys/net/pfkeyv2.h)
-+ */
-+struct sadb_protocol {
-+ uint16_t sadb_protocol_len;
-+ uint16_t sadb_protocol_exttype;
-+ uint8_t sadb_protocol_proto;
-+ uint8_t sadb_protocol_direction;
-+ uint8_t sadb_protocol_flags;
-+ uint8_t sadb_protocol_reserved2;
-+} __attribute__((packed));
-+
-+/*
-+ * NOTE that there is a limit of 31 extensions due to current implementation
-+ * in pfkeyv2_ext_bits.c
-+ */
-+enum sadb_extension_t {
-+ K_SADB_EXT_RESERVED=SADB_RESERVED,
-+ K_SADB_EXT_SA= SADB_EXT_SA,
-+ K_SADB_EXT_LIFETIME_CURRENT=SADB_EXT_LIFETIME_CURRENT,
-+ K_SADB_EXT_LIFETIME_HARD= SADB_EXT_LIFETIME_HARD,
-+ K_SADB_EXT_LIFETIME_SOFT= SADB_EXT_LIFETIME_SOFT,
-+ K_SADB_EXT_ADDRESS_SRC= SADB_EXT_ADDRESS_SRC,
-+ K_SADB_EXT_ADDRESS_DST= SADB_EXT_ADDRESS_DST,
-+ K_SADB_EXT_ADDRESS_PROXY= SADB_EXT_ADDRESS_PROXY,
-+ K_SADB_EXT_KEY_AUTH= SADB_EXT_KEY_AUTH,
-+ K_SADB_EXT_KEY_ENCRYPT= SADB_EXT_KEY_ENCRYPT,
-+ K_SADB_EXT_IDENTITY_SRC= SADB_EXT_IDENTITY_SRC,
-+ K_SADB_EXT_IDENTITY_DST= SADB_EXT_IDENTITY_DST,
-+ K_SADB_EXT_SENSITIVITY= SADB_EXT_SENSITIVITY,
-+ K_SADB_EXT_PROPOSAL= SADB_EXT_PROPOSAL,
-+ K_SADB_EXT_SUPPORTED_AUTH= SADB_EXT_SUPPORTED_AUTH,
-+ K_SADB_EXT_SUPPORTED_ENCRYPT=SADB_EXT_SUPPORTED_ENCRYPT,
-+ K_SADB_EXT_SPIRANGE= SADB_EXT_SPIRANGE,
-+ K_SADB_X_EXT_KMPRIVATE= SADB_X_EXT_KMPRIVATE,
-+ K_SADB_X_EXT_SATYPE2= 18,
-+ K_SADB_X_EXT_POLICY= SADB_X_EXT_POLICY,
-+ K_SADB_X_EXT_SA2= SADB_X_EXT_SA2,
-+ K_SADB_X_EXT_ADDRESS_DST2= 20,
-+ K_SADB_X_EXT_ADDRESS_SRC_FLOW=21,
-+ K_SADB_X_EXT_ADDRESS_DST_FLOW=22,
-+ K_SADB_X_EXT_ADDRESS_SRC_MASK=23,
-+ K_SADB_X_EXT_ADDRESS_DST_MASK=24,
-+ K_SADB_X_EXT_DEBUG= 25,
-+ K_SADB_X_EXT_PROTOCOL= 26,
-+ K_SADB_X_EXT_NAT_T_TYPE= 27,
-+ K_SADB_X_EXT_NAT_T_SPORT= 28,
-+ K_SADB_X_EXT_NAT_T_DPORT= 29,
-+ K_SADB_X_EXT_NAT_T_OA= 30,
-+ K_SADB_X_EXT_PLUMBIF= 31,
-+ K_SADB_X_EXT_SAREF= 32,
-+ K_SADB_EXT_MAX= 32,
-+};
-+
-+
-+#define SADB_X_EXT_SATYPE2 K_SADB_X_EXT_SATYPE2
-+#define SADB_X_EXT_ADDRESS_DST2 K_SADB_X_EXT_ADDRESS_DST2
-+#define SADB_X_EXT_ADDRESS_SRC_FLOW K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+#define SADB_X_EXT_ADDRESS_DST_FLOW K_SADB_X_EXT_ADDRESS_DST_FLOW
-+#define SADB_X_EXT_ADDRESS_SRC_MASK K_SADB_X_EXT_ADDRESS_SRC_MASK
-+#define SADB_X_EXT_ADDRESS_DST_MASK K_SADB_X_EXT_ADDRESS_DST_MASK
-+#define SADB_X_EXT_DEBUG K_SADB_X_EXT_DEBUG
-+#define SADB_X_EXT_PROTOCOL K_SADB_X_EXT_PROTOCOL
-+
-+#undef SADB_X_EXT_NAT_T_TYPE
-+#undef SADB_X_EXT_NAT_T_SPORT
-+#undef SADB_X_EXT_NAT_T_DPORT
-+#undef SADB_X_EXT_NAT_T_OA
-+#define SADB_X_EXT_PLUMBIF K_SADB_X_EXT_PLUMBIF
-+
-+
-+
-+/* K_SADB_X_DELFLOW required over and above K_SADB_X_SAFLAGS_CLEARFLOW */
-+#define K_SADB_X_EXT_ADDRESS_DELFLOW \
-+ ( (1<<K_SADB_X_EXT_ADDRESS_SRC_FLOW) \
-+ | (1<<K_SADB_X_EXT_ADDRESS_DST_FLOW) \
-+ | (1<<K_SADB_X_EXT_ADDRESS_SRC_MASK) \
-+ | (1<<K_SADB_X_EXT_ADDRESS_DST_MASK))
-+
-+enum sadb_satype {
-+ K_SADB_SATYPE_UNSPEC=SADB_SATYPE_UNSPEC,
-+ K_SADB_SATYPE_AH=SADB_SATYPE_AH,
-+ K_SADB_SATYPE_ESP=SADB_SATYPE_ESP,
-+ K_SADB_SATYPE_RSVP=SADB_SATYPE_RSVP,
-+ K_SADB_SATYPE_OSPFV2=SADB_SATYPE_OSPFV2,
-+ K_SADB_SATYPE_RIPV2=SADB_SATYPE_RIPV2,
-+ K_SADB_SATYPE_MIP=SADB_SATYPE_MIP,
-+ K_SADB_X_SATYPE_IPIP=9,
-+ K_SADB_X_SATYPE_COMP=10,
-+ K_SADB_X_SATYPE_INT=11
-+};
-+#define K_SADB_SATYPE_MAX 11
-+
-+enum sadb_sastate {
-+ K_SADB_SASTATE_LARVAL=0,
-+ K_SADB_SASTATE_MATURE=1,
-+ K_SADB_SASTATE_DYING=2,
-+ K_SADB_SASTATE_DEAD=3
-+};
-+#undef SADB_SASTATE_LARVAL
-+#undef SADB_SASTATE_MATURE
-+#undef SADB_SASTATE_DYING
-+#undef SADB_SASTATE_DEAD
-+#define K_SADB_SASTATE_MAX 3
-+
-+#define SADB_SAFLAGS_PFS 1
-+#define SADB_X_SAFLAGS_REPLACEFLOW 2
-+#define SADB_X_SAFLAGS_CLEARFLOW 4
-+#define SADB_X_SAFLAGS_INFLOW 8
-+
-+/* not obvious, but these are the same values as used in isakmp,
-+ * and in freeswan/ipsec_policy.h. If you need to add any, they
-+ * should be added as according to
-+ * http://www.iana.org/assignments/isakmp-registry
-+ *
-+ * and if not, then please try to use a private-use value, and
-+ * consider asking IANA to assign a value.
-+ */
-+#define SADB_AALG_NONE 0
-+#define SADB_AALG_MD5HMAC 2
-+#define SADB_AALG_SHA1HMAC 3
-+#define SADB_X_AALG_SHA2_256HMAC 5
-+#define SADB_X_AALG_SHA2_384HMAC 6
-+#define SADB_X_AALG_SHA2_512HMAC 7
-+#define SADB_X_AALG_RIPEMD160HMAC 8
-+#define SADB_X_AALG_NULL 251 /* kame */
-+enum sadb_aalg {
-+ K_SADB_AALG_NONE= SADB_AALG_NONE,
-+ K_SADB_AALG_MD5HMAC= SADB_AALG_MD5HMAC,
-+ K_SADB_AALG_SHA1HMAC= SADB_AALG_SHA1HMAC,
-+ K_SADB_X_AALG_SHA2_256HMAC=SADB_X_AALG_SHA2_256HMAC,
-+ K_SADB_X_AALG_SHA2_384HMAC=SADB_X_AALG_SHA2_384HMAC,
-+ K_SADB_X_AALG_SHA2_512HMAC=SADB_X_AALG_SHA2_512HMAC,
-+ K_SADB_X_AALG_RIPEMD160HMAC=SADB_X_AALG_RIPEMD160HMAC,
-+};
-+#define K_SADB_AALG_MAX 251
-+
-+#define SADB_EALG_NONE 0
-+#define SADB_EALG_DESCBC 2
-+#define SADB_EALG_3DESCBC 3
-+#define SADB_X_EALG_CASTCBC 6
-+#define SADB_X_EALG_BLOWFISHCBC 7
-+#define SADB_EALG_NULL 11
-+#define SADB_X_EALG_AESCBC 12
-+#define SADB_X_EALG_AESCTR 13
-+#define SADB_X_EALG_AES_CCM_ICV8 14
-+#define SADB_X_EALG_AES_CCM_ICV12 15
-+#define SADB_X_EALG_AES_CCM_ICV16 16
-+#define SADB_X_EALG_AES_GCM_ICV8 18
-+#define SADB_X_EALG_AES_GCM_ICV12 19
-+#define SADB_X_EALG_AES_GCM_ICV16 20
-+#define SADB_X_EALG_CAMELLIACBC 22
-+
-+enum sadb_ealg {
-+ K_SADB_EALG_NONE=SADB_EALG_NONE,
-+ K_SADB_EALG_DESCBC=SADB_EALG_DESCBC,
-+ K_SADB_EALG_3DESCBC=SADB_EALG_3DESCBC,
-+ K_SADB_X_EALG_CASTCBC=SADB_X_EALG_CASTCBC,
-+ K_SADB_X_EALG_BLOWFISHCBC=SADB_X_EALG_BLOWFISHCBC,
-+ K_SADB_EALG_NULL=SADB_EALG_NULL,
-+ K_SADB_X_EALG_AESCBC=SADB_X_EALG_AESCBC,
-+ K_SADB_X_EALG_AESCTR=SADB_X_EALG_AESCTR,
-+ K_SADB_X_EALG_AES_CCM_ICV8=SADB_X_EALG_AES_CCM_ICV8,
-+ K_SADB_X_EALG_AES_CCM_ICV12=SADB_X_EALG_AES_CCM_ICV12,
-+ K_SADB_X_EALG_AES_CCM_ICV16=SADB_X_EALG_AES_CCM_ICV16,
-+ K_SADB_X_EALG_AES_GCM_ICV8=SADB_X_EALG_AES_GCM_ICV8,
-+ K_SADB_X_EALG_AES_GCM_ICV12=SADB_X_EALG_AES_GCM_ICV12,
-+ K_SADB_X_EALG_AES_GCM_ICV16=SADB_X_EALG_AES_GCM_ICV16,
-+ K_SADB_X_EALG_CAMELLIACBC=SADB_X_EALG_CAMELLIACBC
-+};
-+
-+#undef SADB_EALG_MAX
-+#define K_SADB_EALG_MAX 255
-+
-+#define SADB_X_CALG_NONE 0
-+#define SADB_X_CALG_OUI 1
-+#define SADB_X_CALG_DEFLATE 2
-+#define SADB_X_CALG_LZS 3
-+#define SADB_X_CALG_LZJH 4
-+#define SADB_X_CALG_MAX 4
-+
-+enum sadb_talg {
-+ K_SADB_X_TALG_NONE=0,
-+ K_SADB_X_TALG_IPv4_in_IPv4=1,
-+ K_SADB_X_TALG_IPv6_in_IPv4=2,
-+ K_SADB_X_TALG_IPv4_in_IPv6=3,
-+ K_SADB_X_TALG_IPv6_in_IPv6=4,
-+};
-+#define SADB_X_TALG_MAX 4
-+
-+
-+#define SADB_IDENTTYPE_RESERVED 0
-+#define SADB_IDENTTYPE_PREFIX 1
-+#define SADB_IDENTTYPE_FQDN 2
-+#define SADB_IDENTTYPE_USERFQDN 3
-+#define SADB_X_IDENTTYPE_CONNECTION 4
-+#define K_SADB_IDENTTYPE_MAX 4
-+
-+#define K_SADB_KEY_FLAGS_MAX 0
-+#endif /* __PFKEY_V2_H */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/openswan/radij.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,280 @@
-+/*
-+ * RCSID $Id: radij.h,v 1.13 2004/04/05 19:55:08 mcr Exp $
-+ */
-+
-+/*
-+ * This file is defived from ${SRC}/sys/net/radix.h of BSD 4.4lite
-+ *
-+ * Variable and procedure names have been modified so that they don't
-+ * conflict with the original BSD code, as a small number of modifications
-+ * have been introduced and we may want to reuse this code in BSD.
-+ *
-+ * The `j' in `radij' is pronounced as a voiceless guttural (like a Greek
-+ * chi or a German ch sound (as `doch', not as in `milch'), or even a
-+ * spanish j as in Juan. It is not as far back in the throat like
-+ * the corresponding Hebrew sound, nor is it a soft breath like the English h.
-+ * It has nothing to do with the Dutch ij sound.
-+ *
-+ * Here is the appropriate copyright notice:
-+ */
-+
-+/*
-+ * Copyright (c) 1988, 1989, 1993
-+ * The Regents of the University of California. All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * This product includes software developed by the University of
-+ * California, Berkeley and its contributors.
-+ * 4. Neither the name of the University nor the names of its contributors
-+ * may be used to endorse or promote products derived from this software
-+ * without specific prior written permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * @(#)radix.h 8.1 (Berkeley) 6/10/93
-+ */
-+
-+#ifndef _RADIJ_H_
-+#define _RADIJ_H_
-+
-+/*
-+#define RJ_DEBUG
-+*/
-+
-+#ifdef __KERNEL__
-+
-+#ifndef __P
-+#ifdef __STDC__
-+#define __P(x) x
-+#else
-+#define __P(x) ()
-+#endif
-+#endif
-+
-+/*
-+ * Radix search tree node layout.
-+ */
-+
-+struct radij_node
-+{
-+ struct radij_mask *rj_mklist; /* list of masks contained in subtree */
-+ struct radij_node *rj_p; /* parent */
-+ short rj_b; /* bit offset; -1-index(netmask) */
-+ char rj_bmask; /* node: mask for bit test*/
-+ u_char rj_flags; /* enumerated next */
-+#define RJF_NORMAL 1 /* leaf contains normal route */
-+#define RJF_ROOT 2 /* leaf is root leaf for tree */
-+#define RJF_ACTIVE 4 /* This node is alive (for rtfree) */
-+ union {
-+ struct { /* leaf only data: */
-+ caddr_t rj_Key; /* object of search */
-+ caddr_t rj_Mask; /* netmask, if present */
-+ struct radij_node *rj_Dupedkey;
-+ } rj_leaf;
-+ struct { /* node only data: */
-+ int rj_Off; /* where to start compare */
-+ struct radij_node *rj_L;/* progeny */
-+ struct radij_node *rj_R;/* progeny */
-+ }rj_node;
-+ } rj_u;
-+#ifdef RJ_DEBUG
-+ int rj_info;
-+ struct radij_node *rj_twin;
-+ struct radij_node *rj_ybro;
-+#endif
-+};
-+
-+#define rj_dupedkey rj_u.rj_leaf.rj_Dupedkey
-+#define rj_key rj_u.rj_leaf.rj_Key
-+#define rj_mask rj_u.rj_leaf.rj_Mask
-+#define rj_off rj_u.rj_node.rj_Off
-+#define rj_l rj_u.rj_node.rj_L
-+#define rj_r rj_u.rj_node.rj_R
-+
-+/*
-+ * Annotations to tree concerning potential routes applying to subtrees.
-+ */
-+
-+extern struct radij_mask {
-+ short rm_b; /* bit offset; -1-index(netmask) */
-+ char rm_unused; /* cf. rj_bmask */
-+ u_char rm_flags; /* cf. rj_flags */
-+ struct radij_mask *rm_mklist; /* more masks to try */
-+ caddr_t rm_mask; /* the mask */
-+ int rm_refs; /* # of references to this struct */
-+} *rj_mkfreelist;
-+
-+#define MKGet(m) {\
-+ if (rj_mkfreelist) {\
-+ m = rj_mkfreelist; \
-+ rj_mkfreelist = (m)->rm_mklist; \
-+ } else \
-+ R_Malloc(m, struct radij_mask *, sizeof (*(m))); }\
-+
-+#define MKFree(m) { (m)->rm_mklist = rj_mkfreelist; rj_mkfreelist = (m);}
-+
-+struct radij_node_head {
-+ struct radij_node *rnh_treetop;
-+ int rnh_addrsize; /* permit, but not require fixed keys */
-+ int rnh_pktsize; /* permit, but not require fixed keys */
-+#if 0
-+ struct radij_node *(*rnh_addaddr) /* add based on sockaddr */
-+ __P((void *v, void *mask,
-+ struct radij_node_head *head, struct radij_node nodes[]));
-+#endif
-+ int (*rnh_addaddr) /* add based on sockaddr */
-+ __P((void *v, void *mask,
-+ struct radij_node_head *head, struct radij_node nodes[]));
-+ struct radij_node *(*rnh_addpkt) /* add based on packet hdr */
-+ __P((void *v, void *mask,
-+ struct radij_node_head *head, struct radij_node nodes[]));
-+#if 0
-+ struct radij_node *(*rnh_deladdr) /* remove based on sockaddr */
-+ __P((void *v, void *mask, struct radij_node_head *head));
-+#endif
-+ int (*rnh_deladdr) /* remove based on sockaddr */
-+ __P((void *v, void *mask, struct radij_node_head *head, struct radij_node **node));
-+ struct radij_node *(*rnh_delpkt) /* remove based on packet hdr */
-+ __P((void *v, void *mask, struct radij_node_head *head));
-+ struct radij_node *(*rnh_matchaddr) /* locate based on sockaddr */
-+ __P((void *v, struct radij_node_head *head));
-+ struct radij_node *(*rnh_matchpkt) /* locate based on packet hdr */
-+ __P((void *v, struct radij_node_head *head));
-+ int (*rnh_walktree) /* traverse tree */
-+ __P((struct radij_node_head *head, int (*f)(struct radij_node *rn, void *w), void *w));
-+ struct radij_node rnh_nodes[3]; /* empty tree for common case */
-+};
-+
-+
-+#define Bcmp(a, b, n) memcmp(((caddr_t)(b)), ((caddr_t)(a)), (unsigned)(n))
-+#define Bcopy(a, b, n) memmove(((caddr_t)(b)), ((caddr_t)(a)), (unsigned)(n))
-+#define Bzero(p, n) memset((caddr_t)(p), 0, (unsigned)(n))
-+#define R_Malloc(p, t, n) ((p = (t) kmalloc((size_t)(n), GFP_ATOMIC)), Bzero((p),(n)))
-+#define Free(p) kfree((caddr_t)p);
-+
-+void rj_init __P((void));
-+int rj_inithead __P((void **, int));
-+int rj_refines __P((void *, void *));
-+int rj_walktree __P((struct radij_node_head *head, int (*f)(struct radij_node *rn, void *w), void *w));
-+struct radij_node
-+ *rj_addmask __P((void *, int, int)) /* , rgb */ ;
-+int /* * */ rj_addroute __P((void *, void *, struct radij_node_head *,
-+ struct radij_node [2])) /* , rgb */ ;
-+int /* * */ rj_delete __P((void *, void *, struct radij_node_head *, struct radij_node **)) /* , rgb */ ;
-+struct radij_node /* rgb */
-+ *rj_insert __P((void *, struct radij_node_head *, int *,
-+ struct radij_node [2])),
-+ *rj_match __P((void *, struct radij_node_head *)),
-+ *rj_newpair __P((void *, int, struct radij_node[2])),
-+ *rj_search __P((void *, struct radij_node *)),
-+ *rj_search_m __P((void *, struct radij_node *, void *));
-+
-+void rj_deltree(struct radij_node_head *);
-+void rj_delnodes(struct radij_node *);
-+void rj_free_mkfreelist(void);
-+int radijcleartree(void);
-+int radijcleanup(void);
-+
-+extern struct radij_node_head *mask_rjhead;
-+extern int maj_keylen;
-+#endif /* __KERNEL__ */
-+
-+#endif /* _RADIJ_H_ */
-+
-+
-+/*
-+ * $Log: radij.h,v $
-+ * Revision 1.13 2004/04/05 19:55:08 mcr
-+ * Moved from linux/include/freeswan/radij.h,v
-+ *
-+ * Revision 1.12 2002/04/24 07:36:48 mcr
-+ * Moved from ./klips/net/ipsec/radij.h,v
-+ *
-+ * Revision 1.11 2001/09/20 15:33:00 rgb
-+ * Min/max cleanup.
-+ *
-+ * Revision 1.10 1999/11/18 04:09:20 rgb
-+ * Replaced all kernel version macros to shorter, readable form.
-+ *
-+ * Revision 1.9 1999/05/05 22:02:33 rgb
-+ * Add a quick and dirty port to 2.2 kernels by Marc Boucher <marc@mbsi.ca>.
-+ *
-+ * Revision 1.8 1999/04/29 15:24:58 rgb
-+ * Add check for existence of macros min/max.
-+ *
-+ * Revision 1.7 1999/04/11 00:29:02 henry
-+ * GPL boilerplate
-+ *
-+ * Revision 1.6 1999/04/06 04:54:29 rgb
-+ * Fix/Add RCSID Id: and Log: bits to make PHMDs happy. This includes
-+ * patch shell fixes.
-+ *
-+ * Revision 1.5 1999/01/22 06:30:32 rgb
-+ * 64-bit clean-up.
-+ *
-+ * Revision 1.4 1998/11/30 13:22:55 rgb
-+ * Rationalised all the klips kernel file headers. They are much shorter
-+ * now and won't conflict under RH5.2.
-+ *
-+ * Revision 1.3 1998/10/25 02:43:27 rgb
-+ * Change return type on rj_addroute and rj_delete and add and argument
-+ * to the latter to be able to transmit more infomation about errors.
-+ *
-+ * Revision 1.2 1998/07/14 18:09:51 rgb
-+ * Add a routine to clear eroute table.
-+ * Added #ifdef __KERNEL__ directives to restrict scope of header.
-+ *
-+ * Revision 1.1 1998/06/18 21:30:22 henry
-+ * move sources from klips/src to klips/net/ipsec to keep stupid kernel
-+ * build scripts happier about symlinks
-+ *
-+ * Revision 1.4 1998/05/25 20:34:16 rgb
-+ * Remove temporary ipsec_walk, rj_deltree and rj_delnodes functions.
-+ *
-+ * Rename ipsec_rj_walker (ipsec_walk) to ipsec_rj_walker_procprint and
-+ * add ipsec_rj_walker_delete.
-+ *
-+ * Recover memory for eroute table on unload of module.
-+ *
-+ * Revision 1.3 1998/04/22 16:51:37 rgb
-+ * Tidy up radij debug code from recent rash of modifications to debug code.
-+ *
-+ * Revision 1.2 1998/04/14 17:30:38 rgb
-+ * Fix up compiling errors for radij tree memory reclamation.
-+ *
-+ * Revision 1.1 1998/04/09 03:06:16 henry
-+ * sources moved up from linux/net/ipsec
-+ *
-+ * Revision 1.1.1.1 1998/04/08 05:35:04 henry
-+ * RGB's ipsec-0.8pre2.tar.gz ipsec-0.8
-+ *
-+ * Revision 0.4 1997/01/15 01:28:15 ji
-+ * No changes.
-+ *
-+ * Revision 0.3 1996/11/20 14:44:45 ji
-+ * Release update only.
-+ *
-+ * Revision 0.2 1996/11/02 00:18:33 ji
-+ * First limited release.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/zlib/zconf.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,309 @@
-+/* zconf.h -- configuration of the zlib compression library
-+ * Copyright (C) 1995-2002 Jean-loup Gailly.
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* @(#) $Id: zconf.h,v 1.4 2004/07/10 07:48:40 mcr Exp $ */
-+
-+#ifndef _ZCONF_H
-+#define _ZCONF_H
-+
-+/*
-+ * If you *really* need a unique prefix for all types and library functions,
-+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
-+ */
-+#ifdef IPCOMP_PREFIX
-+# define deflateInit_ ipcomp_deflateInit_
-+# define deflate ipcomp_deflate
-+# define deflateEnd ipcomp_deflateEnd
-+# define inflateInit_ ipcomp_inflateInit_
-+# define inflate ipcomp_inflate
-+# define inflateEnd ipcomp_inflateEnd
-+# define deflateInit2_ ipcomp_deflateInit2_
-+# define deflateSetDictionary ipcomp_deflateSetDictionary
-+# define deflateCopy ipcomp_deflateCopy
-+# define deflateReset ipcomp_deflateReset
-+# define deflateParams ipcomp_deflateParams
-+# define inflateInit2_ ipcomp_inflateInit2_
-+# define inflateSetDictionary ipcomp_inflateSetDictionary
-+# define inflateSync ipcomp_inflateSync
-+# define inflateSyncPoint ipcomp_inflateSyncPoint
-+# define inflateReset ipcomp_inflateReset
-+# define compress ipcomp_compress
-+# define compress2 ipcomp_compress2
-+# define uncompress ipcomp_uncompress
-+# define adler32 ipcomp_adler32
-+# define crc32 ipcomp_crc32
-+# define get_crc_table ipcomp_get_crc_table
-+/* SSS: these also need to be prefixed to avoid clash with ppp_deflate and ext2compression */
-+# define inflate_blocks ipcomp_deflate_blocks
-+# define inflate_blocks_free ipcomp_deflate_blocks_free
-+# define inflate_blocks_new ipcomp_inflate_blocks_new
-+# define inflate_blocks_reset ipcomp_inflate_blocks_reset
-+# define inflate_blocks_sync_point ipcomp_inflate_blocks_sync_point
-+# define inflate_set_dictionary ipcomp_inflate_set_dictionary
-+# define inflate_codes ipcomp_inflate_codes
-+# define inflate_codes_free ipcomp_inflate_codes_free
-+# define inflate_codes_new ipcomp_inflate_codes_new
-+# define inflate_fast ipcomp_inflate_fast
-+# define inflate_trees_bits ipcomp_inflate_trees_bits
-+# define inflate_trees_dynamic ipcomp_inflate_trees_dynamic
-+# define inflate_trees_fixed ipcomp_inflate_trees_fixed
-+# define inflate_flush ipcomp_inflate_flush
-+# define inflate_mask ipcomp_inflate_mask
-+# define _dist_code _ipcomp_dist_code
-+# define _length_code _ipcomp_length_code
-+# define _tr_align _ipcomp_tr_align
-+# define _tr_flush_block _ipcomp_tr_flush_block
-+# define _tr_init _ipcomp_tr_init
-+# define _tr_stored_block _ipcomp_tr_stored_block
-+# define _tr_tally _ipcomp_tr_tally
-+# define zError ipcomp_zError
-+# define z_errmsg ipcomp_z_errmsg
-+# define zlibVersion ipcomp_zlibVersion
-+# define match_init ipcomp_match_init
-+# define longest_match ipcomp_longest_match
-+#endif
-+
-+#ifdef Z_PREFIX
-+# define Byte z_Byte
-+# define uInt z_uInt
-+# define uLong z_uLong
-+# define Bytef z_Bytef
-+# define charf z_charf
-+# define intf z_intf
-+# define uIntf z_uIntf
-+# define uLongf z_uLongf
-+# define voidpf z_voidpf
-+# define voidp z_voidp
-+#endif
-+
-+#if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32)
-+# define WIN32
-+#endif
-+#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(i386)
-+# ifndef __32BIT__
-+# define __32BIT__
-+# endif
-+#endif
-+#if defined(__MSDOS__) && !defined(MSDOS)
-+# define MSDOS
-+#endif
-+
-+/*
-+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
-+ * than 64k bytes at a time (needed on systems with 16-bit int).
-+ */
-+#if defined(MSDOS) && !defined(__32BIT__)
-+# define MAXSEG_64K
-+#endif
-+#ifdef MSDOS
-+# define UNALIGNED_OK
-+#endif
-+
-+#if (defined(MSDOS) || defined(_WINDOWS) || defined(WIN32)) && !defined(STDC)
-+# define STDC
-+#endif
-+#if defined(__STDC__) || defined(__cplusplus) || defined(__OS2__)
-+# ifndef STDC
-+# define STDC
-+# endif
-+#endif
-+
-+#ifndef STDC
-+# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
-+# define const
-+# endif
-+#endif
-+
-+/* Some Mac compilers merge all .h files incorrectly: */
-+#if defined(__MWERKS__) || defined(applec) ||defined(THINK_C) ||defined(__SC__)
-+# define NO_DUMMY_DECL
-+#endif
-+
-+/* Old Borland C incorrectly complains about missing returns: */
-+#if defined(__BORLANDC__) && (__BORLANDC__ < 0x500)
-+# define NEED_DUMMY_RETURN
-+#endif
-+
-+
-+/* Maximum value for memLevel in deflateInit2 */
-+#ifndef MAX_MEM_LEVEL
-+# ifdef MAXSEG_64K
-+# define MAX_MEM_LEVEL 8
-+# else
-+# define MAX_MEM_LEVEL 9
-+# endif
-+#endif
-+
-+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
-+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
-+ * created by gzip. (Files created by minigzip can still be extracted by
-+ * gzip.)
-+ */
-+#ifndef MAX_WBITS
-+# define MAX_WBITS 15 /* 32K LZ77 window */
-+#endif
-+
-+/* The memory requirements for deflate are (in bytes):
-+ (1 << (windowBits+2)) + (1 << (memLevel+9))
-+ that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
-+ plus a few kilobytes for small objects. For example, if you want to reduce
-+ the default memory requirements from 256K to 128K, compile with
-+ make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
-+ Of course this will generally degrade compression (there's no free lunch).
-+
-+ The memory requirements for inflate are (in bytes) 1 << windowBits
-+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
-+ for small objects.
-+*/
-+
-+ /* Type declarations */
-+
-+#ifndef OF /* function prototypes */
-+# ifdef STDC
-+# define OF(args) args
-+# else
-+# define OF(args) ()
-+# endif
-+#endif
-+
-+/* The following definitions for FAR are needed only for MSDOS mixed
-+ * model programming (small or medium model with some far allocations).
-+ * This was tested only with MSC; for other MSDOS compilers you may have
-+ * to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
-+ * just define FAR to be empty.
-+ */
-+#if (defined(M_I86SM) || defined(M_I86MM)) && !defined(__32BIT__)
-+ /* MSC small or medium model */
-+# define SMALL_MEDIUM
-+# ifdef _MSC_VER
-+# define FAR _far
-+# else
-+# define FAR far
-+# endif
-+#endif
-+#if defined(__BORLANDC__) && (defined(__SMALL__) || defined(__MEDIUM__))
-+# ifndef __32BIT__
-+# define SMALL_MEDIUM
-+# define FAR _far
-+# endif
-+#endif
-+
-+/* Compile with -DZLIB_DLL for Windows DLL support */
-+#if defined(ZLIB_DLL)
-+# if defined(_WINDOWS) || defined(WINDOWS)
-+# ifdef FAR
-+# undef FAR
-+# endif
-+# include <windows.h>
-+# define ZEXPORT WINAPI
-+# ifdef WIN32
-+# define ZEXPORTVA WINAPIV
-+# else
-+# define ZEXPORTVA FAR _cdecl _export
-+# endif
-+# endif
-+# if defined (__BORLANDC__)
-+# if (__BORLANDC__ >= 0x0500) && defined (WIN32)
-+# include <windows.h>
-+# define ZEXPORT __declspec(dllexport) WINAPI
-+# define ZEXPORTRVA __declspec(dllexport) WINAPIV
-+# else
-+# if defined (_Windows) && defined (__DLL__)
-+# define ZEXPORT _export
-+# define ZEXPORTVA _export
-+# endif
-+# endif
-+# endif
-+#endif
-+
-+#if defined (__BEOS__)
-+# if defined (ZLIB_DLL)
-+# define ZEXTERN extern __declspec(dllexport)
-+# else
-+# define ZEXTERN extern __declspec(dllimport)
-+# endif
-+#endif
-+
-+#ifndef ZEXPORT
-+# define ZEXPORT
-+#endif
-+#ifndef ZEXPORTVA
-+# define ZEXPORTVA
-+#endif
-+#ifndef ZEXTERN
-+# define ZEXTERN extern
-+#endif
-+
-+#ifndef FAR
-+# define FAR
-+#endif
-+
-+#if !defined(MACOS) && !defined(TARGET_OS_MAC)
-+typedef unsigned char Byte; /* 8 bits */
-+#endif
-+typedef unsigned int uInt; /* 16 bits or more */
-+typedef unsigned long uLong; /* 32 bits or more */
-+
-+#ifdef SMALL_MEDIUM
-+ /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
-+# define Bytef Byte FAR
-+#else
-+ typedef Byte FAR Bytef;
-+#endif
-+typedef char FAR charf;
-+typedef int FAR intf;
-+typedef uInt FAR uIntf;
-+typedef uLong FAR uLongf;
-+
-+#ifdef STDC
-+ typedef void FAR *voidpf;
-+ typedef void *voidp;
-+#else
-+ typedef Byte FAR *voidpf;
-+ typedef Byte *voidp;
-+#endif
-+
-+#ifdef HAVE_UNISTD_H
-+# include <sys/types.h> /* for off_t */
-+# include <unistd.h> /* for SEEK_* and off_t */
-+# define z_off_t off_t
-+#endif
-+#ifndef SEEK_SET
-+# define SEEK_SET 0 /* Seek from beginning of file. */
-+# define SEEK_CUR 1 /* Seek from current position. */
-+# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */
-+#endif
-+#ifndef z_off_t
-+# define z_off_t long
-+#endif
-+
-+/* MVS linker does not support external names larger than 8 bytes */
-+#if defined(__MVS__)
-+# pragma map(deflateInit_,"DEIN")
-+# pragma map(deflateInit2_,"DEIN2")
-+# pragma map(deflateEnd,"DEEND")
-+# pragma map(inflateInit_,"ININ")
-+# pragma map(inflateInit2_,"ININ2")
-+# pragma map(inflateEnd,"INEND")
-+# pragma map(inflateSync,"INSY")
-+# pragma map(inflateSetDictionary,"INSEDI")
-+# pragma map(inflate_blocks,"INBL")
-+# pragma map(inflate_blocks_new,"INBLNE")
-+# pragma map(inflate_blocks_free,"INBLFR")
-+# pragma map(inflate_blocks_reset,"INBLRE")
-+# pragma map(inflate_codes_free,"INCOFR")
-+# pragma map(inflate_codes,"INCO")
-+# pragma map(inflate_fast,"INFA")
-+# pragma map(inflate_flush,"INFLU")
-+# pragma map(inflate_mask,"INMA")
-+# pragma map(inflate_set_dictionary,"INSEDI2")
-+# pragma map(ipcomp_inflate_copyright,"INCOPY")
-+# pragma map(inflate_trees_bits,"INTRBI")
-+# pragma map(inflate_trees_dynamic,"INTRDY")
-+# pragma map(inflate_trees_fixed,"INTRFI")
-+# pragma map(inflate_trees_free,"INTRFR")
-+#endif
-+
-+#endif /* _ZCONF_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/zlib/zlib.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,893 @@
-+/* zlib.h -- interface of the 'zlib' general purpose compression library
-+ version 1.1.4, March 11th, 2002
-+
-+ Copyright (C) 1995-2002 Jean-loup Gailly and Mark Adler
-+
-+ This software is provided 'as-is', without any express or implied
-+ warranty. In no event will the authors be held liable for any damages
-+ arising from the use of this software.
-+
-+ Permission is granted to anyone to use this software for any purpose,
-+ including commercial applications, and to alter it and redistribute it
-+ freely, subject to the following restrictions:
-+
-+ 1. The origin of this software must not be misrepresented; you must not
-+ claim that you wrote the original software. If you use this software
-+ in a product, an acknowledgment in the product documentation would be
-+ appreciated but is not required.
-+ 2. Altered source versions must be plainly marked as such, and must not be
-+ misrepresented as being the original software.
-+ 3. This notice may not be removed or altered from any source distribution.
-+
-+ Jean-loup Gailly Mark Adler
-+ jloup@gzip.org madler@alumni.caltech.edu
-+
-+
-+ The data format used by the zlib library is described by RFCs (Request for
-+ Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
-+ (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
-+*/
-+
-+#ifndef _ZLIB_H
-+#define _ZLIB_H
-+
-+#include "zconf.h"
-+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+#define ZLIB_VERSION "1.1.4"
-+
-+/*
-+ The 'zlib' compression library provides in-memory compression and
-+ decompression functions, including integrity checks of the uncompressed
-+ data. This version of the library supports only one compression method
-+ (deflation) but other algorithms will be added later and will have the same
-+ stream interface.
-+
-+ Compression can be done in a single step if the buffers are large
-+ enough (for example if an input file is mmap'ed), or can be done by
-+ repeated calls of the compression function. In the latter case, the
-+ application must provide more input and/or consume the output
-+ (providing more output space) before each call.
-+
-+ The library also supports reading and writing files in gzip (.gz) format
-+ with an interface similar to that of stdio.
-+
-+ The library does not install any signal handler. The decoder checks
-+ the consistency of the compressed data, so the library should never
-+ crash even in case of corrupted input.
-+*/
-+
-+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
-+typedef void (*free_func) OF((voidpf opaque, voidpf address));
-+
-+struct internal_state;
-+
-+typedef struct z_stream_s {
-+ Bytef *next_in; /* next input byte */
-+ uInt avail_in; /* number of bytes available at next_in */
-+ uLong total_in; /* total nb of input bytes read so far */
-+
-+ Bytef *next_out; /* next output byte should be put there */
-+ uInt avail_out; /* remaining free space at next_out */
-+ uLong total_out; /* total nb of bytes output so far */
-+
-+ const char *msg; /* last error message, NULL if no error */
-+ struct internal_state FAR *state; /* not visible by applications */
-+
-+ alloc_func zalloc; /* used to allocate the internal state */
-+ free_func zfree; /* used to free the internal state */
-+ voidpf opaque; /* private data object passed to zalloc and zfree */
-+
-+ int data_type; /* best guess about the data type: ascii or binary */
-+ uLong adler; /* adler32 value of the uncompressed data */
-+ uLong reserved; /* reserved for future use */
-+} z_stream;
-+
-+typedef z_stream FAR *z_streamp;
-+
-+/*
-+ The application must update next_in and avail_in when avail_in has
-+ dropped to zero. It must update next_out and avail_out when avail_out
-+ has dropped to zero. The application must initialize zalloc, zfree and
-+ opaque before calling the init function. All other fields are set by the
-+ compression library and must not be updated by the application.
-+
-+ The opaque value provided by the application will be passed as the first
-+ parameter for calls of zalloc and zfree. This can be useful for custom
-+ memory management. The compression library attaches no meaning to the
-+ opaque value.
-+
-+ zalloc must return Z_NULL if there is not enough memory for the object.
-+ If zlib is used in a multi-threaded application, zalloc and zfree must be
-+ thread safe.
-+
-+ On 16-bit systems, the functions zalloc and zfree must be able to allocate
-+ exactly 65536 bytes, but will not be required to allocate more than this
-+ if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
-+ pointers returned by zalloc for objects of exactly 65536 bytes *must*
-+ have their offset normalized to zero. The default allocation function
-+ provided by this library ensures this (see zutil.c). To reduce memory
-+ requirements and avoid any allocation of 64K objects, at the expense of
-+ compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
-+
-+ The fields total_in and total_out can be used for statistics or
-+ progress reports. After compression, total_in holds the total size of
-+ the uncompressed data and may be saved for use in the decompressor
-+ (particularly if the decompressor wants to decompress everything in
-+ a single step).
-+*/
-+
-+ /* constants */
-+
-+#define Z_NO_FLUSH 0
-+#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
-+#define Z_SYNC_FLUSH 2
-+#define Z_FULL_FLUSH 3
-+#define Z_FINISH 4
-+/* Allowed flush values; see deflate() below for details */
-+
-+#define Z_OK 0
-+#define Z_STREAM_END 1
-+#define Z_NEED_DICT 2
-+#define Z_ERRNO (-1)
-+#define Z_STREAM_ERROR (-2)
-+#define Z_DATA_ERROR (-3)
-+#define Z_MEM_ERROR (-4)
-+#define Z_BUF_ERROR (-5)
-+#define Z_VERSION_ERROR (-6)
-+/* Return codes for the compression/decompression functions. Negative
-+ * values are errors, positive values are used for special but normal events.
-+ */
-+
-+#define Z_NO_COMPRESSION 0
-+#define Z_BEST_SPEED 1
-+#define Z_BEST_COMPRESSION 9
-+#define Z_DEFAULT_COMPRESSION (-1)
-+/* compression levels */
-+
-+#define Z_FILTERED 1
-+#define Z_HUFFMAN_ONLY 2
-+#define Z_DEFAULT_STRATEGY 0
-+/* compression strategy; see deflateInit2() below for details */
-+
-+#define Z_BINARY 0
-+#define Z_ASCII 1
-+#define Z_UNKNOWN 2
-+/* Possible values of the data_type field */
-+
-+#define Z_DEFLATED 8
-+/* The deflate compression method (the only one supported in this version) */
-+
-+#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
-+
-+#define zlib_version zlibVersion()
-+/* for compatibility with versions < 1.0.2 */
-+
-+ /* basic functions */
-+
-+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
-+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
-+ If the first character differs, the library code actually used is
-+ not compatible with the zlib.h header file used by the application.
-+ This check is automatically made by deflateInit and inflateInit.
-+ */
-+
-+/*
-+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
-+
-+ Initializes the internal stream state for compression. The fields
-+ zalloc, zfree and opaque must be initialized before by the caller.
-+ If zalloc and zfree are set to Z_NULL, deflateInit updates them to
-+ use default allocation functions.
-+
-+ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
-+ 1 gives best speed, 9 gives best compression, 0 gives no compression at
-+ all (the input data is simply copied a block at a time).
-+ Z_DEFAULT_COMPRESSION requests a default compromise between speed and
-+ compression (currently equivalent to level 6).
-+
-+ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
-+ enough memory, Z_STREAM_ERROR if level is not a valid compression level,
-+ Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
-+ with the version assumed by the caller (ZLIB_VERSION).
-+ msg is set to null if there is no error message. deflateInit does not
-+ perform any compression: this will be done by deflate().
-+*/
-+
-+
-+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
-+/*
-+ deflate compresses as much data as possible, and stops when the input
-+ buffer becomes empty or the output buffer becomes full. It may introduce some
-+ output latency (reading input without producing any output) except when
-+ forced to flush.
-+
-+ The detailed semantics are as follows. deflate performs one or both of the
-+ following actions:
-+
-+ - Compress more input starting at next_in and update next_in and avail_in
-+ accordingly. If not all input can be processed (because there is not
-+ enough room in the output buffer), next_in and avail_in are updated and
-+ processing will resume at this point for the next call of deflate().
-+
-+ - Provide more output starting at next_out and update next_out and avail_out
-+ accordingly. This action is forced if the parameter flush is non zero.
-+ Forcing flush frequently degrades the compression ratio, so this parameter
-+ should be set only when necessary (in interactive applications).
-+ Some output may be provided even if flush is not set.
-+
-+ Before the call of deflate(), the application should ensure that at least
-+ one of the actions is possible, by providing more input and/or consuming
-+ more output, and updating avail_in or avail_out accordingly; avail_out
-+ should never be zero before the call. The application can consume the
-+ compressed output when it wants, for example when the output buffer is full
-+ (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
-+ and with zero avail_out, it must be called again after making room in the
-+ output buffer because there might be more output pending.
-+
-+ If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
-+ flushed to the output buffer and the output is aligned on a byte boundary, so
-+ that the decompressor can get all input data available so far. (In particular
-+ avail_in is zero after the call if enough output space has been provided
-+ before the call.) Flushing may degrade compression for some compression
-+ algorithms and so it should be used only when necessary.
-+
-+ If flush is set to Z_FULL_FLUSH, all output is flushed as with
-+ Z_SYNC_FLUSH, and the compression state is reset so that decompression can
-+ restart from this point if previous compressed data has been damaged or if
-+ random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
-+ the compression.
-+
-+ If deflate returns with avail_out == 0, this function must be called again
-+ with the same value of the flush parameter and more output space (updated
-+ avail_out), until the flush is complete (deflate returns with non-zero
-+ avail_out).
-+
-+ If the parameter flush is set to Z_FINISH, pending input is processed,
-+ pending output is flushed and deflate returns with Z_STREAM_END if there
-+ was enough output space; if deflate returns with Z_OK, this function must be
-+ called again with Z_FINISH and more output space (updated avail_out) but no
-+ more input data, until it returns with Z_STREAM_END or an error. After
-+ deflate has returned Z_STREAM_END, the only possible operations on the
-+ stream are deflateReset or deflateEnd.
-+
-+ Z_FINISH can be used immediately after deflateInit if all the compression
-+ is to be done in a single step. In this case, avail_out must be at least
-+ 0.1% larger than avail_in plus 12 bytes. If deflate does not return
-+ Z_STREAM_END, then it must be called again as described above.
-+
-+ deflate() sets strm->adler to the adler32 checksum of all input read
-+ so far (that is, total_in bytes).
-+
-+ deflate() may update data_type if it can make a good guess about
-+ the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered
-+ binary. This field is only for information purposes and does not affect
-+ the compression algorithm in any manner.
-+
-+ deflate() returns Z_OK if some progress has been made (more input
-+ processed or more output produced), Z_STREAM_END if all input has been
-+ consumed and all output has been produced (only when flush is set to
-+ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
-+ if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
-+ (for example avail_in or avail_out was zero).
-+*/
-+
-+
-+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
-+/*
-+ All dynamically allocated data structures for this stream are freed.
-+ This function discards any unprocessed input and does not flush any
-+ pending output.
-+
-+ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
-+ stream state was inconsistent, Z_DATA_ERROR if the stream was freed
-+ prematurely (some input or output was discarded). In the error case,
-+ msg may be set but then points to a static string (which must not be
-+ deallocated).
-+*/
-+
-+
-+/*
-+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
-+
-+ Initializes the internal stream state for decompression. The fields
-+ next_in, avail_in, zalloc, zfree and opaque must be initialized before by
-+ the caller. If next_in is not Z_NULL and avail_in is large enough (the exact
-+ value depends on the compression method), inflateInit determines the
-+ compression method from the zlib header and allocates all data structures
-+ accordingly; otherwise the allocation will be deferred to the first call of
-+ inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to
-+ use default allocation functions.
-+
-+ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
-+ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
-+ version assumed by the caller. msg is set to null if there is no error
-+ message. inflateInit does not perform any decompression apart from reading
-+ the zlib header if present: this will be done by inflate(). (So next_in and
-+ avail_in may be modified, but next_out and avail_out are unchanged.)
-+*/
-+
-+
-+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
-+/*
-+ inflate decompresses as much data as possible, and stops when the input
-+ buffer becomes empty or the output buffer becomes full. It may some
-+ introduce some output latency (reading input without producing any output)
-+ except when forced to flush.
-+
-+ The detailed semantics are as follows. inflate performs one or both of the
-+ following actions:
-+
-+ - Decompress more input starting at next_in and update next_in and avail_in
-+ accordingly. If not all input can be processed (because there is not
-+ enough room in the output buffer), next_in is updated and processing
-+ will resume at this point for the next call of inflate().
-+
-+ - Provide more output starting at next_out and update next_out and avail_out
-+ accordingly. inflate() provides as much output as possible, until there
-+ is no more input data or no more space in the output buffer (see below
-+ about the flush parameter).
-+
-+ Before the call of inflate(), the application should ensure that at least
-+ one of the actions is possible, by providing more input and/or consuming
-+ more output, and updating the next_* and avail_* values accordingly.
-+ The application can consume the uncompressed output when it wants, for
-+ example when the output buffer is full (avail_out == 0), or after each
-+ call of inflate(). If inflate returns Z_OK and with zero avail_out, it
-+ must be called again after making room in the output buffer because there
-+ might be more output pending.
-+
-+ If the parameter flush is set to Z_SYNC_FLUSH, inflate flushes as much
-+ output as possible to the output buffer. The flushing behavior of inflate is
-+ not specified for values of the flush parameter other than Z_SYNC_FLUSH
-+ and Z_FINISH, but the current implementation actually flushes as much output
-+ as possible anyway.
-+
-+ inflate() should normally be called until it returns Z_STREAM_END or an
-+ error. However if all decompression is to be performed in a single step
-+ (a single call of inflate), the parameter flush should be set to
-+ Z_FINISH. In this case all pending input is processed and all pending
-+ output is flushed; avail_out must be large enough to hold all the
-+ uncompressed data. (The size of the uncompressed data may have been saved
-+ by the compressor for this purpose.) The next operation on this stream must
-+ be inflateEnd to deallocate the decompression state. The use of Z_FINISH
-+ is never required, but can be used to inform inflate that a faster routine
-+ may be used for the single inflate() call.
-+
-+ If a preset dictionary is needed at this point (see inflateSetDictionary
-+ below), inflate sets strm-adler to the adler32 checksum of the
-+ dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise
-+ it sets strm->adler to the adler32 checksum of all output produced
-+ so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or
-+ an error code as described below. At the end of the stream, inflate()
-+ checks that its computed adler32 checksum is equal to that saved by the
-+ compressor and returns Z_STREAM_END only if the checksum is correct.
-+
-+ inflate() returns Z_OK if some progress has been made (more input processed
-+ or more output produced), Z_STREAM_END if the end of the compressed data has
-+ been reached and all uncompressed output has been produced, Z_NEED_DICT if a
-+ preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
-+ corrupted (input stream not conforming to the zlib format or incorrect
-+ adler32 checksum), Z_STREAM_ERROR if the stream structure was inconsistent
-+ (for example if next_in or next_out was NULL), Z_MEM_ERROR if there was not
-+ enough memory, Z_BUF_ERROR if no progress is possible or if there was not
-+ enough room in the output buffer when Z_FINISH is used. In the Z_DATA_ERROR
-+ case, the application may then call inflateSync to look for a good
-+ compression block.
-+*/
-+
-+
-+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
-+/*
-+ All dynamically allocated data structures for this stream are freed.
-+ This function discards any unprocessed input and does not flush any
-+ pending output.
-+
-+ inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
-+ was inconsistent. In the error case, msg may be set but then points to a
-+ static string (which must not be deallocated).
-+*/
-+
-+ /* Advanced functions */
-+
-+/*
-+ The following functions are needed only in some special applications.
-+*/
-+
-+/*
-+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
-+ int level,
-+ int method,
-+ int windowBits,
-+ int memLevel,
-+ int strategy));
-+
-+ This is another version of deflateInit with more compression options. The
-+ fields next_in, zalloc, zfree and opaque must be initialized before by
-+ the caller.
-+
-+ The method parameter is the compression method. It must be Z_DEFLATED in
-+ this version of the library.
-+
-+ The windowBits parameter is the base two logarithm of the window size
-+ (the size of the history buffer). It should be in the range 8..15 for this
-+ version of the library. Larger values of this parameter result in better
-+ compression at the expense of memory usage. The default value is 15 if
-+ deflateInit is used instead.
-+
-+ The memLevel parameter specifies how much memory should be allocated
-+ for the internal compression state. memLevel=1 uses minimum memory but
-+ is slow and reduces compression ratio; memLevel=9 uses maximum memory
-+ for optimal speed. The default value is 8. See zconf.h for total memory
-+ usage as a function of windowBits and memLevel.
-+
-+ The strategy parameter is used to tune the compression algorithm. Use the
-+ value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
-+ filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no
-+ string match). Filtered data consists mostly of small values with a
-+ somewhat random distribution. In this case, the compression algorithm is
-+ tuned to compress them better. The effect of Z_FILTERED is to force more
-+ Huffman coding and less string matching; it is somewhat intermediate
-+ between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects
-+ the compression ratio but not the correctness of the compressed output even
-+ if it is not set appropriately.
-+
-+ deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-+ memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
-+ method). msg is set to null if there is no error message. deflateInit2 does
-+ not perform any compression: this will be done by deflate().
-+*/
-+
-+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
-+ const Bytef *dictionary,
-+ uInt dictLength));
-+/*
-+ Initializes the compression dictionary from the given byte sequence
-+ without producing any compressed output. This function must be called
-+ immediately after deflateInit, deflateInit2 or deflateReset, before any
-+ call of deflate. The compressor and decompressor must use exactly the same
-+ dictionary (see inflateSetDictionary).
-+
-+ The dictionary should consist of strings (byte sequences) that are likely
-+ to be encountered later in the data to be compressed, with the most commonly
-+ used strings preferably put towards the end of the dictionary. Using a
-+ dictionary is most useful when the data to be compressed is short and can be
-+ predicted with good accuracy; the data can then be compressed better than
-+ with the default empty dictionary.
-+
-+ Depending on the size of the compression data structures selected by
-+ deflateInit or deflateInit2, a part of the dictionary may in effect be
-+ discarded, for example if the dictionary is larger than the window size in
-+ deflate or deflate2. Thus the strings most likely to be useful should be
-+ put at the end of the dictionary, not at the front.
-+
-+ Upon return of this function, strm->adler is set to the Adler32 value
-+ of the dictionary; the decompressor may later use this value to determine
-+ which dictionary has been used by the compressor. (The Adler32 value
-+ applies to the whole dictionary even if only a subset of the dictionary is
-+ actually used by the compressor.)
-+
-+ deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-+ parameter is invalid (such as NULL dictionary) or the stream state is
-+ inconsistent (for example if deflate has already been called for this stream
-+ or if the compression method is bsort). deflateSetDictionary does not
-+ perform any compression: this will be done by deflate().
-+*/
-+
-+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
-+ z_streamp source));
-+/*
-+ Sets the destination stream as a complete copy of the source stream.
-+
-+ This function can be useful when several compression strategies will be
-+ tried, for example when there are several ways of pre-processing the input
-+ data with a filter. The streams that will be discarded should then be freed
-+ by calling deflateEnd. Note that deflateCopy duplicates the internal
-+ compression state which can be quite large, so this strategy is slow and
-+ can consume lots of memory.
-+
-+ deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-+ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-+ (such as zalloc being NULL). msg is left unchanged in both source and
-+ destination.
-+*/
-+
-+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
-+/*
-+ This function is equivalent to deflateEnd followed by deflateInit,
-+ but does not free and reallocate all the internal compression state.
-+ The stream will keep the same compression level and any other attributes
-+ that may have been set by deflateInit2.
-+
-+ deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
-+ stream state was inconsistent (such as zalloc or state being NULL).
-+*/
-+
-+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
-+ int level,
-+ int strategy));
-+/*
-+ Dynamically update the compression level and compression strategy. The
-+ interpretation of level and strategy is as in deflateInit2. This can be
-+ used to switch between compression and straight copy of the input data, or
-+ to switch to a different kind of input data requiring a different
-+ strategy. If the compression level is changed, the input available so far
-+ is compressed with the old level (and may be flushed); the new level will
-+ take effect only at the next call of deflate().
-+
-+ Before the call of deflateParams, the stream state must be set as for
-+ a call of deflate(), since the currently available input may have to
-+ be compressed and flushed. In particular, strm->avail_out must be non-zero.
-+
-+ deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-+ stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
-+ if strm->avail_out was zero.
-+*/
-+
-+/*
-+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
-+ int windowBits));
-+
-+ This is another version of inflateInit with an extra parameter. The
-+ fields next_in, avail_in, zalloc, zfree and opaque must be initialized
-+ before by the caller.
-+
-+ The windowBits parameter is the base two logarithm of the maximum window
-+ size (the size of the history buffer). It should be in the range 8..15 for
-+ this version of the library. The default value is 15 if inflateInit is used
-+ instead. If a compressed stream with a larger window size is given as
-+ input, inflate() will return with the error code Z_DATA_ERROR instead of
-+ trying to allocate a larger window.
-+
-+ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-+ memory, Z_STREAM_ERROR if a parameter is invalid (such as a negative
-+ memLevel). msg is set to null if there is no error message. inflateInit2
-+ does not perform any decompression apart from reading the zlib header if
-+ present: this will be done by inflate(). (So next_in and avail_in may be
-+ modified, but next_out and avail_out are unchanged.)
-+*/
-+
-+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
-+ const Bytef *dictionary,
-+ uInt dictLength));
-+/*
-+ Initializes the decompression dictionary from the given uncompressed byte
-+ sequence. This function must be called immediately after a call of inflate
-+ if this call returned Z_NEED_DICT. The dictionary chosen by the compressor
-+ can be determined from the Adler32 value returned by this call of
-+ inflate. The compressor and decompressor must use exactly the same
-+ dictionary (see deflateSetDictionary).
-+
-+ inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-+ parameter is invalid (such as NULL dictionary) or the stream state is
-+ inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-+ expected one (incorrect Adler32 value). inflateSetDictionary does not
-+ perform any decompression: this will be done by subsequent calls of
-+ inflate().
-+*/
-+
-+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
-+/*
-+ Skips invalid compressed data until a full flush point (see above the
-+ description of deflate with Z_FULL_FLUSH) can be found, or until all
-+ available input is skipped. No output is provided.
-+
-+ inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
-+ if no more input was provided, Z_DATA_ERROR if no flush point has been found,
-+ or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
-+ case, the application may save the current current value of total_in which
-+ indicates where valid compressed data was found. In the error case, the
-+ application may repeatedly call inflateSync, providing more input each time,
-+ until success or end of the input data.
-+*/
-+
-+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
-+/*
-+ This function is equivalent to inflateEnd followed by inflateInit,
-+ but does not free and reallocate all the internal decompression state.
-+ The stream will keep attributes that may have been set by inflateInit2.
-+
-+ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
-+ stream state was inconsistent (such as zalloc or state being NULL).
-+*/
-+
-+
-+ /* utility functions */
-+
-+/*
-+ The following utility functions are implemented on top of the
-+ basic stream-oriented functions. To simplify the interface, some
-+ default options are assumed (compression level and memory usage,
-+ standard memory allocation functions). The source code of these
-+ utility functions can easily be modified if you need special options.
-+*/
-+
-+ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
-+ const Bytef *source, uLong sourceLen));
-+/*
-+ Compresses the source buffer into the destination buffer. sourceLen is
-+ the byte length of the source buffer. Upon entry, destLen is the total
-+ size of the destination buffer, which must be at least 0.1% larger than
-+ sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
-+ compressed buffer.
-+ This function can be used to compress a whole file at once if the
-+ input file is mmap'ed.
-+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
-+ enough memory, Z_BUF_ERROR if there was not enough room in the output
-+ buffer.
-+*/
-+
-+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen,
-+ const Bytef *source, uLong sourceLen,
-+ int level));
-+/*
-+ Compresses the source buffer into the destination buffer. The level
-+ parameter has the same meaning as in deflateInit. sourceLen is the byte
-+ length of the source buffer. Upon entry, destLen is the total size of the
-+ destination buffer, which must be at least 0.1% larger than sourceLen plus
-+ 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
-+
-+ compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-+ memory, Z_BUF_ERROR if there was not enough room in the output buffer,
-+ Z_STREAM_ERROR if the level parameter is invalid.
-+*/
-+
-+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
-+ const Bytef *source, uLong sourceLen));
-+/*
-+ Decompresses the source buffer into the destination buffer. sourceLen is
-+ the byte length of the source buffer. Upon entry, destLen is the total
-+ size of the destination buffer, which must be large enough to hold the
-+ entire uncompressed data. (The size of the uncompressed data must have
-+ been saved previously by the compressor and transmitted to the decompressor
-+ by some mechanism outside the scope of this compression library.)
-+ Upon exit, destLen is the actual size of the compressed buffer.
-+ This function can be used to decompress a whole file at once if the
-+ input file is mmap'ed.
-+
-+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
-+ enough memory, Z_BUF_ERROR if there was not enough room in the output
-+ buffer, or Z_DATA_ERROR if the input data was corrupted.
-+*/
-+
-+
-+typedef voidp gzFile;
-+
-+ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
-+/*
-+ Opens a gzip (.gz) file for reading or writing. The mode parameter
-+ is as in fopen ("rb" or "wb") but can also include a compression level
-+ ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
-+ Huffman only compression as in "wb1h". (See the description
-+ of deflateInit2 for more information about the strategy parameter.)
-+
-+ gzopen can be used to read a file which is not in gzip format; in this
-+ case gzread will directly read from the file without decompression.
-+
-+ gzopen returns NULL if the file could not be opened or if there was
-+ insufficient memory to allocate the (de)compression state; errno
-+ can be checked to distinguish the two cases (if errno is zero, the
-+ zlib error is Z_MEM_ERROR). */
-+
-+ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
-+/*
-+ gzdopen() associates a gzFile with the file descriptor fd. File
-+ descriptors are obtained from calls like open, dup, creat, pipe or
-+ fileno (in the file has been previously opened with fopen).
-+ The mode parameter is as in gzopen.
-+ The next call of gzclose on the returned gzFile will also close the
-+ file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
-+ descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
-+ gzdopen returns NULL if there was insufficient memory to allocate
-+ the (de)compression state.
-+*/
-+
-+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
-+/*
-+ Dynamically update the compression level or strategy. See the description
-+ of deflateInit2 for the meaning of these parameters.
-+ gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
-+ opened for writing.
-+*/
-+
-+ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
-+/*
-+ Reads the given number of uncompressed bytes from the compressed file.
-+ If the input file was not in gzip format, gzread copies the given number
-+ of bytes into the buffer.
-+ gzread returns the number of uncompressed bytes actually read (0 for
-+ end of file, -1 for error). */
-+
-+ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
-+ const voidp buf, unsigned len));
-+/*
-+ Writes the given number of uncompressed bytes into the compressed file.
-+ gzwrite returns the number of uncompressed bytes actually written
-+ (0 in case of error).
-+*/
-+
-+ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...));
-+/*
-+ Converts, formats, and writes the args to the compressed file under
-+ control of the format string, as in fprintf. gzprintf returns the number of
-+ uncompressed bytes actually written (0 in case of error).
-+*/
-+
-+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
-+/*
-+ Writes the given null-terminated string to the compressed file, excluding
-+ the terminating null character.
-+ gzputs returns the number of characters written, or -1 in case of error.
-+*/
-+
-+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
-+/*
-+ Reads bytes from the compressed file until len-1 characters are read, or
-+ a newline character is read and transferred to buf, or an end-of-file
-+ condition is encountered. The string is then terminated with a null
-+ character.
-+ gzgets returns buf, or Z_NULL in case of error.
-+*/
-+
-+ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
-+/*
-+ Writes c, converted to an unsigned char, into the compressed file.
-+ gzputc returns the value that was written, or -1 in case of error.
-+*/
-+
-+ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
-+/*
-+ Reads one byte from the compressed file. gzgetc returns this byte
-+ or -1 in case of end of file or error.
-+*/
-+
-+ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
-+/*
-+ Flushes all pending output into the compressed file. The parameter
-+ flush is as in the deflate() function. The return value is the zlib
-+ error number (see function gzerror below). gzflush returns Z_OK if
-+ the flush parameter is Z_FINISH and all output could be flushed.
-+ gzflush should be called only when strictly necessary because it can
-+ degrade compression.
-+*/
-+
-+ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
-+ z_off_t offset, int whence));
-+/*
-+ Sets the starting position for the next gzread or gzwrite on the
-+ given compressed file. The offset represents a number of bytes in the
-+ uncompressed data stream. The whence parameter is defined as in lseek(2);
-+ the value SEEK_END is not supported.
-+ If the file is opened for reading, this function is emulated but can be
-+ extremely slow. If the file is opened for writing, only forward seeks are
-+ supported; gzseek then compresses a sequence of zeroes up to the new
-+ starting position.
-+
-+ gzseek returns the resulting offset location as measured in bytes from
-+ the beginning of the uncompressed stream, or -1 in case of error, in
-+ particular if the file is opened for writing and the new starting position
-+ would be before the current position.
-+*/
-+
-+ZEXTERN int ZEXPORT gzrewind OF((gzFile file));
-+/*
-+ Rewinds the given file. This function is supported only for reading.
-+
-+ gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
-+*/
-+
-+ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file));
-+/*
-+ Returns the starting position for the next gzread or gzwrite on the
-+ given compressed file. This position represents a number of bytes in the
-+ uncompressed data stream.
-+
-+ gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
-+*/
-+
-+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
-+/*
-+ Returns 1 when EOF has previously been detected reading the given
-+ input stream, otherwise zero.
-+*/
-+
-+ZEXTERN int ZEXPORT gzclose OF((gzFile file));
-+/*
-+ Flushes all pending output if necessary, closes the compressed file
-+ and deallocates all the (de)compression state. The return value is the zlib
-+ error number (see function gzerror below).
-+*/
-+
-+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
-+/*
-+ Returns the error message for the last error which occurred on the
-+ given compressed file. errnum is set to zlib error number. If an
-+ error occurred in the file system and not in the compression library,
-+ errnum is set to Z_ERRNO and the application may consult errno
-+ to get the exact error code.
-+*/
-+
-+ /* checksum functions */
-+
-+/*
-+ These functions are not related to compression but are exported
-+ anyway because they might be useful in applications using the
-+ compression library.
-+*/
-+
-+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
-+
-+/*
-+ Update a running Adler-32 checksum with the bytes buf[0..len-1] and
-+ return the updated checksum. If buf is NULL, this function returns
-+ the required initial value for the checksum.
-+ An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
-+ much faster. Usage example:
-+
-+ uLong adler = adler32(0L, Z_NULL, 0);
-+
-+ while (read_buffer(buffer, length) != EOF) {
-+ adler = adler32(adler, buffer, length);
-+ }
-+ if (adler != original_adler) error();
-+*/
-+
-+ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
-+/*
-+ Update a running crc with the bytes buf[0..len-1] and return the updated
-+ crc. If buf is NULL, this function returns the required initial value
-+ for the crc. Pre- and post-conditioning (one's complement) is performed
-+ within this function so it shouldn't be done by the application.
-+ Usage example:
-+
-+ uLong crc = crc32(0L, Z_NULL, 0);
-+
-+ while (read_buffer(buffer, length) != EOF) {
-+ crc = crc32(crc, buffer, length);
-+ }
-+ if (crc != original_crc) error();
-+*/
-+
-+
-+ /* various hacks, don't look :) */
-+
-+/* deflateInit and inflateInit are macros to allow checking the zlib version
-+ * and the compiler's view of z_stream:
-+ */
-+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
-+ const char *version, int stream_size));
-+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
-+ const char *version, int stream_size));
-+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method,
-+ int windowBits, int memLevel,
-+ int strategy, const char *version,
-+ int stream_size));
-+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits,
-+ const char *version, int stream_size));
-+#define deflateInit(strm, level) \
-+ deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream))
-+#define inflateInit(strm) \
-+ inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream))
-+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
-+ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
-+ (strategy), ZLIB_VERSION, sizeof(z_stream))
-+#define inflateInit2(strm, windowBits) \
-+ inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
-+
-+
-+#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL)
-+ struct internal_state {int dummy;}; /* hack for buggy compilers */
-+#endif
-+
-+ZEXTERN const char * ZEXPORT zError OF((int err));
-+ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z));
-+ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void));
-+
-+#ifdef __cplusplus
-+}
-+#endif
-+
-+#endif /* _ZLIB_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/include/zlib/zutil.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,225 @@
-+/* zutil.h -- internal interface and configuration of the compression library
-+ * Copyright (C) 1995-2002 Jean-loup Gailly.
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+/* @(#) $Id: zutil.h,v 1.4 2002/04/24 07:36:48 mcr Exp $ */
-+
-+#ifndef _Z_UTIL_H
-+#define _Z_UTIL_H
-+
-+#include "zlib.h"
-+
-+#include <linux/string.h>
-+#define HAVE_MEMCPY
-+
-+#if 0 // #ifdef STDC
-+# include <stddef.h>
-+# include <string.h>
-+# include <stdlib.h>
-+#endif
-+#ifndef __KERNEL__
-+#ifdef NO_ERRNO_H
-+ extern int errno;
-+#else
-+# include <errno.h>
-+#endif
-+#endif
-+
-+#ifndef local
-+# define local static
-+#endif
-+/* compile with -Dlocal if your debugger can't find static symbols */
-+
-+typedef unsigned char uch;
-+typedef uch FAR uchf;
-+typedef unsigned short ush;
-+typedef ush FAR ushf;
-+typedef unsigned long ulg;
-+
-+extern const char *z_errmsg[10]; /* indexed by 2-zlib_error */
-+/* (size given to avoid silly warnings with Visual C++) */
-+
-+#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
-+
-+#define ERR_RETURN(strm,err) \
-+ return (strm->msg = ERR_MSG(err), (err))
-+/* To be used only when the state is known to be valid */
-+
-+ /* common constants */
-+
-+#ifndef DEF_WBITS
-+# define DEF_WBITS MAX_WBITS
-+#endif
-+/* default windowBits for decompression. MAX_WBITS is for compression only */
-+
-+#if MAX_MEM_LEVEL >= 8
-+# define DEF_MEM_LEVEL 8
-+#else
-+# define DEF_MEM_LEVEL MAX_MEM_LEVEL
-+#endif
-+/* default memLevel */
-+
-+#define STORED_BLOCK 0
-+#define STATIC_TREES 1
-+#define DYN_TREES 2
-+/* The three kinds of block type */
-+
-+#define MIN_MATCH 3
-+#define MAX_MATCH 258
-+/* The minimum and maximum match lengths */
-+
-+#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
-+
-+ /* target dependencies */
-+
-+#ifdef MSDOS
-+# define OS_CODE 0x00
-+# if defined(__TURBOC__) || defined(__BORLANDC__)
-+# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
-+ /* Allow compilation with ANSI keywords only enabled */
-+ void _Cdecl farfree( void *block );
-+ void *_Cdecl farmalloc( unsigned long nbytes );
-+# else
-+# include <alloc.h>
-+# endif
-+# else /* MSC or DJGPP */
-+# include <malloc.h>
-+# endif
-+#endif
-+
-+#ifdef OS2
-+# define OS_CODE 0x06
-+#endif
-+
-+#ifdef WIN32 /* Window 95 & Windows NT */
-+# define OS_CODE 0x0b
-+#endif
-+
-+#if defined(VAXC) || defined(VMS)
-+# define OS_CODE 0x02
-+# define F_OPEN(name, mode) \
-+ fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
-+#endif
-+
-+#ifdef AMIGA
-+# define OS_CODE 0x01
-+#endif
-+
-+#if defined(ATARI) || defined(atarist)
-+# define OS_CODE 0x05
-+#endif
-+
-+#if defined(MACOS) || defined(TARGET_OS_MAC)
-+# define OS_CODE 0x07
-+# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
-+# include <unix.h> /* for fdopen */
-+# else
-+# ifndef fdopen
-+# define fdopen(fd,mode) NULL /* No fdopen() */
-+# endif
-+# endif
-+#endif
-+
-+#ifdef __50SERIES /* Prime/PRIMOS */
-+# define OS_CODE 0x0F
-+#endif
-+
-+#ifdef TOPS20
-+# define OS_CODE 0x0a
-+#endif
-+
-+#if defined(_BEOS_) || defined(RISCOS)
-+# define fdopen(fd,mode) NULL /* No fdopen() */
-+#endif
-+
-+#if (defined(_MSC_VER) && (_MSC_VER > 600))
-+# define fdopen(fd,type) _fdopen(fd,type)
-+#endif
-+
-+
-+ /* Common defaults */
-+
-+#ifndef OS_CODE
-+# define OS_CODE 0x03 /* assume Unix */
-+#endif
-+
-+#ifndef F_OPEN
-+# define F_OPEN(name, mode) fopen((name), (mode))
-+#endif
-+
-+ /* functions */
-+
-+#ifdef HAVE_STRERROR
-+ extern char *strerror OF((int));
-+# define zstrerror(errnum) strerror(errnum)
-+#else
-+# define zstrerror(errnum) ""
-+#endif
-+
-+#if defined(pyr)
-+# define NO_MEMCPY
-+#endif
-+#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
-+ /* Use our own functions for small and medium model with MSC <= 5.0.
-+ * You may have to use the same strategy for Borland C (untested).
-+ * The __SC__ check is for Symantec.
-+ */
-+# define NO_MEMCPY
-+#endif
-+#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
-+# define HAVE_MEMCPY
-+#endif
-+#ifdef HAVE_MEMCPY
-+# ifdef SMALL_MEDIUM /* MSDOS small or medium model */
-+# define zmemcpy _fmemcpy
-+# define zmemcmp _fmemcmp
-+# define zmemzero(dest, len) _fmemset(dest, 0, len)
-+# else
-+# define zmemcpy memcpy
-+# define zmemcmp memcmp
-+# define zmemzero(dest, len) memset(dest, 0, len)
-+# endif
-+#else
-+ extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len));
-+ extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len));
-+ extern void zmemzero OF((Bytef* dest, uInt len));
-+#endif
-+
-+/* Diagnostic functions */
-+#ifdef DEBUG
-+# include <stdio.h>
-+ extern int z_verbose;
-+ extern void z_error OF((char *m));
-+# define Assert(cond,msg) {if(!(cond)) z_error(msg);}
-+# define Trace(x) {if (z_verbose>=0) fprintf x ;}
-+# define Tracev(x) {if (z_verbose>0) fprintf x ;}
-+# define Tracevv(x) {if (z_verbose>1) fprintf x ;}
-+# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
-+# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
-+#else
-+# define Assert(cond,msg)
-+# define Trace(x)
-+# define Tracev(x)
-+# define Tracevv(x)
-+# define Tracec(c,x)
-+# define Tracecv(c,x)
-+#endif
-+
-+
-+typedef uLong (ZEXPORT *check_func) OF((uLong check, const Bytef *buf,
-+ uInt len));
-+voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
-+void zcfree OF((voidpf opaque, voidpf ptr));
-+
-+#define ZALLOC(strm, items, size) \
-+ (*((strm)->zalloc))((strm)->opaque, (items), (size))
-+#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
-+#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
-+
-+#endif /* _Z_UTIL_H */
---- swan26/net/Kconfig.preipsec 2005-09-01 18:15:19.000000000 -0400
-+++ swan26/net/Kconfig 2005-09-03 16:51:17.000000000 -0400
-@@ -215,2 +215,6 @@
-
-+if INET
-+source "net/ipsec/Kconfig"
-+endif # if INET
-+
- endif # if NET
---- /distros/kernel/linux-2.6.3-rc4/net/Makefile Mon Feb 16 21:22:12 2004
-+++ ref26/net/Makefile Thu Feb 19 21:02:25 2004
-@@ -42,3 +42,6 @@
- ifeq ($(CONFIG_NET),y)
- obj-$(CONFIG_SYSCTL) += sysctl_net.o
- endif
-+
-+obj-$(CONFIG_KLIPS) += ipsec/
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/Kconfig Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,143 @@
-+#
-+# IPSEC configuration
-+# Copyright (C) 2004 Michael Richardson <mcr@freeswan.org>
-+#
-+# This program is free software; you can redistribute it and/or modify it
-+# under the terms of the GNU General Public License as published by the
-+# Free Software Foundation; either version 2 of the License, or (at your
-+# option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+#
-+# This program is distributed in the hope that it will be useful, but
-+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+# for more details.
-+#
-+
-+config KLIPS
-+ tristate "Openswan IPsec (KLIPS)"
-+ default n
-+# depends on NF_CONNTRACK && NETFILTER
-+ help
-+ KLIPS is the Openswan (www.openswan.org) Kernel Level IP Security
-+ system. It is extensively tested, and has interoperated with
-+ many other systems.
-+ It provides "ipsecX" devices on which one can do firewalling.
-+ The Openswan userland, is compatible with both KLIPS and NETKEY
-+ You cannot build KLIPS and NETKEY inline into the kernel.
-+
-+menu "KLIPS options"
-+ depends on KLIPS
-+
-+config KLIPS_ESP
-+ bool 'Encapsulating Security Payload - ESP ("VPN")'
-+ default y
-+ help
-+ This option provides support for the IPSEC Encapsulation Security
-+ Payload (IP protocol 50) which provides packet layer content
-+ hiding, and content authentication.
-+ It is recommended to enable this. RFC2406
-+
-+config KLIPS_AH
-+ bool 'Authentication Header - AH'
-+ default n
-+ help
-+ This option provides support for the IPSEC Authentication Header
-+ (IP protocol 51) which provides packet layer sender and content
-+ authentication. It does not provide for confidentiality.
-+ It is not recommended to enable this. RFC2402
-+
-+config KLIPS_AUTH_HMAC_MD5
-+ bool 'HMAC-MD5 authentication algorithm'
-+ default y
-+ help
-+ The HMAC-MD5 algorithm is used by ESP (and AH) to guarantee packet
-+ integrity. There is little reason not to include it.
-+
-+config KLIPS_AUTH_HMAC_SHA1
-+ bool 'HMAC-SHA1 authentication algorithm'
-+ default y
-+ help
-+ The HMAC-SHA1 algorithm is used by ESP (and AH) to guarantee packet
-+ integrity. SHA1 is a little slower than MD5, but is said to be
-+ a bit more secure. There is little reason not to include it.
-+
-+config KLIPS_ALG
-+ bool 'KLIPS_ALG software encryption'
-+ default y
-+ help
-+ This option provides support for loading new algorithms into the
-+ kernel for crypto use. You may disable this if using the
-+ CONFIG_KLIPS_OCF option for hardware offload.
-+
-+config KLIPS_ENC_CRYPTOAPI
-+ bool 'CryptoAPI algorithm interface'
-+ default n
-+ depends on KLIPS_ALG
-+ help
-+ Enable the algorithm interface to make all CryptoAPI 1.0 algorithms
-+ available to KLIPS.
-+
-+config KLIPS_ENC_1DES
-+ bool 'Include 1DES with CryptoAPI'
-+ default n
-+ depends on KLIPS_ENC_CRYPTOAPI
-+ help
-+ The CryptoAPI interface does not include support for every algorithm
-+ yet, and one that it doesn't support by default is the VERY WEAK
-+ 1DES. Select this if you are terminally stupid.
-+
-+config KLIPS_ENC_3DES
-+ bool '3DES encryption algorithm'
-+ default y
-+ help
-+ The 3DES algorithm is used by ESP to provide for packet privacy.
-+ 3DES is 3-repeats of the DES algorithm. 3DES is widely supported,
-+ and analyzed and is considered very secure. 1DES is not supported.
-+
-+config KLIPS_ENC_AES
-+ bool 'AES encryption algorithm'
-+ default y
-+ depends on KLIPS_ALG
-+ help
-+ The AES algorithm is used by ESP to provide for packet privacy.
-+ AES the NIST replacement for DES. AES is being widely analyzed,
-+ and is very fast.
-+
-+config KLIPS_IPCOMP
-+ bool 'IP compression'
-+ default y
-+ help
-+ The IPcomp protocol is used prior to ESP to make the packet
-+ smaller. Once encrypted, compression will fail, so any link
-+ layer efforts (e.g. PPP) will not work.
-+
-+config KLIPS_OCF
-+ bool 'IPsec OCF Acceleration Support'
-+ default n
-+ help
-+ OCF provides Asynchronous crypto acceleration for kernel and
-+ user applications. It supports various HW accelerators.
-+ If you have OCF support enabled and wish IPsec to utilise
-+ the hardware managed by OCF, then enable this option.
-+ OCF is a kernel patch, see http://ocf-linux.sourceforge.net/
-+
-+config KLIPS_DEBUG
-+ bool 'IPsec debugging'
-+ default y
-+ help
-+ KLIPS includes a lot of debugging code. Unless there is a real
-+ tangible benefit to removing this code, it should be left in place.
-+ Debugging connections without access to kernel level debugging is
-+ essentially impossible. Leave this on.
-+
-+config KLIPS_IF_MAX
-+ int 'Maximum number of virtual interfaces'
-+ default 64
-+ range 4 256
-+ help
-+ KLIPS creates virtual interfaces for tunnel purposes. At present
-+ it keeps track of certain items in an array (FIX ME), and needs
-+ to preallocate this array. Only a pointer is used per item.
-+
-+endmenu
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/Makefile Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,192 @@
-+# Makefile for KLIPS kernel code as a module for 2.6 kernels
-+#
-+# Makefile for KLIPS kernel code as a module
-+# Copyright (C) 1998, 1999, 2000,2001 Richard Guy Briggs.
-+# Copyright (C) 2002-2004 Michael Richardson <mcr@freeswan.org>
-+#
-+# This program is free software; you can redistribute it and/or modify it
-+# under the terms of the GNU General Public License as published by the
-+# Free Software Foundation; either version 2 of the License, or (at your
-+# option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+#
-+# This program is distributed in the hope that it will be useful, but
-+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+# for more details.
-+#
-+# RCSID $Id: Makefile.fs2_6,v 1.8.2.1 2006/04/20 16:33:06 mcr Exp $
-+#
-+# Note! Dependencies are done automagically by 'make dep', which also
-+# removes any old dependencies. DON'T put your own dependencies here
-+# unless it's something special (ie not a .c file).
-+#
-+
-+OPENSWANSRCDIR?=.
-+KLIPS_TOP?=.
-+
-+-include ${OPENSWANSRCDIR}/Makefile.ver
-+
-+base-klips-objs :=
-+
-+base-klips-objs+= ipsec_init.o ipsec_sa.o ipsec_radij.o radij.o
-+base-klips-objs+= ipsec_life.o ipsec_proc.o
-+base-klips-objs+= ipsec_tunnel.o ipsec_xmit.o ipsec_rcv.o ipsec_ipip.o
-+base-klips-objs+= ipsec_snprintf.o
-+base-klips-objs+= ipsec_mast.o
-+base-klips-objs+= sysctl_net_ipsec.o
-+base-klips-objs+= pfkey_v2.o pfkey_v2_parser.o pfkey_v2_ext_process.o
-+base-klips-objs+= version.o
-+
-+base-klips-objs+= satot.o
-+base-klips-objs+= addrtot.o
-+base-klips-objs+= ultot.o
-+base-klips-objs+= addrtypeof.o
-+base-klips-objs+= anyaddr.o
-+base-klips-objs+= initaddr.o
-+base-klips-objs+= ultoa.o
-+base-klips-objs+= addrtoa.o
-+base-klips-objs+= subnettoa.o
-+base-klips-objs+= subnetof.o
-+base-klips-objs+= goodmask.o
-+base-klips-objs+= datatot.o
-+base-klips-objs+= rangetoa.o
-+base-klips-objs+= prng.o
-+base-klips-objs+= pfkey_v2_parse.o
-+base-klips-objs+= pfkey_v2_build.o
-+base-klips-objs+= pfkey_v2_debug.o
-+base-klips-objs+= pfkey_v2_ext_bits.o
-+base-klips-objs+= version.o
-+
-+obj-${CONFIG_KLIPS} += ipsec.o
-+
-+ipsec-objs += ${base-klips-objs}
-+
-+ipsec-$(CONFIG_KLIPS_ESP) += ipsec_esp.o
-+ipsec-$(CONFIG_KLIPS_OCF) += ipsec_ocf.o
-+ipsec-$(CONFIG_KLIPS_IPCOMP) += ipsec_ipcomp.o
-+ipsec-$(CONFIG_KLIPS_AUTH_HMAC_MD5) += ipsec_md5c.o
-+ipsec-$(CONFIG_KLIPS_AUTH_HMAC_SHA1) += ipsec_sha1.o
-+
-+# AH, if you really think you need it.
-+ipsec-$(CONFIG_KLIPS_AH) += ipsec_ah.o
-+
-+ipsec-$(CONFIG_KLIPS_ALG) += ipsec_alg.o
-+
-+# include code from DES subdir
-+crypto-$(CONFIG_KLIPS_ENC_3DES) += des/ipsec_alg_3des.o
-+crypto-$(CONFIG_KLIPS_ENC_3DES) += des/cbc_enc.o
-+crypto-$(CONFIG_KLIPS_ENC_3DES) += des/ecb_enc.o
-+crypto-$(CONFIG_KLIPS_ENC_3DES) += des/set_key.o
-+
-+ifeq ($(strip ${SUBARCH}),)
-+SUBARCH:=${ARCH}
-+endif
-+
-+# the assembly version expects frame pointers, which are
-+# optional in many kernel builds. If you want speed, you should
-+# probably use cryptoapi code instead.
-+USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER}
-+ifeq (${USEASSEMBLY},i386y)
-+crypto-$(CONFIG_KLIPS_ENC_3DES) += des/dx86unix.o
-+else
-+crypto-$(CONFIG_KLIPS_ENC_3DES) += des/des_enc.o
-+endif
-+
-+# include code from AES subdir
-+crypto-$(CONFIG_KLIPS_ENC_AES) += aes/ipsec_alg_aes.o
-+crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes_xcbc_mac.o
-+crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes_cbc.o
-+
-+ifeq ($(strip ${SUBARCH}),)
-+SUBARCH:=${ARCH}
-+endif
-+
-+USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER}
-+ifeq (${USEASSEMBLY},i386y)
-+crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes-i586.o
-+else
-+crypto-$(CONFIG_KLIPS_ENC_AES) += aes/aes.o
-+endif
-+
-+ipsec-y += ${crypto-y}
-+
-+ipsec-$(CONFIG_KLIPS_ENC_CRYPTOAPI) += ipsec_alg_cryptoapi.o
-+
-+# IPcomp stuff
-+base-ipcomp-objs := ipcomp.o
-+base-ipcomp-objs += adler32.o
-+base-ipcomp-objs += deflate.o
-+base-ipcomp-objs += infblock.o
-+base-ipcomp-objs += infcodes.o
-+base-ipcomp-objs += inffast.o
-+base-ipcomp-objs += inflate.o
-+base-ipcomp-objs += inftrees.o
-+base-ipcomp-objs += infutil.o
-+base-ipcomp-objs += trees.o
-+base-ipcomp-objs += zutil.o
-+asm-ipcomp-obj-$(CONFIG_M586) += match586.o
-+asm-ipcomp-obj-$(CONFIG_M586TSC) += match586.o
-+asm-ipcomp-obj-$(CONFIG_M586MMX) += match586.o
-+asm-ipcomp-obj-$(CONFIG_M686) += match686.o
-+asm-ipcomp-obj-$(CONFIG_MPENTIUMIII) += match686.o
-+asm-ipcomp-obj-$(CONFIG_MPENTIUM4) += match686.o
-+asm-ipcomp-obj-$(CONFIG_MK6) += match586.o
-+asm-ipcomp-obj-$(CONFIG_MK7) += match686.o
-+asm-ipcomp-obj-$(CONFIG_MCRUSOE) += match586.o
-+asm-ipcomp-obj-$(CONFIG_MWINCHIPC6) += match586.o
-+asm-ipcomp-obj-$(CONFIG_MWINCHIP2) += match686.o
-+asm-ipcomp-obj-$(CONFIG_MWINCHIP3D) += match686.o
-+base-ipcomp-objs += ${asm-ipcomp-obj-y}
-+
-+ipsec-$(CONFIG_KLIPS_IPCOMP) += ${base-ipcomp-objs}
-+
-+EXTRA_CFLAGS += -DIPCOMP_PREFIX -DKLIPS
-+EXTRA_CFLAGS += -Icrypto/ocf
-+
-+#
-+# $Log: Makefile.fs2_6,v $
-+# Revision 1.8.2.1 2006/04/20 16:33:06 mcr
-+# remove all of CONFIG_KLIPS_ALG --- one can no longer build without it.
-+# Fix in-kernel module compilation. Sub-makefiles do not work.
-+#
-+# Revision 1.8 2005/05/11 03:15:42 mcr
-+# adjusted makefiles to sanely build modules properly.
-+#
-+# Revision 1.7 2005/04/13 22:52:12 mcr
-+# moved KLIPS specific snprintf() wrapper to seperate file.
-+#
-+# Revision 1.6 2004/08/22 05:02:03 mcr
-+# organized symbols such that it is easier to build modules.
-+#
-+# Revision 1.5 2004/08/18 01:43:56 mcr
-+# adjusted makefile enumation so that it can be used by module
-+# wrapper.
-+#
-+# Revision 1.4 2004/08/17 03:27:23 mcr
-+# klips 2.6 edits.
-+#
-+# Revision 1.3 2004/08/04 16:50:13 mcr
-+# removed duplicate definition of dx86unix.o
-+#
-+# Revision 1.2 2004/08/03 18:21:09 mcr
-+# only set KLIPS_TOP and OPENSWANSRCDIR if not already set.
-+#
-+# Revision 1.1 2004/07/26 15:02:22 mcr
-+# makefile for KLIPS module for 2.6.
-+#
-+# Revision 1.3 2004/02/24 17:17:04 mcr
-+# s/CONFIG_IPSEC/CONFIG_KLIPS/ as 26sec uses "CONFIG_IPSEC" to
-+# turn it on/off as well.
-+#
-+# Revision 1.2 2004/02/22 06:50:42 mcr
-+# kernel 2.6 port - merged with 2.4 code.
-+#
-+# Revision 1.1.2.1 2004/02/20 02:07:53 mcr
-+# module configuration for KLIPS 2.6
-+#
-+#
-+# Local Variables:
-+# compile-command: "(cd ../../.. && source umlsetup.sh && make -C ${POOLSPACE} module/ipsec.o)"
-+# End Variables:
-+#
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/README-zlib Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,147 @@
-+zlib 1.1.4 is a general purpose data compression library. All the code
-+is thread safe. The data format used by the zlib library
-+is described by RFCs (Request for Comments) 1950 to 1952 in the files
-+http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate
-+format) and rfc1952.txt (gzip format). These documents are also available in
-+other formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html
-+
-+All functions of the compression library are documented in the file zlib.h
-+(volunteer to write man pages welcome, contact jloup@gzip.org). A usage
-+example of the library is given in the file example.c which also tests that
-+the library is working correctly. Another example is given in the file
-+minigzip.c. The compression library itself is composed of all source files
-+except example.c and minigzip.c.
-+
-+To compile all files and run the test program, follow the instructions
-+given at the top of Makefile. In short "make test; make install"
-+should work for most machines. For Unix: "./configure; make test; make install"
-+For MSDOS, use one of the special makefiles such as Makefile.msc.
-+For VMS, use Make_vms.com or descrip.mms.
-+
-+Questions about zlib should be sent to <zlib@gzip.org>, or to
-+Gilles Vollant <info@winimage.com> for the Windows DLL version.
-+The zlib home page is http://www.zlib.org or http://www.gzip.org/zlib/
-+Before reporting a problem, please check this site to verify that
-+you have the latest version of zlib; otherwise get the latest version and
-+check whether the problem still exists or not.
-+
-+PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html
-+before asking for help.
-+
-+Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
-+issue of Dr. Dobb's Journal; a copy of the article is available in
-+http://dogma.net/markn/articles/zlibtool/zlibtool.htm
-+
-+The changes made in version 1.1.4 are documented in the file ChangeLog.
-+The only changes made since 1.1.3 are bug corrections:
-+
-+- ZFREE was repeated on same allocation on some error conditions.
-+ This creates a security problem described in
-+ http://www.zlib.org/advisory-2002-03-11.txt
-+- Returned incorrect error (Z_MEM_ERROR) on some invalid data
-+- Avoid accesses before window for invalid distances with inflate window
-+ less than 32K.
-+- force windowBits > 8 to avoid a bug in the encoder for a window size
-+ of 256 bytes. (A complete fix will be available in 1.1.5).
-+
-+The beta version 1.1.5beta includes many more changes. A new official
-+version 1.1.5 will be released as soon as extensive testing has been
-+completed on it.
-+
-+
-+Unsupported third party contributions are provided in directory "contrib".
-+
-+A Java implementation of zlib is available in the Java Development Kit
-+http://www.javasoft.com/products/JDK/1.1/docs/api/Package-java.util.zip.html
-+See the zlib home page http://www.zlib.org for details.
-+
-+A Perl interface to zlib written by Paul Marquess <pmarquess@bfsec.bt.co.uk>
-+is in the CPAN (Comprehensive Perl Archive Network) sites
-+http://www.cpan.org/modules/by-module/Compress/
-+
-+A Python interface to zlib written by A.M. Kuchling <amk@magnet.com>
-+is available in Python 1.5 and later versions, see
-+http://www.python.org/doc/lib/module-zlib.html
-+
-+A zlib binding for TCL written by Andreas Kupries <a.kupries@westend.com>
-+is availlable at http://www.westend.com/~kupries/doc/trf/man/man.html
-+
-+An experimental package to read and write files in .zip format,
-+written on top of zlib by Gilles Vollant <info@winimage.com>, is
-+available at http://www.winimage.com/zLibDll/unzip.html
-+and also in the contrib/minizip directory of zlib.
-+
-+
-+Notes for some targets:
-+
-+- To build a Windows DLL version, include in a DLL project zlib.def, zlib.rc
-+ and all .c files except example.c and minigzip.c; compile with -DZLIB_DLL
-+ The zlib DLL support was initially done by Alessandro Iacopetti and is
-+ now maintained by Gilles Vollant <info@winimage.com>. Check the zlib DLL
-+ home page at http://www.winimage.com/zLibDll
-+
-+ From Visual Basic, you can call the DLL functions which do not take
-+ a structure as argument: compress, uncompress and all gz* functions.
-+ See contrib/visual-basic.txt for more information, or get
-+ http://www.tcfb.com/dowseware/cmp-z-it.zip
-+
-+- For 64-bit Irix, deflate.c must be compiled without any optimization.
-+ With -O, one libpng test fails. The test works in 32 bit mode (with
-+ the -n32 compiler flag). The compiler bug has been reported to SGI.
-+
-+- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1
-+ it works when compiled with cc.
-+
-+- on Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1
-+ is necessary to get gzprintf working correctly. This is done by configure.
-+
-+- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works
-+ with other compilers. Use "make test" to check your compiler.
-+
-+- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers.
-+
-+- For Turbo C the small model is supported only with reduced performance to
-+ avoid any far allocation; it was tested with -DMAX_WBITS=11 -DMAX_MEM_LEVEL=3
-+
-+- For PalmOs, see http://www.cs.uit.no/~perm/PASTA/pilot/software.html
-+ Per Harald Myrvang <perm@stud.cs.uit.no>
-+
-+
-+Acknowledgments:
-+
-+ The deflate format used by zlib was defined by Phil Katz. The deflate
-+ and zlib specifications were written by L. Peter Deutsch. Thanks to all the
-+ people who reported problems and suggested various improvements in zlib;
-+ they are too numerous to cite here.
-+
-+Copyright notice:
-+
-+ (C) 1995-2002 Jean-loup Gailly and Mark Adler
-+
-+ This software is provided 'as-is', without any express or implied
-+ warranty. In no event will the authors be held liable for any damages
-+ arising from the use of this software.
-+
-+ Permission is granted to anyone to use this software for any purpose,
-+ including commercial applications, and to alter it and redistribute it
-+ freely, subject to the following restrictions:
-+
-+ 1. The origin of this software must not be misrepresented; you must not
-+ claim that you wrote the original software. If you use this software
-+ in a product, an acknowledgment in the product documentation would be
-+ appreciated but is not required.
-+ 2. Altered source versions must be plainly marked as such, and must not be
-+ misrepresented as being the original software.
-+ 3. This notice may not be removed or altered from any source distribution.
-+
-+ Jean-loup Gailly Mark Adler
-+ jloup@gzip.org madler@alumni.caltech.edu
-+
-+If you use the zlib library in a product, we would appreciate *not*
-+receiving lengthy legal documents to sign. The sources are provided
-+for free but without warranty of any kind. The library has been
-+entirely written by Jean-loup Gailly and Mark Adler; it does not
-+include third-party code.
-+
-+If you redistribute modified sources, we would appreciate that you include
-+in the file ChangeLog history information documenting your changes.
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/README-zlib.freeswan Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,13 @@
-+The only changes made to these files for use in FreeS/WAN are:
-+
-+ - In zconf.h, macros are defined to prefix global symbols with "ipcomp_"
-+ (or "_ipcomp"), when compiled with -DIPCOMP_PREFIX.
-+ - The copyright strings are defined local (static)
-+
-+ The above changes are made to avoid name collisions with ppp_deflate
-+ and ext2compr.
-+
-+ - Files not needed for FreeS/WAN have been removed
-+
-+ See the "README" file for information about where to obtain the complete
-+ zlib package.
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/addrtoa.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,67 @@
-+/*
-+ * addresses to ASCII
-+ * Copyright (C) 1998, 1999 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: addrtoa.c,v 1.10 2004/07/10 07:43:47 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+#define NBYTES 4 /* bytes in an address */
-+#define PERBYTE 4 /* three digits plus a dot or NUL */
-+#define BUFLEN (NBYTES*PERBYTE)
-+
-+#if BUFLEN != ADDRTOA_BUF
-+#error "ADDRTOA_BUF in openswan.h inconsistent with addrtoa() code"
-+#endif
-+
-+/*
-+ - addrtoa - convert binary address to ASCII dotted decimal
-+ */
-+size_t /* space needed for full conversion */
-+addrtoa(addr, format, dst, dstlen)
-+struct in_addr addr;
-+int format; /* character */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ unsigned long a = ntohl(addr.s_addr);
-+ int i;
-+ size_t n;
-+ unsigned long byte;
-+ char buf[BUFLEN];
-+ char *p;
-+
-+ switch (format) {
-+ case 0:
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ p = buf;
-+ for (i = NBYTES-1; i >= 0; i--) {
-+ byte = (a >> (i*8)) & 0xff;
-+ p += ultoa(byte, 10, p, PERBYTE);
-+ if (i != 0)
-+ *(p-1) = '.';
-+ }
-+ n = p - buf;
-+
-+ if (dstlen > 0) {
-+ if (n > dstlen)
-+ buf[dstlen - 1] = '\0';
-+ strcpy(dst, buf);
-+ }
-+ return n;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/addrtot.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,344 @@
-+/*
-+ * addresses to text
-+ * Copyright (C) 2000 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ */
-+
-+#if defined(__KERNEL__) && defined(__HAVE_ARCH_STRSTR)
-+#include <linux/string.h>
-+#endif
-+
-+#include "openswan.h"
-+
-+#define IP4BYTES 4 /* bytes in an IPv4 address */
-+#define PERBYTE 4 /* three digits plus a dot or NUL */
-+#define IP6BYTES 16 /* bytes in an IPv6 address */
-+
-+/* forwards */
-+static size_t normal4(const unsigned char *s, size_t len, char *b, char **dp);
-+static size_t normal6(const unsigned char *s, size_t len, char *b, char **dp, int squish);
-+static size_t reverse4(const unsigned char *s, size_t len, char *b, char **dp);
-+static size_t reverse6(const unsigned char *s, size_t len, char *b, char **dp);
-+
-+#if defined(__KERNEL__) && !defined(__HAVE_ARCH_STRSTR)
-+#define strstr ipsec_strstr
-+/*
-+ * Find the first occurrence of find in s.
-+ * (from NetBSD 1.6's /src/lib/libc/string/strstr.c)
-+ */
-+
-+static char *
-+ipsec_strstr(s, find)
-+ const char *s, *find;
-+{
-+ char c, sc;
-+ size_t len;
-+
-+ if ((c = *find++) != 0) {
-+ len = strlen(find);
-+ do {
-+ do {
-+ if ((sc = *s++) == 0)
-+ return (NULL);
-+ } while (sc != c);
-+ } while (strncmp(s, find, len) != 0);
-+ s--;
-+ }
-+ /* LINTED interface specification */
-+ return ((char *)s);
-+}
-+#endif
-+
-+/*
-+ - addrtot - convert binary address to text (dotted decimal or IPv6 string)
-+ */
-+size_t /* space needed for full conversion */
-+addrtot(src, format, dst, dstlen)
-+const ip_address *src;
-+int format; /* character */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ const unsigned char *b;
-+ size_t n;
-+ char buf[1+ADDRTOT_BUF+1]; /* :address: */
-+ char *p;
-+ int t = addrtypeof(src);
-+# define TF(t, f) (((t)<<8) | (f))
-+
-+ n = addrbytesptr(src, &b);
-+ if (n == 0) {
-+ bad:
-+ dst[0]='\0';
-+ strncat(dst, "<invalid>", dstlen);
-+ return sizeof("<invalid>");
-+ }
-+
-+ switch (TF(t, format)) {
-+ case TF(AF_INET, 0):
-+ n = normal4(b, n, buf, &p);
-+ break;
-+ case TF(AF_INET6, 0):
-+ n = normal6(b, n, buf, &p, 1);
-+ break;
-+ case TF(AF_INET, 'Q'):
-+ n = normal4(b, n, buf, &p);
-+ break;
-+ case TF(AF_INET6, 'Q'):
-+ n = normal6(b, n, buf, &p, 0);
-+ break;
-+ case TF(AF_INET, 'r'):
-+ n = reverse4(b, n, buf, &p);
-+ break;
-+ case TF(AF_INET6, 'r'):
-+ n = reverse6(b, n, buf, &p);
-+ break;
-+ default: /* including (AF_INET, 'R') */
-+ goto bad;
-+ break;
-+ }
-+
-+ if (dstlen > 0) {
-+ if (dstlen < n)
-+ p[dstlen - 1] = '\0';
-+ strcpy(dst, p);
-+ }
-+ return n;
-+}
-+
-+/*
-+ - normal4 - normal IPv4 address-text conversion
-+ */
-+static size_t /* size of text, including NUL */
-+normal4(srcp, srclen, buf, dstp)
-+const unsigned char *srcp;
-+size_t srclen;
-+char *buf; /* guaranteed large enough */
-+char **dstp; /* where to put result pointer */
-+{
-+ int i;
-+ char *p;
-+
-+ if (srclen != IP4BYTES) /* "can't happen" */
-+ return 0;
-+ p = buf;
-+ for (i = 0; i < IP4BYTES; i++) {
-+ p += ultot(srcp[i], 10, p, PERBYTE);
-+ if (i != IP4BYTES - 1)
-+ *(p-1) = '.'; /* overwrites the NUL */
-+ }
-+ *dstp = buf;
-+ return p - buf;
-+}
-+
-+/*
-+ - normal6 - normal IPv6 address-text conversion
-+ */
-+static size_t /* size of text, including NUL */
-+normal6(srcp, srclen, buf, dstp, squish)
-+const unsigned char *srcp;
-+size_t srclen;
-+char *buf; /* guaranteed large enough, plus 2 */
-+char **dstp; /* where to put result pointer */
-+int squish; /* whether to squish out 0:0 */
-+{
-+ int i;
-+ unsigned long piece;
-+ char *p;
-+ char *q;
-+
-+ if (srclen != IP6BYTES) /* "can't happen" */
-+ return 0;
-+ p = buf;
-+ *p++ = ':';
-+ for (i = 0; i < IP6BYTES/2; i++) {
-+ piece = (srcp[2*i] << 8) + srcp[2*i + 1];
-+ p += ultot(piece, 16, p, 5); /* 5 = abcd + NUL */
-+ *(p-1) = ':'; /* overwrites the NUL */
-+ }
-+ *p = '\0';
-+ q = strstr(buf, ":0:0:");
-+ if (squish && q != NULL) { /* zero squishing is possible */
-+ p = q + 1;
-+ while (*p == '0' && *(p+1) == ':')
-+ p += 2;
-+ q++;
-+ *q++ = ':'; /* overwrite first 0 */
-+ while (*p != '\0')
-+ *q++ = *p++;
-+ *q = '\0';
-+ if (!(*(q-1) == ':' && *(q-2) == ':'))
-+ *--q = '\0'; /* strip final : unless :: */
-+ p = buf;
-+ if (!(*p == ':' && *(p+1) == ':'))
-+ p++; /* skip initial : unless :: */
-+ } else {
-+ q = p;
-+ *--q = '\0'; /* strip final : */
-+ p = buf + 1; /* skip initial : */
-+ }
-+ *dstp = p;
-+ return q - p + 1;
-+}
-+
-+/*
-+ - reverse4 - IPv4 reverse-lookup conversion
-+ */
-+static size_t /* size of text, including NUL */
-+reverse4(srcp, srclen, buf, dstp)
-+const unsigned char *srcp;
-+size_t srclen;
-+char *buf; /* guaranteed large enough */
-+char **dstp; /* where to put result pointer */
-+{
-+ int i;
-+ char *p;
-+
-+ if (srclen != IP4BYTES) /* "can't happen" */
-+ return 0;
-+ p = buf;
-+ for (i = IP4BYTES-1; i >= 0; i--) {
-+ p += ultot(srcp[i], 10, p, PERBYTE);
-+ *(p-1) = '.'; /* overwrites the NUL */
-+ }
-+ strcpy(p, "IN-ADDR.ARPA.");
-+ *dstp = buf;
-+ return strlen(buf) + 1;
-+}
-+
-+/*
-+ - reverse6 - IPv6 reverse-lookup conversion (RFC 1886)
-+ * A trifle inefficient, really shouldn't use ultot...
-+ */
-+static size_t /* size of text, including NUL */
-+reverse6(srcp, srclen, buf, dstp)
-+const unsigned char *srcp;
-+size_t srclen;
-+char *buf; /* guaranteed large enough */
-+char **dstp; /* where to put result pointer */
-+{
-+ int i;
-+ unsigned long piece;
-+ char *p;
-+
-+ if (srclen != IP6BYTES) /* "can't happen" */
-+ return 0;
-+ p = buf;
-+ for (i = IP6BYTES-1; i >= 0; i--) {
-+ piece = srcp[i];
-+ p += ultot(piece&0xf, 16, p, 2);
-+ *(p-1) = '.';
-+ p += ultot(piece>>4, 16, p, 2);
-+ *(p-1) = '.';
-+ }
-+ strcpy(p, "IP6.ARPA.");
-+ *dstp = buf;
-+ return strlen(buf) + 1;
-+}
-+
-+/*
-+ - reverse6 - modern IPv6 reverse-lookup conversion (RFC 2874)
-+ * this version removed as it was obsoleted in the end.
-+ */
-+
-+#ifdef ADDRTOT_MAIN
-+
-+#include <stdio.h>
-+#include <sys/socket.h>
-+#include <netinet/in.h>
-+#include <arpa/inet.h>
-+
-+void regress(void);
-+
-+int
-+main(int argc, char *argv[])
-+{
-+ if (argc < 2) {
-+ fprintf(stderr, "Usage: %s {addr|net/mask|begin...end|-r}\n",
-+ argv[0]);
-+ exit(2);
-+ }
-+
-+ if (strcmp(argv[1], "-r") == 0) {
-+ regress();
-+ fprintf(stderr, "regress() returned?!?\n");
-+ exit(1);
-+ }
-+ exit(0);
-+}
-+
-+struct rtab {
-+ char *input;
-+ char format;
-+ char *output; /* NULL means error expected */
-+} rtab[] = {
-+ {"1.2.3.0", 0, "1.2.3.0"},
-+ {"1:2::3:4", 0, "1:2::3:4"},
-+ {"1:2::3:4", 'Q', "1:2:0:0:0:0:3:4"},
-+ {"1:2:0:0:3:4:0:0", 0, "1:2::3:4:0:0"},
-+ {"1.2.3.4", 'r' , "4.3.2.1.IN-ADDR.ARPA."},
-+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f */
-+ {"1:2::3:4", 'r', "4.0.0.0.3.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.2.0.0.0.1.0.0.0.IP6.ARPA."},
-+ {NULL, 0, NULL}
-+};
-+
-+void
-+regress()
-+{
-+ struct rtab *r;
-+ int status = 0;
-+ ip_address a;
-+ char in[100];
-+ char buf[100];
-+ const char *oops;
-+ size_t n;
-+
-+ for (r = rtab; r->input != NULL; r++) {
-+ strcpy(in, r->input);
-+
-+ /* convert it *to* internal format */
-+ oops = ttoaddr(in, strlen(in), 0, &a);
-+
-+ /* now convert it back */
-+
-+ n = addrtot(&a, r->format, buf, sizeof(buf));
-+
-+ if (n == 0 && r->output == NULL)
-+ {} /* okay, error expected */
-+
-+ else if (n == 0) {
-+ printf("`%s' atoasr failed\n", r->input);
-+ status = 1;
-+
-+ } else if (r->output == NULL) {
-+ printf("`%s' atoasr succeeded unexpectedly '%c'\n",
-+ r->input, r->format);
-+ status = 1;
-+ } else {
-+ if (strcasecmp(r->output, buf) != 0) {
-+ printf("`%s' '%c' gave `%s', expected `%s'\n",
-+ r->input, r->format, buf, r->output);
-+ status = 1;
-+ }
-+ }
-+ }
-+ exit(status);
-+}
-+
-+#endif /* ADDRTOT_MAIN */
-+
-+/*
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/addrtypeof.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,123 @@
-+/*
-+ * extract parts of an ip_address
-+ * Copyright (C) 2000 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: addrtypeof.c,v 1.10 2004/07/10 07:43:47 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+/*
-+ - addrtypeof - get the type of an ip_address
-+ */
-+int
-+addrtypeof(src)
-+const ip_address *src;
-+{
-+ return src->u.v4.sin_family;
-+}
-+
-+/*
-+ - addrbytesptr - get pointer to the address bytes of an ip_address
-+ */
-+size_t /* 0 for error */
-+addrbytesptr(src, dstp)
-+const ip_address *src;
-+const unsigned char **dstp; /* NULL means just a size query */
-+{
-+ const unsigned char *p;
-+ size_t n;
-+
-+ switch (src->u.v4.sin_family) {
-+ case AF_INET:
-+ p = (const unsigned char *)&src->u.v4.sin_addr.s_addr;
-+ n = 4;
-+ break;
-+ case AF_INET6:
-+ p = (const unsigned char *)&src->u.v6.sin6_addr;
-+ n = 16;
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ if (dstp != NULL)
-+ *dstp = p;
-+ return n;
-+
-+}
-+/*
-+ - addrbytesptr - get pointer to the address bytes of an ip_address
-+ */
-+size_t /* 0 for error */
-+addrbytesptr_write(src, dstp)
-+ip_address *src;
-+unsigned char **dstp; /* NULL means just a size query */
-+{
-+ unsigned char *p;
-+ size_t n;
-+
-+ switch (src->u.v4.sin_family) {
-+ case AF_INET:
-+ p = (unsigned char *)&src->u.v4.sin_addr.s_addr;
-+ n = 4;
-+ break;
-+ case AF_INET6:
-+ p = (unsigned char *)&src->u.v6.sin6_addr;
-+ n = 16;
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ if (dstp != NULL)
-+ *dstp = p;
-+ return n;
-+}
-+
-+/*
-+ - addrlenof - get length of the address bytes of an ip_address
-+ */
-+size_t /* 0 for error */
-+addrlenof(src)
-+const ip_address *src;
-+{
-+ return addrbytesptr(src, NULL);
-+}
-+
-+/*
-+ - addrbytesof - get the address bytes of an ip_address
-+ */
-+size_t /* 0 for error */
-+addrbytesof(src, dst, dstlen)
-+const ip_address *src;
-+unsigned char *dst;
-+size_t dstlen;
-+{
-+ const unsigned char *p;
-+ size_t n;
-+ size_t ncopy;
-+
-+ n = addrbytesptr(src, &p);
-+ if (n == 0)
-+ return 0;
-+
-+ if (dstlen > 0) {
-+ ncopy = n;
-+ if (ncopy > dstlen)
-+ ncopy = dstlen;
-+ memcpy(dst, p, ncopy);
-+ }
-+ return n;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/adler32.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,49 @@
-+/* adler32.c -- compute the Adler-32 checksum of a data stream
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* @(#) $Id: adler32.c,v 1.6 2004/07/10 19:11:18 mcr Exp $ */
-+
-+#include <zlib/zlib.h>
-+#include <zlib/zconf.h>
-+
-+#define BASE 65521L /* largest prime smaller than 65536 */
-+#define NMAX 5552
-+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
-+
-+#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
-+#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
-+#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
-+#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
-+#define DO16(buf) DO8(buf,0); DO8(buf,8);
-+
-+/* ========================================================================= */
-+uLong ZEXPORT adler32(adler, buf, len)
-+ uLong adler;
-+ const Bytef *buf;
-+ uInt len;
-+{
-+ unsigned long s1 = adler & 0xffff;
-+ unsigned long s2 = (adler >> 16) & 0xffff;
-+ int k;
-+
-+ if (buf == Z_NULL) return 1L;
-+
-+ while (len > 0) {
-+ k = len < NMAX ? len : NMAX;
-+ len -= k;
-+ while (k >= 16) {
-+ DO16(buf);
-+ buf += 16;
-+ k -= 16;
-+ }
-+ if (k != 0) do {
-+ s1 += *buf++;
-+ s2 += s1;
-+ } while (--k);
-+ s1 %= BASE;
-+ s2 %= BASE;
-+ }
-+ return (s2 << 16) | s1;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/aes/Makefile Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,56 @@
-+# Makefile for KLIPS 3DES kernel code as a module for 2.6 kernels
-+#
-+# Makefile for KLIPS kernel code as a module
-+# Copyright (C) 2002-2004 Michael Richardson <mcr@xelerance.com>
-+#
-+# This program is free software; you can redistribute it and/or modify it
-+# under the terms of the GNU General Public License as published by the
-+# Free Software Foundation; either version 2 of the License, or (at your
-+# option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+#
-+# This program is distributed in the hope that it will be useful, but
-+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+# for more details.
-+#
-+# RCSID $Id: Makefile.fs2_6,v 1.2 2005/08/12 14:13:58 mcr Exp $
-+#
-+# Note! Dependencies are done automagically by 'make dep', which also
-+# removes any old dependencies. DON'T put your own dependencies here
-+# unless it's something special (ie not a .c file).
-+#
-+
-+obj-$(CONFIG_KLIPS_ENC_AES) += ipsec_alg_aes.o
-+obj-$(CONFIG_KLIPS_ENC_AES) += aes_xcbc_mac.o
-+obj-$(CONFIG_KLIPS_ENC_AES) += aes_cbc.o
-+
-+ifeq ($(strip ${SUBARCH}),)
-+SUBARCH:=${ARCH}
-+endif
-+
-+# the assembly version expects frame pointers, which are
-+# optional in many kernel builds. If you want speed, you should
-+# probably use cryptoapi code instead.
-+USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER}
-+ifeq (${USEASSEMBLY},i386y)
-+obj-$(CONFIG_KLIPS_ENC_AES) += aes-i586.o
-+else
-+obj-$(CONFIG_KLIPS_ENC_AES) += aes.o
-+endif
-+
-+
-+#
-+# $Log: Makefile.fs2_6,v $
-+# Revision 1.2 2005/08/12 14:13:58 mcr
-+# do not use assembly code with there are no frame pointers,
-+# as it does not have the right linkages.
-+#
-+# Revision 1.1 2004/08/17 03:31:34 mcr
-+# klips 2.6 edits.
-+#
-+#
-+# Local Variables:
-+# compile-command: "(cd ../../.. && source umlsetup.sh && make -C ${POOLSPACE} module/ipsec.o)"
-+# End Variables:
-+#
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/aes/aes-i586.S Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,892 @@
-+//
-+// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
-+// All rights reserved.
-+//
-+// TERMS
-+//
-+// Redistribution and use in source and binary forms, with or without
-+// modification, are permitted subject to the following conditions:
-+//
-+// 1. Redistributions of source code must retain the above copyright
-+// notice, this list of conditions and the following disclaimer.
-+//
-+// 2. Redistributions in binary form must reproduce the above copyright
-+// notice, this list of conditions and the following disclaimer in the
-+// documentation and/or other materials provided with the distribution.
-+//
-+// 3. The copyright holder's name must not be used to endorse or promote
-+// any products derived from this software without his specific prior
-+// written permission.
-+//
-+// This software is provided 'as is' with no express or implied warranties
-+// of correctness or fitness for purpose.
-+
-+// Modified by Jari Ruusu, December 24 2001
-+// - Converted syntax to GNU CPP/assembler syntax
-+// - C programming interface converted back to "old" API
-+// - Minor portability cleanups and speed optimizations
-+
-+// An AES (Rijndael) implementation for the Pentium. This version only
-+// implements the standard AES block length (128 bits, 16 bytes). This code
-+// does not preserve the eax, ecx or edx registers or the artihmetic status
-+// flags. However, the ebx, esi, edi, and ebp registers are preserved across
-+// calls.
-+
-+// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
-+// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
-+// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
-+
-+#if defined(USE_UNDERLINE)
-+# define aes_set_key _aes_set_key
-+# define aes_encrypt _aes_encrypt
-+# define aes_decrypt _aes_decrypt
-+#endif
-+#if !defined(ALIGN32BYTES)
-+# define ALIGN32BYTES 32
-+#endif
-+
-+ .file "aes-i586.S"
-+ .globl aes_set_key
-+ .globl aes_encrypt
-+ .globl aes_decrypt
-+
-+#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
-+
-+// offsets to parameters with one register pushed onto stack
-+
-+#define ctx 8 // AES context structure
-+#define in_blk 12 // input byte array address parameter
-+#define out_blk 16 // output byte array address parameter
-+
-+// offsets in context structure
-+
-+#define nkey 0 // key length, size 4
-+#define nrnd 4 // number of rounds, size 4
-+#define ekey 8 // encryption key schedule base address, size 256
-+#define dkey 264 // decryption key schedule base address, size 256
-+
-+// This macro performs a forward encryption cycle. It is entered with
-+// the first previous round column values in %eax, %ebx, %esi and %edi and
-+// exits with the final values in the same registers.
-+
-+#define fwd_rnd(p1,p2) \
-+ mov %ebx,(%esp) ;\
-+ movzbl %al,%edx ;\
-+ mov %eax,%ecx ;\
-+ mov p2(%ebp),%eax ;\
-+ mov %edi,4(%esp) ;\
-+ mov p2+12(%ebp),%edi ;\
-+ xor p1(,%edx,4),%eax ;\
-+ movzbl %ch,%edx ;\
-+ shr $16,%ecx ;\
-+ mov p2+4(%ebp),%ebx ;\
-+ xor p1+tlen(,%edx,4),%edi ;\
-+ movzbl %cl,%edx ;\
-+ movzbl %ch,%ecx ;\
-+ xor p1+3*tlen(,%ecx,4),%ebx ;\
-+ mov %esi,%ecx ;\
-+ mov p1+2*tlen(,%edx,4),%esi ;\
-+ movzbl %cl,%edx ;\
-+ xor p1(,%edx,4),%esi ;\
-+ movzbl %ch,%edx ;\
-+ shr $16,%ecx ;\
-+ xor p1+tlen(,%edx,4),%ebx ;\
-+ movzbl %cl,%edx ;\
-+ movzbl %ch,%ecx ;\
-+ xor p1+2*tlen(,%edx,4),%eax ;\
-+ mov (%esp),%edx ;\
-+ xor p1+3*tlen(,%ecx,4),%edi ;\
-+ movzbl %dl,%ecx ;\
-+ xor p2+8(%ebp),%esi ;\
-+ xor p1(,%ecx,4),%ebx ;\
-+ movzbl %dh,%ecx ;\
-+ shr $16,%edx ;\
-+ xor p1+tlen(,%ecx,4),%eax ;\
-+ movzbl %dl,%ecx ;\
-+ movzbl %dh,%edx ;\
-+ xor p1+2*tlen(,%ecx,4),%edi ;\
-+ mov 4(%esp),%ecx ;\
-+ xor p1+3*tlen(,%edx,4),%esi ;\
-+ movzbl %cl,%edx ;\
-+ xor p1(,%edx,4),%edi ;\
-+ movzbl %ch,%edx ;\
-+ shr $16,%ecx ;\
-+ xor p1+tlen(,%edx,4),%esi ;\
-+ movzbl %cl,%edx ;\
-+ movzbl %ch,%ecx ;\
-+ xor p1+2*tlen(,%edx,4),%ebx ;\
-+ xor p1+3*tlen(,%ecx,4),%eax
-+
-+// This macro performs an inverse encryption cycle. It is entered with
-+// the first previous round column values in %eax, %ebx, %esi and %edi and
-+// exits with the final values in the same registers.
-+
-+#define inv_rnd(p1,p2) \
-+ movzbl %al,%edx ;\
-+ mov %ebx,(%esp) ;\
-+ mov %eax,%ecx ;\
-+ mov p2(%ebp),%eax ;\
-+ mov %edi,4(%esp) ;\
-+ mov p2+4(%ebp),%ebx ;\
-+ xor p1(,%edx,4),%eax ;\
-+ movzbl %ch,%edx ;\
-+ shr $16,%ecx ;\
-+ mov p2+12(%ebp),%edi ;\
-+ xor p1+tlen(,%edx,4),%ebx ;\
-+ movzbl %cl,%edx ;\
-+ movzbl %ch,%ecx ;\
-+ xor p1+3*tlen(,%ecx,4),%edi ;\
-+ mov %esi,%ecx ;\
-+ mov p1+2*tlen(,%edx,4),%esi ;\
-+ movzbl %cl,%edx ;\
-+ xor p1(,%edx,4),%esi ;\
-+ movzbl %ch,%edx ;\
-+ shr $16,%ecx ;\
-+ xor p1+tlen(,%edx,4),%edi ;\
-+ movzbl %cl,%edx ;\
-+ movzbl %ch,%ecx ;\
-+ xor p1+2*tlen(,%edx,4),%eax ;\
-+ mov (%esp),%edx ;\
-+ xor p1+3*tlen(,%ecx,4),%ebx ;\
-+ movzbl %dl,%ecx ;\
-+ xor p2+8(%ebp),%esi ;\
-+ xor p1(,%ecx,4),%ebx ;\
-+ movzbl %dh,%ecx ;\
-+ shr $16,%edx ;\
-+ xor p1+tlen(,%ecx,4),%esi ;\
-+ movzbl %dl,%ecx ;\
-+ movzbl %dh,%edx ;\
-+ xor p1+2*tlen(,%ecx,4),%edi ;\
-+ mov 4(%esp),%ecx ;\
-+ xor p1+3*tlen(,%edx,4),%eax ;\
-+ movzbl %cl,%edx ;\
-+ xor p1(,%edx,4),%edi ;\
-+ movzbl %ch,%edx ;\
-+ shr $16,%ecx ;\
-+ xor p1+tlen(,%edx,4),%eax ;\
-+ movzbl %cl,%edx ;\
-+ movzbl %ch,%ecx ;\
-+ xor p1+2*tlen(,%edx,4),%ebx ;\
-+ xor p1+3*tlen(,%ecx,4),%esi
-+
-+// AES (Rijndael) Encryption Subroutine
-+
-+ .text
-+ .align ALIGN32BYTES
-+aes_encrypt:
-+ push %ebp
-+ mov ctx(%esp),%ebp // pointer to context
-+ mov in_blk(%esp),%ecx
-+ push %ebx
-+ push %esi
-+ push %edi
-+ mov nrnd(%ebp),%edx // number of rounds
-+ lea ekey+16(%ebp),%ebp // key pointer
-+
-+// input four columns and xor in first round key
-+
-+ mov (%ecx),%eax
-+ mov 4(%ecx),%ebx
-+ mov 8(%ecx),%esi
-+ mov 12(%ecx),%edi
-+ xor -16(%ebp),%eax
-+ xor -12(%ebp),%ebx
-+ xor -8(%ebp),%esi
-+ xor -4(%ebp),%edi
-+
-+ sub $8,%esp // space for register saves on stack
-+
-+ sub $10,%edx
-+ je aes_15
-+ add $32,%ebp
-+ sub $2,%edx
-+ je aes_13
-+ add $32,%ebp
-+
-+ fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
-+ fwd_rnd(aes_ft_tab,-48)
-+aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
-+ fwd_rnd(aes_ft_tab,-16)
-+aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
-+ fwd_rnd(aes_ft_tab,16)
-+ fwd_rnd(aes_ft_tab,32)
-+ fwd_rnd(aes_ft_tab,48)
-+ fwd_rnd(aes_ft_tab,64)
-+ fwd_rnd(aes_ft_tab,80)
-+ fwd_rnd(aes_ft_tab,96)
-+ fwd_rnd(aes_ft_tab,112)
-+ fwd_rnd(aes_ft_tab,128)
-+ fwd_rnd(aes_fl_tab,144) // last round uses a different table
-+
-+// move final values to the output array.
-+
-+ mov out_blk+20(%esp),%ebp
-+ add $8,%esp
-+ mov %eax,(%ebp)
-+ mov %ebx,4(%ebp)
-+ mov %esi,8(%ebp)
-+ mov %edi,12(%ebp)
-+ pop %edi
-+ pop %esi
-+ pop %ebx
-+ pop %ebp
-+ ret
-+
-+
-+// AES (Rijndael) Decryption Subroutine
-+
-+ .align ALIGN32BYTES
-+aes_decrypt:
-+ push %ebp
-+ mov ctx(%esp),%ebp // pointer to context
-+ mov in_blk(%esp),%ecx
-+ push %ebx
-+ push %esi
-+ push %edi
-+ mov nrnd(%ebp),%edx // number of rounds
-+ lea dkey+16(%ebp),%ebp // key pointer
-+
-+// input four columns and xor in first round key
-+
-+ mov (%ecx),%eax
-+ mov 4(%ecx),%ebx
-+ mov 8(%ecx),%esi
-+ mov 12(%ecx),%edi
-+ xor -16(%ebp),%eax
-+ xor -12(%ebp),%ebx
-+ xor -8(%ebp),%esi
-+ xor -4(%ebp),%edi
-+
-+ sub $8,%esp // space for register saves on stack
-+
-+ sub $10,%edx
-+ je aes_25
-+ add $32,%ebp
-+ sub $2,%edx
-+ je aes_23
-+ add $32,%ebp
-+
-+ inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
-+ inv_rnd(aes_it_tab,-48)
-+aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
-+ inv_rnd(aes_it_tab,-16)
-+aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
-+ inv_rnd(aes_it_tab,16)
-+ inv_rnd(aes_it_tab,32)
-+ inv_rnd(aes_it_tab,48)
-+ inv_rnd(aes_it_tab,64)
-+ inv_rnd(aes_it_tab,80)
-+ inv_rnd(aes_it_tab,96)
-+ inv_rnd(aes_it_tab,112)
-+ inv_rnd(aes_it_tab,128)
-+ inv_rnd(aes_il_tab,144) // last round uses a different table
-+
-+// move final values to the output array.
-+
-+ mov out_blk+20(%esp),%ebp
-+ add $8,%esp
-+ mov %eax,(%ebp)
-+ mov %ebx,4(%ebp)
-+ mov %esi,8(%ebp)
-+ mov %edi,12(%ebp)
-+ pop %edi
-+ pop %esi
-+ pop %ebx
-+ pop %ebp
-+ ret
-+
-+// AES (Rijndael) Key Schedule Subroutine
-+
-+// input/output parameters
-+
-+#define aes_cx 12 // AES context
-+#define in_key 16 // key input array address
-+#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
-+#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
-+
-+// offsets for locals
-+
-+#define cnt -4
-+#define kpf -8
-+#define slen 8
-+
-+// This macro performs a column mixing operation on an input 32-bit
-+// word to give a 32-bit result. It uses each of the 4 bytes in the
-+// the input column to index 4 different tables of 256 32-bit words
-+// that are xored together to form the output value.
-+
-+#define mix_col(p1) \
-+ movzbl %bl,%ecx ;\
-+ mov p1(,%ecx,4),%eax ;\
-+ movzbl %bh,%ecx ;\
-+ ror $16,%ebx ;\
-+ xor p1+tlen(,%ecx,4),%eax ;\
-+ movzbl %bl,%ecx ;\
-+ xor p1+2*tlen(,%ecx,4),%eax ;\
-+ movzbl %bh,%ecx ;\
-+ xor p1+3*tlen(,%ecx,4),%eax
-+
-+// Key Schedule Macros
-+
-+#define ksc4(p1) \
-+ rol $24,%ebx ;\
-+ mix_col(aes_fl_tab) ;\
-+ ror $8,%ebx ;\
-+ xor 4*p1+aes_rcon_tab,%eax ;\
-+ xor %eax,%esi ;\
-+ xor %esi,%ebp ;\
-+ mov %esi,16*p1(%edi) ;\
-+ mov %ebp,16*p1+4(%edi) ;\
-+ xor %ebp,%edx ;\
-+ xor %edx,%ebx ;\
-+ mov %edx,16*p1+8(%edi) ;\
-+ mov %ebx,16*p1+12(%edi)
-+
-+#define ksc6(p1) \
-+ rol $24,%ebx ;\
-+ mix_col(aes_fl_tab) ;\
-+ ror $8,%ebx ;\
-+ xor 4*p1+aes_rcon_tab,%eax ;\
-+ xor 24*p1-24(%edi),%eax ;\
-+ mov %eax,24*p1(%edi) ;\
-+ xor 24*p1-20(%edi),%eax ;\
-+ mov %eax,24*p1+4(%edi) ;\
-+ xor %eax,%esi ;\
-+ xor %esi,%ebp ;\
-+ mov %esi,24*p1+8(%edi) ;\
-+ mov %ebp,24*p1+12(%edi) ;\
-+ xor %ebp,%edx ;\
-+ xor %edx,%ebx ;\
-+ mov %edx,24*p1+16(%edi) ;\
-+ mov %ebx,24*p1+20(%edi)
-+
-+#define ksc8(p1) \
-+ rol $24,%ebx ;\
-+ mix_col(aes_fl_tab) ;\
-+ ror $8,%ebx ;\
-+ xor 4*p1+aes_rcon_tab,%eax ;\
-+ xor 32*p1-32(%edi),%eax ;\
-+ mov %eax,32*p1(%edi) ;\
-+ xor 32*p1-28(%edi),%eax ;\
-+ mov %eax,32*p1+4(%edi) ;\
-+ xor 32*p1-24(%edi),%eax ;\
-+ mov %eax,32*p1+8(%edi) ;\
-+ xor 32*p1-20(%edi),%eax ;\
-+ mov %eax,32*p1+12(%edi) ;\
-+ push %ebx ;\
-+ mov %eax,%ebx ;\
-+ mix_col(aes_fl_tab) ;\
-+ pop %ebx ;\
-+ xor %eax,%esi ;\
-+ xor %esi,%ebp ;\
-+ mov %esi,32*p1+16(%edi) ;\
-+ mov %ebp,32*p1+20(%edi) ;\
-+ xor %ebp,%edx ;\
-+ xor %edx,%ebx ;\
-+ mov %edx,32*p1+24(%edi) ;\
-+ mov %ebx,32*p1+28(%edi)
-+
-+ .align ALIGN32BYTES
-+aes_set_key:
-+ pushfl
-+ push %ebp
-+ mov %esp,%ebp
-+ sub $slen,%esp
-+ push %ebx
-+ push %esi
-+ push %edi
-+
-+ mov aes_cx(%ebp),%edx // edx -> AES context
-+
-+ mov key_ln(%ebp),%ecx // key length
-+ cmpl $128,%ecx
-+ jb aes_30
-+ shr $3,%ecx
-+aes_30: cmpl $32,%ecx
-+ je aes_32
-+ cmpl $24,%ecx
-+ je aes_32
-+ mov $16,%ecx
-+aes_32: shr $2,%ecx
-+ mov %ecx,nkey(%edx)
-+
-+ lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
-+ mov %eax,nrnd(%edx)
-+
-+ mov in_key(%ebp),%esi // key input array
-+ lea ekey(%edx),%edi // key position in AES context
-+ cld
-+ push %ebp
-+ mov %ecx,%eax // save key length in eax
-+ rep ; movsl // words in the key schedule
-+ mov -4(%esi),%ebx // put some values in registers
-+ mov -8(%esi),%edx // to allow faster code
-+ mov -12(%esi),%ebp
-+ mov -16(%esi),%esi
-+
-+ cmpl $4,%eax // jump on key size
-+ je aes_36
-+ cmpl $6,%eax
-+ je aes_35
-+
-+ ksc8(0)
-+ ksc8(1)
-+ ksc8(2)
-+ ksc8(3)
-+ ksc8(4)
-+ ksc8(5)
-+ ksc8(6)
-+ jmp aes_37
-+aes_35: ksc6(0)
-+ ksc6(1)
-+ ksc6(2)
-+ ksc6(3)
-+ ksc6(4)
-+ ksc6(5)
-+ ksc6(6)
-+ ksc6(7)
-+ jmp aes_37
-+aes_36: ksc4(0)
-+ ksc4(1)
-+ ksc4(2)
-+ ksc4(3)
-+ ksc4(4)
-+ ksc4(5)
-+ ksc4(6)
-+ ksc4(7)
-+ ksc4(8)
-+ ksc4(9)
-+aes_37: pop %ebp
-+ mov aes_cx(%ebp),%edx // edx -> AES context
-+ cmpl $0,ed_flg(%ebp)
-+ jne aes_39
-+
-+// compile decryption key schedule from encryption schedule - reverse
-+// order and do mix_column operation on round keys except first and last
-+
-+ mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
-+ shl $2,%eax
-+ lea dkey(%edx,%eax,4),%edi
-+ lea ekey(%edx),%esi // kf = cx->e_key
-+
-+ movsl // copy first round key (unmodified)
-+ movsl
-+ movsl
-+ movsl
-+ sub $32,%edi
-+ movl $1,cnt(%ebp)
-+aes_38: // do mix column on each column of
-+ lodsl // each round key
-+ mov %eax,%ebx
-+ mix_col(aes_im_tab)
-+ stosl
-+ lodsl
-+ mov %eax,%ebx
-+ mix_col(aes_im_tab)
-+ stosl
-+ lodsl
-+ mov %eax,%ebx
-+ mix_col(aes_im_tab)
-+ stosl
-+ lodsl
-+ mov %eax,%ebx
-+ mix_col(aes_im_tab)
-+ stosl
-+ sub $32,%edi
-+
-+ incl cnt(%ebp)
-+ mov cnt(%ebp),%eax
-+ cmp nrnd(%edx),%eax
-+ jb aes_38
-+
-+ movsl // copy last round key (unmodified)
-+ movsl
-+ movsl
-+ movsl
-+aes_39: pop %edi
-+ pop %esi
-+ pop %ebx
-+ mov %ebp,%esp
-+ pop %ebp
-+ popfl
-+ ret
-+
-+
-+// finite field multiplies by {02}, {04} and {08}
-+
-+#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
-+#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
-+#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
-+
-+// finite field multiplies required in table generation
-+
-+#define f3(x) (f2(x) ^ x)
-+#define f9(x) (f8(x) ^ x)
-+#define fb(x) (f8(x) ^ f2(x) ^ x)
-+#define fd(x) (f8(x) ^ f4(x) ^ x)
-+#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
-+
-+// These defines generate the forward table entries
-+
-+#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
-+#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
-+#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
-+#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
-+
-+// These defines generate the inverse table entries
-+
-+#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
-+#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
-+#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
-+#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
-+
-+// These defines generate entries for the last round tables
-+
-+#define w0(x) (x)
-+#define w1(x) (x << 8)
-+#define w2(x) (x << 16)
-+#define w3(x) (x << 24)
-+
-+// macro to generate inverse mix column tables (needed for the key schedule)
-+
-+#define im_data0(p1) \
-+ .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
-+ .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
-+ .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
-+ .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
-+#define im_data1(p1) \
-+ .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
-+ .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
-+ .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
-+ .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
-+#define im_data2(p1) \
-+ .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
-+ .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
-+ .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
-+ .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
-+#define im_data3(p1) \
-+ .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
-+ .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
-+ .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
-+ .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
-+#define im_data4(p1) \
-+ .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
-+ .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
-+ .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
-+ .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
-+#define im_data5(p1) \
-+ .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
-+ .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
-+ .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
-+ .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
-+#define im_data6(p1) \
-+ .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
-+ .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
-+ .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
-+ .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
-+#define im_data7(p1) \
-+ .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
-+ .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
-+ .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
-+ .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
-+
-+// S-box data - 256 entries
-+
-+#define sb_data0(p1) \
-+ .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
-+ .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
-+ .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
-+ .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
-+#define sb_data1(p1) \
-+ .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
-+ .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
-+ .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
-+ .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
-+#define sb_data2(p1) \
-+ .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
-+ .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
-+ .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
-+ .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
-+#define sb_data3(p1) \
-+ .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
-+ .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
-+ .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
-+ .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
-+#define sb_data4(p1) \
-+ .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
-+ .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
-+ .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
-+ .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
-+#define sb_data5(p1) \
-+ .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
-+ .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
-+ .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
-+ .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
-+#define sb_data6(p1) \
-+ .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
-+ .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
-+ .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
-+ .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
-+#define sb_data7(p1) \
-+ .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
-+ .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
-+ .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
-+ .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
-+
-+// Inverse S-box data - 256 entries
-+
-+#define ib_data0(p1) \
-+ .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
-+ .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
-+ .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
-+ .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
-+#define ib_data1(p1) \
-+ .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
-+ .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
-+ .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
-+ .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
-+#define ib_data2(p1) \
-+ .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
-+ .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
-+ .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
-+ .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
-+#define ib_data3(p1) \
-+ .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
-+ .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
-+ .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
-+ .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
-+#define ib_data4(p1) \
-+ .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
-+ .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
-+ .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
-+ .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
-+#define ib_data5(p1) \
-+ .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
-+ .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
-+ .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
-+ .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
-+#define ib_data6(p1) \
-+ .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
-+ .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
-+ .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
-+ .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
-+#define ib_data7(p1) \
-+ .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
-+ .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
-+ .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
-+ .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
-+
-+// The rcon_table (needed for the key schedule)
-+//
-+// Here is original Dr Brian Gladman's source code:
-+// _rcon_tab:
-+// %assign x 1
-+// %rep 29
-+// dd x
-+// %assign x f2(x)
-+// %endrep
-+//
-+// Here is precomputed output (it's more portable this way):
-+
-+ .align ALIGN32BYTES
-+aes_rcon_tab:
-+ .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
-+ .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
-+ .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
-+ .long 0xb3,0x7d,0xfa,0xef,0xc5
-+
-+// The forward xor tables
-+
-+ .align ALIGN32BYTES
-+aes_ft_tab:
-+ sb_data0(u0)
-+ sb_data1(u0)
-+ sb_data2(u0)
-+ sb_data3(u0)
-+ sb_data4(u0)
-+ sb_data5(u0)
-+ sb_data6(u0)
-+ sb_data7(u0)
-+
-+ sb_data0(u1)
-+ sb_data1(u1)
-+ sb_data2(u1)
-+ sb_data3(u1)
-+ sb_data4(u1)
-+ sb_data5(u1)
-+ sb_data6(u1)
-+ sb_data7(u1)
-+
-+ sb_data0(u2)
-+ sb_data1(u2)
-+ sb_data2(u2)
-+ sb_data3(u2)
-+ sb_data4(u2)
-+ sb_data5(u2)
-+ sb_data6(u2)
-+ sb_data7(u2)
-+
-+ sb_data0(u3)
-+ sb_data1(u3)
-+ sb_data2(u3)
-+ sb_data3(u3)
-+ sb_data4(u3)
-+ sb_data5(u3)
-+ sb_data6(u3)
-+ sb_data7(u3)
-+
-+ .align ALIGN32BYTES
-+aes_fl_tab:
-+ sb_data0(w0)
-+ sb_data1(w0)
-+ sb_data2(w0)
-+ sb_data3(w0)
-+ sb_data4(w0)
-+ sb_data5(w0)
-+ sb_data6(w0)
-+ sb_data7(w0)
-+
-+ sb_data0(w1)
-+ sb_data1(w1)
-+ sb_data2(w1)
-+ sb_data3(w1)
-+ sb_data4(w1)
-+ sb_data5(w1)
-+ sb_data6(w1)
-+ sb_data7(w1)
-+
-+ sb_data0(w2)
-+ sb_data1(w2)
-+ sb_data2(w2)
-+ sb_data3(w2)
-+ sb_data4(w2)
-+ sb_data5(w2)
-+ sb_data6(w2)
-+ sb_data7(w2)
-+
-+ sb_data0(w3)
-+ sb_data1(w3)
-+ sb_data2(w3)
-+ sb_data3(w3)
-+ sb_data4(w3)
-+ sb_data5(w3)
-+ sb_data6(w3)
-+ sb_data7(w3)
-+
-+// The inverse xor tables
-+
-+ .align ALIGN32BYTES
-+aes_it_tab:
-+ ib_data0(v0)
-+ ib_data1(v0)
-+ ib_data2(v0)
-+ ib_data3(v0)
-+ ib_data4(v0)
-+ ib_data5(v0)
-+ ib_data6(v0)
-+ ib_data7(v0)
-+
-+ ib_data0(v1)
-+ ib_data1(v1)
-+ ib_data2(v1)
-+ ib_data3(v1)
-+ ib_data4(v1)
-+ ib_data5(v1)
-+ ib_data6(v1)
-+ ib_data7(v1)
-+
-+ ib_data0(v2)
-+ ib_data1(v2)
-+ ib_data2(v2)
-+ ib_data3(v2)
-+ ib_data4(v2)
-+ ib_data5(v2)
-+ ib_data6(v2)
-+ ib_data7(v2)
-+
-+ ib_data0(v3)
-+ ib_data1(v3)
-+ ib_data2(v3)
-+ ib_data3(v3)
-+ ib_data4(v3)
-+ ib_data5(v3)
-+ ib_data6(v3)
-+ ib_data7(v3)
-+
-+ .align ALIGN32BYTES
-+aes_il_tab:
-+ ib_data0(w0)
-+ ib_data1(w0)
-+ ib_data2(w0)
-+ ib_data3(w0)
-+ ib_data4(w0)
-+ ib_data5(w0)
-+ ib_data6(w0)
-+ ib_data7(w0)
-+
-+ ib_data0(w1)
-+ ib_data1(w1)
-+ ib_data2(w1)
-+ ib_data3(w1)
-+ ib_data4(w1)
-+ ib_data5(w1)
-+ ib_data6(w1)
-+ ib_data7(w1)
-+
-+ ib_data0(w2)
-+ ib_data1(w2)
-+ ib_data2(w2)
-+ ib_data3(w2)
-+ ib_data4(w2)
-+ ib_data5(w2)
-+ ib_data6(w2)
-+ ib_data7(w2)
-+
-+ ib_data0(w3)
-+ ib_data1(w3)
-+ ib_data2(w3)
-+ ib_data3(w3)
-+ ib_data4(w3)
-+ ib_data5(w3)
-+ ib_data6(w3)
-+ ib_data7(w3)
-+
-+// The inverse mix column tables
-+
-+ .align ALIGN32BYTES
-+aes_im_tab:
-+ im_data0(v0)
-+ im_data1(v0)
-+ im_data2(v0)
-+ im_data3(v0)
-+ im_data4(v0)
-+ im_data5(v0)
-+ im_data6(v0)
-+ im_data7(v0)
-+
-+ im_data0(v1)
-+ im_data1(v1)
-+ im_data2(v1)
-+ im_data3(v1)
-+ im_data4(v1)
-+ im_data5(v1)
-+ im_data6(v1)
-+ im_data7(v1)
-+
-+ im_data0(v2)
-+ im_data1(v2)
-+ im_data2(v2)
-+ im_data3(v2)
-+ im_data4(v2)
-+ im_data5(v2)
-+ im_data6(v2)
-+ im_data7(v2)
-+
-+ im_data0(v3)
-+ im_data1(v3)
-+ im_data2(v3)
-+ im_data3(v3)
-+ im_data4(v3)
-+ im_data5(v3)
-+ im_data6(v3)
-+ im_data7(v3)
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/aes/aes.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1427 @@
-+// I retain copyright in this code but I encourage its free use provided
-+// that I don't carry any responsibility for the results. I am especially
-+// happy to see it used in free and open source software. If you do use
-+// it I would appreciate an acknowledgement of its origin in the code or
-+// the product that results and I would also appreciate knowing a little
-+// about the use to which it is being put. I am grateful to Frank Yellin
-+// for some ideas that are used in this implementation.
-+//
-+// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
-+//
-+// This is an implementation of the AES encryption algorithm (Rijndael)
-+// designed by Joan Daemen and Vincent Rijmen. This version is designed
-+// to provide both fixed and dynamic block and key lengths and can also
-+// run with either big or little endian internal byte order (see aes.h).
-+// It inputs block and key lengths in bytes with the legal values being
-+// 16, 24 and 32.
-+
-+/*
-+ * Modified by Jari Ruusu, May 1 2001
-+ * - Fixed some compile warnings, code was ok but gcc warned anyway.
-+ * - Changed basic types: byte -> unsigned char, word -> u_int32_t
-+ * - Major name space cleanup: Names visible to outside now begin
-+ * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
-+ * - Removed C++ and DLL support as part of name space cleanup.
-+ * - Eliminated unnecessary recomputation of tables. (actual bug fix)
-+ * - Merged precomputed constant tables to aes.c file.
-+ * - Removed data alignment restrictions for portability reasons.
-+ * - Made block and key lengths accept bit count (128/192/256)
-+ * as well byte count (16/24/32).
-+ * - Removed all error checks. This change also eliminated the need
-+ * to preinitialize the context struct to zero.
-+ * - Removed some totally unused constants.
-+ */
-+
-+#include "klips-crypto/aes.h"
-+
-+#ifdef OCF_ASSIST
-+#include "klips-crypto/ocf_assist.h"
-+#endif
-+
-+// CONFIGURATION OPTIONS (see also aes.h)
-+//
-+// 1. Define UNROLL for full loop unrolling in encryption and decryption.
-+// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
-+// 3. Define FIXED_TABLES for compiled rather than dynamic tables.
-+// 4. Define FF_TABLES to use tables for field multiplies and inverses.
-+// Do not enable this without understanding stack space requirements.
-+// 5. Define ARRAYS to use arrays to hold the local state block. If this
-+// is not defined, individually declared 32-bit words are used.
-+// 6. Define FAST_VARIABLE if a high speed variable block implementation
-+// is needed (essentially three separate fixed block size code sequences)
-+// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
-+// version using 1 table (2 kbytes of table space) or 4 tables (8
-+// kbytes of table space) for higher speed.
-+// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
-+// increase by using tables for the last rounds but with more table
-+// space (2 or 8 kbytes extra).
-+// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
-+// slower version is provided.
-+// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
-+// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
-+
-+#define UNROLL
-+//#define PARTIAL_UNROLL
-+
-+#define FIXED_TABLES
-+//#define FF_TABLES
-+//#define ARRAYS
-+#define FAST_VARIABLE
-+
-+//#define ONE_TABLE
-+#define FOUR_TABLES
-+
-+//#define ONE_LR_TABLE
-+#define FOUR_LR_TABLES
-+
-+//#define ONE_IM_TABLE
-+#define FOUR_IM_TABLES
-+
-+#if defined(UNROLL) && defined (PARTIAL_UNROLL)
-+#error both UNROLL and PARTIAL_UNROLL are defined
-+#endif
-+
-+#if defined(ONE_TABLE) && defined (FOUR_TABLES)
-+#error both ONE_TABLE and FOUR_TABLES are defined
-+#endif
-+
-+#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
-+#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
-+#endif
-+
-+#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
-+#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
-+#endif
-+
-+#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
-+#error an illegal block size has been specified
-+#endif
-+
-+// upr(x,n): rotates bytes within words by n positions, moving bytes
-+// to higher index positions with wrap around into low positions
-+// ups(x,n): moves bytes by n positions to higher index positions in
-+// words but without wrap around
-+// bval(x,n): extracts a byte from a word
-+
-+#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
-+#define ups(x,n) ((x) << 8 * (n))
-+#define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
-+#define bytes2word(b0, b1, b2, b3) \
-+ ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
-+
-+
-+/* little endian processor without data alignment restrictions: AES_LE_OK */
-+/* original code: i386 */
-+#if defined(i386) || defined(_I386) || defined(__i386__) || defined(__i386)
-+#define AES_LE_OK 1
-+/* added (tested): alpha --jjo */
-+#elif defined(__alpha__)|| defined (__alpha)
-+#define AES_LE_OK 1
-+/* added (tested): ia64 --jjo */
-+#elif defined(__ia64__)|| defined (__ia64)
-+#define AES_LE_OK 1
-+#endif
-+
-+#ifdef AES_LE_OK
-+/* little endian processor without data alignment restrictions */
-+#define word_in(x) *(u_int32_t*)(x)
-+#define const_word_in(x) *(const u_int32_t*)(x)
-+#define word_out(x,v) *(u_int32_t*)(x) = (v)
-+#define const_word_out(x,v) *(const u_int32_t*)(x) = (v)
-+#else
-+/* slower but generic big endian or with data alignment restrictions */
-+/* some additional "const" touches to stop "gcc -Wcast-qual" complains --jjo */
-+#define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
-+#define const_word_in(x) ((const u_int32_t)(((const unsigned char *)(x))[0])|((const u_int32_t)(((const unsigned char *)(x))[1])<<8)|((const u_int32_t)(((const unsigned char *)(x))[2])<<16)|((const u_int32_t)(((const unsigned char *)(x))[3])<<24))
-+#define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
-+#define const_word_out(x,v) ((const unsigned char *)(x))[0]=(v),((const unsigned char *)(x))[1]=((v)>>8),((const unsigned char *)(x))[2]=((v)>>16),((const unsigned char *)(x))[3]=((v)>>24)
-+#endif
-+
-+// Disable at least some poor combinations of options
-+
-+#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
-+#define FIXED_TABLES
-+#undef UNROLL
-+#undef ONE_LR_TABLE
-+#undef FOUR_LR_TABLES
-+#undef ONE_IM_TABLE
-+#undef FOUR_IM_TABLES
-+#elif !defined(FOUR_TABLES)
-+#ifdef FOUR_LR_TABLES
-+#undef FOUR_LR_TABLES
-+#define ONE_LR_TABLE
-+#endif
-+#ifdef FOUR_IM_TABLES
-+#undef FOUR_IM_TABLES
-+#define ONE_IM_TABLE
-+#endif
-+#elif !defined(AES_BLOCK_SIZE)
-+#if defined(UNROLL)
-+#define PARTIAL_UNROLL
-+#undef UNROLL
-+#endif
-+#endif
-+
-+// the finite field modular polynomial and elements
-+
-+#define ff_poly 0x011b
-+#define ff_hi 0x80
-+
-+// multiply four bytes in GF(2^8) by 'x' {02} in parallel
-+
-+#define m1 0x80808080
-+#define m2 0x7f7f7f7f
-+#define m3 0x0000001b
-+#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
-+
-+// The following defines provide alternative definitions of FFmulX that might
-+// give improved performance if a fast 32-bit multiply is not available. Note
-+// that a temporary variable u needs to be defined where FFmulX is used.
-+
-+// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
-+// #define m4 0x1b1b1b1b
-+// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
-+
-+// perform column mix operation on four bytes in parallel
-+
-+#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
-+
-+#if defined(FIXED_TABLES)
-+
-+// the S-Box table
-+
-+static const unsigned char s_box[256] =
-+{
-+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
-+ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
-+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
-+ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
-+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
-+ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
-+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
-+ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
-+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
-+ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
-+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
-+ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
-+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
-+ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
-+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
-+ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
-+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
-+ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
-+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
-+ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
-+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
-+ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
-+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
-+ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
-+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
-+ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
-+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
-+ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
-+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
-+ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
-+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
-+ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
-+};
-+
-+// the inverse S-Box table
-+
-+static const unsigned char inv_s_box[256] =
-+{
-+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
-+ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
-+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
-+ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
-+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
-+ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
-+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
-+ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
-+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
-+ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
-+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
-+ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
-+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
-+ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
-+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
-+ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
-+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
-+ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
-+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
-+ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
-+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
-+ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
-+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
-+ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
-+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
-+ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
-+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
-+ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
-+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
-+ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
-+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
-+ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-+};
-+
-+#define w0(p) 0x000000##p
-+
-+// Number of elements required in this table for different
-+// block and key lengths is:
-+//
-+// Nk = 4 6 8
-+// ----------
-+// Nb = 4 | 10 8 7
-+// 6 | 19 12 11
-+// 8 | 29 19 14
-+//
-+// this table can be a table of bytes if the key schedule
-+// code is adjusted accordingly
-+
-+static const u_int32_t rcon_tab[29] =
-+{
-+ w0(01), w0(02), w0(04), w0(08),
-+ w0(10), w0(20), w0(40), w0(80),
-+ w0(1b), w0(36), w0(6c), w0(d8),
-+ w0(ab), w0(4d), w0(9a), w0(2f),
-+ w0(5e), w0(bc), w0(63), w0(c6),
-+ w0(97), w0(35), w0(6a), w0(d4),
-+ w0(b3), w0(7d), w0(fa), w0(ef),
-+ w0(c5)
-+};
-+
-+#undef w0
-+
-+#define r0(p,q,r,s) 0x##p##q##r##s
-+#define r1(p,q,r,s) 0x##q##r##s##p
-+#define r2(p,q,r,s) 0x##r##s##p##q
-+#define r3(p,q,r,s) 0x##s##p##q##r
-+#define w0(p) 0x000000##p
-+#define w1(p) 0x0000##p##00
-+#define w2(p) 0x00##p##0000
-+#define w3(p) 0x##p##000000
-+
-+#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
-+
-+// data for forward tables (other than last round)
-+
-+#define f_table \
-+ r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
-+ r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
-+ r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
-+ r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
-+ r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
-+ r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
-+ r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
-+ r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
-+ r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
-+ r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
-+ r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
-+ r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
-+ r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
-+ r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
-+ r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
-+ r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
-+ r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
-+ r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
-+ r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
-+ r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
-+ r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
-+ r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
-+ r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
-+ r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
-+ r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
-+ r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
-+ r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
-+ r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
-+ r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
-+ r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
-+ r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
-+ r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
-+ r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
-+ r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
-+ r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
-+ r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
-+ r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
-+ r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
-+ r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
-+ r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
-+ r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
-+ r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
-+ r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
-+ r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
-+ r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
-+ r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
-+ r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
-+ r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
-+ r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
-+ r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
-+ r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
-+ r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
-+ r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
-+ r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
-+ r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
-+ r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
-+ r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
-+ r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
-+ r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
-+ r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
-+ r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
-+ r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
-+ r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
-+ r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
-+
-+// data for inverse tables (other than last round)
-+
-+#define i_table \
-+ r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
-+ r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
-+ r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
-+ r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
-+ r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
-+ r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
-+ r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
-+ r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
-+ r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
-+ r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
-+ r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
-+ r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
-+ r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
-+ r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
-+ r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
-+ r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
-+ r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
-+ r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
-+ r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
-+ r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
-+ r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
-+ r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
-+ r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
-+ r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
-+ r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
-+ r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
-+ r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
-+ r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
-+ r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
-+ r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
-+ r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
-+ r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
-+ r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
-+ r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
-+ r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
-+ r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
-+ r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
-+ r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
-+ r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
-+ r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
-+ r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
-+ r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
-+ r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
-+ r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
-+ r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
-+ r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
-+ r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
-+ r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
-+ r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
-+ r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
-+ r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
-+ r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
-+ r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
-+ r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
-+ r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
-+ r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
-+ r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
-+ r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
-+ r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
-+ r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
-+ r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
-+ r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
-+ r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
-+ r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
-+
-+// generate the required tables in the desired endian format
-+
-+#undef r
-+#define r r0
-+
-+#if defined(ONE_TABLE)
-+static const u_int32_t ft_tab[256] =
-+ { f_table };
-+#elif defined(FOUR_TABLES)
-+static const u_int32_t ft_tab[4][256] =
-+{ { f_table },
-+#undef r
-+#define r r1
-+ { f_table },
-+#undef r
-+#define r r2
-+ { f_table },
-+#undef r
-+#define r r3
-+ { f_table }
-+};
-+#endif
-+
-+#undef r
-+#define r r0
-+#if defined(ONE_TABLE)
-+static const u_int32_t it_tab[256] =
-+ { i_table };
-+#elif defined(FOUR_TABLES)
-+static const u_int32_t it_tab[4][256] =
-+{ { i_table },
-+#undef r
-+#define r r1
-+ { i_table },
-+#undef r
-+#define r r2
-+ { i_table },
-+#undef r
-+#define r r3
-+ { i_table }
-+};
-+#endif
-+
-+#endif
-+
-+#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
-+
-+// data for inverse tables (last round)
-+
-+#define li_table \
-+ w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
-+ w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
-+ w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
-+ w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
-+ w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
-+ w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
-+ w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
-+ w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
-+ w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
-+ w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
-+ w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
-+ w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
-+ w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
-+ w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
-+ w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
-+ w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
-+ w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
-+ w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
-+ w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
-+ w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
-+ w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
-+ w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
-+ w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
-+ w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
-+ w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
-+ w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
-+ w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
-+ w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
-+ w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
-+ w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
-+ w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
-+ w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
-+
-+// generate the required tables in the desired endian format
-+
-+#undef r
-+#define r(p,q,r,s) w0(q)
-+#if defined(ONE_LR_TABLE)
-+static const u_int32_t fl_tab[256] =
-+ { f_table };
-+#elif defined(FOUR_LR_TABLES)
-+static const u_int32_t fl_tab[4][256] =
-+{ { f_table },
-+#undef r
-+#define r(p,q,r,s) w1(q)
-+ { f_table },
-+#undef r
-+#define r(p,q,r,s) w2(q)
-+ { f_table },
-+#undef r
-+#define r(p,q,r,s) w3(q)
-+ { f_table }
-+};
-+#endif
-+
-+#undef w
-+#define w w0
-+#if defined(ONE_LR_TABLE)
-+static const u_int32_t il_tab[256] =
-+ { li_table };
-+#elif defined(FOUR_LR_TABLES)
-+static const u_int32_t il_tab[4][256] =
-+{ { li_table },
-+#undef w
-+#define w w1
-+ { li_table },
-+#undef w
-+#define w w2
-+ { li_table },
-+#undef w
-+#define w w3
-+ { li_table }
-+};
-+#endif
-+
-+#endif
-+
-+#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
-+
-+#define m_table \
-+ r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
-+ r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
-+ r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
-+ r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
-+ r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
-+ r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
-+ r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
-+ r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
-+ r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
-+ r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
-+ r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
-+ r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
-+ r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
-+ r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
-+ r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
-+ r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
-+ r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
-+ r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
-+ r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
-+ r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
-+ r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
-+ r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
-+ r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
-+ r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
-+ r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
-+ r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
-+ r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
-+ r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
-+ r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
-+ r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
-+ r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
-+ r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
-+ r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
-+ r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
-+ r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
-+ r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
-+ r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
-+ r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
-+ r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
-+ r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
-+ r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
-+ r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
-+ r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
-+ r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
-+ r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
-+ r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
-+ r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
-+ r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
-+ r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
-+ r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
-+ r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
-+ r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
-+ r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
-+ r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
-+ r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
-+ r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
-+ r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
-+ r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
-+ r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
-+ r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
-+ r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
-+ r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
-+ r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
-+ r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
-+
-+#undef r
-+#define r r0
-+
-+#if defined(ONE_IM_TABLE)
-+static const u_int32_t im_tab[256] =
-+ { m_table };
-+#elif defined(FOUR_IM_TABLES)
-+static const u_int32_t im_tab[4][256] =
-+{ { m_table },
-+#undef r
-+#define r r1
-+ { m_table },
-+#undef r
-+#define r r2
-+ { m_table },
-+#undef r
-+#define r r3
-+ { m_table }
-+};
-+#endif
-+
-+#endif
-+
-+#else
-+
-+static int tab_gen = 0;
-+
-+static unsigned char s_box[256]; // the S box
-+static unsigned char inv_s_box[256]; // the inverse S box
-+static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
-+
-+#if defined(ONE_TABLE)
-+static u_int32_t ft_tab[256];
-+static u_int32_t it_tab[256];
-+#elif defined(FOUR_TABLES)
-+static u_int32_t ft_tab[4][256];
-+static u_int32_t it_tab[4][256];
-+#endif
-+
-+#if defined(ONE_LR_TABLE)
-+static u_int32_t fl_tab[256];
-+static u_int32_t il_tab[256];
-+#elif defined(FOUR_LR_TABLES)
-+static u_int32_t fl_tab[4][256];
-+static u_int32_t il_tab[4][256];
-+#endif
-+
-+#if defined(ONE_IM_TABLE)
-+static u_int32_t im_tab[256];
-+#elif defined(FOUR_IM_TABLES)
-+static u_int32_t im_tab[4][256];
-+#endif
-+
-+// Generate the tables for the dynamic table option
-+
-+#if !defined(FF_TABLES)
-+
-+// It will generally be sensible to use tables to compute finite
-+// field multiplies and inverses but where memory is scarse this
-+// code might sometimes be better.
-+
-+// return 2 ^ (n - 1) where n is the bit number of the highest bit
-+// set in x with x in the range 1 < x < 0x00000200. This form is
-+// used so that locals within FFinv can be bytes rather than words
-+
-+static unsigned char hibit(const u_int32_t x)
-+{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
-+
-+ r |= (r >> 2);
-+ r |= (r >> 4);
-+ return (r + 1) >> 1;
-+}
-+
-+// return the inverse of the finite field element x
-+
-+static unsigned char FFinv(const unsigned char x)
-+{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
-+
-+ if(x < 2) return x;
-+
-+ for(;;)
-+ {
-+ if(!n1) return v1;
-+
-+ while(n2 >= n1)
-+ {
-+ n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
-+ }
-+
-+ if(!n2) return v2;
-+
-+ while(n1 >= n2)
-+ {
-+ n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
-+ }
-+ }
-+}
-+
-+// define the finite field multiplies required for Rijndael
-+
-+#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
-+#define FFmul03(x) ((x) ^ FFmul02(x))
-+#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
-+#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
-+#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
-+#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
-+
-+#else
-+
-+#define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
-+
-+#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
-+#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
-+#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
-+#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
-+#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
-+#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
-+
-+#endif
-+
-+// The forward and inverse affine transformations used in the S-box
-+
-+#define fwd_affine(x) \
-+ (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
-+
-+#define inv_affine(x) \
-+ (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
-+
-+static void gen_tabs(void)
-+{ u_int32_t i, w;
-+
-+#if defined(FF_TABLES)
-+
-+ unsigned char pow[512], log[256];
-+
-+ // log and power tables for GF(2^8) finite field with
-+ // 0x011b as modular polynomial - the simplest primitive
-+ // root is 0x03, used here to generate the tables
-+
-+ i = 0; w = 1;
-+ do
-+ {
-+ pow[i] = (unsigned char)w;
-+ pow[i + 255] = (unsigned char)w;
-+ log[w] = (unsigned char)i++;
-+ w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
-+ }
-+ while (w != 1);
-+
-+#endif
-+
-+ for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
-+ {
-+ rcon_tab[i] = bytes2word(w, 0, 0, 0);
-+ w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
-+ }
-+
-+ for(i = 0; i < 256; ++i)
-+ { unsigned char b;
-+
-+ s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
-+
-+ w = bytes2word(b, 0, 0, 0);
-+#if defined(ONE_LR_TABLE)
-+ fl_tab[i] = w;
-+#elif defined(FOUR_LR_TABLES)
-+ fl_tab[0][i] = w;
-+ fl_tab[1][i] = upr(w,1);
-+ fl_tab[2][i] = upr(w,2);
-+ fl_tab[3][i] = upr(w,3);
-+#endif
-+ w = bytes2word(FFmul02(b), b, b, FFmul03(b));
-+#if defined(ONE_TABLE)
-+ ft_tab[i] = w;
-+#elif defined(FOUR_TABLES)
-+ ft_tab[0][i] = w;
-+ ft_tab[1][i] = upr(w,1);
-+ ft_tab[2][i] = upr(w,2);
-+ ft_tab[3][i] = upr(w,3);
-+#endif
-+ inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
-+
-+ w = bytes2word(b, 0, 0, 0);
-+#if defined(ONE_LR_TABLE)
-+ il_tab[i] = w;
-+#elif defined(FOUR_LR_TABLES)
-+ il_tab[0][i] = w;
-+ il_tab[1][i] = upr(w,1);
-+ il_tab[2][i] = upr(w,2);
-+ il_tab[3][i] = upr(w,3);
-+#endif
-+ w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
-+#if defined(ONE_TABLE)
-+ it_tab[i] = w;
-+#elif defined(FOUR_TABLES)
-+ it_tab[0][i] = w;
-+ it_tab[1][i] = upr(w,1);
-+ it_tab[2][i] = upr(w,2);
-+ it_tab[3][i] = upr(w,3);
-+#endif
-+#if defined(ONE_IM_TABLE)
-+ im_tab[b] = w;
-+#elif defined(FOUR_IM_TABLES)
-+ im_tab[0][b] = w;
-+ im_tab[1][b] = upr(w,1);
-+ im_tab[2][b] = upr(w,2);
-+ im_tab[3][b] = upr(w,3);
-+#endif
-+
-+ }
-+}
-+
-+#endif
-+
-+#define no_table(x,box,vf,rf,c) bytes2word( \
-+ box[bval(vf(x,0,c),rf(0,c))], \
-+ box[bval(vf(x,1,c),rf(1,c))], \
-+ box[bval(vf(x,2,c),rf(2,c))], \
-+ box[bval(vf(x,3,c),rf(3,c))])
-+
-+#define one_table(x,op,tab,vf,rf,c) \
-+ ( tab[bval(vf(x,0,c),rf(0,c))] \
-+ ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
-+ ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
-+ ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
-+
-+#define four_tables(x,tab,vf,rf,c) \
-+ ( tab[0][bval(vf(x,0,c),rf(0,c))] \
-+ ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
-+ ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
-+ ^ tab[3][bval(vf(x,3,c),rf(3,c))])
-+
-+#define vf1(x,r,c) (x)
-+#define rf1(r,c) (r)
-+#define rf2(r,c) ((r-c)&3)
-+
-+#if defined(FOUR_LR_TABLES)
-+#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
-+#elif defined(ONE_LR_TABLE)
-+#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
-+#else
-+#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
-+#endif
-+
-+#if defined(FOUR_IM_TABLES)
-+#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
-+#elif defined(ONE_IM_TABLE)
-+#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
-+#else
-+#define inv_mcol(x) \
-+ (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
-+ f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
-+#endif
-+
-+// Subroutine to set the block size (if variable) in bytes, legal
-+// values being 16, 24 and 32.
-+
-+#if defined(AES_BLOCK_SIZE)
-+#define nc (AES_BLOCK_SIZE / 4)
-+#else
-+#define nc (cx->aes_Ncol)
-+
-+void aes_set_blk(aes_context *cx, int n_bytes)
-+{
-+#if !defined(FIXED_TABLES)
-+ if(!tab_gen) { gen_tabs(); tab_gen = 1; }
-+#endif
-+
-+ switch(n_bytes) {
-+ case 32: /* bytes */
-+ case 256: /* bits */
-+ nc = 8;
-+ break;
-+ case 24: /* bytes */
-+ case 192: /* bits */
-+ nc = 6;
-+ break;
-+ case 16: /* bytes */
-+ case 128: /* bits */
-+ default:
-+ nc = 4;
-+ break;
-+ }
-+}
-+
-+#endif
-+
-+// Initialise the key schedule from the user supplied key. The key
-+// length is now specified in bytes - 16, 24 or 32 as appropriate.
-+// This corresponds to bit lengths of 128, 192 and 256 bits, and
-+// to Nk values of 4, 6 and 8 respectively.
-+
-+#define mx(t,f) (*t++ = inv_mcol(*f),f++)
-+#define cp(t,f) *t++ = *f++
-+
-+#if AES_BLOCK_SIZE == 16
-+#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
-+#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
-+#elif AES_BLOCK_SIZE == 24
-+#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
-+ cp(d,s); cp(d,s)
-+#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
-+ mx(d,s); mx(d,s)
-+#elif AES_BLOCK_SIZE == 32
-+#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
-+ cp(d,s); cp(d,s); cp(d,s); cp(d,s)
-+#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
-+ mx(d,s); mx(d,s); mx(d,s); mx(d,s)
-+#else
-+
-+#define cpy(d,s) \
-+switch(nc) \
-+{ case 8: cp(d,s); cp(d,s); \
-+ case 6: cp(d,s); cp(d,s); \
-+ case 4: cp(d,s); cp(d,s); \
-+ cp(d,s); cp(d,s); \
-+}
-+
-+#define mix(d,s) \
-+switch(nc) \
-+{ case 8: mx(d,s); mx(d,s); \
-+ case 6: mx(d,s); mx(d,s); \
-+ case 4: mx(d,s); mx(d,s); \
-+ mx(d,s); mx(d,s); \
-+}
-+
-+#endif
-+
-+void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
-+{ u_int32_t *kf, *kt, rci;
-+
-+#if !defined(FIXED_TABLES)
-+ if(!tab_gen) { gen_tabs(); tab_gen = 1; }
-+#endif
-+
-+/* only need to do a special set_key for the cryptodev hw acceleration */
-+#ifdef OCF_ASSIST
-+ if (ocf_aes_assist() & OCF_PROVIDES_AES) {
-+ ocf_aes_set_key(cx, in_key, n_bytes, f);
-+ return;
-+ }
-+#endif
-+
-+ switch(n_bytes) {
-+ case 32: /* bytes */
-+ case 256: /* bits */
-+ cx->aes_Nkey = 8;
-+ break;
-+ case 24: /* bytes */
-+ case 192: /* bits */
-+ cx->aes_Nkey = 6;
-+ break;
-+ case 16: /* bytes */
-+ case 128: /* bits */
-+ default:
-+ cx->aes_Nkey = 4;
-+ break;
-+ }
-+
-+ cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6;
-+
-+ cx->aes_e_key[0] = const_word_in(in_key );
-+ cx->aes_e_key[1] = const_word_in(in_key + 4);
-+ cx->aes_e_key[2] = const_word_in(in_key + 8);
-+ cx->aes_e_key[3] = const_word_in(in_key + 12);
-+
-+ kf = cx->aes_e_key;
-+ kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey;
-+ rci = 0;
-+
-+ switch(cx->aes_Nkey)
-+ {
-+ case 4: do
-+ { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
-+ kf[5] = kf[1] ^ kf[4];
-+ kf[6] = kf[2] ^ kf[5];
-+ kf[7] = kf[3] ^ kf[6];
-+ kf += 4;
-+ }
-+ while(kf < kt);
-+ break;
-+
-+ case 6: cx->aes_e_key[4] = const_word_in(in_key + 16);
-+ cx->aes_e_key[5] = const_word_in(in_key + 20);
-+ do
-+ { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
-+ kf[ 7] = kf[1] ^ kf[ 6];
-+ kf[ 8] = kf[2] ^ kf[ 7];
-+ kf[ 9] = kf[3] ^ kf[ 8];
-+ kf[10] = kf[4] ^ kf[ 9];
-+ kf[11] = kf[5] ^ kf[10];
-+ kf += 6;
-+ }
-+ while(kf < kt);
-+ break;
-+
-+ case 8: cx->aes_e_key[4] = const_word_in(in_key + 16);
-+ cx->aes_e_key[5] = const_word_in(in_key + 20);
-+ cx->aes_e_key[6] = const_word_in(in_key + 24);
-+ cx->aes_e_key[7] = const_word_in(in_key + 28);
-+ do
-+ { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
-+ kf[ 9] = kf[1] ^ kf[ 8];
-+ kf[10] = kf[2] ^ kf[ 9];
-+ kf[11] = kf[3] ^ kf[10];
-+ kf[12] = kf[4] ^ ls_box(kf[11],0);
-+ kf[13] = kf[5] ^ kf[12];
-+ kf[14] = kf[6] ^ kf[13];
-+ kf[15] = kf[7] ^ kf[14];
-+ kf += 8;
-+ }
-+ while (kf < kt);
-+ break;
-+ }
-+
-+ if(!f)
-+ { u_int32_t i;
-+
-+ kt = cx->aes_d_key + nc * cx->aes_Nrnd;
-+ kf = cx->aes_e_key;
-+
-+ cpy(kt, kf); kt -= 2 * nc;
-+
-+ for(i = 1; i < cx->aes_Nrnd; ++i)
-+ {
-+#if defined(ONE_TABLE) || defined(FOUR_TABLES)
-+#if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
-+ u_int32_t f2, f4, f8, f9;
-+#endif
-+ mix(kt, kf);
-+#else
-+ cpy(kt, kf);
-+#endif
-+ kt -= 2 * nc;
-+ }
-+
-+ cpy(kt, kf);
-+ }
-+}
-+
-+// y = output word, x = input word, r = row, c = column
-+// for r = 0, 1, 2 and 3 = column accessed for row r
-+
-+#if defined(ARRAYS)
-+#define s(x,c) x[c]
-+#else
-+#define s(x,c) x##c
-+#endif
-+
-+// I am grateful to Frank Yellin for the following constructions
-+// which, given the column (c) of the output state variable that
-+// is being computed, return the input state variables which are
-+// needed for each row (r) of the state
-+
-+// For the fixed block size options, compilers reduce these two
-+// expressions to fixed variable references. For variable block
-+// size code conditional clauses will sometimes be returned
-+
-+#define unused 77 // Sunset Strip
-+
-+#define fwd_var(x,r,c) \
-+ ( r==0 ? \
-+ ( c==0 ? s(x,0) \
-+ : c==1 ? s(x,1) \
-+ : c==2 ? s(x,2) \
-+ : c==3 ? s(x,3) \
-+ : c==4 ? s(x,4) \
-+ : c==5 ? s(x,5) \
-+ : c==6 ? s(x,6) \
-+ : s(x,7)) \
-+ : r==1 ? \
-+ ( c==0 ? s(x,1) \
-+ : c==1 ? s(x,2) \
-+ : c==2 ? s(x,3) \
-+ : c==3 ? nc==4 ? s(x,0) : s(x,4) \
-+ : c==4 ? s(x,5) \
-+ : c==5 ? nc==8 ? s(x,6) : s(x,0) \
-+ : c==6 ? s(x,7) \
-+ : s(x,0)) \
-+ : r==2 ? \
-+ ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
-+ : c==1 ? nc==8 ? s(x,4) : s(x,3) \
-+ : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
-+ : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
-+ : c==4 ? nc==8 ? s(x,7) : s(x,0) \
-+ : c==5 ? nc==8 ? s(x,0) : s(x,1) \
-+ : c==6 ? s(x,1) \
-+ : s(x,2)) \
-+ : \
-+ ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
-+ : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
-+ : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
-+ : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
-+ : c==4 ? nc==8 ? s(x,0) : s(x,1) \
-+ : c==5 ? nc==8 ? s(x,1) : s(x,2) \
-+ : c==6 ? s(x,2) \
-+ : s(x,3)))
-+
-+#define inv_var(x,r,c) \
-+ ( r==0 ? \
-+ ( c==0 ? s(x,0) \
-+ : c==1 ? s(x,1) \
-+ : c==2 ? s(x,2) \
-+ : c==3 ? s(x,3) \
-+ : c==4 ? s(x,4) \
-+ : c==5 ? s(x,5) \
-+ : c==6 ? s(x,6) \
-+ : s(x,7)) \
-+ : r==1 ? \
-+ ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
-+ : c==1 ? s(x,0) \
-+ : c==2 ? s(x,1) \
-+ : c==3 ? s(x,2) \
-+ : c==4 ? s(x,3) \
-+ : c==5 ? s(x,4) \
-+ : c==6 ? s(x,5) \
-+ : s(x,6)) \
-+ : r==2 ? \
-+ ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
-+ : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
-+ : c==2 ? nc==8 ? s(x,7) : s(x,0) \
-+ : c==3 ? nc==8 ? s(x,0) : s(x,1) \
-+ : c==4 ? nc==8 ? s(x,1) : s(x,2) \
-+ : c==5 ? nc==8 ? s(x,2) : s(x,3) \
-+ : c==6 ? s(x,3) \
-+ : s(x,4)) \
-+ : \
-+ ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
-+ : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
-+ : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
-+ : c==3 ? nc==8 ? s(x,7) : s(x,0) \
-+ : c==4 ? nc==8 ? s(x,0) : s(x,1) \
-+ : c==5 ? nc==8 ? s(x,1) : s(x,2) \
-+ : c==6 ? s(x,2) \
-+ : s(x,3)))
-+
-+#define si(y,x,k,c) s(y,c) = const_word_in(x + 4 * c) ^ k[c]
-+#define so(y,x,c) word_out(y + 4 * c, s(x,c))
-+
-+#if defined(FOUR_TABLES)
-+#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
-+#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
-+#elif defined(ONE_TABLE)
-+#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
-+#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
-+#else
-+#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
-+#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
-+#endif
-+
-+#if defined(FOUR_LR_TABLES)
-+#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
-+#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
-+#elif defined(ONE_LR_TABLE)
-+#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
-+#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
-+#else
-+#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
-+#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
-+#endif
-+
-+#if AES_BLOCK_SIZE == 16
-+
-+#if defined(ARRAYS)
-+#define locals(y,x) x[4],y[4]
-+#else
-+#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
-+// the following defines prevent the compiler requiring the declaration
-+// of generated but unused variables in the fwd_var and inv_var macros
-+#define b04 unused
-+#define b05 unused
-+#define b06 unused
-+#define b07 unused
-+#define b14 unused
-+#define b15 unused
-+#define b16 unused
-+#define b17 unused
-+#endif
-+#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
-+ s(y,2) = s(x,2); s(y,3) = s(x,3);
-+#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
-+#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
-+#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
-+
-+#elif AES_BLOCK_SIZE == 24
-+
-+#if defined(ARRAYS)
-+#define locals(y,x) x[6],y[6]
-+#else
-+#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
-+ y##0,y##1,y##2,y##3,y##4,y##5
-+#define b06 unused
-+#define b07 unused
-+#define b16 unused
-+#define b17 unused
-+#endif
-+#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
-+ s(y,2) = s(x,2); s(y,3) = s(x,3); \
-+ s(y,4) = s(x,4); s(y,5) = s(x,5);
-+#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
-+ si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
-+#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
-+ so(y,x,3); so(y,x,4); so(y,x,5)
-+#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
-+ rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
-+#else
-+
-+#if defined(ARRAYS)
-+#define locals(y,x) x[8],y[8]
-+#else
-+#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
-+ y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
-+#endif
-+#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
-+ s(y,2) = s(x,2); s(y,3) = s(x,3); \
-+ s(y,4) = s(x,4); s(y,5) = s(x,5); \
-+ s(y,6) = s(x,6); s(y,7) = s(x,7);
-+
-+#if AES_BLOCK_SIZE == 32
-+
-+#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
-+ si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
-+#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
-+ so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
-+#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
-+ rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
-+#else
-+
-+#define state_in(y,x,k) \
-+switch(nc) \
-+{ case 8: si(y,x,k,7); si(y,x,k,6); \
-+ case 6: si(y,x,k,5); si(y,x,k,4); \
-+ case 4: si(y,x,k,3); si(y,x,k,2); \
-+ si(y,x,k,1); si(y,x,k,0); \
-+}
-+
-+#define state_out(y,x) \
-+switch(nc) \
-+{ case 8: so(y,x,7); so(y,x,6); \
-+ case 6: so(y,x,5); so(y,x,4); \
-+ case 4: so(y,x,3); so(y,x,2); \
-+ so(y,x,1); so(y,x,0); \
-+}
-+
-+#if defined(FAST_VARIABLE)
-+
-+#define round(rm,y,x,k) \
-+switch(nc) \
-+{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
-+ rm(y,x,k,5); rm(y,x,k,4); \
-+ rm(y,x,k,3); rm(y,x,k,2); \
-+ rm(y,x,k,1); rm(y,x,k,0); \
-+ break; \
-+ case 6: rm(y,x,k,5); rm(y,x,k,4); \
-+ rm(y,x,k,3); rm(y,x,k,2); \
-+ rm(y,x,k,1); rm(y,x,k,0); \
-+ break; \
-+ case 4: rm(y,x,k,3); rm(y,x,k,2); \
-+ rm(y,x,k,1); rm(y,x,k,0); \
-+ break; \
-+}
-+#else
-+
-+#define round(rm,y,x,k) \
-+switch(nc) \
-+{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
-+ case 6: rm(y,x,k,5); rm(y,x,k,4); \
-+ case 4: rm(y,x,k,3); rm(y,x,k,2); \
-+ rm(y,x,k,1); rm(y,x,k,0); \
-+}
-+
-+#endif
-+
-+#endif
-+#endif
-+
-+void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
-+{ u_int32_t locals(b0, b1);
-+ const u_int32_t *kp = cx->aes_e_key;
-+
-+#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
-+ u_int32_t f2;
-+#endif
-+
-+ state_in(b0, in_blk, kp); kp += nc;
-+
-+#if defined(UNROLL)
-+
-+ switch(cx->aes_Nrnd)
-+ {
-+ case 14: round(fwd_rnd, b1, b0, kp );
-+ round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
-+ case 12: round(fwd_rnd, b1, b0, kp );
-+ round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
-+ case 10: round(fwd_rnd, b1, b0, kp );
-+ round(fwd_rnd, b0, b1, kp + nc);
-+ round(fwd_rnd, b1, b0, kp + 2 * nc);
-+ round(fwd_rnd, b0, b1, kp + 3 * nc);
-+ round(fwd_rnd, b1, b0, kp + 4 * nc);
-+ round(fwd_rnd, b0, b1, kp + 5 * nc);
-+ round(fwd_rnd, b1, b0, kp + 6 * nc);
-+ round(fwd_rnd, b0, b1, kp + 7 * nc);
-+ round(fwd_rnd, b1, b0, kp + 8 * nc);
-+ round(fwd_lrnd, b0, b1, kp + 9 * nc);
-+ }
-+
-+#elif defined(PARTIAL_UNROLL)
-+ { u_int32_t rnd;
-+
-+ for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
-+ {
-+ round(fwd_rnd, b1, b0, kp);
-+ round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
-+ }
-+
-+ round(fwd_rnd, b1, b0, kp);
-+ round(fwd_lrnd, b0, b1, kp + nc);
-+ }
-+#else
-+ { u_int32_t rnd;
-+
-+ for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
-+ {
-+ round(fwd_rnd, b1, b0, kp);
-+ l_copy(b0, b1); kp += nc;
-+ }
-+
-+ round(fwd_lrnd, b0, b1, kp);
-+ }
-+#endif
-+
-+ state_out(out_blk, b0);
-+}
-+
-+void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
-+{ u_int32_t locals(b0, b1);
-+ const u_int32_t *kp = cx->aes_d_key;
-+
-+#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
-+ u_int32_t f2, f4, f8, f9;
-+#endif
-+
-+ state_in(b0, in_blk, kp); kp += nc;
-+
-+#if defined(UNROLL)
-+
-+ switch(cx->aes_Nrnd)
-+ {
-+ case 14: round(inv_rnd, b1, b0, kp );
-+ round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
-+ case 12: round(inv_rnd, b1, b0, kp );
-+ round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
-+ case 10: round(inv_rnd, b1, b0, kp );
-+ round(inv_rnd, b0, b1, kp + nc);
-+ round(inv_rnd, b1, b0, kp + 2 * nc);
-+ round(inv_rnd, b0, b1, kp + 3 * nc);
-+ round(inv_rnd, b1, b0, kp + 4 * nc);
-+ round(inv_rnd, b0, b1, kp + 5 * nc);
-+ round(inv_rnd, b1, b0, kp + 6 * nc);
-+ round(inv_rnd, b0, b1, kp + 7 * nc);
-+ round(inv_rnd, b1, b0, kp + 8 * nc);
-+ round(inv_lrnd, b0, b1, kp + 9 * nc);
-+ }
-+
-+#elif defined(PARTIAL_UNROLL)
-+ { u_int32_t rnd;
-+
-+ for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
-+ {
-+ round(inv_rnd, b1, b0, kp);
-+ round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
-+ }
-+
-+ round(inv_rnd, b1, b0, kp);
-+ round(inv_lrnd, b0, b1, kp + nc);
-+ }
-+#else
-+ { u_int32_t rnd;
-+
-+ for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
-+ {
-+ round(inv_rnd, b1, b0, kp);
-+ l_copy(b0, b1); kp += nc;
-+ }
-+
-+ round(inv_lrnd, b0, b1, kp);
-+ }
-+#endif
-+
-+ state_out(out_blk, b0);
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/aes/aes_cbc.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,67 @@
-+/*
-+// I retain copyright in this code but I encourage its free use provided
-+// that I don't carry any responsibility for the results. I am especially
-+// happy to see it used in free and open source software. If you do use
-+// it I would appreciate an acknowledgement of its origin in the code or
-+// the product that results and I would also appreciate knowing a little
-+// about the use to which it is being put. I am grateful to Frank Yellin
-+// for some ideas that are used in this implementation.
-+//
-+// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
-+//
-+// This is an implementation of the AES encryption algorithm (Rijndael)
-+// designed by Joan Daemen and Vincent Rijmen. This version is designed
-+// to provide both fixed and dynamic block and key lengths and can also
-+// run with either big or little endian internal byte order (see aes.h).
-+// It inputs block and key lengths in bytes with the legal values being
-+// 16, 24 and 32.
-+*
-+*/
-+
-+#ifdef __KERNEL__
-+#include <linux/types.h>
-+#else
-+#include <sys/types.h>
-+#endif
-+#include "klips-crypto/aes_cbc.h"
-+#include "klips-crypto/cbc_generic.h"
-+#ifdef OCF_ASSIST
-+#include "klips-crypto/ocf_assist.h"
-+#endif
-+
-+/* returns bool success */
-+int AES_set_key(aes_context *aes_ctx, const u_int8_t *key, int keysize) {
-+ aes_set_key(aes_ctx, key, keysize, 0);
-+ return 1;
-+}
-+
-+#ifdef OCF_ASSIST
-+
-+CBC_IMPL_BLK16(_AES_cbc_encrypt, aes_context, u_int8_t *, aes_encrypt, aes_decrypt);
-+
-+int
-+AES_cbc_encrypt(aes_context *ctx, const u_int8_t *in, u_int8_t *out, int ilen,
-+ const u_int8_t *iv, int encrypt)
-+{
-+ if (ocf_aes_assist() & OCF_PROVIDES_AES) {
-+ return ocf_aes_cbc_encrypt(ctx, in, out, ilen, iv, encrypt);
-+ } else {
-+ return _AES_cbc_encrypt(ctx, in, out, ilen, iv, encrypt);
-+ }
-+}
-+
-+#else
-+CBC_IMPL_BLK16(AES_cbc_encrypt, aes_context, u_int8_t *, aes_encrypt, aes_decrypt);
-+#endif
-+
-+
-+/*
-+ * $Log: aes_cbc.c,v $
-+ * Revision 1.2 2004/07/10 07:48:40 mcr
-+ * Moved from linux/crypto/ciphers/aes/aes_cbc.c,v
-+ *
-+ * Revision 1.1 2004/04/06 02:48:12 mcr
-+ * pullup of AES cipher from alg-branch.
-+ *
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/aes/aes_xcbc_mac.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,67 @@
-+#ifdef __KERNEL__
-+#include <linux/types.h>
-+#include <linux/kernel.h>
-+#define AES_DEBUG(x)
-+#else
-+#include <stdio.h>
-+#include <sys/types.h>
-+#define AES_DEBUG(x) x
-+#endif
-+
-+#include "klips-crypto/aes.h"
-+#include "klips-crypto/aes_xcbc_mac.h"
-+
-+int AES_xcbc_mac_set_key(aes_context_mac *ctxm, const u_int8_t *key, int keylen)
-+{
-+ int ret=1;
-+ aes_block kn[3] = {
-+ { 0x01010101, 0x01010101, 0x01010101, 0x01010101 },
-+ { 0x02020202, 0x02020202, 0x02020202, 0x02020202 },
-+ { 0x03030303, 0x03030303, 0x03030303, 0x03030303 },
-+ };
-+ aes_set_key(&ctxm->ctx_k1, key, keylen, 0);
-+ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *) kn[0], (u_int8_t *) kn[0]);
-+ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *) kn[1], (u_int8_t *) ctxm->k2);
-+ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *) kn[2], (u_int8_t *) ctxm->k3);
-+ aes_set_key(&ctxm->ctx_k1, (u_int8_t *) kn[0], 16, 0);
-+ return ret;
-+}
-+static void do_pad_xor(u_int8_t *out, const u_int8_t *in, int len) {
-+ int pos=0;
-+ for (pos=1; pos <= 16; pos++, in++, out++) {
-+ if (pos <= len)
-+ *out ^= *in;
-+ if (pos > len) {
-+ AES_DEBUG(printf("put 0x80 at pos=%d\n", pos));
-+ *out ^= 0x80;
-+ break;
-+ }
-+ }
-+}
-+static void xor_block(aes_block res, const aes_block op) {
-+ res[0] ^= op[0];
-+ res[1] ^= op[1];
-+ res[2] ^= op[2];
-+ res[3] ^= op[3];
-+}
-+int AES_xcbc_mac_hash(const aes_context_mac *ctxm, const u_int8_t * in, int ilen, u_int8_t hash[16]) {
-+ int ret=ilen;
-+ u_int32_t out[4] = { 0, 0, 0, 0 };
-+ for (; ilen > 16 ; ilen-=16) {
-+ xor_block(out, (const u_int32_t*) &in[0]);
-+ aes_encrypt(&ctxm->ctx_k1, in, (u_int8_t *)&out[0]);
-+ in+=16;
-+ }
-+ do_pad_xor((u_int8_t *)&out, in, ilen);
-+ if (ilen==16) {
-+ AES_DEBUG(printf("using k3\n"));
-+ xor_block(out, ctxm->k3);
-+ }
-+ else
-+ {
-+ AES_DEBUG(printf("using k2\n"));
-+ xor_block(out, ctxm->k2);
-+ }
-+ aes_encrypt(&ctxm->ctx_k1, (u_int8_t *)out, hash);
-+ return ret;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/aes/ipsec_alg_aes.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,300 @@
-+/*
-+ * ipsec_alg AES cipher stubs
-+ *
-+ * Author: JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>
-+ *
-+ * ipsec_alg_aes.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * Fixes by:
-+ * PK: Pawel Krawczyk <kravietz@aba.krakow.pl>
-+ * Fixes list:
-+ * PK: make XCBC comply with latest draft (keylength)
-+ *
-+ */
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+/*
-+ * special case: ipsec core modular with this static algo inside:
-+ * must avoid MODULE magic for this file
-+ */
-+#if defined(CONFIG_KLIPS_MODULE) && defined(CONFIG_KLIPS_ENC_AES)
-+#undef MODULE
-+#endif
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+
-+#include <linux/kernel.h> /* printk() */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/string.h>
-+
-+/* Check if __exit is defined, if not null it */
-+#ifndef __exit
-+#define __exit
-+#endif
-+
-+/* Low freeswan header coupling */
-+#include <openswan.h>
-+#include "openswan/ipsec_alg.h"
-+#include "klips-crypto/aes_cbc.h"
-+
-+#define CONFIG_KLIPS_ENC_AES_MAC 1
-+
-+#define AES_CONTEXT_T aes_context
-+static int debug_aes=0;
-+static int test_aes=0;
-+static int excl_aes=0;
-+static int keyminbits=0;
-+static int keymaxbits=0;
-+#if defined(CONFIG_KLIPS_ENC_AES_MODULE)
-+MODULE_AUTHOR("JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>");
-+#ifdef module_param
-+module_param(debug_aes,int,0664);
-+module_param(test_aes,int,0664);
-+module_param(excl_aes,int,0664);
-+module_param(keyminbits,int,0664);
-+module_param(keymaxbits,int,0664);
-+#else
-+MODULE_PARM(debug_aes, "i");
-+MODULE_PARM(test_aes, "i");
-+MODULE_PARM(excl_aes, "i");
-+MODULE_PARM(keyminbits, "i");
-+MODULE_PARM(keymaxbits, "i");
-+#endif
-+#endif
-+
-+#if CONFIG_KLIPS_ENC_AES_MAC
-+#include "klips-crypto/aes_xcbc_mac.h"
-+
-+/*
-+ * Not IANA number yet (draft-ietf-ipsec-ciph-aes-xcbc-mac-00.txt).
-+ * We use 9 for non-modular algorithm and none for modular, thus
-+ * forcing user to specify one on module load. -kravietz
-+ */
-+#ifdef MODULE
-+static int auth_id=0;
-+#else
-+static int auth_id=9;
-+#endif
-+#if 0
-+#ifdef MODULE_PARM
-+MODULE_PARM(auth_id, "i");
-+#else
-+module_param(auth_id,int,0664);
-+#endif
-+#endif
-+#endif
-+
-+#define ESP_AES 12 /* truely _constant_ :) */
-+
-+/* 128, 192 or 256 */
-+#define ESP_AES_KEY_SZ_MIN 16 /* 128 bit secret key */
-+#define ESP_AES_KEY_SZ_MAX 32 /* 256 bit secret key */
-+#define ESP_AES_CBC_BLK_LEN 16 /* AES-CBC block size */
-+
-+/* Values according to draft-ietf-ipsec-ciph-aes-xcbc-mac-02.txt
-+ * -kravietz
-+ */
-+#define ESP_AES_MAC_KEY_SZ 16 /* 128 bit MAC key */
-+#define ESP_AES_MAC_BLK_LEN 16 /* 128 bit block */
-+
-+static int _aes_set_key(struct ipsec_alg_enc *alg,
-+ __u8 * key_e, const __u8 * key,
-+ size_t keysize)
-+{
-+ int ret;
-+ AES_CONTEXT_T *ctx=(AES_CONTEXT_T*)key_e;
-+ ret=AES_set_key(ctx, key, keysize)!=0? 0: -EINVAL;
-+ if (debug_aes > 0)
-+ printk(KERN_DEBUG "klips_debug:_aes_set_key:"
-+ "ret=%d key_e=%p key=%p keysize=%ld\n",
-+ ret, key_e, key, (unsigned long int) keysize);
-+ return ret;
-+}
-+
-+static int _aes_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e,
-+ const __u8 * in, int ilen, const __u8 * iv,
-+ int encrypt)
-+{
-+ AES_CONTEXT_T *ctx=(AES_CONTEXT_T*)key_e;
-+ if (debug_aes > 0)
-+ printk(KERN_DEBUG "klips_debug:_aes_cbc_encrypt:"
-+ "key_e=%p in=%p ilen=%d iv=%p encrypt=%d\n",
-+ key_e, in, ilen, iv, encrypt);
-+ return AES_cbc_encrypt(ctx, in, in, ilen, iv, encrypt);
-+}
-+#if CONFIG_KLIPS_ENC_AES_MAC
-+static int _aes_mac_set_key(struct ipsec_alg_auth *alg, __u8 * key_a, const __u8 * key, int keylen) {
-+ aes_context_mac *ctxm=(aes_context_mac *)key_a;
-+ return AES_xcbc_mac_set_key(ctxm, key, keylen)? 0 : -EINVAL;
-+}
-+static int _aes_mac_hash(struct ipsec_alg_auth *alg, __u8 * key_a, const __u8 * dat, int len, __u8 * hash, int hashlen) {
-+ int ret;
-+ char hash_buf[16];
-+ aes_context_mac *ctxm=(aes_context_mac *)key_a;
-+ ret=AES_xcbc_mac_hash(ctxm, dat, len, hash_buf);
-+ memcpy(hash, hash_buf, hashlen);
-+ return ret;
-+}
-+static struct ipsec_alg_auth ipsec_alg_AES_MAC = {
-+ ixt_common: { ixt_version: IPSEC_ALG_VERSION,
-+ ixt_refcnt: ATOMIC_INIT(0),
-+ ixt_name: "aes_mac",
-+ ixt_blocksize: ESP_AES_MAC_BLK_LEN,
-+ ixt_support: {
-+ ias_exttype: IPSEC_ALG_TYPE_AUTH,
-+ ias_id: 0,
-+ ias_keyminbits: ESP_AES_MAC_KEY_SZ*8,
-+ ias_keymaxbits: ESP_AES_MAC_KEY_SZ*8,
-+ },
-+ },
-+#if defined(CONFIG_KLIPS_ENC_AES_MODULE)
-+ ixt_module: THIS_MODULE,
-+#endif
-+ ixt_a_keylen: ESP_AES_MAC_KEY_SZ,
-+ ixt_a_ctx_size: sizeof(aes_context_mac),
-+ ixt_a_hmac_set_key: _aes_mac_set_key,
-+ ixt_a_hmac_hash:_aes_mac_hash,
-+};
-+#endif /* CONFIG_KLIPS_ENC_AES_MAC */
-+static struct ipsec_alg_enc ipsec_alg_AES = {
-+ ixt_common: { ixt_version: IPSEC_ALG_VERSION,
-+ ixt_refcnt: ATOMIC_INIT(0),
-+ ixt_name: "aes",
-+ ixt_blocksize: ESP_AES_CBC_BLK_LEN,
-+ ixt_support: {
-+ ias_exttype: IPSEC_ALG_TYPE_ENCRYPT,
-+ //ias_ivlen: 128,
-+ ias_id: ESP_AES,
-+ ias_keyminbits: ESP_AES_KEY_SZ_MIN*8,
-+ ias_keymaxbits: ESP_AES_KEY_SZ_MAX*8,
-+ },
-+ },
-+#if defined(CONFIG_KLIPS_ENC_AES_MODULE)
-+ ixt_module: THIS_MODULE,
-+#endif
-+ ixt_e_keylen: ESP_AES_KEY_SZ_MAX,
-+ ixt_e_ctx_size: sizeof(AES_CONTEXT_T),
-+ ixt_e_set_key: _aes_set_key,
-+ ixt_e_cbc_encrypt:_aes_cbc_encrypt,
-+};
-+
-+#if defined(CONFIG_KLIPS_ENC_AES_MODULE)
-+IPSEC_ALG_MODULE_INIT_MOD( ipsec_aes_init )
-+#else
-+IPSEC_ALG_MODULE_INIT_STATIC( ipsec_aes_init )
-+#endif
-+{
-+ int ret, test_ret;
-+
-+ if (keyminbits)
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_keyminbits=keyminbits;
-+ if (keymaxbits) {
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_keymaxbits=keymaxbits;
-+ if (keymaxbits*8>ipsec_alg_AES.ixt_common.ixt_support.ias_keymaxbits)
-+ ipsec_alg_AES.ixt_e_keylen=keymaxbits*8;
-+ }
-+ if (excl_aes) ipsec_alg_AES.ixt_common.ixt_state |= IPSEC_ALG_ST_EXCL;
-+ ret=register_ipsec_alg_enc(&ipsec_alg_AES);
-+ printk("ipsec_aes_init(alg_type=%d alg_id=%d name=%s): ret=%d\n",
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_exttype,
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_id,
-+ ipsec_alg_AES.ixt_common.ixt_name,
-+ ret);
-+ if (ret==0 && test_aes) {
-+ test_ret=ipsec_alg_test(
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_exttype ,
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_id,
-+ test_aes);
-+ printk("ipsec_aes_init(alg_type=%d alg_id=%d): test_ret=%d\n",
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_exttype ,
-+ ipsec_alg_AES.ixt_common.ixt_support.ias_id,
-+ test_ret);
-+ }
-+#if CONFIG_KLIPS_ENC_AES_MAC
-+ if (auth_id!=0){
-+ int ret;
-+ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id=auth_id;
-+ ret=register_ipsec_alg_auth(&ipsec_alg_AES_MAC);
-+ printk("ipsec_aes_init(alg_type=%d alg_id=%d name=%s): ret=%d\n",
-+ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_exttype,
-+ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id,
-+ ipsec_alg_AES_MAC.ixt_common.ixt_name,
-+ ret);
-+ if (ret==0 && test_aes) {
-+ test_ret=ipsec_alg_test(
-+ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_exttype,
-+ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id,
-+ test_aes);
-+ printk("ipsec_aes_init(alg_type=%d alg_id=%d): test_ret=%d\n",
-+ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_exttype,
-+ ipsec_alg_AES_MAC.ixt_common.ixt_support.ias_id,
-+ test_ret);
-+ }
-+ } else {
-+ printk(KERN_DEBUG "klips_debug: experimental ipsec_alg_AES_MAC not registered [Ok] (auth_id=%d)\n", auth_id);
-+ }
-+#endif /* CONFIG_KLIPS_ENC_AES_MAC */
-+ return ret;
-+}
-+
-+#if defined(CONFIG_KLIPS_ENC_AES_MODULE)
-+IPSEC_ALG_MODULE_EXIT_MOD( ipsec_aes_fini )
-+#else
-+IPSEC_ALG_MODULE_EXIT_STATIC( ipsec_aes_fini )
-+#endif
-+{
-+#if CONFIG_KLIPS_ENC_AES_MAC
-+ if (auth_id) unregister_ipsec_alg_auth(&ipsec_alg_AES_MAC);
-+#endif /* CONFIG_KLIPS_ENC_AES_MAC */
-+ unregister_ipsec_alg_enc(&ipsec_alg_AES);
-+ return;
-+}
-+#ifdef MODULE_LICENSE
-+MODULE_LICENSE("GPL");
-+#endif
-+
-+#if 0 /* +NOT_YET */
-+#ifndef MODULE
-+/*
-+ * This is intended for static module setups, currently
-+ * doesn't work for modular ipsec.o with static algos inside
-+ */
-+static int setup_keybits(const char *str)
-+{
-+ unsigned aux;
-+ char *end;
-+
-+ aux = simple_strtoul(str,&end,0);
-+ if (aux != 128 && aux != 192 && aux != 256)
-+ return 0;
-+ keyminbits = aux;
-+
-+ if (*end == 0 || *end != ',')
-+ return 1;
-+ str=end+1;
-+ aux = simple_strtoul(str, NULL, 0);
-+ if (aux != 128 && aux != 192 && aux != 256)
-+ return 0;
-+ if (aux >= keyminbits)
-+ keymaxbits = aux;
-+ return 1;
-+}
-+__setup("ipsec_aes_keybits=", setup_keybits);
-+#endif
-+#endif
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/alg/Config.alg_aes.in Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,3 @@
-+if [ "$CONFIG_IPSEC_ALG" = "y" ]; then
-+ tristate ' AES encryption algorithm' CONFIG_IPSEC_ENC_AES
-+fi
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/alg/Config.alg_cryptoapi.in Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,6 @@
-+if [ "$CONFIG_IPSEC_ALG" = "y" ]; then
-+ dep_tristate ' CRYPTOAPI ciphers support (needs cryptoapi patch)' CONFIG_IPSEC_ALG_CRYPTOAPI $CONFIG_CRYPTO
-+ if [ "$CONFIG_IPSEC_ALG_CRYPTOAPI" != "n" ]; then
-+ bool ' CRYPTOAPI proprietary ciphers ' CONFIG_IPSEC_ALG_NON_LIBRE
-+ fi
-+fi
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/alg/Config.in Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,3 @@
-+#Placeholder
-+source net/ipsec/alg/Config.alg_aes.in
-+source net/ipsec/alg/Config.alg_cryptoapi.in
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/alg/Makefile.alg_aes Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,18 @@
-+MOD_AES := ipsec_aes.o
-+
-+ALG_MODULES += $(MOD_AES)
-+ALG_SUBDIRS += libaes
-+
-+obj-$(CONFIG_IPSEC_ALG_AES) += $(MOD_AES)
-+static_init-func-$(CONFIG_IPSEC_ALG_AES)+= ipsec_aes_init
-+alg_obj-$(CONFIG_IPSEC_ALG_AES) += ipsec_alg_aes.o
-+
-+AES_OBJS := ipsec_alg_aes.o $(LIBCRYPTO)/libaes/libaes.a
-+
-+
-+$(MOD_AES): $(AES_OBJS)
-+ $(LD) $(EXTRA_LDFLAGS) -r $(AES_OBJS) -o $@
-+
-+$(LIBCRYPTO)/libaes/libaes.a:
-+ $(MAKE) -C $(LIBCRYPTO)/libaes CC='$(CC)' 'ARCH_ASM=$(ARCH_ASM)' CFLAGS='$(CFLAGS) $(EXTRA_CFLAGS)' libaes.a
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/alg/Makefile.alg_cryptoapi Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,14 @@
-+MOD_CRYPTOAPI := ipsec_cryptoapi.o
-+
-+ifneq ($(wildcard $(TOPDIR)/include/linux/crypto.h),)
-+ALG_MODULES += $(MOD_CRYPTOAPI)
-+obj-$(CONFIG_IPSEC_ALG_CRYPTOAPI) += $(MOD_CRYPTOAPI)
-+static_init-func-$(CONFIG_IPSEC_ALG_CRYPTOAPI)+= ipsec_cryptoapi_init
-+alg_obj-$(CONFIG_IPSEC_ALG_CRYPTOAPI) += ipsec_alg_cryptoapi.o
-+else
-+$(warning "Linux CryptoAPI (2.4.22+ or 2.6.x) not found, not building ipsec_cryptoapi.o")
-+endif
-+
-+CRYPTOAPI_OBJS := ipsec_alg_cryptoapi.o
-+$(MOD_CRYPTOAPI): $(CRYPTOAPI_OBJS)
-+ $(LD) -r $(CRYPTOAPI_OBJS) -o $@
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/alg/ipsec_alg_cryptoapi.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,442 @@
-+/*
-+ * ipsec_alg to linux cryptoapi GLUE
-+ *
-+ * Authors: CODE.ar TEAM
-+ * Harpo MAxx <harpo@linuxmendoza.org.ar>
-+ * JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>
-+ * Luciano Ruete <docemeses@softhome.net>
-+ *
-+ * ipsec_alg_cryptoapi.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * Example usage:
-+ * modinfo -p ipsec_cryptoapi (quite useful info, including supported algos)
-+ * modprobe ipsec_cryptoapi
-+ * modprobe ipsec_cryptoapi test=1
-+ * modprobe ipsec_cryptoapi excl=1 (exclusive cipher/algo)
-+ * modprobe ipsec_cryptoapi noauto=1 aes=1 twofish=1 (only these ciphers)
-+ * modprobe ipsec_cryptoapi aes=128,128 (force these keylens)
-+ * modprobe ipsec_cryptoapi des_ede3=0 (everything but 3DES)
-+ */
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+/*
-+ * special case: ipsec core modular with this static algo inside:
-+ * must avoid MODULE magic for this file
-+ */
-+#if CONFIG_IPSEC_MODULE && CONFIG_IPSEC_ALG_CRYPTOAPI
-+#undef MODULE
-+#endif
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+
-+#include <linux/kernel.h> /* printk() */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/string.h>
-+
-+/* Check if __exit is defined, if not null it */
-+#ifndef __exit
-+#define __exit
-+#endif
-+
-+/* warn the innocent */
-+#if !defined (CONFIG_CRYPTO) && !defined (CONFIG_CRYPTO_MODULE)
-+#warning "No linux CryptoAPI found, install 2.4.22+ or 2.6.x"
-+#define NO_CRYPTOAPI_SUPPORT
-+#endif
-+/* Low freeswan header coupling */
-+#include "openswan/ipsec_alg.h"
-+
-+#include <linux/crypto.h>
-+#ifdef CRYPTO_API_VERSION_CODE
-+#warning "Old CryptoAPI is not supported. Only linux-2.4.22+ or linux-2.6.x are supported"
-+#define NO_CRYPTOAPI_SUPPORT
-+#endif
-+
-+#ifdef NO_CRYPTOAPI_SUPPORT
-+#warning "Building an unusable module :P"
-+/* Catch old CryptoAPI by not allowing module to load */
-+IPSEC_ALG_MODULE_INIT( ipsec_cryptoapi_init )
-+{
-+ printk(KERN_WARNING "ipsec_cryptoapi.o was not built on stock Linux CryptoAPI (2.4.22+ or 2.6.x), not loading.\n");
-+ return -EINVAL;
-+}
-+#else
-+#include <asm/scatterlist.h>
-+#include <asm/pgtable.h>
-+#include <linux/mm.h>
-+
-+#define CIPHERNAME_AES "aes"
-+#define CIPHERNAME_3DES "des3_ede"
-+#define CIPHERNAME_BLOWFISH "blowfish"
-+#define CIPHERNAME_CAST "cast5"
-+#define CIPHERNAME_SERPENT "serpent"
-+#define CIPHERNAME_TWOFISH "twofish"
-+
-+#define ESP_3DES 3
-+#define ESP_AES 12
-+#define ESP_BLOWFISH 7 /* truely _constant_ :) */
-+#define ESP_CAST 6 /* quite constant :) */
-+#define ESP_SERPENT 252 /* from ipsec drafts */
-+#define ESP_TWOFISH 253 /* from ipsec drafts */
-+
-+#define AH_MD5 2
-+#define AH_SHA 3
-+#define DIGESTNAME_MD5 "md5"
-+#define DIGESTNAME_SHA1 "sha1"
-+
-+MODULE_AUTHOR("Juanjo Ciarlante, Harpo MAxx, Luciano Ruete");
-+static int debug=0;
-+static int test=0;
-+static int excl=0;
-+#ifdef module_param
-+module_param(debug, int, 0664);
-+module_param(test, int, 0664);
-+module_param(excl, int, 0664);
-+#else
-+MODULE_PARM(debug, "i");
-+MODULE_PARM(test, "i");
-+MODULE_PARM(excl, "i");
-+#endif
-+
-+static int noauto = 0;
-+#ifdef module_param
-+module_param(noauto,int, 0664);
-+#else
-+MODULE_PARM(noauto,"i");
-+#endif
-+MODULE_PARM_DESC(noauto, "Dont try all known algos, just setup enabled ones");
-+
-+static int des_ede3[] = {-1, -1};
-+static int aes[] = {-1, -1};
-+static int blowfish[] = {-1, -1};
-+static int cast[] = {-1, -1};
-+static int serpent[] = {-1, -1};
-+static int twofish[] = {-1, -1};
-+
-+#ifdef module_param_array
-+module_param_array(des_ede3,int,NULL,0);
-+module_param_array(aes,int,NULL,0);
-+module_param_array(blowfish,int,NULL,0);
-+module_param_array(cast,int,NULL,0);
-+module_param_array(serpent,int,NULL,0);
-+module_param_array(twofish,int,NULL,0);
-+#else
-+MODULE_PARM(des_ede3,"1-2i");
-+MODULE_PARM(aes,"1-2i");
-+MODULE_PARM(blowfish,"1-2i");
-+MODULE_PARM(cast,"1-2i");
-+MODULE_PARM(serpent,"1-2i");
-+MODULE_PARM(twofish,"1-2i");
-+#endif
-+MODULE_PARM_DESC(des_ede3, "0: disable | 1: force_enable | min,max: dontuse");
-+MODULE_PARM_DESC(aes, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(blowfish, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(cast, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(serpent, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(twofish, "0: disable | 1: force_enable | min,max: keybitlens");
-+
-+struct ipsec_alg_capi_cipher {
-+ const char *ciphername; /* cryptoapi's ciphername */
-+ unsigned blocksize;
-+ unsigned short minbits;
-+ unsigned short maxbits;
-+ int *parm; /* lkm param for this cipher */
-+ struct ipsec_alg_enc alg; /* note it's not a pointer */
-+};
-+static struct ipsec_alg_capi_cipher alg_capi_carray[] = {
-+ { CIPHERNAME_AES , 16, 128, 256, aes , { ixt_alg_id: ESP_AES, }},
-+ { CIPHERNAME_TWOFISH , 16, 128, 256, twofish, { ixt_alg_id: ESP_TWOFISH, }},
-+ { CIPHERNAME_SERPENT , 16, 128, 256, serpent, { ixt_alg_id: ESP_SERPENT, }},
-+ { CIPHERNAME_CAST , 8, 128, 128, cast , { ixt_alg_id: ESP_CAST, }},
-+ { CIPHERNAME_BLOWFISH , 8, 96, 448, blowfish,{ ixt_alg_id: ESP_BLOWFISH, }},
-+ { CIPHERNAME_3DES , 8, 192, 192, des_ede3,{ ixt_alg_id: ESP_3DES, }},
-+ { NULL, 0, 0, 0, NULL, {} }
-+};
-+#ifdef NOT_YET
-+struct ipsec_alg_capi_digest {
-+ const char *digestname; /* cryptoapi's digestname */
-+ struct digest_implementation *di;
-+ struct ipsec_alg_auth alg; /* note it's not a pointer */
-+};
-+static struct ipsec_alg_capi_cipher alg_capi_darray[] = {
-+ { DIGESTNAME_MD5, NULL, { ixt_alg_id: AH_MD5, }},
-+ { DIGESTNAME_SHA1, NULL, { ixt_alg_id: AH_SHA, }},
-+ { NULL, NULL, {} }
-+};
-+#endif
-+/*
-+ * "generic" linux cryptoapi setup_cipher() function
-+ */
-+int setup_cipher(const char *ciphername)
-+{
-+ return crypto_alg_available(ciphername, 0);
-+}
-+
-+/*
-+ * setups ipsec_alg_capi_cipher "hyper" struct components, calling
-+ * register_ipsec_alg for cointaned ipsec_alg object
-+ */
-+static void _capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e);
-+static __u8 * _capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen);
-+static int _capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, const __u8 * iv, int encrypt);
-+
-+static int
-+setup_ipsec_alg_capi_cipher(struct ipsec_alg_capi_cipher *cptr)
-+{
-+ int ret;
-+ cptr->alg.ixt_version = IPSEC_ALG_VERSION;
-+ cptr->alg.ixt_module = THIS_MODULE;
-+ atomic_set (& cptr->alg.ixt_refcnt, 0);
-+ strncpy (cptr->alg.ixt_name , cptr->ciphername, sizeof (cptr->alg.ixt_name));
-+
-+ cptr->alg.ixt_blocksize=cptr->blocksize;
-+ cptr->alg.ixt_keyminbits=cptr->minbits;
-+ cptr->alg.ixt_keymaxbits=cptr->maxbits;
-+ cptr->alg.ixt_state = 0;
-+ if (excl) cptr->alg.ixt_state |= IPSEC_ALG_ST_EXCL;
-+ cptr->alg.ixt_e_keylen=cptr->alg.ixt_keymaxbits/8;
-+ cptr->alg.ixt_e_ctx_size = 0;
-+ cptr->alg.ixt_alg_type = IPSEC_ALG_TYPE_ENCRYPT;
-+ cptr->alg.ixt_e_new_key = _capi_new_key;
-+ cptr->alg.ixt_e_destroy_key = _capi_destroy_key;
-+ cptr->alg.ixt_e_cbc_encrypt = _capi_cbc_encrypt;
-+ cptr->alg.ixt_data = cptr;
-+
-+ ret=register_ipsec_alg_enc(&cptr->alg);
-+ printk("setup_ipsec_alg_capi_cipher(): "
-+ "alg_type=%d alg_id=%d name=%s "
-+ "keyminbits=%d keymaxbits=%d, ret=%d\n",
-+ cptr->alg.ixt_alg_type,
-+ cptr->alg.ixt_alg_id,
-+ cptr->alg.ixt_name,
-+ cptr->alg.ixt_keyminbits,
-+ cptr->alg.ixt_keymaxbits,
-+ ret);
-+ return ret;
-+}
-+/*
-+ * called in ipsec_sa_wipe() time, will destroy key contexts
-+ * and do 1 unbind()
-+ */
-+static void
-+_capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e)
-+{
-+ struct crypto_tfm *tfm=(struct crypto_tfm*)key_e;
-+
-+ if (debug > 0)
-+ printk(KERN_DEBUG "klips_debug: _capi_destroy_key:"
-+ "name=%s key_e=%p \n",
-+ alg->ixt_name, key_e);
-+ if (!key_e) {
-+ printk(KERN_ERR "klips_debug: _capi_destroy_key:"
-+ "name=%s NULL key_e!\n",
-+ alg->ixt_name);
-+ return;
-+ }
-+ crypto_free_tfm(tfm);
-+}
-+
-+/*
-+ * create new key context, need alg->ixt_data to know which
-+ * (of many) cipher inside this module is the target
-+ */
-+static __u8 *
-+_capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen)
-+{
-+ struct ipsec_alg_capi_cipher *cptr;
-+ struct crypto_tfm *tfm=NULL;
-+
-+ cptr = alg->ixt_data;
-+ if (!cptr) {
-+ printk(KERN_ERR "_capi_new_key(): "
-+ "NULL ixt_data (?!) for \"%s\" algo\n"
-+ , alg->ixt_name);
-+ goto err;
-+ }
-+ if (debug > 0)
-+ printk(KERN_DEBUG "klips_debug:_capi_new_key:"
-+ "name=%s cptr=%p key=%p keysize=%d\n",
-+ alg->ixt_name, cptr, key, keylen);
-+
-+ /*
-+ * alloc tfm
-+ */
-+ tfm = crypto_alloc_tfm(cptr->ciphername, CRYPTO_TFM_MODE_CBC);
-+ if (!tfm) {
-+ printk(KERN_ERR "_capi_new_key(): "
-+ "NULL tfm for \"%s\" cryptoapi (\"%s\") algo\n"
-+ , alg->ixt_name, cptr->ciphername);
-+ goto err;
-+ }
-+ if (crypto_cipher_setkey(tfm, key, keylen) < 0) {
-+ printk(KERN_ERR "_capi_new_key(): "
-+ "failed new_key() for \"%s\" cryptoapi algo (keylen=%d)\n"
-+ , alg->ixt_name, keylen);
-+ crypto_free_tfm(tfm);
-+ tfm=NULL;
-+ }
-+err:
-+ if (debug > 0)
-+ printk(KERN_DEBUG "klips_debug:_capi_new_key:"
-+ "name=%s key=%p keylen=%d tfm=%p\n",
-+ alg->ixt_name, key, keylen, tfm);
-+ return (__u8 *) tfm;
-+}
-+/*
-+ * core encryption function: will use cx->ci to call actual cipher's
-+ * cbc function
-+ */
-+static int
-+_capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, const __u8 * iv, int encrypt) {
-+ int error =0;
-+ struct crypto_tfm *tfm=(struct crypto_tfm *)key_e;
-+ struct scatterlist sg = {
-+ .page = virt_to_page(in),
-+ .offset = (unsigned long)(in) % PAGE_SIZE,
-+ .length=ilen,
-+ };
-+ if (debug > 1)
-+ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:"
-+ "key_e=%p "
-+ "in=%p out=%p ilen=%d iv=%p encrypt=%d\n"
-+ , key_e
-+ , in, in, ilen, iv, encrypt);
-+ crypto_cipher_set_iv(tfm, iv, crypto_tfm_alg_ivsize(tfm));
-+ if (encrypt)
-+ error = crypto_cipher_encrypt (tfm, &sg, &sg, ilen);
-+ else
-+ error = crypto_cipher_decrypt (tfm, &sg, &sg, ilen);
-+ if (debug > 1)
-+ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:"
-+ "error=%d\n"
-+ , error);
-+ return (error<0)? error : ilen;
-+}
-+/*
-+ * main initialization loop: for each cipher in list, do
-+ * 1) setup cryptoapi cipher else continue
-+ * 2) register ipsec_alg object
-+ */
-+static int
-+setup_cipher_list (struct ipsec_alg_capi_cipher* clist)
-+{
-+ struct ipsec_alg_capi_cipher *cptr;
-+ /* foreach cipher in list ... */
-+ for (cptr=clist;cptr->ciphername;cptr++) {
-+ /*
-+ * see if cipher has been disabled (0) or
-+ * if noauto set and not enabled (1)
-+ */
-+ if (cptr->parm[0] == 0 || (noauto && cptr->parm[0] < 0)) {
-+ if (debug>0)
-+ printk(KERN_INFO "setup_cipher_list(): "
-+ "ciphername=%s skipped at user request: "
-+ "noauto=%d parm[0]=%d parm[1]=%d\n"
-+ , cptr->ciphername
-+ , noauto
-+ , cptr->parm[0]
-+ , cptr->parm[1]);
-+ continue;
-+ }
-+ /*
-+ * use a local ci to avoid touching cptr->ci,
-+ * if register ipsec_alg success then bind cipher
-+ */
-+ if( setup_cipher(cptr->ciphername) ) {
-+ if (debug > 0)
-+ printk(KERN_DEBUG "klips_debug:"
-+ "setup_cipher_list():"
-+ "ciphername=%s found\n"
-+ , cptr->ciphername);
-+ if (setup_ipsec_alg_capi_cipher(cptr) == 0) {
-+
-+
-+ } else {
-+ printk(KERN_ERR "klips_debug:"
-+ "setup_cipher_list():"
-+ "ciphername=%s failed ipsec_alg_register\n"
-+ , cptr->ciphername);
-+ }
-+ } else {
-+ if (debug>0)
-+ printk(KERN_INFO "setup_cipher_list(): lookup for ciphername=%s: not found \n",
-+ cptr->ciphername);
-+ }
-+ }
-+ return 0;
-+}
-+/*
-+ * deregister ipsec_alg objects and unbind ciphers
-+ */
-+static int
-+unsetup_cipher_list (struct ipsec_alg_capi_cipher* clist)
-+{
-+ struct ipsec_alg_capi_cipher *cptr;
-+ /* foreach cipher in list ... */
-+ for (cptr=clist;cptr->ciphername;cptr++) {
-+ if (cptr->alg.ixt_state & IPSEC_ALG_ST_REGISTERED) {
-+ unregister_ipsec_alg_enc(&cptr->alg);
-+ }
-+ }
-+ return 0;
-+}
-+/*
-+ * test loop for registered algos
-+ */
-+static int
-+test_cipher_list (struct ipsec_alg_capi_cipher* clist)
-+{
-+ int test_ret;
-+ struct ipsec_alg_capi_cipher *cptr;
-+ /* foreach cipher in list ... */
-+ for (cptr=clist;cptr->ciphername;cptr++) {
-+ if (cptr->alg.ixt_state & IPSEC_ALG_ST_REGISTERED) {
-+ test_ret=ipsec_alg_test(
-+ cptr->alg.ixt_alg_type,
-+ cptr->alg.ixt_alg_id,
-+ test);
-+ printk("test_cipher_list(alg_type=%d alg_id=%d): test_ret=%d\n",
-+ cptr->alg.ixt_alg_type,
-+ cptr->alg.ixt_alg_id,
-+ test_ret);
-+ }
-+ }
-+ return 0;
-+}
-+
-+IPSEC_ALG_MODULE_INIT( ipsec_cryptoapi_init )
-+{
-+ int ret, test_ret;
-+ if ((ret=setup_cipher_list(alg_capi_carray)) < 0)
-+ return -EPROTONOSUPPORT;
-+ if (ret==0 && test) {
-+ test_ret=test_cipher_list(alg_capi_carray);
-+ }
-+ return ret;
-+}
-+IPSEC_ALG_MODULE_EXIT( ipsec_cryptoapi_fini )
-+{
-+ unsetup_cipher_list(alg_capi_carray);
-+ return;
-+}
-+#ifdef MODULE_LICENSE
-+MODULE_LICENSE("GPL");
-+#endif
-+
-+EXPORT_NO_SYMBOLS;
-+#endif /* NO_CRYPTOAPI_SUPPORT */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/alg/scripts/mk-static_init.c.sh Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,18 @@
-+#!/bin/sh
-+cat << EOF
-+#include <linux/kernel.h>
-+#include <linux/list.h>
-+#include "freeswan/ipsec_alg.h"
-+$(for i in $*; do
-+ test -z "$i" && continue
-+ echo "extern int $i(void);"
-+done)
-+void ipsec_alg_static_init(void){
-+ int __attribute__ ((unused)) err=0;
-+$(for i in $*; do
-+ test -z "$i" && continue
-+ echo " if ((err=$i()) < 0)"
-+ echo " printk(KERN_WARNING \"$i() returned %d\", err);"
-+done)
-+}
-+EOF
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/anyaddr.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,150 @@
-+/*
-+ * special addresses
-+ * Copyright (C) 2000 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: anyaddr.c,v 1.10 2004/07/10 07:43:47 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+/* these are mostly fallbacks for the no-IPv6-support-in-library case */
-+#ifndef IN6ADDR_ANY_INIT
-+#define IN6ADDR_ANY_INIT {{{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }}}
-+#endif
-+#ifndef IN6ADDR_LOOPBACK_INIT
-+#define IN6ADDR_LOOPBACK_INIT {{{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 }}}
-+#endif
-+
-+static struct in6_addr v6any = IN6ADDR_ANY_INIT;
-+static struct in6_addr v6loop = IN6ADDR_LOOPBACK_INIT;
-+
-+/*
-+ - anyaddr - initialize to the any-address value
-+ */
-+err_t /* NULL for success, else string literal */
-+anyaddr(af, dst)
-+int af; /* address family */
-+ip_address *dst;
-+{
-+ uint32_t v4any = htonl(INADDR_ANY);
-+
-+ switch (af) {
-+ case AF_INET:
-+ return initaddr((unsigned char *)&v4any, sizeof(v4any), af, dst);
-+ break;
-+ case AF_INET6:
-+ return initaddr((unsigned char *)&v6any, sizeof(v6any), af, dst);
-+ break;
-+ default:
-+ return "unknown address family in anyaddr/unspecaddr";
-+ break;
-+ }
-+}
-+
-+/*
-+ - unspecaddr - initialize to the unspecified-address value
-+ */
-+err_t /* NULL for success, else string literal */
-+unspecaddr(af, dst)
-+int af; /* address family */
-+ip_address *dst;
-+{
-+ return anyaddr(af, dst);
-+}
-+
-+/*
-+ - loopbackaddr - initialize to the loopback-address value
-+ */
-+err_t /* NULL for success, else string literal */
-+loopbackaddr(af, dst)
-+int af; /* address family */
-+ip_address *dst;
-+{
-+ uint32_t v4loop = htonl(INADDR_LOOPBACK);
-+
-+ switch (af) {
-+ case AF_INET:
-+ return initaddr((unsigned char *)&v4loop, sizeof(v4loop), af, dst);
-+ break;
-+ case AF_INET6:
-+ return initaddr((unsigned char *)&v6loop, sizeof(v6loop), af, dst);
-+ break;
-+ default:
-+ return "unknown address family in loopbackaddr";
-+ break;
-+ }
-+}
-+
-+/*
-+ - isanyaddr - test for the any-address value
-+ */
-+int
-+isanyaddr(src)
-+const ip_address *src;
-+{
-+ uint32_t v4any = htonl(INADDR_ANY);
-+ int cmp;
-+
-+ switch (src->u.v4.sin_family) {
-+ case AF_INET:
-+ cmp = memcmp(&src->u.v4.sin_addr.s_addr, &v4any, sizeof(v4any));
-+ break;
-+ case AF_INET6:
-+ cmp = memcmp(&src->u.v6.sin6_addr, &v6any, sizeof(v6any));
-+ break;
-+
-+ case 0:
-+ /* a zeroed structure is considered any address */
-+ return 1;
-+
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ return (cmp == 0) ? 1 : 0;
-+}
-+
-+/*
-+ - isunspecaddr - test for the unspecified-address value
-+ */
-+int
-+isunspecaddr(src)
-+const ip_address *src;
-+{
-+ return isanyaddr(src);
-+}
-+
-+/*
-+ - isloopbackaddr - test for the loopback-address value
-+ */
-+int
-+isloopbackaddr(src)
-+const ip_address *src;
-+{
-+ uint32_t v4loop = htonl(INADDR_LOOPBACK);
-+ int cmp;
-+
-+ switch (src->u.v4.sin_family) {
-+ case AF_INET:
-+ cmp = memcmp(&src->u.v4.sin_addr.s_addr, &v4loop, sizeof(v4loop));
-+ break;
-+ case AF_INET6:
-+ cmp = memcmp(&src->u.v6.sin6_addr, &v6loop, sizeof(v6loop));
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ return (cmp == 0) ? 1 : 0;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/datatot.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,234 @@
-+/*
-+ * convert from binary data (e.g. key) to text form
-+ * Copyright (C) 2000 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: datatot.c,v 1.9 2005/08/30 21:15:26 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+static void convert(const char *src, size_t nreal, int format, char *out);
-+
-+/*
-+ - datatot - convert data bytes to text
-+ */
-+size_t /* true length (with NUL) for success */
-+datatot(src, srclen, format, dst, dstlen)
-+const unsigned char *src;
-+size_t srclen;
-+int format; /* character indicating what format */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ size_t inblocksize; /* process this many bytes at a time */
-+ size_t outblocksize; /* producing this many */
-+ size_t breakevery; /* add a _ every this many (0 means don't) */
-+ size_t sincebreak; /* output bytes since last _ */
-+ char breakchar; /* character used to break between groups */
-+ unsigned char inblock[10]; /* enough for any format */
-+ char outblock[10]; /* enough for any format */
-+ char fake[1]; /* fake output area for dstlen == 0 */
-+ size_t needed; /* return value */
-+ char *stop; /* where the terminating NUL will go */
-+ size_t ntodo; /* remaining input */
-+ size_t nreal;
-+ char *out;
-+ char *prefix;
-+
-+ breakevery = 0;
-+ breakchar = '_';
-+
-+ switch (format) {
-+ case 0:
-+ case 'h':
-+ format = 'x';
-+ breakevery = 8;
-+ /* FALLTHROUGH */
-+ case 'x':
-+ inblocksize = 1;
-+ outblocksize = 2;
-+ prefix = "0x";
-+ break;
-+ case ':':
-+ format = 'x';
-+ breakevery = 2;
-+ breakchar = ':';
-+ /* FALLTHROUGH */
-+ case 16:
-+ inblocksize = 1;
-+ outblocksize = 2;
-+ prefix = "";
-+ format = 'x';
-+ break;
-+ case 's':
-+ inblocksize = 3;
-+ outblocksize = 4;
-+ prefix = "0s";
-+ break;
-+ case 64: /* beware, equals ' ' */
-+ inblocksize = 3;
-+ outblocksize = 4;
-+ prefix = "";
-+ format = 's';
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ user_assert(inblocksize < sizeof(inblock));
-+ user_assert(outblocksize < sizeof(outblock));
-+ user_assert(breakevery % outblocksize == 0);
-+
-+ if (srclen == 0)
-+ return 0;
-+ ntodo = srclen;
-+
-+ if (dstlen == 0) { /* dispose of awkward special case */
-+ dst = fake;
-+ dstlen = 1;
-+ }
-+ stop = dst + dstlen - 1;
-+
-+ nreal = strlen(prefix);
-+ needed = nreal; /* for starters */
-+ if (dstlen <= nreal) { /* prefix won't fit */
-+ strncpy(dst, prefix, dstlen - 1);
-+ dst += dstlen - 1;
-+ } else {
-+ strcpy(dst, prefix);
-+ dst += nreal;
-+ }
-+
-+ user_assert(dst <= stop);
-+ sincebreak = 0;
-+
-+ while (ntodo > 0) {
-+ if (ntodo < inblocksize) { /* incomplete input */
-+ memset(inblock, 0, sizeof(inblock));
-+ memcpy(inblock, src, ntodo);
-+ src = inblock;
-+ nreal = ntodo;
-+ ntodo = inblocksize;
-+ } else
-+ nreal = inblocksize;
-+ out = (outblocksize > stop - dst) ? outblock : dst;
-+
-+ convert((const char *)src, nreal, format, out);
-+ needed += outblocksize;
-+ sincebreak += outblocksize;
-+ if (dst < stop) {
-+ if (out != dst) {
-+ user_assert(outblocksize > stop - dst);
-+ memcpy(dst, out, stop - dst);
-+ dst = stop;
-+ } else
-+ dst += outblocksize;
-+ }
-+
-+ src += inblocksize;
-+ ntodo -= inblocksize;
-+ if (breakevery != 0 && sincebreak >= breakevery && ntodo > 0) {
-+ if (dst < stop)
-+ *dst++ = breakchar;
-+ needed++;
-+ sincebreak = 0;
-+ }
-+ }
-+
-+ user_assert(dst <= stop);
-+ *dst++ = '\0';
-+ needed++;
-+
-+ return needed;
-+}
-+
-+/*
-+ - convert - convert one input block to one output block
-+ */
-+static void
-+convert(src, nreal, format, out)
-+const char *src;
-+size_t nreal; /* how much of the input block is real */
-+int format;
-+char *out;
-+{
-+ static char hex[] = "0123456789abcdef";
-+ static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-+ "abcdefghijklmnopqrstuvwxyz"
-+ "0123456789+/";
-+ unsigned char c;
-+ unsigned char c1, c2, c3;
-+
-+ user_assert(nreal > 0);
-+ switch (format) {
-+ case 'x':
-+ user_assert(nreal == 1);
-+ c = (unsigned char)*src;
-+ *out++ = hex[c >> 4];
-+ *out++ = hex[c & 0xf];
-+ break;
-+ case 's':
-+ c1 = (unsigned char)*src++;
-+ c2 = (unsigned char)*src++;
-+ c3 = (unsigned char)*src++;
-+ *out++ = base64[c1 >> 2]; /* top 6 bits of c1 */
-+ c = (c1 & 0x3) << 4; /* bottom 2 of c1... */
-+ c |= c2 >> 4; /* ...top 4 of c2 */
-+ *out++ = base64[c];
-+ if (nreal == 1)
-+ *out++ = '=';
-+ else {
-+ c = (c2 & 0xf) << 2; /* bottom 4 of c2... */
-+ c |= c3 >> 6; /* ...top 2 of c3 */
-+ *out++ = base64[c];
-+ }
-+ if (nreal <= 2)
-+ *out++ = '=';
-+ else
-+ *out++ = base64[c3 & 0x3f]; /* bottom 6 of c3 */
-+ break;
-+ default:
-+ user_assert(nreal == 0); /* unknown format */
-+ break;
-+ }
-+}
-+
-+/*
-+ - datatoa - convert data to ASCII
-+ * backward-compatibility synonym for datatot
-+ */
-+size_t /* true length (with NUL) for success */
-+datatoa(src, srclen, format, dst, dstlen)
-+const unsigned char *src;
-+size_t srclen;
-+int format; /* character indicating what format */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ return datatot(src, srclen, format, dst, dstlen);
-+}
-+
-+/*
-+ - bytestoa - convert data bytes to ASCII
-+ * backward-compatibility synonym for datatot
-+ */
-+size_t /* true length (with NUL) for success */
-+bytestoa(src, srclen, format, dst, dstlen)
-+const unsigned char *src;
-+size_t srclen;
-+int format; /* character indicating what format */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ return datatot(src, srclen, format, dst, dstlen);
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/defconfig Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,63 @@
-+
-+#
-+# RCSID $Id: defconfig,v 1.30 2005/09/15 02:31:12 paul Exp $
-+#
-+
-+#
-+# Openswan IPSec implementation, KLIPS kernel config defaults
-+#
-+
-+#
-+# First, lets override stuff already set or not in the kernel config.
-+#
-+# We can't even think about leaving this off...
-+CONFIG_INET=y
-+
-+#
-+# This must be on for subnet protection.
-+CONFIG_IP_FORWARD=y
-+
-+# Shut off IPSEC masquerading if it has been enabled, since it will
-+# break the compile. IPPROTO_ESP and IPPROTO_AH were included in
-+# net/ipv4/ip_masq.c when they should have gone into include/linux/in.h.
-+CONFIG_IP_MASQUERADE_IPSEC=n
-+
-+#
-+# Next, lets set the recommended FreeS/WAN configuration.
-+#
-+
-+# To config as static (preferred), 'y'. To config as module, 'm'.
-+CONFIG_KLIPS=m
-+
-+# To do tunnel mode IPSec, this must be enabled.
-+CONFIG_KLIPS_IPIP=y
-+
-+# To enable authentication, say 'y'. (Highly recommended)
-+CONFIG_KLIPS_AH=y
-+
-+# Authentication algorithm(s):
-+CONFIG_KLIPS_AUTH_HMAC_MD5=y
-+CONFIG_KLIPS_AUTH_HMAC_SHA1=y
-+
-+# To enable encryption, say 'y'. (Highly recommended)
-+CONFIG_KLIPS_ESP=y
-+
-+# modular algo extensions (and new ALGOs)
-+CONFIG_KLIPS_ALG=y
-+
-+# Encryption algorithm(s):
-+CONFIG_KLIPS_ENC_3DES=y
-+CONFIG_KLIPS_ENC_AES=y
-+
-+# Use CryptoAPI for ALG? - by default, no.
-+CONFIG_KLIPS_ENC_CRYPTOAPI=n
-+
-+# IP Compression: new, probably still has minor bugs.
-+CONFIG_KLIPS_IPCOMP=y
-+
-+# To enable userspace-switchable KLIPS debugging, say 'y'.
-+CONFIG_KLIPS_DEBUG=y
-+
-+# OCF HW offloading, requires kernel patch
-+# CONFIG_KLIPS_OCF is not set
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/deflate.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1351 @@
-+/* deflate.c -- compress data using the deflation algorithm
-+ * Copyright (C) 1995-2002 Jean-loup Gailly.
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/*
-+ * ALGORITHM
-+ *
-+ * The "deflation" process depends on being able to identify portions
-+ * of the input text which are identical to earlier input (within a
-+ * sliding window trailing behind the input currently being processed).
-+ *
-+ * The most straightforward technique turns out to be the fastest for
-+ * most input files: try all possible matches and select the longest.
-+ * The key feature of this algorithm is that insertions into the string
-+ * dictionary are very simple and thus fast, and deletions are avoided
-+ * completely. Insertions are performed at each input character, whereas
-+ * string matches are performed only when the previous match ends. So it
-+ * is preferable to spend more time in matches to allow very fast string
-+ * insertions and avoid deletions. The matching algorithm for small
-+ * strings is inspired from that of Rabin & Karp. A brute force approach
-+ * is used to find longer strings when a small match has been found.
-+ * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
-+ * (by Leonid Broukhis).
-+ * A previous version of this file used a more sophisticated algorithm
-+ * (by Fiala and Greene) which is guaranteed to run in linear amortized
-+ * time, but has a larger average cost, uses more memory and is patented.
-+ * However the F&G algorithm may be faster for some highly redundant
-+ * files if the parameter max_chain_length (described below) is too large.
-+ *
-+ * ACKNOWLEDGEMENTS
-+ *
-+ * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
-+ * I found it in 'freeze' written by Leonid Broukhis.
-+ * Thanks to many people for bug reports and testing.
-+ *
-+ * REFERENCES
-+ *
-+ * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
-+ * Available in ftp://ds.internic.net/rfc/rfc1951.txt
-+ *
-+ * A description of the Rabin and Karp algorithm is given in the book
-+ * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
-+ *
-+ * Fiala,E.R., and Greene,D.H.
-+ * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
-+ *
-+ */
-+
-+/* @(#) $Id: deflate.c,v 1.4 2004/07/10 07:48:37 mcr Exp $ */
-+
-+#include "deflate.h"
-+
-+local const char deflate_copyright[] =
-+ " deflate 1.1.4 Copyright 1995-2002 Jean-loup Gailly ";
-+/*
-+ If you use the zlib library in a product, an acknowledgment is welcome
-+ in the documentation of your product. If for some reason you cannot
-+ include such an acknowledgment, I would appreciate that you keep this
-+ copyright string in the executable of your product.
-+ */
-+
-+/* ===========================================================================
-+ * Function prototypes.
-+ */
-+typedef enum {
-+ need_more, /* block not completed, need more input or more output */
-+ block_done, /* block flush performed */
-+ finish_started, /* finish started, need only more output at next deflate */
-+ finish_done /* finish done, accept no more input or output */
-+} block_state;
-+
-+typedef block_state (*compress_func) OF((deflate_state *s, int flush));
-+/* Compression function. Returns the block state after the call. */
-+
-+local void fill_window OF((deflate_state *s));
-+local block_state deflate_stored OF((deflate_state *s, int flush));
-+local block_state deflate_fast OF((deflate_state *s, int flush));
-+local block_state deflate_slow OF((deflate_state *s, int flush));
-+local void lm_init OF((deflate_state *s));
-+local void putShortMSB OF((deflate_state *s, uInt b));
-+local void flush_pending OF((z_streamp strm));
-+local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
-+#ifdef ASMV
-+ void match_init OF((void)); /* asm code initialization */
-+ uInt longest_match OF((deflate_state *s, IPos cur_match));
-+#else
-+local uInt longest_match OF((deflate_state *s, IPos cur_match));
-+#endif
-+
-+#ifdef DEBUG
-+local void check_match OF((deflate_state *s, IPos start, IPos match,
-+ int length));
-+#endif
-+
-+/* ===========================================================================
-+ * Local data
-+ */
-+
-+#define NIL 0
-+/* Tail of hash chains */
-+
-+#ifndef TOO_FAR
-+# define TOO_FAR 4096
-+#endif
-+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-+
-+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-+/* Minimum amount of lookahead, except at the end of the input file.
-+ * See deflate.c for comments about the MIN_MATCH+1.
-+ */
-+
-+/* Values for max_lazy_match, good_match and max_chain_length, depending on
-+ * the desired pack level (0..9). The values given below have been tuned to
-+ * exclude worst case performance for pathological files. Better values may be
-+ * found for specific files.
-+ */
-+typedef struct config_s {
-+ ush good_length; /* reduce lazy search above this match length */
-+ ush max_lazy; /* do not perform lazy search above this match length */
-+ ush nice_length; /* quit search above this match length */
-+ ush max_chain;
-+ compress_func func;
-+} config;
-+
-+local const config configuration_table[10] = {
-+/* good lazy nice chain */
-+/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
-+/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */
-+/* 2 */ {4, 5, 16, 8, deflate_fast},
-+/* 3 */ {4, 6, 32, 32, deflate_fast},
-+
-+/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
-+/* 5 */ {8, 16, 32, 32, deflate_slow},
-+/* 6 */ {8, 16, 128, 128, deflate_slow},
-+/* 7 */ {8, 32, 128, 256, deflate_slow},
-+/* 8 */ {32, 128, 258, 1024, deflate_slow},
-+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */
-+
-+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
-+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
-+ * meaning.
-+ */
-+
-+#define EQUAL 0
-+/* result of memcmp for equal strings */
-+
-+struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
-+
-+/* ===========================================================================
-+ * Update a hash value with the given input byte
-+ * IN assertion: all calls to to UPDATE_HASH are made with consecutive
-+ * input characters, so that a running hash key can be computed from the
-+ * previous key instead of complete recalculation each time.
-+ */
-+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
-+
-+
-+/* ===========================================================================
-+ * Insert string str in the dictionary and set match_head to the previous head
-+ * of the hash chain (the most recent string with same hash key). Return
-+ * the previous length of the hash chain.
-+ * If this file is compiled with -DFASTEST, the compression level is forced
-+ * to 1, and no hash chains are maintained.
-+ * IN assertion: all calls to to INSERT_STRING are made with consecutive
-+ * input characters and the first MIN_MATCH bytes of str are valid
-+ * (except for the last MIN_MATCH-1 bytes of the input file).
-+ */
-+#ifdef FASTEST
-+#define INSERT_STRING(s, str, match_head) \
-+ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
-+ match_head = s->head[s->ins_h], \
-+ s->head[s->ins_h] = (Pos)(str))
-+#else
-+#define INSERT_STRING(s, str, match_head) \
-+ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
-+ s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \
-+ s->head[s->ins_h] = (Pos)(str))
-+#endif
-+
-+/* ===========================================================================
-+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
-+ * prev[] will be initialized on the fly.
-+ */
-+#define CLEAR_HASH(s) \
-+ s->head[s->hash_size-1] = NIL; \
-+ zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
-+
-+/* ========================================================================= */
-+int ZEXPORT deflateInit_(strm, level, version, stream_size)
-+ z_streamp strm;
-+ int level;
-+ const char *version;
-+ int stream_size;
-+{
-+ return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
-+ Z_DEFAULT_STRATEGY, version, stream_size);
-+ /* To do: ignore strm->next_in if we use it as window */
-+}
-+
-+/* ========================================================================= */
-+int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
-+ version, stream_size)
-+ z_streamp strm;
-+ int level;
-+ int method;
-+ int windowBits;
-+ int memLevel;
-+ int strategy;
-+ const char *version;
-+ int stream_size;
-+{
-+ deflate_state *s;
-+ int noheader = 0;
-+ static const char* my_version = ZLIB_VERSION;
-+
-+ ushf *overlay;
-+ /* We overlay pending_buf and d_buf+l_buf. This works since the average
-+ * output size for (length,distance) codes is <= 24 bits.
-+ */
-+
-+ if (version == Z_NULL || version[0] != my_version[0] ||
-+ stream_size != sizeof(z_stream)) {
-+ return Z_VERSION_ERROR;
-+ }
-+ if (strm == Z_NULL) return Z_STREAM_ERROR;
-+
-+ strm->msg = Z_NULL;
-+ if (strm->zalloc == Z_NULL) {
-+ return Z_STREAM_ERROR;
-+/* strm->zalloc = zcalloc;
-+ strm->opaque = (voidpf)0;*/
-+ }
-+ if (strm->zfree == Z_NULL) return Z_STREAM_ERROR; /* strm->zfree = zcfree; */
-+
-+ if (level == Z_DEFAULT_COMPRESSION) level = 6;
-+#ifdef FASTEST
-+ level = 1;
-+#endif
-+
-+ if (windowBits < 0) { /* undocumented feature: suppress zlib header */
-+ noheader = 1;
-+ windowBits = -windowBits;
-+ }
-+ if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
-+ windowBits < 9 || windowBits > 15 || level < 0 || level > 9 ||
-+ strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-+ return Z_STREAM_ERROR;
-+ }
-+ s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
-+ if (s == Z_NULL) return Z_MEM_ERROR;
-+ strm->state = (struct internal_state FAR *)s;
-+ s->strm = strm;
-+
-+ s->noheader = noheader;
-+ s->w_bits = windowBits;
-+ s->w_size = 1 << s->w_bits;
-+ s->w_mask = s->w_size - 1;
-+
-+ s->hash_bits = memLevel + 7;
-+ s->hash_size = 1 << s->hash_bits;
-+ s->hash_mask = s->hash_size - 1;
-+ s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
-+
-+ s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
-+ s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
-+ s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
-+
-+ s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
-+
-+ overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
-+ s->pending_buf = (uchf *) overlay;
-+ s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
-+
-+ if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
-+ s->pending_buf == Z_NULL) {
-+ strm->msg = ERR_MSG(Z_MEM_ERROR);
-+ deflateEnd (strm);
-+ return Z_MEM_ERROR;
-+ }
-+ s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
-+ s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
-+
-+ s->level = level;
-+ s->strategy = strategy;
-+ s->method = (Byte)method;
-+
-+ return deflateReset(strm);
-+}
-+
-+/* ========================================================================= */
-+int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
-+ z_streamp strm;
-+ const Bytef *dictionary;
-+ uInt dictLength;
-+{
-+ deflate_state *s;
-+ uInt length = dictLength;
-+ uInt n;
-+ IPos hash_head = 0;
-+
-+ if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
-+ strm->state->status != INIT_STATE) return Z_STREAM_ERROR;
-+
-+ s = strm->state;
-+ strm->adler = adler32(strm->adler, dictionary, dictLength);
-+
-+ if (length < MIN_MATCH) return Z_OK;
-+ if (length > MAX_DIST(s)) {
-+ length = MAX_DIST(s);
-+#ifndef USE_DICT_HEAD
-+ dictionary += dictLength - length; /* use the tail of the dictionary */
-+#endif
-+ }
-+ zmemcpy(s->window, dictionary, length);
-+ s->strstart = length;
-+ s->block_start = (long)length;
-+
-+ /* Insert all strings in the hash table (except for the last two bytes).
-+ * s->lookahead stays null, so s->ins_h will be recomputed at the next
-+ * call of fill_window.
-+ */
-+ s->ins_h = s->window[0];
-+ UPDATE_HASH(s, s->ins_h, s->window[1]);
-+ for (n = 0; n <= length - MIN_MATCH; n++) {
-+ INSERT_STRING(s, n, hash_head);
-+ }
-+ if (hash_head) hash_head = 0; /* to make compiler happy */
-+ return Z_OK;
-+}
-+
-+/* ========================================================================= */
-+int ZEXPORT deflateReset (strm)
-+ z_streamp strm;
-+{
-+ deflate_state *s;
-+
-+ if (strm == Z_NULL || strm->state == Z_NULL ||
-+ strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR;
-+
-+ strm->total_in = strm->total_out = 0;
-+ strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
-+ strm->data_type = Z_UNKNOWN;
-+
-+ s = (deflate_state *)strm->state;
-+ s->pending = 0;
-+ s->pending_out = s->pending_buf;
-+
-+ if (s->noheader < 0) {
-+ s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */
-+ }
-+ s->status = s->noheader ? BUSY_STATE : INIT_STATE;
-+ strm->adler = 1;
-+ s->last_flush = Z_NO_FLUSH;
-+
-+ _tr_init(s);
-+ lm_init(s);
-+
-+ return Z_OK;
-+}
-+
-+/* ========================================================================= */
-+int ZEXPORT deflateParams(strm, level, strategy)
-+ z_streamp strm;
-+ int level;
-+ int strategy;
-+{
-+ deflate_state *s;
-+ compress_func func;
-+ int err = Z_OK;
-+
-+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
-+ s = strm->state;
-+
-+ if (level == Z_DEFAULT_COMPRESSION) {
-+ level = 6;
-+ }
-+ if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-+ return Z_STREAM_ERROR;
-+ }
-+ func = configuration_table[s->level].func;
-+
-+ if (func != configuration_table[level].func && strm->total_in != 0) {
-+ /* Flush the last buffer: */
-+ err = deflate(strm, Z_PARTIAL_FLUSH);
-+ }
-+ if (s->level != level) {
-+ s->level = level;
-+ s->max_lazy_match = configuration_table[level].max_lazy;
-+ s->good_match = configuration_table[level].good_length;
-+ s->nice_match = configuration_table[level].nice_length;
-+ s->max_chain_length = configuration_table[level].max_chain;
-+ }
-+ s->strategy = strategy;
-+ return err;
-+}
-+
-+/* =========================================================================
-+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
-+ * IN assertion: the stream state is correct and there is enough room in
-+ * pending_buf.
-+ */
-+local void putShortMSB (s, b)
-+ deflate_state *s;
-+ uInt b;
-+{
-+ put_byte(s, (Byte)(b >> 8));
-+ put_byte(s, (Byte)(b & 0xff));
-+}
-+
-+/* =========================================================================
-+ * Flush as much pending output as possible. All deflate() output goes
-+ * through this function so some applications may wish to modify it
-+ * to avoid allocating a large strm->next_out buffer and copying into it.
-+ * (See also read_buf()).
-+ */
-+local void flush_pending(strm)
-+ z_streamp strm;
-+{
-+ unsigned len = strm->state->pending;
-+
-+ if (len > strm->avail_out) len = strm->avail_out;
-+ if (len == 0) return;
-+
-+ zmemcpy(strm->next_out, strm->state->pending_out, len);
-+ strm->next_out += len;
-+ strm->state->pending_out += len;
-+ strm->total_out += len;
-+ strm->avail_out -= len;
-+ strm->state->pending -= len;
-+ if (strm->state->pending == 0) {
-+ strm->state->pending_out = strm->state->pending_buf;
-+ }
-+}
-+
-+/* ========================================================================= */
-+int ZEXPORT deflate (strm, flush)
-+ z_streamp strm;
-+ int flush;
-+{
-+ int old_flush; /* value of flush param for previous deflate call */
-+ deflate_state *s;
-+
-+ if (strm == Z_NULL || strm->state == Z_NULL ||
-+ flush > Z_FINISH || flush < 0) {
-+ return Z_STREAM_ERROR;
-+ }
-+ s = strm->state;
-+
-+ if (strm->next_out == Z_NULL ||
-+ (strm->next_in == Z_NULL && strm->avail_in != 0) ||
-+ (s->status == FINISH_STATE && flush != Z_FINISH)) {
-+ ERR_RETURN(strm, Z_STREAM_ERROR);
-+ }
-+ if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
-+
-+ s->strm = strm; /* just in case */
-+ old_flush = s->last_flush;
-+ s->last_flush = flush;
-+
-+ /* Write the zlib header */
-+ if (s->status == INIT_STATE) {
-+
-+ uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
-+ uInt level_flags = (s->level-1) >> 1;
-+
-+ if (level_flags > 3) level_flags = 3;
-+ header |= (level_flags << 6);
-+ if (s->strstart != 0) header |= PRESET_DICT;
-+ header += 31 - (header % 31);
-+
-+ s->status = BUSY_STATE;
-+ putShortMSB(s, header);
-+
-+ /* Save the adler32 of the preset dictionary: */
-+ if (s->strstart != 0) {
-+ putShortMSB(s, (uInt)(strm->adler >> 16));
-+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
-+ }
-+ strm->adler = 1L;
-+ }
-+
-+ /* Flush as much pending output as possible */
-+ if (s->pending != 0) {
-+ flush_pending(strm);
-+ if (strm->avail_out == 0) {
-+ /* Since avail_out is 0, deflate will be called again with
-+ * more output space, but possibly with both pending and
-+ * avail_in equal to zero. There won't be anything to do,
-+ * but this is not an error situation so make sure we
-+ * return OK instead of BUF_ERROR at next call of deflate:
-+ */
-+ s->last_flush = -1;
-+ return Z_OK;
-+ }
-+
-+ /* Make sure there is something to do and avoid duplicate consecutive
-+ * flushes. For repeated and useless calls with Z_FINISH, we keep
-+ * returning Z_STREAM_END instead of Z_BUFF_ERROR.
-+ */
-+ } else if (strm->avail_in == 0 && flush <= old_flush &&
-+ flush != Z_FINISH) {
-+ ERR_RETURN(strm, Z_BUF_ERROR);
-+ }
-+
-+ /* User must not provide more input after the first FINISH: */
-+ if (s->status == FINISH_STATE && strm->avail_in != 0) {
-+ ERR_RETURN(strm, Z_BUF_ERROR);
-+ }
-+
-+ /* Start a new block or continue the current one.
-+ */
-+ if (strm->avail_in != 0 || s->lookahead != 0 ||
-+ (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
-+ block_state bstate;
-+
-+ bstate = (*(configuration_table[s->level].func))(s, flush);
-+
-+ if (bstate == finish_started || bstate == finish_done) {
-+ s->status = FINISH_STATE;
-+ }
-+ if (bstate == need_more || bstate == finish_started) {
-+ if (strm->avail_out == 0) {
-+ s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
-+ }
-+ return Z_OK;
-+ /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
-+ * of deflate should use the same flush parameter to make sure
-+ * that the flush is complete. So we don't have to output an
-+ * empty block here, this will be done at next call. This also
-+ * ensures that for a very small output buffer, we emit at most
-+ * one empty block.
-+ */
-+ }
-+ if (bstate == block_done) {
-+ if (flush == Z_PARTIAL_FLUSH) {
-+ _tr_align(s);
-+ } else { /* FULL_FLUSH or SYNC_FLUSH */
-+ _tr_stored_block(s, (char*)0, 0L, 0);
-+ /* For a full flush, this empty block will be recognized
-+ * as a special marker by inflate_sync().
-+ */
-+ if (flush == Z_FULL_FLUSH) {
-+ CLEAR_HASH(s); /* forget history */
-+ }
-+ }
-+ flush_pending(strm);
-+ if (strm->avail_out == 0) {
-+ s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
-+ return Z_OK;
-+ }
-+ }
-+ }
-+ Assert(strm->avail_out > 0, "bug2");
-+
-+ if (flush != Z_FINISH) return Z_OK;
-+ if (s->noheader) return Z_STREAM_END;
-+
-+ /* Write the zlib trailer (adler32) */
-+ putShortMSB(s, (uInt)(strm->adler >> 16));
-+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
-+ flush_pending(strm);
-+ /* If avail_out is zero, the application will call deflate again
-+ * to flush the rest.
-+ */
-+ s->noheader = -1; /* write the trailer only once! */
-+ return s->pending != 0 ? Z_OK : Z_STREAM_END;
-+}
-+
-+/* ========================================================================= */
-+int ZEXPORT deflateEnd (strm)
-+ z_streamp strm;
-+{
-+ int status;
-+
-+ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
-+
-+ status = strm->state->status;
-+ if (status != INIT_STATE && status != BUSY_STATE &&
-+ status != FINISH_STATE) {
-+ return Z_STREAM_ERROR;
-+ }
-+
-+ /* Deallocate in reverse order of allocations: */
-+ TRY_FREE(strm, strm->state->pending_buf);
-+ TRY_FREE(strm, strm->state->head);
-+ TRY_FREE(strm, strm->state->prev);
-+ TRY_FREE(strm, strm->state->window);
-+
-+ ZFREE(strm, strm->state);
-+ strm->state = Z_NULL;
-+
-+ return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
-+}
-+
-+/* =========================================================================
-+ * Copy the source state to the destination state.
-+ * To simplify the source, this is not supported for 16-bit MSDOS (which
-+ * doesn't have enough memory anyway to duplicate compression states).
-+ */
-+int ZEXPORT deflateCopy (dest, source)
-+ z_streamp dest;
-+ z_streamp source;
-+{
-+#ifdef MAXSEG_64K
-+ return Z_STREAM_ERROR;
-+#else
-+ deflate_state *ds;
-+ deflate_state *ss;
-+ ushf *overlay;
-+
-+
-+ if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) {
-+ return Z_STREAM_ERROR;
-+ }
-+
-+ ss = source->state;
-+
-+ *dest = *source;
-+
-+ ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
-+ if (ds == Z_NULL) return Z_MEM_ERROR;
-+ dest->state = (struct internal_state FAR *) ds;
-+ *ds = *ss;
-+ ds->strm = dest;
-+
-+ ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
-+ ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos));
-+ ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos));
-+ overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
-+ ds->pending_buf = (uchf *) overlay;
-+
-+ if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
-+ ds->pending_buf == Z_NULL) {
-+ deflateEnd (dest);
-+ return Z_MEM_ERROR;
-+ }
-+ /* following zmemcpy do not work for 16-bit MSDOS */
-+ zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-+ zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-+ zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-+ zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-+
-+ ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-+ ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-+ ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-+
-+ ds->l_desc.dyn_tree = ds->dyn_ltree;
-+ ds->d_desc.dyn_tree = ds->dyn_dtree;
-+ ds->bl_desc.dyn_tree = ds->bl_tree;
-+
-+ return Z_OK;
-+#endif
-+}
-+
-+/* ===========================================================================
-+ * Read a new buffer from the current input stream, update the adler32
-+ * and total number of bytes read. All deflate() input goes through
-+ * this function so some applications may wish to modify it to avoid
-+ * allocating a large strm->next_in buffer and copying from it.
-+ * (See also flush_pending()).
-+ */
-+local int read_buf(strm, buf, size)
-+ z_streamp strm;
-+ Bytef *buf;
-+ unsigned size;
-+{
-+ unsigned len = strm->avail_in;
-+
-+ if (len > size) len = size;
-+ if (len == 0) return 0;
-+
-+ strm->avail_in -= len;
-+
-+ if (!strm->state->noheader) {
-+ strm->adler = adler32(strm->adler, strm->next_in, len);
-+ }
-+ zmemcpy(buf, strm->next_in, len);
-+ strm->next_in += len;
-+ strm->total_in += len;
-+
-+ return (int)len;
-+}
-+
-+/* ===========================================================================
-+ * Initialize the "longest match" routines for a new zlib stream
-+ */
-+local void lm_init (s)
-+ deflate_state *s;
-+{
-+ s->window_size = (ulg)2L*s->w_size;
-+
-+ CLEAR_HASH(s);
-+
-+ /* Set the default configuration parameters:
-+ */
-+ s->max_lazy_match = configuration_table[s->level].max_lazy;
-+ s->good_match = configuration_table[s->level].good_length;
-+ s->nice_match = configuration_table[s->level].nice_length;
-+ s->max_chain_length = configuration_table[s->level].max_chain;
-+
-+ s->strstart = 0;
-+ s->block_start = 0L;
-+ s->lookahead = 0;
-+ s->match_length = s->prev_length = MIN_MATCH-1;
-+ s->match_available = 0;
-+ s->ins_h = 0;
-+#ifdef ASMV
-+ match_init(); /* initialize the asm code */
-+#endif
-+}
-+
-+/* ===========================================================================
-+ * Set match_start to the longest match starting at the given string and
-+ * return its length. Matches shorter or equal to prev_length are discarded,
-+ * in which case the result is equal to prev_length and match_start is
-+ * garbage.
-+ * IN assertions: cur_match is the head of the hash chain for the current
-+ * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
-+ * OUT assertion: the match length is not greater than s->lookahead.
-+ */
-+#ifndef ASMV
-+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
-+ * match.S. The code will be functionally equivalent.
-+ */
-+#ifndef FASTEST
-+local uInt longest_match(s, cur_match)
-+ deflate_state *s;
-+ IPos cur_match; /* current match */
-+{
-+ unsigned chain_length = s->max_chain_length;/* max hash chain length */
-+ register Bytef *scan = s->window + s->strstart; /* current string */
-+ register Bytef *match; /* matched string */
-+ register int len; /* length of current match */
-+ int best_len = s->prev_length; /* best match length so far */
-+ int nice_match = s->nice_match; /* stop if match long enough */
-+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
-+ s->strstart - (IPos)MAX_DIST(s) : NIL;
-+ /* Stop when cur_match becomes <= limit. To simplify the code,
-+ * we prevent matches with the string of window index 0.
-+ */
-+ Posf *prev = s->prev;
-+ uInt wmask = s->w_mask;
-+
-+#ifdef UNALIGNED_OK
-+ /* Compare two bytes at a time. Note: this is not always beneficial.
-+ * Try with and without -DUNALIGNED_OK to check.
-+ */
-+ register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
-+ register ush scan_start = *(ushf*)scan;
-+ register ush scan_end = *(ushf*)(scan+best_len-1);
-+#else
-+ register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-+ register Byte scan_end1 = scan[best_len-1];
-+ register Byte scan_end = scan[best_len];
-+#endif
-+
-+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
-+ * It is easy to get rid of this optimization if necessary.
-+ */
-+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-+
-+ /* Do not waste too much time if we already have a good match: */
-+ if (s->prev_length >= s->good_match) {
-+ chain_length >>= 2;
-+ }
-+ /* Do not look for matches beyond the end of the input. This is necessary
-+ * to make deflate deterministic.
-+ */
-+ if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
-+
-+ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
-+
-+ do {
-+ Assert(cur_match < s->strstart, "no future");
-+ match = s->window + cur_match;
-+
-+ /* Skip to next match if the match length cannot increase
-+ * or if the match length is less than 2:
-+ */
-+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
-+ /* This code assumes sizeof(unsigned short) == 2. Do not use
-+ * UNALIGNED_OK if your compiler uses a different size.
-+ */
-+ if (*(ushf*)(match+best_len-1) != scan_end ||
-+ *(ushf*)match != scan_start) continue;
-+
-+ /* It is not necessary to compare scan[2] and match[2] since they are
-+ * always equal when the other bytes match, given that the hash keys
-+ * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
-+ * strstart+3, +5, ... up to strstart+257. We check for insufficient
-+ * lookahead only every 4th comparison; the 128th check will be made
-+ * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
-+ * necessary to put more guard bytes at the end of the window, or
-+ * to check more often for insufficient lookahead.
-+ */
-+ Assert(scan[2] == match[2], "scan[2]?");
-+ scan++, match++;
-+ do {
-+ } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-+ *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-+ scan < strend);
-+ /* The funny "do {}" generates better code on most compilers */
-+
-+ /* Here, scan <= window+strstart+257 */
-+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-+ if (*scan == *match) scan++;
-+
-+ len = (MAX_MATCH - 1) - (int)(strend-scan);
-+ scan = strend - (MAX_MATCH-1);
-+
-+#else /* UNALIGNED_OK */
-+
-+ if (match[best_len] != scan_end ||
-+ match[best_len-1] != scan_end1 ||
-+ *match != *scan ||
-+ *++match != scan[1]) continue;
-+
-+ /* The check at best_len-1 can be removed because it will be made
-+ * again later. (This heuristic is not always a win.)
-+ * It is not necessary to compare scan[2] and match[2] since they
-+ * are always equal when the other bytes match, given that
-+ * the hash keys are equal and that HASH_BITS >= 8.
-+ */
-+ scan += 2, match++;
-+ Assert(*scan == *match, "match[2]?");
-+
-+ /* We check for insufficient lookahead only every 8th comparison;
-+ * the 256th check will be made at strstart+258.
-+ */
-+ do {
-+ } while (*++scan == *++match && *++scan == *++match &&
-+ *++scan == *++match && *++scan == *++match &&
-+ *++scan == *++match && *++scan == *++match &&
-+ *++scan == *++match && *++scan == *++match &&
-+ scan < strend);
-+
-+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-+
-+ len = MAX_MATCH - (int)(strend - scan);
-+ scan = strend - MAX_MATCH;
-+
-+#endif /* UNALIGNED_OK */
-+
-+ if (len > best_len) {
-+ s->match_start = cur_match;
-+ best_len = len;
-+ if (len >= nice_match) break;
-+#ifdef UNALIGNED_OK
-+ scan_end = *(ushf*)(scan+best_len-1);
-+#else
-+ scan_end1 = scan[best_len-1];
-+ scan_end = scan[best_len];
-+#endif
-+ }
-+ } while ((cur_match = prev[cur_match & wmask]) > limit
-+ && --chain_length != 0);
-+
-+ if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
-+ return s->lookahead;
-+}
-+
-+#else /* FASTEST */
-+/* ---------------------------------------------------------------------------
-+ * Optimized version for level == 1 only
-+ */
-+local uInt longest_match(s, cur_match)
-+ deflate_state *s;
-+ IPos cur_match; /* current match */
-+{
-+ register Bytef *scan = s->window + s->strstart; /* current string */
-+ register Bytef *match; /* matched string */
-+ register int len; /* length of current match */
-+ register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-+
-+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
-+ * It is easy to get rid of this optimization if necessary.
-+ */
-+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-+
-+ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
-+
-+ Assert(cur_match < s->strstart, "no future");
-+
-+ match = s->window + cur_match;
-+
-+ /* Return failure if the match length is less than 2:
-+ */
-+ if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
-+
-+ /* The check at best_len-1 can be removed because it will be made
-+ * again later. (This heuristic is not always a win.)
-+ * It is not necessary to compare scan[2] and match[2] since they
-+ * are always equal when the other bytes match, given that
-+ * the hash keys are equal and that HASH_BITS >= 8.
-+ */
-+ scan += 2, match += 2;
-+ Assert(*scan == *match, "match[2]?");
-+
-+ /* We check for insufficient lookahead only every 8th comparison;
-+ * the 256th check will be made at strstart+258.
-+ */
-+ do {
-+ } while (*++scan == *++match && *++scan == *++match &&
-+ *++scan == *++match && *++scan == *++match &&
-+ *++scan == *++match && *++scan == *++match &&
-+ *++scan == *++match && *++scan == *++match &&
-+ scan < strend);
-+
-+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-+
-+ len = MAX_MATCH - (int)(strend - scan);
-+
-+ if (len < MIN_MATCH) return MIN_MATCH - 1;
-+
-+ s->match_start = cur_match;
-+ return len <= s->lookahead ? len : s->lookahead;
-+}
-+#endif /* FASTEST */
-+#endif /* ASMV */
-+
-+#ifdef DEBUG
-+/* ===========================================================================
-+ * Check that the match at match_start is indeed a match.
-+ */
-+local void check_match(s, start, match, length)
-+ deflate_state *s;
-+ IPos start, match;
-+ int length;
-+{
-+ /* check that the match is indeed a match */
-+ if (zmemcmp(s->window + match,
-+ s->window + start, length) != EQUAL) {
-+ fprintf(stderr, " start %u, match %u, length %d\n",
-+ start, match, length);
-+ do {
-+ fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
-+ } while (--length != 0);
-+ z_error("invalid match");
-+ }
-+ if (z_verbose > 1) {
-+ fprintf(stderr,"\\[%d,%d]", start-match, length);
-+ do { putc(s->window[start++], stderr); } while (--length != 0);
-+ }
-+}
-+#else
-+# define check_match(s, start, match, length)
-+#endif
-+
-+/* ===========================================================================
-+ * Fill the window when the lookahead becomes insufficient.
-+ * Updates strstart and lookahead.
-+ *
-+ * IN assertion: lookahead < MIN_LOOKAHEAD
-+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
-+ * At least one byte has been read, or avail_in == 0; reads are
-+ * performed for at least two bytes (required for the zip translate_eol
-+ * option -- not supported here).
-+ */
-+local void fill_window(s)
-+ deflate_state *s;
-+{
-+ register unsigned n, m;
-+ register Posf *p;
-+ unsigned more; /* Amount of free space at the end of the window. */
-+ uInt wsize = s->w_size;
-+
-+ do {
-+ more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
-+
-+ /* Deal with !@#$% 64K limit: */
-+ if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
-+ more = wsize;
-+
-+ } else if (more == (unsigned)(-1)) {
-+ /* Very unlikely, but possible on 16 bit machine if strstart == 0
-+ * and lookahead == 1 (input done one byte at time)
-+ */
-+ more--;
-+
-+ /* If the window is almost full and there is insufficient lookahead,
-+ * move the upper half to the lower one to make room in the upper half.
-+ */
-+ } else if (s->strstart >= wsize+MAX_DIST(s)) {
-+
-+ zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
-+ s->match_start -= wsize;
-+ s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
-+ s->block_start -= (long) wsize;
-+
-+ /* Slide the hash table (could be avoided with 32 bit values
-+ at the expense of memory usage). We slide even when level == 0
-+ to keep the hash table consistent if we switch back to level > 0
-+ later. (Using level 0 permanently is not an optimal usage of
-+ zlib, so we don't care about this pathological case.)
-+ */
-+ n = s->hash_size;
-+ p = &s->head[n];
-+ do {
-+ m = *--p;
-+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
-+ } while (--n);
-+
-+ n = wsize;
-+#ifndef FASTEST
-+ p = &s->prev[n];
-+ do {
-+ m = *--p;
-+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
-+ /* If n is not on any hash chain, prev[n] is garbage but
-+ * its value will never be used.
-+ */
-+ } while (--n);
-+#endif
-+ more += wsize;
-+ }
-+ if (s->strm->avail_in == 0) return;
-+
-+ /* If there was no sliding:
-+ * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-+ * more == window_size - lookahead - strstart
-+ * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-+ * => more >= window_size - 2*WSIZE + 2
-+ * In the BIG_MEM or MMAP case (not yet supported),
-+ * window_size == input_size + MIN_LOOKAHEAD &&
-+ * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-+ * Otherwise, window_size == 2*WSIZE so more >= 2.
-+ * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-+ */
-+ Assert(more >= 2, "more < 2");
-+
-+ n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-+ s->lookahead += n;
-+
-+ /* Initialize the hash value now that we have some input: */
-+ if (s->lookahead >= MIN_MATCH) {
-+ s->ins_h = s->window[s->strstart];
-+ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-+#if MIN_MATCH != 3
-+ Call UPDATE_HASH() MIN_MATCH-3 more times
-+#endif
-+ }
-+ /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-+ * but this is not important since only literal bytes will be emitted.
-+ */
-+
-+ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-+}
-+
-+/* ===========================================================================
-+ * Flush the current block, with given end-of-file flag.
-+ * IN assertion: strstart is set to the end of the current match.
-+ */
-+#define FLUSH_BLOCK_ONLY(s, eof) { \
-+ _tr_flush_block(s, (s->block_start >= 0L ? \
-+ (charf *)&s->window[(unsigned)s->block_start] : \
-+ (charf *)Z_NULL), \
-+ (ulg)((long)s->strstart - s->block_start), \
-+ (eof)); \
-+ s->block_start = s->strstart; \
-+ flush_pending(s->strm); \
-+ Tracev((stderr,"[FLUSH]")); \
-+}
-+
-+/* Same but force premature exit if necessary. */
-+#define FLUSH_BLOCK(s, eof) { \
-+ FLUSH_BLOCK_ONLY(s, eof); \
-+ if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
-+}
-+
-+/* ===========================================================================
-+ * Copy without compression as much as possible from the input stream, return
-+ * the current block state.
-+ * This function does not insert new strings in the dictionary since
-+ * uncompressible data is probably not useful. This function is used
-+ * only for the level=0 compression option.
-+ * NOTE: this function should be optimized to avoid extra copying from
-+ * window to pending_buf.
-+ */
-+local block_state deflate_stored(s, flush)
-+ deflate_state *s;
-+ int flush;
-+{
-+ /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
-+ * to pending_buf_size, and each stored block has a 5 byte header:
-+ */
-+ ulg max_block_size = 0xffff;
-+ ulg max_start;
-+
-+ if (max_block_size > s->pending_buf_size - 5) {
-+ max_block_size = s->pending_buf_size - 5;
-+ }
-+
-+ /* Copy as much as possible from input to output: */
-+ for (;;) {
-+ /* Fill the window as much as possible: */
-+ if (s->lookahead <= 1) {
-+
-+ Assert(s->strstart < s->w_size+MAX_DIST(s) ||
-+ s->block_start >= (long)s->w_size, "slide too late");
-+
-+ fill_window(s);
-+ if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
-+
-+ if (s->lookahead == 0) break; /* flush the current block */
-+ }
-+ Assert(s->block_start >= 0L, "block gone");
-+
-+ s->strstart += s->lookahead;
-+ s->lookahead = 0;
-+
-+ /* Emit a stored block if pending_buf will be full: */
-+ max_start = s->block_start + max_block_size;
-+ if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
-+ /* strstart == 0 is possible when wraparound on 16-bit machine */
-+ s->lookahead = (uInt)(s->strstart - max_start);
-+ s->strstart = (uInt)max_start;
-+ FLUSH_BLOCK(s, 0);
-+ }
-+ /* Flush if we may have to slide, otherwise block_start may become
-+ * negative and the data will be gone:
-+ */
-+ if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
-+ FLUSH_BLOCK(s, 0);
-+ }
-+ }
-+ FLUSH_BLOCK(s, flush == Z_FINISH);
-+ return flush == Z_FINISH ? finish_done : block_done;
-+}
-+
-+/* ===========================================================================
-+ * Compress as much as possible from the input stream, return the current
-+ * block state.
-+ * This function does not perform lazy evaluation of matches and inserts
-+ * new strings in the dictionary only for unmatched strings or for short
-+ * matches. It is used only for the fast compression options.
-+ */
-+local block_state deflate_fast(s, flush)
-+ deflate_state *s;
-+ int flush;
-+{
-+ IPos hash_head = NIL; /* head of the hash chain */
-+ int bflush; /* set if current block must be flushed */
-+
-+ for (;;) {
-+ /* Make sure that we always have enough lookahead, except
-+ * at the end of the input file. We need MAX_MATCH bytes
-+ * for the next match, plus MIN_MATCH bytes to insert the
-+ * string following the next match.
-+ */
-+ if (s->lookahead < MIN_LOOKAHEAD) {
-+ fill_window(s);
-+ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
-+ return need_more;
-+ }
-+ if (s->lookahead == 0) break; /* flush the current block */
-+ }
-+
-+ /* Insert the string window[strstart .. strstart+2] in the
-+ * dictionary, and set hash_head to the head of the hash chain:
-+ */
-+ if (s->lookahead >= MIN_MATCH) {
-+ INSERT_STRING(s, s->strstart, hash_head);
-+ }
-+
-+ /* Find the longest match, discarding those <= prev_length.
-+ * At this point we have always match_length < MIN_MATCH
-+ */
-+ if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
-+ /* To simplify the code, we prevent matches with the string
-+ * of window index 0 (in particular we have to avoid a match
-+ * of the string with itself at the start of the input file).
-+ */
-+ if (s->strategy != Z_HUFFMAN_ONLY) {
-+ s->match_length = longest_match (s, hash_head);
-+ }
-+ /* longest_match() sets match_start */
-+ }
-+ if (s->match_length >= MIN_MATCH) {
-+ check_match(s, s->strstart, s->match_start, s->match_length);
-+
-+ _tr_tally_dist(s, s->strstart - s->match_start,
-+ s->match_length - MIN_MATCH, bflush);
-+
-+ s->lookahead -= s->match_length;
-+
-+ /* Insert new strings in the hash table only if the match length
-+ * is not too large. This saves time but degrades compression.
-+ */
-+#ifndef FASTEST
-+ if (s->match_length <= s->max_insert_length &&
-+ s->lookahead >= MIN_MATCH) {
-+ s->match_length--; /* string at strstart already in hash table */
-+ do {
-+ s->strstart++;
-+ INSERT_STRING(s, s->strstart, hash_head);
-+ /* strstart never exceeds WSIZE-MAX_MATCH, so there are
-+ * always MIN_MATCH bytes ahead.
-+ */
-+ } while (--s->match_length != 0);
-+ s->strstart++;
-+ } else
-+#endif
-+ {
-+ s->strstart += s->match_length;
-+ s->match_length = 0;
-+ s->ins_h = s->window[s->strstart];
-+ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-+#if MIN_MATCH != 3
-+ Call UPDATE_HASH() MIN_MATCH-3 more times
-+#endif
-+ /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
-+ * matter since it will be recomputed at next deflate call.
-+ */
-+ }
-+ } else {
-+ /* No match, output a literal byte */
-+ Tracevv((stderr,"%c", s->window[s->strstart]));
-+ _tr_tally_lit (s, s->window[s->strstart], bflush);
-+ s->lookahead--;
-+ s->strstart++;
-+ }
-+ if (bflush) FLUSH_BLOCK(s, 0);
-+ }
-+ FLUSH_BLOCK(s, flush == Z_FINISH);
-+ return flush == Z_FINISH ? finish_done : block_done;
-+}
-+
-+/* ===========================================================================
-+ * Same as above, but achieves better compression. We use a lazy
-+ * evaluation for matches: a match is finally adopted only if there is
-+ * no better match at the next window position.
-+ */
-+local block_state deflate_slow(s, flush)
-+ deflate_state *s;
-+ int flush;
-+{
-+ IPos hash_head = NIL; /* head of hash chain */
-+ int bflush; /* set if current block must be flushed */
-+
-+ /* Process the input block. */
-+ for (;;) {
-+ /* Make sure that we always have enough lookahead, except
-+ * at the end of the input file. We need MAX_MATCH bytes
-+ * for the next match, plus MIN_MATCH bytes to insert the
-+ * string following the next match.
-+ */
-+ if (s->lookahead < MIN_LOOKAHEAD) {
-+ fill_window(s);
-+ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
-+ return need_more;
-+ }
-+ if (s->lookahead == 0) break; /* flush the current block */
-+ }
-+
-+ /* Insert the string window[strstart .. strstart+2] in the
-+ * dictionary, and set hash_head to the head of the hash chain:
-+ */
-+ if (s->lookahead >= MIN_MATCH) {
-+ INSERT_STRING(s, s->strstart, hash_head);
-+ }
-+
-+ /* Find the longest match, discarding those <= prev_length.
-+ */
-+ s->prev_length = s->match_length, s->prev_match = s->match_start;
-+ s->match_length = MIN_MATCH-1;
-+
-+ if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
-+ s->strstart - hash_head <= MAX_DIST(s)) {
-+ /* To simplify the code, we prevent matches with the string
-+ * of window index 0 (in particular we have to avoid a match
-+ * of the string with itself at the start of the input file).
-+ */
-+ if (s->strategy != Z_HUFFMAN_ONLY) {
-+ s->match_length = longest_match (s, hash_head);
-+ }
-+ /* longest_match() sets match_start */
-+
-+ if (s->match_length <= 5 && (s->strategy == Z_FILTERED ||
-+ (s->match_length == MIN_MATCH &&
-+ s->strstart - s->match_start > TOO_FAR))) {
-+
-+ /* If prev_match is also MIN_MATCH, match_start is garbage
-+ * but we will ignore the current match anyway.
-+ */
-+ s->match_length = MIN_MATCH-1;
-+ }
-+ }
-+ /* If there was a match at the previous step and the current
-+ * match is not better, output the previous match:
-+ */
-+ if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
-+ uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
-+ /* Do not insert strings in hash table beyond this. */
-+
-+ check_match(s, s->strstart-1, s->prev_match, s->prev_length);
-+
-+ _tr_tally_dist(s, s->strstart -1 - s->prev_match,
-+ s->prev_length - MIN_MATCH, bflush);
-+
-+ /* Insert in hash table all strings up to the end of the match.
-+ * strstart-1 and strstart are already inserted. If there is not
-+ * enough lookahead, the last two strings are not inserted in
-+ * the hash table.
-+ */
-+ s->lookahead -= s->prev_length-1;
-+ s->prev_length -= 2;
-+ do {
-+ if (++s->strstart <= max_insert) {
-+ INSERT_STRING(s, s->strstart, hash_head);
-+ }
-+ } while (--s->prev_length != 0);
-+ s->match_available = 0;
-+ s->match_length = MIN_MATCH-1;
-+ s->strstart++;
-+
-+ if (bflush) FLUSH_BLOCK(s, 0);
-+
-+ } else if (s->match_available) {
-+ /* If there was no match at the previous position, output a
-+ * single literal. If there was a match but the current match
-+ * is longer, truncate the previous match to a single literal.
-+ */
-+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
-+ _tr_tally_lit(s, s->window[s->strstart-1], bflush);
-+ if (bflush) {
-+ FLUSH_BLOCK_ONLY(s, 0);
-+ }
-+ s->strstart++;
-+ s->lookahead--;
-+ if (s->strm->avail_out == 0) return need_more;
-+ } else {
-+ /* There is no previous match to compare with, wait for
-+ * the next step to decide.
-+ */
-+ s->match_available = 1;
-+ s->strstart++;
-+ s->lookahead--;
-+ }
-+ }
-+ Assert (flush != Z_NO_FLUSH, "no flush?");
-+ if (s->match_available) {
-+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
-+ _tr_tally_lit(s, s->window[s->strstart-1], bflush);
-+ s->match_available = 0;
-+ }
-+ FLUSH_BLOCK(s, flush == Z_FINISH);
-+ return flush == Z_FINISH ? finish_done : block_done;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/deflate.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,318 @@
-+/* deflate.h -- internal compression state
-+ * Copyright (C) 1995-2002 Jean-loup Gailly
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+/* @(#) $Id: deflate.h,v 1.5 2004/07/10 07:48:38 mcr Exp $ */
-+
-+#ifndef _DEFLATE_H
-+#define _DEFLATE_H
-+
-+#include "zlib/zutil.h"
-+
-+/* ===========================================================================
-+ * Internal compression state.
-+ */
-+
-+#define LENGTH_CODES 29
-+/* number of length codes, not counting the special END_BLOCK code */
-+
-+#define LITERALS 256
-+/* number of literal bytes 0..255 */
-+
-+#define L_CODES (LITERALS+1+LENGTH_CODES)
-+/* number of Literal or Length codes, including the END_BLOCK code */
-+
-+#define D_CODES 30
-+/* number of distance codes */
-+
-+#define BL_CODES 19
-+/* number of codes used to transfer the bit lengths */
-+
-+#define HEAP_SIZE (2*L_CODES+1)
-+/* maximum heap size */
-+
-+#define MAX_BITS 15
-+/* All codes must not exceed MAX_BITS bits */
-+
-+#define INIT_STATE 42
-+#define BUSY_STATE 113
-+#define FINISH_STATE 666
-+/* Stream status */
-+
-+
-+/* Data structure describing a single value and its code string. */
-+typedef struct ct_data_s {
-+ union {
-+ ush freq; /* frequency count */
-+ ush code; /* bit string */
-+ } fc;
-+ union {
-+ ush dad; /* father node in Huffman tree */
-+ ush len; /* length of bit string */
-+ } dl;
-+} FAR ct_data;
-+
-+#define Freq fc.freq
-+#define Code fc.code
-+#define Dad dl.dad
-+#define Len dl.len
-+
-+typedef struct static_tree_desc_s static_tree_desc;
-+
-+typedef struct tree_desc_s {
-+ ct_data *dyn_tree; /* the dynamic tree */
-+ int max_code; /* largest code with non zero frequency */
-+ static_tree_desc *stat_desc; /* the corresponding static tree */
-+} FAR tree_desc;
-+
-+typedef ush Pos;
-+typedef Pos FAR Posf;
-+typedef unsigned IPos;
-+
-+/* A Pos is an index in the character window. We use short instead of int to
-+ * save space in the various tables. IPos is used only for parameter passing.
-+ */
-+
-+typedef struct internal_state {
-+ z_streamp strm; /* pointer back to this zlib stream */
-+ int status; /* as the name implies */
-+ Bytef *pending_buf; /* output still pending */
-+ ulg pending_buf_size; /* size of pending_buf */
-+ Bytef *pending_out; /* next pending byte to output to the stream */
-+ int pending; /* nb of bytes in the pending buffer */
-+ int noheader; /* suppress zlib header and adler32 */
-+ Byte data_type; /* UNKNOWN, BINARY or ASCII */
-+ Byte method; /* STORED (for zip only) or DEFLATED */
-+ int last_flush; /* value of flush param for previous deflate call */
-+
-+ /* used by deflate.c: */
-+
-+ uInt w_size; /* LZ77 window size (32K by default) */
-+ uInt w_bits; /* log2(w_size) (8..16) */
-+ uInt w_mask; /* w_size - 1 */
-+
-+ Bytef *window;
-+ /* Sliding window. Input bytes are read into the second half of the window,
-+ * and move to the first half later to keep a dictionary of at least wSize
-+ * bytes. With this organization, matches are limited to a distance of
-+ * wSize-MAX_MATCH bytes, but this ensures that IO is always
-+ * performed with a length multiple of the block size. Also, it limits
-+ * the window size to 64K, which is quite useful on MSDOS.
-+ * To do: use the user input buffer as sliding window.
-+ */
-+
-+ ulg window_size;
-+ /* Actual size of window: 2*wSize, except when the user input buffer
-+ * is directly used as sliding window.
-+ */
-+
-+ Posf *prev;
-+ /* Link to older string with same hash index. To limit the size of this
-+ * array to 64K, this link is maintained only for the last 32K strings.
-+ * An index in this array is thus a window index modulo 32K.
-+ */
-+
-+ Posf *head; /* Heads of the hash chains or NIL. */
-+
-+ uInt ins_h; /* hash index of string to be inserted */
-+ uInt hash_size; /* number of elements in hash table */
-+ uInt hash_bits; /* log2(hash_size) */
-+ uInt hash_mask; /* hash_size-1 */
-+
-+ uInt hash_shift;
-+ /* Number of bits by which ins_h must be shifted at each input
-+ * step. It must be such that after MIN_MATCH steps, the oldest
-+ * byte no longer takes part in the hash key, that is:
-+ * hash_shift * MIN_MATCH >= hash_bits
-+ */
-+
-+ long block_start;
-+ /* Window position at the beginning of the current output block. Gets
-+ * negative when the window is moved backwards.
-+ */
-+
-+ uInt match_length; /* length of best match */
-+ IPos prev_match; /* previous match */
-+ int match_available; /* set if previous match exists */
-+ uInt strstart; /* start of string to insert */
-+ uInt match_start; /* start of matching string */
-+ uInt lookahead; /* number of valid bytes ahead in window */
-+
-+ uInt prev_length;
-+ /* Length of the best match at previous step. Matches not greater than this
-+ * are discarded. This is used in the lazy match evaluation.
-+ */
-+
-+ uInt max_chain_length;
-+ /* To speed up deflation, hash chains are never searched beyond this
-+ * length. A higher limit improves compression ratio but degrades the
-+ * speed.
-+ */
-+
-+ uInt max_lazy_match;
-+ /* Attempt to find a better match only when the current match is strictly
-+ * smaller than this value. This mechanism is used only for compression
-+ * levels >= 4.
-+ */
-+# define max_insert_length max_lazy_match
-+ /* Insert new strings in the hash table only if the match length is not
-+ * greater than this length. This saves time but degrades compression.
-+ * max_insert_length is used only for compression levels <= 3.
-+ */
-+
-+ int level; /* compression level (1..9) */
-+ int strategy; /* favor or force Huffman coding*/
-+
-+ uInt good_match;
-+ /* Use a faster search when the previous match is longer than this */
-+
-+ int nice_match; /* Stop searching when current match exceeds this */
-+
-+ /* used by trees.c: */
-+ /* Didn't use ct_data typedef below to supress compiler warning */
-+ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
-+ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
-+ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
-+
-+ struct tree_desc_s l_desc; /* desc. for literal tree */
-+ struct tree_desc_s d_desc; /* desc. for distance tree */
-+ struct tree_desc_s bl_desc; /* desc. for bit length tree */
-+
-+ ush bl_count[MAX_BITS+1];
-+ /* number of codes at each bit length for an optimal tree */
-+
-+ int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
-+ int heap_len; /* number of elements in the heap */
-+ int heap_max; /* element of largest frequency */
-+ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
-+ * The same heap array is used to build all trees.
-+ */
-+
-+ uch depth[2*L_CODES+1];
-+ /* Depth of each subtree used as tie breaker for trees of equal frequency
-+ */
-+
-+ uchf *l_buf; /* buffer for literals or lengths */
-+
-+ uInt lit_bufsize;
-+ /* Size of match buffer for literals/lengths. There are 4 reasons for
-+ * limiting lit_bufsize to 64K:
-+ * - frequencies can be kept in 16 bit counters
-+ * - if compression is not successful for the first block, all input
-+ * data is still in the window so we can still emit a stored block even
-+ * when input comes from standard input. (This can also be done for
-+ * all blocks if lit_bufsize is not greater than 32K.)
-+ * - if compression is not successful for a file smaller than 64K, we can
-+ * even emit a stored file instead of a stored block (saving 5 bytes).
-+ * This is applicable only for zip (not gzip or zlib).
-+ * - creating new Huffman trees less frequently may not provide fast
-+ * adaptation to changes in the input data statistics. (Take for
-+ * example a binary file with poorly compressible code followed by
-+ * a highly compressible string table.) Smaller buffer sizes give
-+ * fast adaptation but have of course the overhead of transmitting
-+ * trees more frequently.
-+ * - I can't count above 4
-+ */
-+
-+ uInt last_lit; /* running index in l_buf */
-+
-+ ushf *d_buf;
-+ /* Buffer for distances. To simplify the code, d_buf and l_buf have
-+ * the same number of elements. To use different lengths, an extra flag
-+ * array would be necessary.
-+ */
-+
-+ ulg opt_len; /* bit length of current block with optimal trees */
-+ ulg static_len; /* bit length of current block with static trees */
-+ uInt matches; /* number of string matches in current block */
-+ int last_eob_len; /* bit length of EOB code for last block */
-+
-+#ifdef DEBUG
-+ ulg compressed_len; /* total bit length of compressed file mod 2^32 */
-+ ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
-+#endif
-+
-+ ush bi_buf;
-+ /* Output buffer. bits are inserted starting at the bottom (least
-+ * significant bits).
-+ */
-+ int bi_valid;
-+ /* Number of valid bits in bi_buf. All bits above the last valid bit
-+ * are always zero.
-+ */
-+
-+} FAR deflate_state;
-+
-+/* Output a byte on the stream.
-+ * IN assertion: there is enough room in pending_buf.
-+ */
-+#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
-+
-+
-+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-+/* Minimum amount of lookahead, except at the end of the input file.
-+ * See deflate.c for comments about the MIN_MATCH+1.
-+ */
-+
-+#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
-+/* In order to simplify the code, particularly on 16 bit machines, match
-+ * distances are limited to MAX_DIST instead of WSIZE.
-+ */
-+
-+ /* in trees.c */
-+void _tr_init OF((deflate_state *s));
-+int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
-+void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len,
-+ int eof));
-+void _tr_align OF((deflate_state *s));
-+void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
-+ int eof));
-+
-+#define d_code(dist) \
-+ ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
-+/* Mapping from a distance to a distance code. dist is the distance - 1 and
-+ * must not have side effects. _dist_code[256] and _dist_code[257] are never
-+ * used.
-+ */
-+
-+#ifndef DEBUG
-+/* Inline versions of _tr_tally for speed: */
-+
-+#if defined(GEN_TREES_H) || !defined(STDC)
-+ extern uch _length_code[];
-+ extern uch _dist_code[];
-+#else
-+ extern const uch _length_code[];
-+ extern const uch _dist_code[];
-+#endif
-+
-+# define _tr_tally_lit(s, c, flush) \
-+ { uch cc = (c); \
-+ s->d_buf[s->last_lit] = 0; \
-+ s->l_buf[s->last_lit++] = cc; \
-+ s->dyn_ltree[cc].Freq++; \
-+ flush = (s->last_lit == s->lit_bufsize-1); \
-+ }
-+# define _tr_tally_dist(s, distance, length, flush) \
-+ { uch len = (length); \
-+ ush dist = (distance); \
-+ s->d_buf[s->last_lit] = dist; \
-+ s->l_buf[s->last_lit++] = len; \
-+ dist--; \
-+ s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
-+ s->dyn_dtree[d_code(dist)].Freq++; \
-+ flush = (s->last_lit == s->lit_bufsize-1); \
-+ }
-+#else
-+# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
-+# define _tr_tally_dist(s, distance, length, flush) \
-+ flush = _tr_tally(s, distance, length)
-+#endif
-+
-+#endif /* _DEFLATE_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/COPYRIGHT Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,50 @@
-+Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+All rights reserved.
-+
-+This package is an DES implementation written by Eric Young (eay@cryptsoft.com).
-+The implementation was written so as to conform with MIT's libdes.
-+
-+This library is free for commercial and non-commercial use as long as
-+the following conditions are aheared to. The following conditions
-+apply to all code found in this distribution.
-+
-+Copyright remains Eric Young's, and as such any Copyright notices in
-+the code are not to be removed.
-+If this package is used in a product, Eric Young should be given attribution
-+as the author of that the SSL library. This can be in the form of a textual
-+message at program startup or in documentation (online or textual) provided
-+with the package.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions
-+are met:
-+1. Redistributions of source code must retain the copyright
-+ notice, this list of conditions and the following disclaimer.
-+2. Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+3. All advertising materials mentioning features or use of this software
-+ must display the following acknowledgement:
-+ This product includes software developed by Eric Young (eay@cryptsoft.com)
-+
-+THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+SUCH DAMAGE.
-+
-+The license and distribution terms for any publically available version or
-+derivative of this code cannot be changed. i.e. this code cannot simply be
-+copied and put under another distrubution license
-+[including the GNU Public License.]
-+
-+The reason behind this being stated in this direct manner is past
-+experience in code simply being copied and the attribution removed
-+from it and then being distributed as part of other packages. This
-+implementation was a non-trivial and unpaid effort.
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/INSTALL Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,69 @@
-+Check the CC and CFLAGS lines in the makefile
-+
-+If your C library does not support the times(3) function, change the
-+#define TIMES to
-+#undef TIMES in speed.c
-+If it does, check the HZ value for the times(3) function.
-+If your system does not define CLK_TCK it will be assumed to
-+be 100.0.
-+
-+If possible use gcc v 2.7.?
-+Turn on the maximum optimising (normally '-O3 -fomit-frame-pointer' for gcc)
-+In recent times, some system compilers give better performace.
-+
-+type 'make'
-+
-+run './destest' to check things are ok.
-+run './rpw' to check the tty code for reading passwords works.
-+run './speed' to see how fast those optimisations make the library run :-)
-+run './des_opts' to determin the best compile time options.
-+
-+The output from des_opts should be put in the makefile options and des_enc.c
-+should be rebuilt. For 64 bit computers, do not use the DES_PTR option.
-+For the DEC Alpha, edit des.h and change DES_LONG to 'unsigned int'
-+and then you can use the 'DES_PTR' option.
-+
-+The file options.txt has the options listed for best speed on quite a
-+few systems. Look and the options (UNROLL, PTR, RISC2 etc) and then
-+turn on the relevent option in the Makefile
-+
-+There are some special Makefile targets that make life easier.
-+make cc - standard cc build
-+make gcc - standard gcc build
-+make x86-elf - x86 assembler (elf), linux-elf.
-+make x86-out - x86 assembler (a.out), FreeBSD
-+make x86-solaris- x86 assembler
-+make x86-bsdi - x86 assembler (a.out with primative assembler).
-+
-+If at all possible use the assembler (for Windows NT/95, use
-+asm/win32.obj to link with). The x86 assembler is very very fast.
-+
-+A make install will by default install
-+libdes.a in /usr/local/lib/libdes.a
-+des in /usr/local/bin/des
-+des_crypt.man in /usr/local/man/man3/des_crypt.3
-+des.man in /usr/local/man/man1/des.1
-+des.h in /usr/include/des.h
-+
-+des(1) should be compatible with sunOS's but I have been unable to
-+test it.
-+
-+These routines should compile on MSDOS, most 32bit and 64bit version
-+of Unix (BSD and SYSV) and VMS, without modification.
-+The only problems should be #include files that are in the wrong places.
-+
-+These routines can be compiled under MSDOS.
-+I have successfully encrypted files using des(1) under MSDOS and then
-+decrypted the files on a SparcStation.
-+I have been able to compile and test the routines with
-+Microsoft C v 5.1 and Turbo C v 2.0.
-+The code in this library is in no way optimised for the 16bit
-+operation of MSDOS.
-+
-+When building for glibc, ignore all of the above and just unpack into
-+glibc-1.??/des and then gmake as per normal.
-+
-+As a final note on performace. Certain CPUs like sparcs and Alpha often give
-+a %10 speed difference depending on the link order. It is rather anoying
-+when one program reports 'x' DES encrypts a second and another reports
-+'x*0.9' the speed.
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/Makefile Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,60 @@
-+# Makefile for KLIPS kernel code as a module for 2.6 kernels
-+#
-+# Makefile for KLIPS kernel code as a module
-+# Copyright (C) 1998, 1999, 2000,2001 Richard Guy Briggs.
-+# Copyright (C) 2002-2004 Michael Richardson <mcr@freeswan.org>
-+#
-+# This program is free software; you can redistribute it and/or modify it
-+# under the terms of the GNU General Public License as published by the
-+# Free Software Foundation; either version 2 of the License, or (at your
-+# option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+#
-+# This program is distributed in the hope that it will be useful, but
-+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+# for more details.
-+#
-+# RCSID $Id: Makefile.fs2_6,v 1.3 2005/08/12 14:13:59 mcr Exp $
-+#
-+# Note! Dependencies are done automagically by 'make dep', which also
-+# removes any old dependencies. DON'T put your own dependencies here
-+# unless it's something special (ie not a .c file).
-+#
-+
-+obj-$(CONFIG_KLIPS_ENC_3DES) += ipsec_alg_3des.o
-+obj-$(CONFIG_KLIPS_ENC_3DES) += cbc_enc.o
-+obj-$(CONFIG_KLIPS_ENC_3DES) += ecb_enc.o
-+obj-$(CONFIG_KLIPS_ENC_3DES) += set_key.o
-+
-+ifeq ($(strip ${SUBARCH}),)
-+SUBARCH:=${ARCH}
-+endif
-+
-+# the assembly version expects frame pointers, which are
-+# optional in many kernel builds. If you want speed, you should
-+# probably use cryptoapi code instead.
-+USEASSEMBLY=${SUBARCH}${CONFIG_FRAME_POINTER}
-+ifeq (${USEASSEMBLY},i386y)
-+obj-$(CONFIG_KLIPS_ENC_3DES) += dx86unix.o
-+else
-+obj-$(CONFIG_KLIPS_ENC_3DES) += des_enc.o
-+endif
-+
-+#
-+# $Log: Makefile.fs2_6,v $
-+# Revision 1.3 2005/08/12 14:13:59 mcr
-+# do not use assembly code with there are no frame pointers,
-+# as it does not have the right linkages.
-+#
-+# Revision 1.2 2005/04/29 05:13:07 mcr
-+# 3DES algorithm code.
-+#
-+# Revision 1.1 2004/08/17 03:27:30 mcr
-+# klips 2.6 edits.
-+#
-+#
-+# Local Variables:
-+# compile-command: "(cd ../../.. && source umlsetup.sh && make -C ${POOLSPACE} module/ipsec.o)"
-+# End Variables:
-+#
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/README Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,54 @@
-+
-+ libdes, Version 4.01 10-Jan-97
-+
-+ Copyright (c) 1997, Eric Young
-+ All rights reserved.
-+
-+ This program is free software; you can redistribute it and/or modify
-+ it under the terms specified in COPYRIGHT.
-+
-+--
-+The primary ftp site for this library is
-+ftp://ftp.psy.uq.oz.au/pub/Crypto/DES/libdes-x.xx.tar.gz
-+libdes is now also shipped with SSLeay. Primary ftp site of
-+ftp://ftp.psy.uq.oz.au/pub/Crypto/SSL/SSLeay-x.x.x.tar.gz
-+
-+The best way to build this library is to build it as part of SSLeay.
-+
-+This kit builds a DES encryption library and a DES encryption program.
-+It supports ecb, cbc, ofb, cfb, triple ecb, triple cbc, triple ofb,
-+triple cfb, desx, and MIT's pcbc encryption modes and also has a fast
-+implementation of crypt(3).
-+It contains support routines to read keys from a terminal,
-+generate a random key, generate a key from an arbitrary length string,
-+read/write encrypted data from/to a file descriptor.
-+
-+The implementation was written so as to conform with the manual entry
-+for the des_crypt(3) library routines from MIT's project Athena.
-+
-+destest should be run after compilation to test the des routines.
-+rpw should be run after compilation to test the read password routines.
-+The des program is a replacement for the sun des command. I believe it
-+conforms to the sun version.
-+
-+The Imakefile is setup for use in the kerberos distribution.
-+
-+These routines are best compiled with gcc or any other good
-+optimising compiler.
-+Just turn you optimiser up to the highest settings and run destest
-+after the build to make sure everything works.
-+
-+I believe these routines are close to the fastest and most portable DES
-+routines that use small lookup tables (4.5k) that are publicly available.
-+The fcrypt routine is faster than ufc's fcrypt (when compiling with
-+gcc2 -O2) on the sparc 2 (1410 vs 1270) but is not so good on other machines
-+(on a sun3/260 168 vs 336). It is a function of CPU on chip cache size.
-+[ 10-Jan-97 and a function of an incorrect speed testing program in
-+ ufc which gave much better test figures that reality ].
-+
-+It is worth noting that on sparc and Alpha CPUs, performance of the DES
-+library can vary by upto %10 due to the positioning of files after application
-+linkage.
-+
-+Eric Young (eay@cryptsoft.com)
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/README.freeswan Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,33 @@
-+The only changes the FreeS/WAN project has made to libdes-lite 4.04b are:
-+
-+We #ifdef-ed the declaration of DES_LONG in des.h, so it's more efficient
-+on the Alpha, instead of just noting the issue in a comment.
-+
-+We #ifdef-ed out the des_options() function in ecb_enc.c, because we don't
-+use it, and its call to sprintf() can cause subtle difficulties when KLIPS
-+is built as a module (depending on details of Linux configuration options).
-+
-+We changed some instances of CC=$(CC) in the Makefile to CC='$(CC)' to make
-+it cope better with Linux kernel Makefile stupidities, and took out an
-+explicit CC=gcc (unwise on systems with strange compilers).
-+
-+We deleted some references to <stdio.h> and <stdlib.h>, and a declaration
-+of one function found only in the full libdes (not in libdes-lite), to
-+avoid dragging in bits of stdio/stdlib unnecessarily. (Our thanks to Hans
-+Schultz for spotting this and pointing out the fixes.)
-+
-+We deleted a couple of .obj files in the asm subdirectory, which appear to
-+have been included in the original library by accident.
-+
-+We have added an include of our Makefile.inc file, to permit overriding
-+things like choice of compiler (although the libdes Makefile would
-+probably need some work to make this effective).
-+
-+
-+
-+Note that Eric Young is no longer at the email address listed in these
-+files, and is (alas) no longer working on free crypto software.
-+
-+
-+
-+This file is RCSID $Id: README.freeswan,v 1.12 2004/07/10 08:06:51 mcr Exp $
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/VERSION Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,406 @@
-+Version 4.04
-+ Fixed a few tests in destest. Also added x86 assember for
-+ des_ncbc_encrypt() which is the standard cbc mode function.
-+ This makes a very very large performace difference.
-+ Ariel Glenn ariel@columbia.edu reports that the terminal
-+ 'turn echo off' can return (errno == EINVAL) under solaris
-+ when redirection is used. So I now catch that as well as ENOTTY.
-+
-+
-+Version 4.03
-+ Left a static out of enc_write.c, which caused to buffer to be
-+ continiously malloc()ed. Does anyone use these functions? I keep
-+ on feeling like removing them since I only had these in there
-+ for a version of kerberised login. Anyway, this was pointed out
-+ by Theo de Raadt <deraadt@cvs.openbsd.org>
-+ The 'n' bit ofb code was wrong, it was not shifting the shift
-+ register. It worked correctly for n == 64. Thanks to
-+ Gigi Ankeny <Gigi.Ankeny@Eng.Sun.COM> for pointing this one out.
-+
-+Version 4.02
-+ I was doing 'if (memcmp(weak_keys[i],key,sizeof(key)) == 0)'
-+ when checking for weak keys which is wrong :-(, pointed out by
-+ Markus F.X.J. Oberhumer <markus.oberhumer@jk.uni-linz.ac.at>.
-+
-+Version 4.01
-+ Even faster inner loop in the DES assembler for x86 and a modification
-+ for IP/FP which is faster on x86. Both of these changes are
-+ from Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk>. His
-+ changes make the assembler run %40 faster on a pentium. This is just
-+ a case of getting the instruction sequence 'just right'.
-+ All credit to 'Svend' :-)
-+ Quite a few special x86 'make' targets.
-+ A libdes-l (lite) distribution.
-+
-+Version 4.00
-+ After a bit of a pause, I'll up the major version number since this
-+ is mostly a performace release. I've added x86 assembler and
-+ added more options for performance. A %28 speedup for gcc
-+ on a pentium and the assembler is a %50 speedup.
-+ MIPS CPU's, sparc and Alpha are the main CPU's with speedups.
-+ Run des_opts to work out which options should be used.
-+ DES_RISC1/DES_RISC2 use alternative inner loops which use
-+ more registers but should give speedups on any CPU that does
-+ dual issue (pentium). DES_UNROLL unrolls the inner loop,
-+ which costs in code size.
-+
-+Version 3.26
-+ I've finally removed one of the shifts in D_ENCRYPT. This
-+ meant I've changed the des_SPtrans table (spr.h), the set_key()
-+ function and some things in des_enc.c. This has definitly
-+ made things faster :-). I've known about this one for some
-+ time but I've been too lazy to follow it up :-).
-+ Noticed that in the D_ENCRYPT() macro, we can just do L^=(..)^(..)^..
-+ instead of L^=((..)|(..)|(..).. This should save a register at
-+ least.
-+ Assember for x86. The file to replace is des_enc.c, which is replaced
-+ by one of the assembler files found in asm. Look at des/asm/readme
-+ for more info.
-+
-+ /* Modification to fcrypt so it can be compiled to support
-+ HPUX 10.x's long password format, define -DLONGCRYPT to use this.
-+ Thanks to Jens Kupferschmidt <bt1cu@hpboot.rz.uni-leipzig.de>. */
-+
-+ SIGWINCH case put in des_read_passwd() so the function does not
-+ 'exit' if this function is recieved.
-+
-+Version 3.25 17/07/96
-+ Modified read_pwd.c so that stdin can be read if not a tty.
-+ Thanks to Jeff Barber <jeffb@issl.atl.hp.com> for the patches.
-+ des_init_random_number_generator() shortened due to VMS linker
-+ limits.
-+ Added RSA's DESX cbc mode. It is a form of cbc encryption, with 2
-+ 8 byte quantites xored before and after encryption.
-+ des_xcbc_encryption() - the name is funny to preserve the des_
-+ prefix on all functions.
-+
-+Version 3.24 20/04/96
-+ The DES_PTR macro option checked and used by SSLeay configuration
-+
-+Version 3.23 11/04/96
-+ Added DES_LONG. If defined to 'unsigned int' on the DEC Alpha,
-+ it gives a %20 speedup :-)
-+ Fixed the problem with des.pl under perl5. The patches were
-+ sent by Ed Kubaitis (ejk@uiuc.edu).
-+ if fcrypt.c, changed values to handle illegal salt values the way
-+ normal crypt() implementations do. Some programs apparently use
-+ them :-(. The patch was sent by Bjorn Gronvall <bg@sics.se>
-+
-+Version 3.22 29/11/95
-+ Bug in des(1), an error with the uuencoding stuff when the
-+ 'data' is small, thanks to Geoff Keating <keagchon@mehta.anu.edu.au>
-+ for the patch.
-+
-+Version 3.21 22/11/95
-+ After some emailing back and forth with
-+ Colin Plumb <colin@nyx10.cs.du.edu>, I've tweaked a few things
-+ and in a future version I will probably put in some of the
-+ optimisation he suggested for use with the DES_USE_PTR option.
-+ Extra routines from Mark Murray <mark@grondar.za> for use in
-+ freeBSD. They mostly involve random number generation for use
-+ with kerberos. They involve evil machine specific system calls
-+ etc so I would normally suggest pushing this stuff into the
-+ application and/or using RAND_seed()/RAND_bytes() if you are
-+ using this DES library as part of SSLeay.
-+ Redone the read_pw() function so that it is cleaner and
-+ supports termios, thanks to Sameer Parekh <sameer@c2.org>
-+ for the initial patches for this.
-+ Renamed 3ecb_encrypt() to ecb3_encrypt(). This has been
-+ done just to make things more consistent.
-+ I have also now added triple DES versions of cfb and ofb.
-+
-+Version 3.20
-+ Damn, Damn, Damn, as pointed out by Mike_Spreitzer.PARC@xerox.com,
-+ my des_random_seed() function was only copying 4 bytes of the
-+ passed seed into the init structure. It is now fixed to copy 8.
-+ My own suggestion is to used something like MD5 :-)
-+
-+Version 3.19
-+ While looking at my code one day, I though, why do I keep on
-+ calling des_encrypt(in,out,ks,enc) when every function that
-+ calls it has in and out the same. So I dropped the 'out'
-+ parameter, people should not be using this function.
-+
-+Version 3.18 30/08/95
-+ Fixed a few bit with the distribution and the filenames.
-+ 3.17 had been munged via a move to DOS and back again.
-+ NO CODE CHANGES
-+
-+Version 3.17 14/07/95
-+ Fixed ede3 cbc which I had broken in 3.16. I have also
-+ removed some unneeded variables in 7-8 of the routines.
-+
-+Version 3.16 26/06/95
-+ Added des_encrypt2() which does not use IP/FP, used by triple
-+ des routines. Tweaked things a bit elsewhere. %13 speedup on
-+ sparc and %6 on a R4400 for ede3 cbc mode.
-+
-+Version 3.15 06/06/95
-+ Added des_ncbc_encrypt(), it is des_cbc mode except that it is
-+ 'normal' and copies the new iv value back over the top of the
-+ passed parameter.
-+ CHANGED des_ede3_cbc_encrypt() so that it too now overwrites
-+ the iv. THIS WILL BREAK EXISTING CODE, but since this function
-+ only new, I feel I can change it, not so with des_cbc_encrypt :-(.
-+ I need to update the documentation.
-+
-+Version 3.14 31/05/95
-+ New release upon the world, as part of my SSL implementation.
-+ New copyright and usage stuff. Basically free for all to use
-+ as long as you say it came from me :-)
-+
-+Version 3.13 31/05/95
-+ A fix in speed.c, if HZ is not defined, I set it to 100.0
-+ which is reasonable for most unixes except SunOS 4.x.
-+ I now have a #ifdef sun but timing for SunOS 4.x looked very
-+ good :-(. At my last job where I used SunOS 4.x, it was
-+ defined to be 60.0 (look at the old INSTALL documentation), at
-+ the last release had it changed to 100.0 since I now work with
-+ Solaris2 and SVR4 boxes.
-+ Thanks to Rory Chisholm <rchishol@math.ethz.ch> for pointing this
-+ one out.
-+
-+Version 3.12 08/05/95
-+ As pointed out by The Crypt Keeper <tck@bend.UCSD.EDU>,
-+ my D_ENCRYPT macro in crypt() had an un-necessary variable.
-+ It has been removed.
-+
-+Version 3.11 03/05/95
-+ Added des_ede3_cbc_encrypt() which is cbc mode des with 3 keys
-+ and one iv. It is a standard and I needed it for my SSL code.
-+ It makes more sense to use this for triple DES than
-+ 3cbc_encrypt(). I have also added (or should I say tested :-)
-+ cfb64_encrypt() which is cfb64 but it will encrypt a partial
-+ number of bytes - 3 bytes in 3 bytes out. Again this is for
-+ my SSL library, as a form of encryption to use with SSL
-+ telnet.
-+
-+Version 3.10 22/03/95
-+ Fixed a bug in 3cbc_encrypt() :-(. When making repeated calls
-+ to cbc3_encrypt, the 2 iv values that were being returned to
-+ be used in the next call were reversed :-(.
-+ Many thanks to Bill Wade <wade@Stoner.COM> for pointing out
-+ this error.
-+
-+Version 3.09 01/02/95
-+ Fixed des_random_key to far more random, it was rather feeble
-+ with regards to picking the initial seed. The problem was
-+ pointed out by Olaf Kirch <okir@monad.swb.de>.
-+
-+Version 3.08 14/12/94
-+ Added Makefile.PL so libdes can be built into perl5.
-+ Changed des_locl.h so RAND is always defined.
-+
-+Version 3.07 05/12/94
-+ Added GNUmake and stuff so the library can be build with
-+ glibc.
-+
-+Version 3.06 30/08/94
-+ Added rpc_enc.c which contains _des_crypt. This is for use in
-+ secure_rpc v 4.0
-+ Finally fixed the cfb_enc problems.
-+ Fixed a few parameter parsing bugs in des (-3 and -b), thanks
-+ to Rob McMillan <R.McMillan@its.gu.edu.au>
-+
-+Version 3.05 21/04/94
-+ for unsigned long l; gcc does not produce ((l>>34) == 0)
-+ This causes bugs in cfb_enc.
-+ Thanks to Hadmut Danisch <danisch@ira.uka.de>
-+
-+Version 3.04 20/04/94
-+ Added a version number to des.c and libdes.a
-+
-+Version 3.03 12/01/94
-+ Fixed a bug in non zero iv in 3cbc_enc.
-+
-+Version 3.02 29/10/93
-+ I now work in a place where there are 6+ architectures and 14+
-+ OS versions :-).
-+ Fixed TERMIO definition so the most sys V boxes will work :-)
-+
-+Release upon comp.sources.misc
-+Version 3.01 08/10/93
-+ Added des_3cbc_encrypt()
-+
-+Version 3.00 07/10/93
-+ Fixed up documentation.
-+ quad_cksum definitely compatible with MIT's now.
-+
-+Version 2.30 24/08/93
-+ Triple DES now defaults to triple cbc but can do triple ecb
-+ with the -b flag.
-+ Fixed some MSDOS uuen/uudecoding problems, thanks to
-+ Added prototypes.
-+
-+Version 2.22 29/06/93
-+ Fixed a bug in des_is_weak_key() which stopped it working :-(
-+ thanks to engineering@MorningStar.Com.
-+
-+Version 2.21 03/06/93
-+ des(1) with no arguments gives quite a bit of help.
-+ Added -c (generate ckecksum) flag to des(1).
-+ Added -3 (triple DES) flag to des(1).
-+ Added cfb and ofb routines to the library.
-+
-+Version 2.20 11/03/93
-+ Added -u (uuencode) flag to des(1).
-+ I have been playing with byte order in quad_cksum to make it
-+ compatible with MIT's version. All I can say is avid this
-+ function if possible since MIT's output is endian dependent.
-+
-+Version 2.12 14/10/92
-+ Added MSDOS specific macro in ecb_encrypt which gives a %70
-+ speed up when the code is compiled with turbo C.
-+
-+Version 2.11 12/10/92
-+ Speedup in set_key (recoding of PC-1)
-+ I now do it in 47 simple operations, down from 60.
-+ Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov)
-+ for motivating me to look for a faster system :-)
-+ The speedup is probably less that 1% but it is still 13
-+ instructions less :-).
-+
-+Version 2.10 06/10/92
-+ The code now works on the 64bit ETA10 and CRAY without modifications or
-+ #defines. I believe the code should work on any machine that
-+ defines long, int or short to be 8 bytes long.
-+ Thanks to Shabbir J. Safdar (shabby@mentor.cc.purdue.edu)
-+ for helping me fix the code to run on 64bit machines (he had
-+ access to an ETA10).
-+ Thanks also to John Fletcher <john_fletcher@lccmail.ocf.llnl.gov>
-+ for testing the routines on a CRAY.
-+ read_password.c has been renamed to read_passwd.c
-+ string_to_key.c has been renamed to string2key.c
-+
-+Version 2.00 14/09/92
-+ Made mods so that the library should work on 64bit CPU's.
-+ Removed all my uchar and ulong defs. To many different
-+ versions of unix define them in their header files in too many
-+ different combinations :-)
-+ IRIX - Sillicon Graphics mods (mostly in read_password.c).
-+ Thanks to Andrew Daviel (advax@erich.triumf.ca)
-+
-+Version 1.99 26/08/92
-+ Fixed a bug or 2 in enc_read.c
-+ Fixed a bug in enc_write.c
-+ Fixed a pseudo bug in fcrypt.c (very obscure).
-+
-+Version 1.98 31/07/92
-+ Support for the ETA10. This is a strange machine that defines
-+ longs and ints as 8 bytes and shorts as 4 bytes.
-+ Since I do evil things with long * that assume that they are 4
-+ bytes. Look in the Makefile for the option to compile for
-+ this machine. quad_cksum appears to have problems but I
-+ will don't have the time to fix it right now, and this is not
-+ a function that uses DES and so will not effect the main uses
-+ of the library.
-+
-+Version 1.97 20/05/92 eay
-+ Fixed the Imakefile and made some changes to des.h to fix some
-+ problems when building this package with Kerberos v 4.
-+
-+Version 1.96 18/05/92 eay
-+ Fixed a small bug in string_to_key() where problems could
-+ occur if des_check_key was set to true and the string
-+ generated a weak key.
-+
-+Patch2 posted to comp.sources.misc
-+Version 1.95 13/05/92 eay
-+ Added an alternative version of the D_ENCRYPT macro in
-+ ecb_encrypt and fcrypt. Depending on the compiler, one version or the
-+ other will be faster. This was inspired by
-+ Dana How <how@isl.stanford.edu>, and her pointers about doing the
-+ *(ulong *)((uchar *)ptr+(value&0xfc))
-+ vs
-+ ptr[value&0x3f]
-+ to stop the C compiler doing a <<2 to convert the long array index.
-+
-+Version 1.94 05/05/92 eay
-+ Fixed an incompatibility between my string_to_key and the MIT
-+ version. When the key is longer than 8 chars, I was wrapping
-+ with a different method. To use the old version, define
-+ OLD_STR_TO_KEY in the makefile. Thanks to
-+ viktor@newsu.shearson.com (Viktor Dukhovni).
-+
-+Version 1.93 28/04/92 eay
-+ Fixed the VMS mods so that echo is now turned off in
-+ read_password. Thanks again to brennan@coco.cchs.su.oz.AU.
-+ MSDOS support added. The routines can be compiled with
-+ Turbo C (v2.0) and MSC (v5.1). Make sure MSDOS is defined.
-+
-+Patch1 posted to comp.sources.misc
-+Version 1.92 13/04/92 eay
-+ Changed D_ENCRYPT so that the rotation of R occurs outside of
-+ the loop. This required rotating all the longs in sp.h (now
-+ called spr.h). Thanks to Richard Outerbridge <71755.204@CompuServe.COM>
-+ speed.c has been changed so it will work without SIGALRM. If
-+ times(3) is not present it will try to use ftime() instead.
-+
-+Version 1.91 08/04/92 eay
-+ Added -E/-D options to des(1) so it can use string_to_key.
-+ Added SVR4 mods suggested by witr@rwwa.COM
-+ Added VMS mods suggested by brennan@coco.cchs.su.oz.AU. If
-+ anyone knows how to turn of tty echo in VMS please tell me or
-+ implement it yourself :-).
-+ Changed FILE *IN/*OUT to *DES_IN/*DES_OUT since it appears VMS
-+ does not like IN/OUT being used.
-+
-+Libdes posted to comp.sources.misc
-+Version 1.9 24/03/92 eay
-+ Now contains a fast small crypt replacement.
-+ Added des(1) command.
-+ Added des_rw_mode so people can use cbc encryption with
-+ enc_read and enc_write.
-+
-+Version 1.8 15/10/91 eay
-+ Bug in cbc_cksum.
-+ Many thanks to Keith Reynolds (keithr@sco.COM) for pointing this
-+ one out.
-+
-+Version 1.7 24/09/91 eay
-+ Fixed set_key :-)
-+ set_key is 4 times faster and takes less space.
-+ There are a few minor changes that could be made.
-+
-+Version 1.6 19/09/1991 eay
-+ Finally go IP and FP finished.
-+ Now I need to fix set_key.
-+ This version is quite a bit faster that 1.51
-+
-+Version 1.52 15/06/1991 eay
-+ 20% speedup in ecb_encrypt by changing the E bit selection
-+ to use 2 32bit words. This also required modification of the
-+ sp table. There is still a way to speedup the IP and IP-1
-+ (hints from outer@sq.com) still working on this one :-(.
-+
-+Version 1.51 07/06/1991 eay
-+ Faster des_encrypt by loop unrolling
-+ Fixed bug in quad_cksum.c (thanks to hughes@logos.ucs.indiana.edu)
-+
-+Version 1.50 28/05/1991 eay
-+ Optimised the code a bit more for the sparc. I have improved the
-+ speed of the inner des_encrypt by speeding up the initial and
-+ final permutations.
-+
-+Version 1.40 23/10/1990 eay
-+ Fixed des_random_key, it did not produce a random key :-(
-+
-+Version 1.30 2/10/1990 eay
-+ Have made des_quad_cksum the same as MIT's, the full package
-+ should be compatible with MIT's
-+ Have tested on a DECstation 3100
-+ Still need to fix des_set_key (make it faster).
-+ Does des_cbc_encrypts at 70.5k/sec on a 3100.
-+
-+Version 1.20 18/09/1990 eay
-+ Fixed byte order dependencies.
-+ Fixed (I hope) all the word alignment problems.
-+ Speedup in des_ecb_encrypt.
-+
-+Version 1.10 11/09/1990 eay
-+ Added des_enc_read and des_enc_write.
-+ Still need to fix des_quad_cksum.
-+ Still need to document des_enc_read and des_enc_write.
-+
-+Version 1.00 27/08/1990 eay
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/asm/des-586.pl Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,251 @@
-+#!/usr/local/bin/perl
-+#
-+# The inner loop instruction sequence and the IP/FP modifications are from
-+# Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk>
-+#
-+
-+push(@INC,"perlasm","../../perlasm");
-+require "x86asm.pl";
-+require "cbc.pl";
-+require "desboth.pl";
-+
-+# base code is in microsft
-+# op dest, source
-+# format.
-+#
-+
-+&asm_init($ARGV[0],"des-586.pl");
-+
-+$L="edi";
-+$R="esi";
-+
-+&external_label("des_SPtrans");
-+&des_encrypt("des_encrypt",1);
-+&des_encrypt("des_encrypt2",0);
-+&des_encrypt3("des_encrypt3",1);
-+&des_encrypt3("des_decrypt3",0);
-+&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1);
-+&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5);
-+
-+&asm_finish();
-+
-+sub des_encrypt
-+ {
-+ local($name,$do_ip)=@_;
-+
-+ &function_begin_B($name,"EXTRN _des_SPtrans:DWORD");
-+
-+ &push("esi");
-+ &push("edi");
-+
-+ &comment("");
-+ &comment("Load the 2 words");
-+ $ks="ebp";
-+
-+ if ($do_ip)
-+ {
-+ &mov($R,&wparam(0));
-+ &xor( "ecx", "ecx" );
-+
-+ &push("ebx");
-+ &push("ebp");
-+
-+ &mov("eax",&DWP(0,$R,"",0));
-+ &mov("ebx",&wparam(2)); # get encrypt flag
-+ &mov($L,&DWP(4,$R,"",0));
-+ &comment("");
-+ &comment("IP");
-+ &IP_new("eax",$L,$R,3);
-+ }
-+ else
-+ {
-+ &mov("eax",&wparam(0));
-+ &xor( "ecx", "ecx" );
-+
-+ &push("ebx");
-+ &push("ebp");
-+
-+ &mov($R,&DWP(0,"eax","",0));
-+ &mov("ebx",&wparam(2)); # get encrypt flag
-+ &rotl($R,3);
-+ &mov($L,&DWP(4,"eax","",0));
-+ &rotl($L,3);
-+ }
-+
-+ &mov( $ks, &wparam(1) );
-+ &cmp("ebx","0");
-+ &je(&label("start_decrypt"));
-+
-+ for ($i=0; $i<16; $i+=2)
-+ {
-+ &comment("");
-+ &comment("Round $i");
-+ &D_ENCRYPT($i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
-+
-+ &comment("");
-+ &comment("Round ".sprintf("%d",$i+1));
-+ &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
-+ }
-+ &jmp(&label("end"));
-+
-+ &set_label("start_decrypt");
-+
-+ for ($i=15; $i>0; $i-=2)
-+ {
-+ &comment("");
-+ &comment("Round $i");
-+ &D_ENCRYPT(15-$i,$L,$R,$i*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
-+ &comment("");
-+ &comment("Round ".sprintf("%d",$i-1));
-+ &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$ks,"des_SPtrans","eax","ebx","ecx","edx");
-+ }
-+
-+ &set_label("end");
-+
-+ if ($do_ip)
-+ {
-+ &comment("");
-+ &comment("FP");
-+ &mov("edx",&wparam(0));
-+ &FP_new($L,$R,"eax",3);
-+
-+ &mov(&DWP(0,"edx","",0),"eax");
-+ &mov(&DWP(4,"edx","",0),$R);
-+ }
-+ else
-+ {
-+ &comment("");
-+ &comment("Fixup");
-+ &rotr($L,3); # r
-+ &mov("eax",&wparam(0));
-+ &rotr($R,3); # l
-+ &mov(&DWP(0,"eax","",0),$L);
-+ &mov(&DWP(4,"eax","",0),$R);
-+ }
-+
-+ &pop("ebp");
-+ &pop("ebx");
-+ &pop("edi");
-+ &pop("esi");
-+ &ret();
-+
-+ &function_end_B($name);
-+ }
-+
-+sub D_ENCRYPT
-+ {
-+ local($r,$L,$R,$S,$ks,$desSP,$u,$tmp1,$tmp2,$t)=@_;
-+
-+ &mov( $u, &DWP(&n2a($S*4),$ks,"",0));
-+ &xor( $tmp1, $tmp1);
-+ &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0));
-+ &xor( $u, $R);
-+ &xor( $t, $R);
-+ &and( $u, "0xfcfcfcfc" );
-+ &and( $t, "0xcfcfcfcf" );
-+ &movb( &LB($tmp1), &LB($u) );
-+ &movb( &LB($tmp2), &HB($u) );
-+ &rotr( $t, 4 );
-+ &mov( $ks, &DWP(" $desSP",$tmp1,"",0));
-+ &movb( &LB($tmp1), &LB($t) );
-+ &xor( $L, $ks);
-+ &mov( $ks, &DWP("0x200+$desSP",$tmp2,"",0));
-+ &xor( $L, $ks); ######
-+ &movb( &LB($tmp2), &HB($t) );
-+ &shr( $u, 16);
-+ &mov( $ks, &DWP("0x100+$desSP",$tmp1,"",0));
-+ &xor( $L, $ks); ######
-+ &movb( &LB($tmp1), &HB($u) );
-+ &shr( $t, 16);
-+ &mov( $ks, &DWP("0x300+$desSP",$tmp2,"",0));
-+ &xor( $L, $ks);
-+ &mov( $ks, &wparam(1) );
-+ &movb( &LB($tmp2), &HB($t) );
-+ &and( $u, "0xff" );
-+ &and( $t, "0xff" );
-+ &mov( $tmp1, &DWP("0x600+$desSP",$tmp1,"",0));
-+ &xor( $L, $tmp1);
-+ &mov( $tmp1, &DWP("0x700+$desSP",$tmp2,"",0));
-+ &xor( $L, $tmp1);
-+ &mov( $tmp1, &DWP("0x400+$desSP",$u,"",0));
-+ &xor( $L, $tmp1);
-+ &mov( $tmp1, &DWP("0x500+$desSP",$t,"",0));
-+ &xor( $L, $tmp1);
-+ }
-+
-+sub n2a
-+ {
-+ sprintf("%d",$_[0]);
-+ }
-+
-+# now has a side affect of rotating $a by $shift
-+sub R_PERM_OP
-+ {
-+ local($a,$b,$tt,$shift,$mask,$last)=@_;
-+
-+ &rotl( $a, $shift ) if ($shift != 0);
-+ &mov( $tt, $a );
-+ &xor( $a, $b );
-+ &and( $a, $mask );
-+ if (!$last eq $b)
-+ {
-+ &xor( $b, $a );
-+ &xor( $tt, $a );
-+ }
-+ else
-+ {
-+ &xor( $tt, $a );
-+ &xor( $b, $a );
-+ }
-+ &comment("");
-+ }
-+
-+sub IP_new
-+ {
-+ local($l,$r,$tt,$lr)=@_;
-+
-+ &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l);
-+ &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l);
-+ &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r);
-+ &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r);
-+ &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r);
-+
-+ if ($lr != 3)
-+ {
-+ if (($lr-3) < 0)
-+ { &rotr($tt, 3-$lr); }
-+ else { &rotl($tt, $lr-3); }
-+ }
-+ if ($lr != 2)
-+ {
-+ if (($lr-2) < 0)
-+ { &rotr($r, 2-$lr); }
-+ else { &rotl($r, $lr-2); }
-+ }
-+ }
-+
-+sub FP_new
-+ {
-+ local($l,$r,$tt,$lr)=@_;
-+
-+ if ($lr != 2)
-+ {
-+ if (($lr-2) < 0)
-+ { &rotl($r, 2-$lr); }
-+ else { &rotr($r, $lr-2); }
-+ }
-+ if ($lr != 3)
-+ {
-+ if (($lr-3) < 0)
-+ { &rotl($l, 3-$lr); }
-+ else { &rotr($l, $lr-3); }
-+ }
-+
-+ &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r);
-+ &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r);
-+ &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l);
-+ &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l);
-+ &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r);
-+ &rotr($tt , 4);
-+ }
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/asm/des686.pl Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,230 @@
-+#!/usr/local/bin/perl
-+
-+$prog="des686.pl";
-+
-+# base code is in microsft
-+# op dest, source
-+# format.
-+#
-+
-+# WILL NOT WORK ANYMORE WITH desboth.pl
-+require "desboth.pl";
-+
-+if ( ($ARGV[0] eq "elf"))
-+ { require "x86unix.pl"; }
-+elsif ( ($ARGV[0] eq "a.out"))
-+ { $aout=1; require "x86unix.pl"; }
-+elsif ( ($ARGV[0] eq "sol"))
-+ { $sol=1; require "x86unix.pl"; }
-+elsif ( ($ARGV[0] eq "cpp"))
-+ { $cpp=1; require "x86unix.pl"; }
-+elsif ( ($ARGV[0] eq "win32"))
-+ { require "x86ms.pl"; }
-+else
-+ {
-+ print STDERR <<"EOF";
-+Pick one target type from
-+ elf - linux, FreeBSD etc
-+ a.out - old linux
-+ sol - x86 solaris
-+ cpp - format so x86unix.cpp can be used
-+ win32 - Windows 95/Windows NT
-+EOF
-+ exit(1);
-+ }
-+
-+&comment("Don't even think of reading this code");
-+&comment("It was automatically generated by $prog");
-+&comment("Which is a perl program used to generate the x86 assember for");
-+&comment("any of elf, a.out, Win32, or Solaris");
-+&comment("It can be found in SSLeay 0.6.5+ or in libdes 3.26+");
-+&comment("eric <eay\@cryptsoft.com>");
-+&comment("");
-+
-+&file("dx86xxxx");
-+
-+$L="edi";
-+$R="esi";
-+
-+&des_encrypt("des_encrypt",1);
-+&des_encrypt("des_encrypt2",0);
-+
-+&des_encrypt3("des_encrypt3",1);
-+&des_encrypt3("des_decrypt3",0);
-+
-+&file_end();
-+
-+sub des_encrypt
-+ {
-+ local($name,$do_ip)=@_;
-+
-+ &function_begin($name,"EXTRN _des_SPtrans:DWORD");
-+
-+ &comment("");
-+ &comment("Load the 2 words");
-+ &mov("eax",&wparam(0));
-+ &mov($L,&DWP(0,"eax","",0));
-+ &mov($R,&DWP(4,"eax","",0));
-+
-+ $ksp=&wparam(1);
-+
-+ if ($do_ip)
-+ {
-+ &comment("");
-+ &comment("IP");
-+ &IP_new($L,$R,"eax");
-+ }
-+
-+ &comment("");
-+ &comment("fixup rotate");
-+ &rotl($R,3);
-+ &rotl($L,3);
-+ &exch($L,$R);
-+
-+ &comment("");
-+ &comment("load counter, key_schedule and enc flag");
-+ &mov("eax",&wparam(2)); # get encrypt flag
-+ &mov("ebp",&wparam(1)); # get ks
-+ &cmp("eax","0");
-+ &je(&label("start_decrypt"));
-+
-+ # encrypting part
-+
-+ for ($i=0; $i<16; $i+=2)
-+ {
-+ &comment("");
-+ &comment("Round $i");
-+ &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
-+
-+ &comment("");
-+ &comment("Round ".sprintf("%d",$i+1));
-+ &D_ENCRYPT($R,$L,($i+1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
-+ }
-+ &jmp(&label("end"));
-+
-+ &set_label("start_decrypt");
-+
-+ for ($i=15; $i>0; $i-=2)
-+ {
-+ &comment("");
-+ &comment("Round $i");
-+ &D_ENCRYPT($L,$R,$i*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
-+ &comment("");
-+ &comment("Round ".sprintf("%d",$i-1));
-+ &D_ENCRYPT($R,$L,($i-1)*2,"ebp","des_SPtrans","ecx","edx","eax","ebx");
-+ }
-+
-+ &set_label("end");
-+
-+ &comment("");
-+ &comment("Fixup");
-+ &rotr($L,3); # r
-+ &rotr($R,3); # l
-+
-+ if ($do_ip)
-+ {
-+ &comment("");
-+ &comment("FP");
-+ &FP_new($R,$L,"eax");
-+ }
-+
-+ &mov("eax",&wparam(0));
-+ &mov(&DWP(0,"eax","",0),$L);
-+ &mov(&DWP(4,"eax","",0),$R);
-+
-+ &function_end($name);
-+ }
-+
-+
-+# The logic is to load R into 2 registers and operate on both at the same time.
-+# We also load the 2 R's into 2 more registers so we can do the 'move word down a byte'
-+# while also masking the other copy and doing a lookup. We then also accumulate the
-+# L value in 2 registers then combine them at the end.
-+sub D_ENCRYPT
-+ {
-+ local($L,$R,$S,$ks,$desSP,$u,$t,$tmp1,$tmp2,$tmp3)=@_;
-+
-+ &mov( $u, &DWP(&n2a($S*4),$ks,"",0));
-+ &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0));
-+ &xor( $u, $R );
-+ &xor( $t, $R );
-+ &rotr( $t, 4 );
-+
-+ # the numbers at the end of the line are origional instruction order
-+ &mov( $tmp2, $u ); # 1 2
-+ &mov( $tmp1, $t ); # 1 1
-+ &and( $tmp2, "0xfc" ); # 1 4
-+ &and( $tmp1, "0xfc" ); # 1 3
-+ &shr( $t, 8 ); # 1 5
-+ &xor( $L, &DWP("0x100+$desSP",$tmp1,"",0)); # 1 7
-+ &shr( $u, 8 ); # 1 6
-+ &mov( $tmp1, &DWP(" $desSP",$tmp2,"",0)); # 1 8
-+
-+ &mov( $tmp2, $u ); # 2 2
-+ &xor( $L, $tmp1 ); # 1 9
-+ &and( $tmp2, "0xfc" ); # 2 4
-+ &mov( $tmp1, $t ); # 2 1
-+ &and( $tmp1, "0xfc" ); # 2 3
-+ &shr( $t, 8 ); # 2 5
-+ &xor( $L, &DWP("0x300+$desSP",$tmp1,"",0)); # 2 7
-+ &shr( $u, 8 ); # 2 6
-+ &mov( $tmp1, &DWP("0x200+$desSP",$tmp2,"",0)); # 2 8
-+ &mov( $tmp2, $u ); # 3 2
-+
-+ &xor( $L, $tmp1 ); # 2 9
-+ &and( $tmp2, "0xfc" ); # 3 4
-+
-+ &mov( $tmp1, $t ); # 3 1
-+ &shr( $u, 8 ); # 3 6
-+ &and( $tmp1, "0xfc" ); # 3 3
-+ &shr( $t, 8 ); # 3 5
-+ &xor( $L, &DWP("0x500+$desSP",$tmp1,"",0)); # 3 7
-+ &mov( $tmp1, &DWP("0x400+$desSP",$tmp2,"",0)); # 3 8
-+
-+ &and( $t, "0xfc" ); # 4 1
-+ &xor( $L, $tmp1 ); # 3 9
-+
-+ &and( $u, "0xfc" ); # 4 2
-+ &xor( $L, &DWP("0x700+$desSP",$t,"",0)); # 4 3
-+ &xor( $L, &DWP("0x600+$desSP",$u,"",0)); # 4 4
-+ }
-+
-+sub PERM_OP
-+ {
-+ local($a,$b,$tt,$shift,$mask)=@_;
-+
-+ &mov( $tt, $a );
-+ &shr( $tt, $shift );
-+ &xor( $tt, $b );
-+ &and( $tt, $mask );
-+ &xor( $b, $tt );
-+ &shl( $tt, $shift );
-+ &xor( $a, $tt );
-+ }
-+
-+sub IP_new
-+ {
-+ local($l,$r,$tt)=@_;
-+
-+ &PERM_OP($r,$l,$tt, 4,"0x0f0f0f0f");
-+ &PERM_OP($l,$r,$tt,16,"0x0000ffff");
-+ &PERM_OP($r,$l,$tt, 2,"0x33333333");
-+ &PERM_OP($l,$r,$tt, 8,"0x00ff00ff");
-+ &PERM_OP($r,$l,$tt, 1,"0x55555555");
-+ }
-+
-+sub FP_new
-+ {
-+ local($l,$r,$tt)=@_;
-+
-+ &PERM_OP($l,$r,$tt, 1,"0x55555555");
-+ &PERM_OP($r,$l,$tt, 8,"0x00ff00ff");
-+ &PERM_OP($l,$r,$tt, 2,"0x33333333");
-+ &PERM_OP($r,$l,$tt,16,"0x0000ffff");
-+ &PERM_OP($l,$r,$tt, 4,"0x0f0f0f0f");
-+ }
-+
-+sub n2a
-+ {
-+ sprintf("%d",$_[0]);
-+ }
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/asm/desboth.pl Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,79 @@
-+#!/usr/local/bin/perl
-+
-+$L="edi";
-+$R="esi";
-+
-+sub des_encrypt3
-+ {
-+ local($name,$enc)=@_;
-+
-+ &function_begin_B($name,"");
-+ &push("ebx");
-+ &mov("ebx",&wparam(0));
-+
-+ &push("ebp");
-+ &push("esi");
-+
-+ &push("edi");
-+
-+ &comment("");
-+ &comment("Load the data words");
-+ &mov($L,&DWP(0,"ebx","",0));
-+ &mov($R,&DWP(4,"ebx","",0));
-+ &stack_push(3);
-+
-+ &comment("");
-+ &comment("IP");
-+ &IP_new($L,$R,"edx",0);
-+
-+ # put them back
-+
-+ if ($enc)
-+ {
-+ &mov(&DWP(4,"ebx","",0),$R);
-+ &mov("eax",&wparam(1));
-+ &mov(&DWP(0,"ebx","",0),"edx");
-+ &mov("edi",&wparam(2));
-+ &mov("esi",&wparam(3));
-+ }
-+ else
-+ {
-+ &mov(&DWP(4,"ebx","",0),$R);
-+ &mov("esi",&wparam(1));
-+ &mov(&DWP(0,"ebx","",0),"edx");
-+ &mov("edi",&wparam(2));
-+ &mov("eax",&wparam(3));
-+ }
-+ &mov(&swtmp(2), (($enc)?"1":"0"));
-+ &mov(&swtmp(1), "eax");
-+ &mov(&swtmp(0), "ebx");
-+ &call("des_encrypt2");
-+ &mov(&swtmp(2), (($enc)?"0":"1"));
-+ &mov(&swtmp(1), "edi");
-+ &mov(&swtmp(0), "ebx");
-+ &call("des_encrypt2");
-+ &mov(&swtmp(2), (($enc)?"1":"0"));
-+ &mov(&swtmp(1), "esi");
-+ &mov(&swtmp(0), "ebx");
-+ &call("des_encrypt2");
-+
-+ &stack_pop(3);
-+ &mov($L,&DWP(0,"ebx","",0));
-+ &mov($R,&DWP(4,"ebx","",0));
-+
-+ &comment("");
-+ &comment("FP");
-+ &FP_new($L,$R,"eax",0);
-+
-+ &mov(&DWP(0,"ebx","",0),"eax");
-+ &mov(&DWP(4,"ebx","",0),$R);
-+
-+ &pop("edi");
-+ &pop("esi");
-+ &pop("ebp");
-+ &pop("ebx");
-+ &ret();
-+ &function_end_B($name);
-+ }
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/asm/readme Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,131 @@
-+First up, let me say I don't like writing in assembler. It is not portable,
-+dependant on the particular CPU architecture release and is generally a pig
-+to debug and get right. Having said that, the x86 architecture is probably
-+the most important for speed due to number of boxes and since
-+it appears to be the worst architecture to to get
-+good C compilers for. So due to this, I have lowered myself to do
-+assembler for the inner DES routines in libdes :-).
-+
-+The file to implement in assembler is des_enc.c. Replace the following
-+4 functions
-+des_encrypt(DES_LONG data[2],des_key_schedule ks, int encrypt);
-+des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt);
-+des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
-+des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3);
-+
-+They encrypt/decrypt the 64 bits held in 'data' using
-+the 'ks' key schedules. The only difference between the 4 functions is that
-+des_encrypt2() does not perform IP() or FP() on the data (this is an
-+optimization for when doing triple DES and des_encrypt3() and des_decrypt3()
-+perform triple des. The triple DES routines are in here because it does
-+make a big difference to have them located near the des_encrypt2 function
-+at link time..
-+
-+Now as we all know, there are lots of different operating systems running on
-+x86 boxes, and unfortunately they normally try to make sure their assembler
-+formating is not the same as the other peoples.
-+The 4 main formats I know of are
-+Microsoft Windows 95/Windows NT
-+Elf Includes Linux and FreeBSD(?).
-+a.out The older Linux.
-+Solaris Same as Elf but different comments :-(.
-+
-+Now I was not overly keen to write 4 different copies of the same code,
-+so I wrote a few perl routines to output the correct assembler, given
-+a target assembler type. This code is ugly and is just a hack.
-+The libraries are x86unix.pl and x86ms.pl.
-+des586.pl, des686.pl and des-som[23].pl are the programs to actually
-+generate the assembler.
-+
-+So to generate elf assembler
-+perl des-som3.pl elf >dx86-elf.s
-+For Windows 95/NT
-+perl des-som2.pl win32 >win32.asm
-+
-+[ update 4 Jan 1996 ]
-+I have added another way to do things.
-+perl des-som3.pl cpp >dx86-cpp.s
-+generates a file that will be included by dx86unix.cpp when it is compiled.
-+To build for elf, a.out, solaris, bsdi etc,
-+cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o
-+cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o
-+cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o
-+cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o
-+This was done to cut down the number of files in the distribution.
-+
-+Now the ugly part. I acquired my copy of Intels
-+"Optimization's For Intel's 32-Bit Processors" and found a few interesting
-+things. First, the aim of the exersize is to 'extract' one byte at a time
-+from a word and do an array lookup. This involves getting the byte from
-+the 4 locations in the word and moving it to a new word and doing the lookup.
-+The most obvious way to do this is
-+xor eax, eax # clear word
-+movb al, cl # get low byte
-+xor edi DWORD PTR 0x100+des_SP[eax] # xor in word
-+movb al, ch # get next byte
-+xor edi DWORD PTR 0x300+des_SP[eax] # xor in word
-+shr ecx 16
-+which seems ok. For the pentium, this system appears to be the best.
-+One has to do instruction interleaving to keep both functional units
-+operating, but it is basically very efficient.
-+
-+Now the crunch. When a full register is used after a partial write, eg.
-+mov al, cl
-+xor edi, DWORD PTR 0x100+des_SP[eax]
-+386 - 1 cycle stall
-+486 - 1 cycle stall
-+586 - 0 cycle stall
-+686 - at least 7 cycle stall (page 22 of the above mentioned document).
-+
-+So the technique that produces the best results on a pentium, according to
-+the documentation, will produce hideous results on a pentium pro.
-+
-+To get around this, des686.pl will generate code that is not as fast on
-+a pentium, should be very good on a pentium pro.
-+mov eax, ecx # copy word
-+shr ecx, 8 # line up next byte
-+and eax, 0fch # mask byte
-+xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup
-+mov eax, ecx # get word
-+shr ecx 8 # line up next byte
-+and eax, 0fch # mask byte
-+xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup
-+
-+Due to the execution units in the pentium, this actually works quite well.
-+For a pentium pro it should be very good. This is the type of output
-+Visual C++ generates.
-+
-+There is a third option. instead of using
-+mov al, ch
-+which is bad on the pentium pro, one may be able to use
-+movzx eax, ch
-+which may not incur the partial write penalty. On the pentium,
-+this instruction takes 4 cycles so is not worth using but on the
-+pentium pro it appears it may be worth while. I need access to one to
-+experiment :-).
-+
-+eric (20 Oct 1996)
-+
-+22 Nov 1996 - I have asked people to run the 2 different version on pentium
-+pros and it appears that the intel documentation is wrong. The
-+mov al,bh is still faster on a pentium pro, so just use the des586.pl
-+install des686.pl
-+
-+3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these
-+functions into des_enc.c because it does make a massive performance
-+difference on some boxes to have the functions code located close to
-+the des_encrypt2() function.
-+
-+9 Jan 1997 - des-som2.pl is now the correct perl script to use for
-+pentiums. It contains an inner loop from
-+Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at
-+273,000 per second. He had a previous version at 250,000 and the best
-+I was able to get was 203,000. The content has not changed, this is all
-+due to instruction sequencing (and actual instructions choice) which is able
-+to keep both functional units of the pentium going.
-+We may have lost the ugly register usage restrictions when x86 went 32 bit
-+but for the pentium it has been replaced by evil instruction ordering tricks.
-+
-+13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf.
-+raw DES at 281,000 per second on a pentium 100.
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/cbc_enc.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,142 @@
-+/* crypto/des/cbc_enc.c */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+#include "des/des_locl.h"
-+
-+void des_cbc_encrypt(input, output, length, schedule, ivec, enc)
-+des_cblock (*input);
-+des_cblock (*output);
-+long length;
-+des_key_schedule schedule;
-+des_cblock (*ivec);
-+int enc;
-+ {
-+ register DES_LONG tin0,tin1;
-+ register DES_LONG tout0,tout1,xor0,xor1;
-+ register unsigned char *in,*out;
-+ register long l=length;
-+ DES_LONG tin[2];
-+ unsigned char *iv;
-+
-+#ifdef OCF_ASSIST
-+ if (ocf_des_assist() & OCF_PROVIDES_DES_3DES) {
-+ ocf_des_cbc_encrypt(input, output, length, schedule, ivec, enc);
-+ return;
-+ }
-+#endif
-+
-+ in=(unsigned char *)input;
-+ out=(unsigned char *)output;
-+ iv=(unsigned char *)ivec;
-+
-+ if (enc)
-+ {
-+ c2l(iv,tout0);
-+ c2l(iv,tout1);
-+ for (l-=8; l>=0; l-=8)
-+ {
-+ c2l(in,tin0);
-+ c2l(in,tin1);
-+ tin0^=tout0; tin[0]=tin0;
-+ tin1^=tout1; tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
-+ tout0=tin[0]; l2c(tout0,out);
-+ tout1=tin[1]; l2c(tout1,out);
-+ }
-+ if (l != -8)
-+ {
-+ c2ln(in,tin0,tin1,l+8);
-+ tin0^=tout0; tin[0]=tin0;
-+ tin1^=tout1; tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
-+ tout0=tin[0]; l2c(tout0,out);
-+ tout1=tin[1]; l2c(tout1,out);
-+ }
-+ }
-+ else
-+ {
-+ c2l(iv,xor0);
-+ c2l(iv,xor1);
-+ for (l-=8; l>=0; l-=8)
-+ {
-+ c2l(in,tin0); tin[0]=tin0;
-+ c2l(in,tin1); tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT);
-+ tout0=tin[0]^xor0;
-+ tout1=tin[1]^xor1;
-+ l2c(tout0,out);
-+ l2c(tout1,out);
-+ xor0=tin0;
-+ xor1=tin1;
-+ }
-+ if (l != -8)
-+ {
-+ c2l(in,tin0); tin[0]=tin0;
-+ c2l(in,tin1); tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT);
-+ tout0=tin[0]^xor0;
-+ tout1=tin[1]^xor1;
-+ l2cn(tout0,tout1,out,l+8);
-+ /* xor0=tin0;
-+ xor1=tin1; */
-+ }
-+ }
-+ tin0=tin1=tout0=tout1=xor0=xor1=0;
-+ tin[0]=tin[1]=0;
-+ }
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/des.doc Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,505 @@
-+The DES library.
-+
-+Please note that this library was originally written to operate with
-+eBones, a version of Kerberos that had had encryption removed when it left
-+the USA and then put back in. As such there are some routines that I will
-+advise not using but they are still in the library for historical reasons.
-+For all calls that have an 'input' and 'output' variables, they can be the
-+same.
-+
-+This library requires the inclusion of 'des.h'.
-+
-+All of the encryption functions take what is called a des_key_schedule as an
-+argument. A des_key_schedule is an expanded form of the des key.
-+A des_key is 8 bytes of odd parity, the type used to hold the key is a
-+des_cblock. A des_cblock is an array of 8 bytes, often in this library
-+description I will refer to input bytes when the function specifies
-+des_cblock's as input or output, this just means that the variable should
-+be a multiple of 8 bytes.
-+
-+The define DES_ENCRYPT is passed to specify encryption, DES_DECRYPT to
-+specify decryption. The functions and global variable are as follows:
-+
-+int des_check_key;
-+ DES keys are supposed to be odd parity. If this variable is set to
-+ a non-zero value, des_set_key() will check that the key has odd
-+ parity and is not one of the known weak DES keys. By default this
-+ variable is turned off;
-+
-+void des_set_odd_parity(
-+des_cblock *key );
-+ This function takes a DES key (8 bytes) and sets the parity to odd.
-+
-+int des_is_weak_key(
-+des_cblock *key );
-+ This function returns a non-zero value if the DES key passed is a
-+ weak, DES key. If it is a weak key, don't use it, try a different
-+ one. If you are using 'random' keys, the chances of hitting a weak
-+ key are 1/2^52 so it is probably not worth checking for them.
-+
-+int des_set_key(
-+des_cblock *key,
-+des_key_schedule schedule);
-+ Des_set_key converts an 8 byte DES key into a des_key_schedule.
-+ A des_key_schedule is an expanded form of the key which is used to
-+ perform actual encryption. It can be regenerated from the DES key
-+ so it only needs to be kept when encryption or decryption is about
-+ to occur. Don't save or pass around des_key_schedule's since they
-+ are CPU architecture dependent, DES keys are not. If des_check_key
-+ is non zero, zero is returned if the key has the wrong parity or
-+ the key is a weak key, else 1 is returned.
-+
-+int des_key_sched(
-+des_cblock *key,
-+des_key_schedule schedule);
-+ An alternative name for des_set_key().
-+
-+int des_rw_mode; /* defaults to DES_PCBC_MODE */
-+ This flag holds either DES_CBC_MODE or DES_PCBC_MODE (default).
-+ This specifies the function to use in the enc_read() and enc_write()
-+ functions.
-+
-+void des_encrypt(
-+unsigned long *data,
-+des_key_schedule ks,
-+int enc);
-+ This is the DES encryption function that gets called by just about
-+ every other DES routine in the library. You should not use this
-+ function except to implement 'modes' of DES. I say this because the
-+ functions that call this routine do the conversion from 'char *' to
-+ long, and this needs to be done to make sure 'non-aligned' memory
-+ access do not occur. The characters are loaded 'little endian',
-+ have a look at my source code for more details on how I use this
-+ function.
-+ Data is a pointer to 2 unsigned long's and ks is the
-+ des_key_schedule to use. enc, is non zero specifies encryption,
-+ zero if decryption.
-+
-+void des_encrypt2(
-+unsigned long *data,
-+des_key_schedule ks,
-+int enc);
-+ This functions is the same as des_encrypt() except that the DES
-+ initial permutation (IP) and final permutation (FP) have been left
-+ out. As for des_encrypt(), you should not use this function.
-+ It is used by the routines in my library that implement triple DES.
-+ IP() des_encrypt2() des_encrypt2() des_encrypt2() FP() is the same
-+ as des_encrypt() des_encrypt() des_encrypt() except faster :-).
-+
-+void des_ecb_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+des_key_schedule ks,
-+int enc);
-+ This is the basic Electronic Code Book form of DES, the most basic
-+ form. Input is encrypted into output using the key represented by
-+ ks. If enc is non zero (DES_ENCRYPT), encryption occurs, otherwise
-+ decryption occurs. Input is 8 bytes long and output is 8 bytes.
-+ (the des_cblock structure is 8 chars).
-+
-+void des_ecb3_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+des_key_schedule ks1,
-+des_key_schedule ks2,
-+des_key_schedule ks3,
-+int enc);
-+ This is the 3 key EDE mode of ECB DES. What this means is that
-+ the 8 bytes of input is encrypted with ks1, decrypted with ks2 and
-+ then encrypted again with ks3, before being put into output;
-+ C=E(ks3,D(ks2,E(ks1,M))). There is a macro, des_ecb2_encrypt()
-+ that only takes 2 des_key_schedules that implements,
-+ C=E(ks1,D(ks2,E(ks1,M))) in that the final encrypt is done with ks1.
-+
-+void des_cbc_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+des_key_schedule ks,
-+des_cblock *ivec,
-+int enc);
-+ This routine implements DES in Cipher Block Chaining mode.
-+ Input, which should be a multiple of 8 bytes is encrypted
-+ (or decrypted) to output which will also be a multiple of 8 bytes.
-+ The number of bytes is in length (and from what I've said above,
-+ should be a multiple of 8). If length is not a multiple of 8, I'm
-+ not being held responsible :-). ivec is the initialisation vector.
-+ This function does not modify this variable. To correctly implement
-+ cbc mode, you need to do one of 2 things; copy the last 8 bytes of
-+ cipher text for use as the next ivec in your application,
-+ or use des_ncbc_encrypt().
-+ Only this routine has this problem with updating the ivec, all
-+ other routines that are implementing cbc mode update ivec.
-+
-+void des_ncbc_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+des_key_schedule sk,
-+des_cblock *ivec,
-+int enc);
-+ For historical reasons, des_cbc_encrypt() did not update the
-+ ivec with the value requires so that subsequent calls to
-+ des_cbc_encrypt() would 'chain'. This was needed so that the same
-+ 'length' values would not need to be used when decrypting.
-+ des_ncbc_encrypt() does the right thing. It is the same as
-+ des_cbc_encrypt accept that ivec is updates with the correct value
-+ to pass in subsequent calls to des_ncbc_encrypt(). I advise using
-+ des_ncbc_encrypt() instead of des_cbc_encrypt();
-+
-+void des_xcbc_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+des_key_schedule sk,
-+des_cblock *ivec,
-+des_cblock *inw,
-+des_cblock *outw,
-+int enc);
-+ This is RSA's DESX mode of DES. It uses inw and outw to
-+ 'whiten' the encryption. inw and outw are secret (unlike the iv)
-+ and are as such, part of the key. So the key is sort of 24 bytes.
-+ This is much better than cbc des.
-+
-+void des_3cbc_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+des_key_schedule sk1,
-+des_key_schedule sk2,
-+des_cblock *ivec1,
-+des_cblock *ivec2,
-+int enc);
-+ This function is flawed, do not use it. I have left it in the
-+ library because it is used in my des(1) program and will function
-+ correctly when used by des(1). If I removed the function, people
-+ could end up unable to decrypt files.
-+ This routine implements outer triple cbc encryption using 2 ks and
-+ 2 ivec's. Use des_ede2_cbc_encrypt() instead.
-+
-+void des_ede3_cbc_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+des_key_schedule ks1,
-+des_key_schedule ks2,
-+des_key_schedule ks3,
-+des_cblock *ivec,
-+int enc);
-+ This function implements inner triple CBC DES encryption with 3
-+ keys. What this means is that each 'DES' operation
-+ inside the cbc mode is really an C=E(ks3,D(ks2,E(ks1,M))).
-+ Again, this is cbc mode so an ivec is requires.
-+ This mode is used by SSL.
-+ There is also a des_ede2_cbc_encrypt() that only uses 2
-+ des_key_schedule's, the first being reused for the final
-+ encryption. C=E(ks1,D(ks2,E(ks1,M))). This form of triple DES
-+ is used by the RSAref library.
-+
-+void des_pcbc_encrypt(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+des_key_schedule ks,
-+des_cblock *ivec,
-+int enc);
-+ This is Propagating Cipher Block Chaining mode of DES. It is used
-+ by Kerberos v4. It's parameters are the same as des_ncbc_encrypt().
-+
-+void des_cfb_encrypt(
-+unsigned char *in,
-+unsigned char *out,
-+int numbits,
-+long length,
-+des_key_schedule ks,
-+des_cblock *ivec,
-+int enc);
-+ Cipher Feedback Back mode of DES. This implementation 'feeds back'
-+ in numbit blocks. The input (and output) is in multiples of numbits
-+ bits. numbits should to be a multiple of 8 bits. Length is the
-+ number of bytes input. If numbits is not a multiple of 8 bits,
-+ the extra bits in the bytes will be considered padding. So if
-+ numbits is 12, for each 2 input bytes, the 4 high bits of the
-+ second byte will be ignored. So to encode 72 bits when using
-+ a numbits of 12 take 12 bytes. To encode 72 bits when using
-+ numbits of 9 will take 16 bytes. To encode 80 bits when using
-+ numbits of 16 will take 10 bytes. etc, etc. This padding will
-+ apply to both input and output.
-+
-+
-+void des_cfb64_encrypt(
-+unsigned char *in,
-+unsigned char *out,
-+long length,
-+des_key_schedule ks,
-+des_cblock *ivec,
-+int *num,
-+int enc);
-+ This is one of the more useful functions in this DES library, it
-+ implements CFB mode of DES with 64bit feedback. Why is this
-+ useful you ask? Because this routine will allow you to encrypt an
-+ arbitrary number of bytes, no 8 byte padding. Each call to this
-+ routine will encrypt the input bytes to output and then update ivec
-+ and num. num contains 'how far' we are though ivec. If this does
-+ not make much sense, read more about cfb mode of DES :-).
-+
-+void des_ede3_cfb64_encrypt(
-+unsigned char *in,
-+unsigned char *out,
-+long length,
-+des_key_schedule ks1,
-+des_key_schedule ks2,
-+des_key_schedule ks3,
-+des_cblock *ivec,
-+int *num,
-+int enc);
-+ Same as des_cfb64_encrypt() accept that the DES operation is
-+ triple DES. As usual, there is a macro for
-+ des_ede2_cfb64_encrypt() which reuses ks1.
-+
-+void des_ofb_encrypt(
-+unsigned char *in,
-+unsigned char *out,
-+int numbits,
-+long length,
-+des_key_schedule ks,
-+des_cblock *ivec);
-+ This is a implementation of Output Feed Back mode of DES. It is
-+ the same as des_cfb_encrypt() in that numbits is the size of the
-+ units dealt with during input and output (in bits).
-+
-+void des_ofb64_encrypt(
-+unsigned char *in,
-+unsigned char *out,
-+long length,
-+des_key_schedule ks,
-+des_cblock *ivec,
-+int *num);
-+ The same as des_cfb64_encrypt() except that it is Output Feed Back
-+ mode.
-+
-+void des_ede3_ofb64_encrypt(
-+unsigned char *in,
-+unsigned char *out,
-+long length,
-+des_key_schedule ks1,
-+des_key_schedule ks2,
-+des_key_schedule ks3,
-+des_cblock *ivec,
-+int *num);
-+ Same as des_ofb64_encrypt() accept that the DES operation is
-+ triple DES. As usual, there is a macro for
-+ des_ede2_ofb64_encrypt() which reuses ks1.
-+
-+int des_read_pw_string(
-+char *buf,
-+int length,
-+char *prompt,
-+int verify);
-+ This routine is used to get a password from the terminal with echo
-+ turned off. Buf is where the string will end up and length is the
-+ size of buf. Prompt is a string presented to the 'user' and if
-+ verify is set, the key is asked for twice and unless the 2 copies
-+ match, an error is returned. A return code of -1 indicates a
-+ system error, 1 failure due to use interaction, and 0 is success.
-+
-+unsigned long des_cbc_cksum(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+des_key_schedule ks,
-+des_cblock *ivec);
-+ This function produces an 8 byte checksum from input that it puts in
-+ output and returns the last 4 bytes as a long. The checksum is
-+ generated via cbc mode of DES in which only the last 8 byes are
-+ kept. I would recommend not using this function but instead using
-+ the EVP_Digest routines, or at least using MD5 or SHA. This
-+ function is used by Kerberos v4 so that is why it stays in the
-+ library.
-+
-+char *des_fcrypt(
-+const char *buf,
-+const char *salt
-+char *ret);
-+ This is my fast version of the unix crypt(3) function. This version
-+ takes only a small amount of space relative to other fast
-+ crypt() implementations. This is different to the normal crypt
-+ in that the third parameter is the buffer that the return value
-+ is written into. It needs to be at least 14 bytes long. This
-+ function is thread safe, unlike the normal crypt.
-+
-+char *crypt(
-+const char *buf,
-+const char *salt);
-+ This function calls des_fcrypt() with a static array passed as the
-+ third parameter. This emulates the normal non-thread safe semantics
-+ of crypt(3).
-+
-+void des_string_to_key(
-+char *str,
-+des_cblock *key);
-+ This function takes str and converts it into a DES key. I would
-+ recommend using MD5 instead and use the first 8 bytes of output.
-+ When I wrote the first version of these routines back in 1990, MD5
-+ did not exist but I feel these routines are still sound. This
-+ routines is compatible with the one in MIT's libdes.
-+
-+void des_string_to_2keys(
-+char *str,
-+des_cblock *key1,
-+des_cblock *key2);
-+ This function takes str and converts it into 2 DES keys.
-+ I would recommend using MD5 and using the 16 bytes as the 2 keys.
-+ I have nothing against these 2 'string_to_key' routines, it's just
-+ that if you say that your encryption key is generated by using the
-+ 16 bytes of an MD5 hash, every-one knows how you generated your
-+ keys.
-+
-+int des_read_password(
-+des_cblock *key,
-+char *prompt,
-+int verify);
-+ This routine combines des_read_pw_string() with des_string_to_key().
-+
-+int des_read_2passwords(
-+des_cblock *key1,
-+des_cblock *key2,
-+char *prompt,
-+int verify);
-+ This routine combines des_read_pw_string() with des_string_to_2key().
-+
-+void des_random_seed(
-+des_cblock key);
-+ This routine sets a starting point for des_random_key().
-+
-+void des_random_key(
-+des_cblock ret);
-+ This function return a random key. Make sure to 'seed' the random
-+ number generator (with des_random_seed()) before using this function.
-+ I personally now use a MD5 based random number system.
-+
-+int des_enc_read(
-+int fd,
-+char *buf,
-+int len,
-+des_key_schedule ks,
-+des_cblock *iv);
-+ This function will write to a file descriptor the encrypted data
-+ from buf. This data will be preceded by a 4 byte 'byte count' and
-+ will be padded out to 8 bytes. The encryption is either CBC of
-+ PCBC depending on the value of des_rw_mode. If it is DES_PCBC_MODE,
-+ pcbc is used, if DES_CBC_MODE, cbc is used. The default is to use
-+ DES_PCBC_MODE.
-+
-+int des_enc_write(
-+int fd,
-+char *buf,
-+int len,
-+des_key_schedule ks,
-+des_cblock *iv);
-+ This routines read stuff written by des_enc_read() and decrypts it.
-+ I have used these routines quite a lot but I don't believe they are
-+ suitable for non-blocking io. If you are after a full
-+ authentication/encryption over networks, have a look at SSL instead.
-+
-+unsigned long des_quad_cksum(
-+des_cblock *input,
-+des_cblock *output,
-+long length,
-+int out_count,
-+des_cblock *seed);
-+ This is a function from Kerberos v4 that is not anything to do with
-+ DES but was needed. It is a cksum that is quicker to generate than
-+ des_cbc_cksum(); I personally would use MD5 routines now.
-+=====
-+Modes of DES
-+Quite a bit of the following information has been taken from
-+ AS 2805.5.2
-+ Australian Standard
-+ Electronic funds transfer - Requirements for interfaces,
-+ Part 5.2: Modes of operation for an n-bit block cipher algorithm
-+ Appendix A
-+
-+There are several different modes in which DES can be used, they are
-+as follows.
-+
-+Electronic Codebook Mode (ECB) (des_ecb_encrypt())
-+- 64 bits are enciphered at a time.
-+- The order of the blocks can be rearranged without detection.
-+- The same plaintext block always produces the same ciphertext block
-+ (for the same key) making it vulnerable to a 'dictionary attack'.
-+- An error will only affect one ciphertext block.
-+
-+Cipher Block Chaining Mode (CBC) (des_cbc_encrypt())
-+- a multiple of 64 bits are enciphered at a time.
-+- The CBC mode produces the same ciphertext whenever the same
-+ plaintext is encrypted using the same key and starting variable.
-+- The chaining operation makes the ciphertext blocks dependent on the
-+ current and all preceding plaintext blocks and therefore blocks can not
-+ be rearranged.
-+- The use of different starting variables prevents the same plaintext
-+ enciphering to the same ciphertext.
-+- An error will affect the current and the following ciphertext blocks.
-+
-+Cipher Feedback Mode (CFB) (des_cfb_encrypt())
-+- a number of bits (j) <= 64 are enciphered at a time.
-+- The CFB mode produces the same ciphertext whenever the same
-+ plaintext is encrypted using the same key and starting variable.
-+- The chaining operation makes the ciphertext variables dependent on the
-+ current and all preceding variables and therefore j-bit variables are
-+ chained together and can not be rearranged.
-+- The use of different starting variables prevents the same plaintext
-+ enciphering to the same ciphertext.
-+- The strength of the CFB mode depends on the size of k (maximal if
-+ j == k). In my implementation this is always the case.
-+- Selection of a small value for j will require more cycles through
-+ the encipherment algorithm per unit of plaintext and thus cause
-+ greater processing overheads.
-+- Only multiples of j bits can be enciphered.
-+- An error will affect the current and the following ciphertext variables.
-+
-+Output Feedback Mode (OFB) (des_ofb_encrypt())
-+- a number of bits (j) <= 64 are enciphered at a time.
-+- The OFB mode produces the same ciphertext whenever the same
-+ plaintext enciphered using the same key and starting variable. More
-+ over, in the OFB mode the same key stream is produced when the same
-+ key and start variable are used. Consequently, for security reasons
-+ a specific start variable should be used only once for a given key.
-+- The absence of chaining makes the OFB more vulnerable to specific attacks.
-+- The use of different start variables values prevents the same
-+ plaintext enciphering to the same ciphertext, by producing different
-+ key streams.
-+- Selection of a small value for j will require more cycles through
-+ the encipherment algorithm per unit of plaintext and thus cause
-+ greater processing overheads.
-+- Only multiples of j bits can be enciphered.
-+- OFB mode of operation does not extend ciphertext errors in the
-+ resultant plaintext output. Every bit error in the ciphertext causes
-+ only one bit to be in error in the deciphered plaintext.
-+- OFB mode is not self-synchronising. If the two operation of
-+ encipherment and decipherment get out of synchronism, the system needs
-+ to be re-initialised.
-+- Each re-initialisation should use a value of the start variable
-+ different from the start variable values used before with the same
-+ key. The reason for this is that an identical bit stream would be
-+ produced each time from the same parameters. This would be
-+ susceptible to a ' known plaintext' attack.
-+
-+Triple ECB Mode (des_ecb3_encrypt())
-+- Encrypt with key1, decrypt with key2 and encrypt with key3 again.
-+- As for ECB encryption but increases the key length to 168 bits.
-+ There are theoretic attacks that can be used that make the effective
-+ key length 112 bits, but this attack also requires 2^56 blocks of
-+ memory, not very likely, even for the NSA.
-+- If both keys are the same it is equivalent to encrypting once with
-+ just one key.
-+- If the first and last key are the same, the key length is 112 bits.
-+ There are attacks that could reduce the key space to 55 bit's but it
-+ requires 2^56 blocks of memory.
-+- If all 3 keys are the same, this is effectively the same as normal
-+ ecb mode.
-+
-+Triple CBC Mode (des_ede3_cbc_encrypt())
-+- Encrypt with key1, decrypt with key2 and then encrypt with key3.
-+- As for CBC encryption but increases the key length to 168 bits with
-+ the same restrictions as for triple ecb mode.
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/des_enc.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,524 @@
-+/* crypto/des/des_enc.c */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+#include "des/des_locl.h"
-+
-+void des_encrypt(data, ks, enc)
-+DES_LONG *data;
-+des_key_schedule ks;
-+int enc;
-+ {
-+ register DES_LONG l,r,t,u;
-+#ifdef DES_PTR
-+ register unsigned char *des_SP=(unsigned char *)des_SPtrans;
-+#endif
-+#ifndef DES_UNROLL
-+ register int i;
-+#endif
-+ register DES_LONG *s;
-+
-+#ifdef OCF_ASSIST
-+ if (ocf_des_assist() & OCF_PROVIDES_DES_3DES) {
-+ ocf_des_encrypt(data, ks, enc);
-+ return;
-+ }
-+#endif
-+
-+ r=data[0];
-+ l=data[1];
-+
-+ IP(r,l);
-+ /* Things have been modified so that the initial rotate is
-+ * done outside the loop. This required the
-+ * des_SPtrans values in sp.h to be rotated 1 bit to the right.
-+ * One perl script later and things have a 5% speed up on a sparc2.
-+ * Thanks to Richard Outerbridge <71755.204@CompuServe.COM>
-+ * for pointing this out. */
-+ /* clear the top bits on machines with 8byte longs */
-+ /* shift left by 2 */
-+ r=ROTATE(r,29)&0xffffffffL;
-+ l=ROTATE(l,29)&0xffffffffL;
-+
-+ s=(DES_LONG *)ks;
-+ /* I don't know if it is worth the effort of loop unrolling the
-+ * inner loop */
-+ if (enc)
-+ {
-+#ifdef DES_UNROLL
-+ D_ENCRYPT(l,r, 0); /* 1 */
-+ D_ENCRYPT(r,l, 2); /* 2 */
-+ D_ENCRYPT(l,r, 4); /* 3 */
-+ D_ENCRYPT(r,l, 6); /* 4 */
-+ D_ENCRYPT(l,r, 8); /* 5 */
-+ D_ENCRYPT(r,l,10); /* 6 */
-+ D_ENCRYPT(l,r,12); /* 7 */
-+ D_ENCRYPT(r,l,14); /* 8 */
-+ D_ENCRYPT(l,r,16); /* 9 */
-+ D_ENCRYPT(r,l,18); /* 10 */
-+ D_ENCRYPT(l,r,20); /* 11 */
-+ D_ENCRYPT(r,l,22); /* 12 */
-+ D_ENCRYPT(l,r,24); /* 13 */
-+ D_ENCRYPT(r,l,26); /* 14 */
-+ D_ENCRYPT(l,r,28); /* 15 */
-+ D_ENCRYPT(r,l,30); /* 16 */
-+#else
-+ for (i=0; i<32; i+=8)
-+ {
-+ D_ENCRYPT(l,r,i+0); /* 1 */
-+ D_ENCRYPT(r,l,i+2); /* 2 */
-+ D_ENCRYPT(l,r,i+4); /* 3 */
-+ D_ENCRYPT(r,l,i+6); /* 4 */
-+ }
-+#endif
-+ }
-+ else
-+ {
-+#ifdef DES_UNROLL
-+ D_ENCRYPT(l,r,30); /* 16 */
-+ D_ENCRYPT(r,l,28); /* 15 */
-+ D_ENCRYPT(l,r,26); /* 14 */
-+ D_ENCRYPT(r,l,24); /* 13 */
-+ D_ENCRYPT(l,r,22); /* 12 */
-+ D_ENCRYPT(r,l,20); /* 11 */
-+ D_ENCRYPT(l,r,18); /* 10 */
-+ D_ENCRYPT(r,l,16); /* 9 */
-+ D_ENCRYPT(l,r,14); /* 8 */
-+ D_ENCRYPT(r,l,12); /* 7 */
-+ D_ENCRYPT(l,r,10); /* 6 */
-+ D_ENCRYPT(r,l, 8); /* 5 */
-+ D_ENCRYPT(l,r, 6); /* 4 */
-+ D_ENCRYPT(r,l, 4); /* 3 */
-+ D_ENCRYPT(l,r, 2); /* 2 */
-+ D_ENCRYPT(r,l, 0); /* 1 */
-+#else
-+ for (i=30; i>0; i-=8)
-+ {
-+ D_ENCRYPT(l,r,i-0); /* 16 */
-+ D_ENCRYPT(r,l,i-2); /* 15 */
-+ D_ENCRYPT(l,r,i-4); /* 14 */
-+ D_ENCRYPT(r,l,i-6); /* 13 */
-+ }
-+#endif
-+ }
-+
-+ /* rotate and clear the top bits on machines with 8byte longs */
-+ l=ROTATE(l,3)&0xffffffffL;
-+ r=ROTATE(r,3)&0xffffffffL;
-+
-+ FP(r,l);
-+ data[0]=l;
-+ data[1]=r;
-+ l=r=t=u=0;
-+ }
-+
-+void des_encrypt2(data, ks, enc)
-+DES_LONG *data;
-+des_key_schedule ks;
-+int enc;
-+ {
-+ register DES_LONG l,r,t,u;
-+#ifdef DES_PTR
-+ register unsigned char *des_SP=(unsigned char *)des_SPtrans;
-+#endif
-+#ifndef DES_UNROLL
-+ register int i;
-+#endif
-+ register DES_LONG *s;
-+
-+ r=data[0];
-+ l=data[1];
-+
-+ /* Things have been modified so that the initial rotate is
-+ * done outside the loop. This required the
-+ * des_SPtrans values in sp.h to be rotated 1 bit to the right.
-+ * One perl script later and things have a 5% speed up on a sparc2.
-+ * Thanks to Richard Outerbridge <71755.204@CompuServe.COM>
-+ * for pointing this out. */
-+ /* clear the top bits on machines with 8byte longs */
-+ r=ROTATE(r,29)&0xffffffffL;
-+ l=ROTATE(l,29)&0xffffffffL;
-+
-+ s=(DES_LONG *)ks;
-+ /* I don't know if it is worth the effort of loop unrolling the
-+ * inner loop */
-+ if (enc)
-+ {
-+#ifdef DES_UNROLL
-+ D_ENCRYPT(l,r, 0); /* 1 */
-+ D_ENCRYPT(r,l, 2); /* 2 */
-+ D_ENCRYPT(l,r, 4); /* 3 */
-+ D_ENCRYPT(r,l, 6); /* 4 */
-+ D_ENCRYPT(l,r, 8); /* 5 */
-+ D_ENCRYPT(r,l,10); /* 6 */
-+ D_ENCRYPT(l,r,12); /* 7 */
-+ D_ENCRYPT(r,l,14); /* 8 */
-+ D_ENCRYPT(l,r,16); /* 9 */
-+ D_ENCRYPT(r,l,18); /* 10 */
-+ D_ENCRYPT(l,r,20); /* 11 */
-+ D_ENCRYPT(r,l,22); /* 12 */
-+ D_ENCRYPT(l,r,24); /* 13 */
-+ D_ENCRYPT(r,l,26); /* 14 */
-+ D_ENCRYPT(l,r,28); /* 15 */
-+ D_ENCRYPT(r,l,30); /* 16 */
-+#else
-+ for (i=0; i<32; i+=8)
-+ {
-+ D_ENCRYPT(l,r,i+0); /* 1 */
-+ D_ENCRYPT(r,l,i+2); /* 2 */
-+ D_ENCRYPT(l,r,i+4); /* 3 */
-+ D_ENCRYPT(r,l,i+6); /* 4 */
-+ }
-+#endif
-+ }
-+ else
-+ {
-+#ifdef DES_UNROLL
-+ D_ENCRYPT(l,r,30); /* 16 */
-+ D_ENCRYPT(r,l,28); /* 15 */
-+ D_ENCRYPT(l,r,26); /* 14 */
-+ D_ENCRYPT(r,l,24); /* 13 */
-+ D_ENCRYPT(l,r,22); /* 12 */
-+ D_ENCRYPT(r,l,20); /* 11 */
-+ D_ENCRYPT(l,r,18); /* 10 */
-+ D_ENCRYPT(r,l,16); /* 9 */
-+ D_ENCRYPT(l,r,14); /* 8 */
-+ D_ENCRYPT(r,l,12); /* 7 */
-+ D_ENCRYPT(l,r,10); /* 6 */
-+ D_ENCRYPT(r,l, 8); /* 5 */
-+ D_ENCRYPT(l,r, 6); /* 4 */
-+ D_ENCRYPT(r,l, 4); /* 3 */
-+ D_ENCRYPT(l,r, 2); /* 2 */
-+ D_ENCRYPT(r,l, 0); /* 1 */
-+#else
-+ for (i=30; i>0; i-=8)
-+ {
-+ D_ENCRYPT(l,r,i-0); /* 16 */
-+ D_ENCRYPT(r,l,i-2); /* 15 */
-+ D_ENCRYPT(l,r,i-4); /* 14 */
-+ D_ENCRYPT(r,l,i-6); /* 13 */
-+ }
-+#endif
-+ }
-+ /* rotate and clear the top bits on machines with 8byte longs */
-+ data[0]=ROTATE(l,3)&0xffffffffL;
-+ data[1]=ROTATE(r,3)&0xffffffffL;
-+ l=r=t=u=0;
-+ }
-+
-+void des_encrypt3(data,ks1,ks2,ks3)
-+DES_LONG *data;
-+des_key_schedule ks1;
-+des_key_schedule ks2;
-+des_key_schedule ks3;
-+ {
-+ register DES_LONG l,r;
-+
-+ l=data[0];
-+ r=data[1];
-+ IP(l,r);
-+ data[0]=l;
-+ data[1]=r;
-+ des_encrypt2((DES_LONG *)data,ks1,DES_ENCRYPT);
-+ des_encrypt2((DES_LONG *)data,ks2,DES_DECRYPT);
-+ des_encrypt2((DES_LONG *)data,ks3,DES_ENCRYPT);
-+ l=data[0];
-+ r=data[1];
-+ FP(r,l);
-+ data[0]=l;
-+ data[1]=r;
-+ }
-+
-+void des_decrypt3(data,ks1,ks2,ks3)
-+DES_LONG *data;
-+des_key_schedule ks1;
-+des_key_schedule ks2;
-+des_key_schedule ks3;
-+ {
-+ register DES_LONG l,r;
-+
-+ l=data[0];
-+ r=data[1];
-+ IP(l,r);
-+ data[0]=l;
-+ data[1]=r;
-+ des_encrypt2((DES_LONG *)data,ks3,DES_DECRYPT);
-+ des_encrypt2((DES_LONG *)data,ks2,DES_ENCRYPT);
-+ des_encrypt2((DES_LONG *)data,ks1,DES_DECRYPT);
-+ l=data[0];
-+ r=data[1];
-+ FP(r,l);
-+ data[0]=l;
-+ data[1]=r;
-+ }
-+
-+#ifndef DES_DEFAULT_OPTIONS
-+
-+void des_ncbc_encrypt(input, output, length, schedule, ivec, enc)
-+des_cblock (*input);
-+des_cblock (*output);
-+long length;
-+des_key_schedule schedule;
-+des_cblock (*ivec);
-+int enc;
-+ {
-+ register DES_LONG tin0,tin1;
-+ register DES_LONG tout0,tout1,xor0,xor1;
-+ register unsigned char *in,*out;
-+ register long l=length;
-+ DES_LONG tin[2];
-+ unsigned char *iv;
-+
-+#ifdef OCF_ASSIST
-+ if (ocf_des_assist() & OCF_PROVIDES_DES_3DES) {
-+ ocf_des_ncbc_encrypt(input, output, length, schedule, ivec, enc);
-+ return;
-+ }
-+#endif
-+
-+ in=(unsigned char *)input;
-+ out=(unsigned char *)output;
-+ iv=(unsigned char *)ivec;
-+
-+ if (enc)
-+ {
-+ c2l(iv,tout0);
-+ c2l(iv,tout1);
-+ for (l-=8; l>=0; l-=8)
-+ {
-+ c2l(in,tin0);
-+ c2l(in,tin1);
-+ tin0^=tout0; tin[0]=tin0;
-+ tin1^=tout1; tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
-+ tout0=tin[0]; l2c(tout0,out);
-+ tout1=tin[1]; l2c(tout1,out);
-+ }
-+ if (l != -8)
-+ {
-+ c2ln(in,tin0,tin1,l+8);
-+ tin0^=tout0; tin[0]=tin0;
-+ tin1^=tout1; tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
-+ tout0=tin[0]; l2c(tout0,out);
-+ tout1=tin[1]; l2c(tout1,out);
-+ }
-+ iv=(unsigned char *)ivec;
-+ l2c(tout0,iv);
-+ l2c(tout1,iv);
-+ }
-+ else
-+ {
-+ c2l(iv,xor0);
-+ c2l(iv,xor1);
-+ for (l-=8; l>=0; l-=8)
-+ {
-+ c2l(in,tin0); tin[0]=tin0;
-+ c2l(in,tin1); tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT);
-+ tout0=tin[0]^xor0;
-+ tout1=tin[1]^xor1;
-+ l2c(tout0,out);
-+ l2c(tout1,out);
-+ xor0=tin0;
-+ xor1=tin1;
-+ }
-+ if (l != -8)
-+ {
-+ c2l(in,tin0); tin[0]=tin0;
-+ c2l(in,tin1); tin[1]=tin1;
-+ des_encrypt((DES_LONG *)tin,schedule,DES_DECRYPT);
-+ tout0=tin[0]^xor0;
-+ tout1=tin[1]^xor1;
-+ l2cn(tout0,tout1,out,l+8);
-+ xor0=tin0;
-+ xor1=tin1;
-+ }
-+
-+ iv=(unsigned char *)ivec;
-+ l2c(xor0,iv);
-+ l2c(xor1,iv);
-+ }
-+ tin0=tin1=tout0=tout1=xor0=xor1=0;
-+ tin[0]=tin[1]=0;
-+ }
-+
-+void des_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
-+des_cblock (*input);
-+des_cblock (*output);
-+long length;
-+des_key_schedule ks1;
-+des_key_schedule ks2;
-+des_key_schedule ks3;
-+des_cblock (*ivec);
-+int enc;
-+ {
-+ register DES_LONG tin0,tin1;
-+ register DES_LONG tout0,tout1,xor0,xor1;
-+ register unsigned char *in,*out;
-+ register long l=length;
-+ DES_LONG tin[2];
-+ unsigned char *iv;
-+
-+#ifdef OCF_ASSIST
-+ if (ocf_des_assist() & OCF_PROVIDES_DES_3DES) {
-+ ocf_des_ede3_cbc_encrypt(input,output,length,ks1,ks2,ks3,ivec,enc);
-+ return;
-+ }
-+#endif
-+
-+
-+ in=(unsigned char *)input;
-+ out=(unsigned char *)output;
-+ iv=(unsigned char *)ivec;
-+
-+ if (enc)
-+ {
-+ c2l(iv,tout0);
-+ c2l(iv,tout1);
-+ for (l-=8; l>=0; l-=8)
-+ {
-+ c2l(in,tin0);
-+ c2l(in,tin1);
-+ tin0^=tout0;
-+ tin1^=tout1;
-+
-+ tin[0]=tin0;
-+ tin[1]=tin1;
-+ des_encrypt3((DES_LONG *)tin,ks1,ks2,ks3);
-+ tout0=tin[0];
-+ tout1=tin[1];
-+
-+ l2c(tout0,out);
-+ l2c(tout1,out);
-+ }
-+ if (l != -8)
-+ {
-+ c2ln(in,tin0,tin1,l+8);
-+ tin0^=tout0;
-+ tin1^=tout1;
-+
-+ tin[0]=tin0;
-+ tin[1]=tin1;
-+ des_encrypt3((DES_LONG *)tin,ks1,ks2,ks3);
-+ tout0=tin[0];
-+ tout1=tin[1];
-+
-+ l2c(tout0,out);
-+ l2c(tout1,out);
-+ }
-+ iv=(unsigned char *)ivec;
-+ l2c(tout0,iv);
-+ l2c(tout1,iv);
-+ }
-+ else
-+ {
-+ register DES_LONG t0,t1;
-+
-+ c2l(iv,xor0);
-+ c2l(iv,xor1);
-+ for (l-=8; l>=0; l-=8)
-+ {
-+ c2l(in,tin0);
-+ c2l(in,tin1);
-+
-+ t0=tin0;
-+ t1=tin1;
-+
-+ tin[0]=tin0;
-+ tin[1]=tin1;
-+ des_decrypt3((DES_LONG *)tin,ks1,ks2,ks3);
-+ tout0=tin[0];
-+ tout1=tin[1];
-+
-+ tout0^=xor0;
-+ tout1^=xor1;
-+ l2c(tout0,out);
-+ l2c(tout1,out);
-+ xor0=t0;
-+ xor1=t1;
-+ }
-+ if (l != -8)
-+ {
-+ c2l(in,tin0);
-+ c2l(in,tin1);
-+
-+ t0=tin0;
-+ t1=tin1;
-+
-+ tin[0]=tin0;
-+ tin[1]=tin1;
-+ des_decrypt3((DES_LONG *)tin,ks1,ks2,ks3);
-+ tout0=tin[0];
-+ tout1=tin[1];
-+
-+ tout0^=xor0;
-+ tout1^=xor1;
-+ l2cn(tout0,tout1,out,l+8);
-+ xor0=t0;
-+ xor1=t1;
-+ }
-+
-+ iv=(unsigned char *)ivec;
-+ l2c(xor0,iv);
-+ l2c(xor1,iv);
-+ }
-+ tin0=tin1=tout0=tout1=xor0=xor1=0;
-+ tin[0]=tin[1]=0;
-+ }
-+
-+#endif /* DES_DEFAULT_OPTIONS */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/des_opts.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,620 @@
-+/* crypto/des/des_opts.c */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+/* define PART1, PART2, PART3 or PART4 to build only with a few of the options.
-+ * This is for machines with 64k code segment size restrictions. */
-+
-+#ifndef MSDOS
-+#define TIMES
-+#endif
-+
-+#include <stdio.h>
-+#ifndef MSDOS
-+#include <unistd.h>
-+#else
-+#include <io.h>
-+extern void exit();
-+#endif
-+#include <signal.h>
-+#ifndef VMS
-+#ifndef _IRIX
-+#include <time.h>
-+#endif
-+#ifdef TIMES
-+#include <sys/types.h>
-+#include <sys/times.h>
-+#endif
-+#else /* VMS */
-+#include <types.h>
-+struct tms {
-+ time_t tms_utime;
-+ time_t tms_stime;
-+ time_t tms_uchild; /* I dunno... */
-+ time_t tms_uchildsys; /* so these names are a guess :-) */
-+ }
-+#endif
-+#ifndef TIMES
-+#include <sys/timeb.h>
-+#endif
-+
-+#ifdef sun
-+#include <limits.h>
-+#include <sys/param.h>
-+#endif
-+
-+#include "des/des_locl.h"
-+#include "des/spr.h"
-+
-+#define DES_DEFAULT_OPTIONS
-+
-+#if !defined(PART1) && !defined(PART2) && !defined(PART3) && !defined(PART4)
-+#define PART1
-+#define PART2
-+#define PART3
-+#define PART4
-+#endif
-+
-+#ifdef PART1
-+
-+#undef DES_UNROLL
-+#undef DES_RISC1
-+#undef DES_RISC2
-+#undef DES_PTR
-+#undef D_ENCRYPT
-+#define des_encrypt des_encrypt_u4_cisc_idx
-+#define des_encrypt2 des_encrypt2_u4_cisc_idx
-+#define des_encrypt3 des_encrypt3_u4_cisc_idx
-+#define des_decrypt3 des_decrypt3_u4_cisc_idx
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#define DES_UNROLL
-+#undef DES_RISC1
-+#undef DES_RISC2
-+#undef DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u16_cisc_idx
-+#define des_encrypt2 des_encrypt2_u16_cisc_idx
-+#define des_encrypt3 des_encrypt3_u16_cisc_idx
-+#define des_decrypt3 des_decrypt3_u16_cisc_idx
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#undef DES_UNROLL
-+#define DES_RISC1
-+#undef DES_RISC2
-+#undef DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u4_risc1_idx
-+#define des_encrypt2 des_encrypt2_u4_risc1_idx
-+#define des_encrypt3 des_encrypt3_u4_risc1_idx
-+#define des_decrypt3 des_decrypt3_u4_risc1_idx
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#endif
-+
-+#ifdef PART2
-+
-+#undef DES_UNROLL
-+#undef DES_RISC1
-+#define DES_RISC2
-+#undef DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u4_risc2_idx
-+#define des_encrypt2 des_encrypt2_u4_risc2_idx
-+#define des_encrypt3 des_encrypt3_u4_risc2_idx
-+#define des_decrypt3 des_decrypt3_u4_risc2_idx
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#define DES_UNROLL
-+#define DES_RISC1
-+#undef DES_RISC2
-+#undef DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u16_risc1_idx
-+#define des_encrypt2 des_encrypt2_u16_risc1_idx
-+#define des_encrypt3 des_encrypt3_u16_risc1_idx
-+#define des_decrypt3 des_decrypt3_u16_risc1_idx
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#define DES_UNROLL
-+#undef DES_RISC1
-+#define DES_RISC2
-+#undef DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u16_risc2_idx
-+#define des_encrypt2 des_encrypt2_u16_risc2_idx
-+#define des_encrypt3 des_encrypt3_u16_risc2_idx
-+#define des_decrypt3 des_decrypt3_u16_risc2_idx
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#endif
-+
-+#ifdef PART3
-+
-+#undef DES_UNROLL
-+#undef DES_RISC1
-+#undef DES_RISC2
-+#define DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u4_cisc_ptr
-+#define des_encrypt2 des_encrypt2_u4_cisc_ptr
-+#define des_encrypt3 des_encrypt3_u4_cisc_ptr
-+#define des_decrypt3 des_decrypt3_u4_cisc_ptr
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#define DES_UNROLL
-+#undef DES_RISC1
-+#undef DES_RISC2
-+#define DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u16_cisc_ptr
-+#define des_encrypt2 des_encrypt2_u16_cisc_ptr
-+#define des_encrypt3 des_encrypt3_u16_cisc_ptr
-+#define des_decrypt3 des_decrypt3_u16_cisc_ptr
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#undef DES_UNROLL
-+#define DES_RISC1
-+#undef DES_RISC2
-+#define DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u4_risc1_ptr
-+#define des_encrypt2 des_encrypt2_u4_risc1_ptr
-+#define des_encrypt3 des_encrypt3_u4_risc1_ptr
-+#define des_decrypt3 des_decrypt3_u4_risc1_ptr
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#endif
-+
-+#ifdef PART4
-+
-+#undef DES_UNROLL
-+#undef DES_RISC1
-+#define DES_RISC2
-+#define DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u4_risc2_ptr
-+#define des_encrypt2 des_encrypt2_u4_risc2_ptr
-+#define des_encrypt3 des_encrypt3_u4_risc2_ptr
-+#define des_decrypt3 des_decrypt3_u4_risc2_ptr
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#define DES_UNROLL
-+#define DES_RISC1
-+#undef DES_RISC2
-+#define DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u16_risc1_ptr
-+#define des_encrypt2 des_encrypt2_u16_risc1_ptr
-+#define des_encrypt3 des_encrypt3_u16_risc1_ptr
-+#define des_decrypt3 des_decrypt3_u16_risc1_ptr
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#define DES_UNROLL
-+#undef DES_RISC1
-+#define DES_RISC2
-+#define DES_PTR
-+#undef D_ENCRYPT
-+#undef des_encrypt
-+#undef des_encrypt2
-+#undef des_encrypt3
-+#undef des_decrypt3
-+#define des_encrypt des_encrypt_u16_risc2_ptr
-+#define des_encrypt2 des_encrypt2_u16_risc2_ptr
-+#define des_encrypt3 des_encrypt3_u16_risc2_ptr
-+#define des_decrypt3 des_decrypt3_u16_risc2_ptr
-+#undef HEADER_DES_LOCL_H
-+#include "des_enc.c"
-+
-+#endif
-+
-+/* The following if from times(3) man page. It may need to be changed */
-+#ifndef HZ
-+# ifndef CLK_TCK
-+# ifndef _BSD_CLK_TCK_ /* FreeBSD fix */
-+# ifndef VMS
-+# define HZ 100.0
-+# else /* VMS */
-+# define HZ 100.0
-+# endif
-+# else /* _BSD_CLK_TCK_ */
-+# define HZ ((double)_BSD_CLK_TCK_)
-+# endif
-+# else /* CLK_TCK */
-+# define HZ ((double)CLK_TCK)
-+# endif
-+#endif
-+
-+#define BUFSIZE ((long)1024)
-+long run=0;
-+
-+#ifndef NOPROTO
-+double Time_F(int s);
-+#else
-+double Time_F();
-+#endif
-+
-+#ifdef SIGALRM
-+#if defined(__STDC__) || defined(sgi)
-+#define SIGRETTYPE void
-+#else
-+#define SIGRETTYPE int
-+#endif
-+
-+#ifndef NOPROTO
-+SIGRETTYPE sig_done(int sig);
-+#else
-+SIGRETTYPE sig_done();
-+#endif
-+
-+SIGRETTYPE sig_done(sig)
-+int sig;
-+ {
-+ signal(SIGALRM,sig_done);
-+ run=0;
-+#ifdef LINT
-+ sig=sig;
-+#endif
-+ }
-+#endif
-+
-+#define START 0
-+#define STOP 1
-+
-+double Time_F(s)
-+int s;
-+ {
-+ double ret;
-+#ifdef TIMES
-+ static struct tms tstart,tend;
-+
-+ if (s == START)
-+ {
-+ times(&tstart);
-+ return(0);
-+ }
-+ else
-+ {
-+ times(&tend);
-+ ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
-+ return((ret == 0.0)?1e-6:ret);
-+ }
-+#else /* !times() */
-+ static struct timeb tstart,tend;
-+ long i;
-+
-+ if (s == START)
-+ {
-+ ftime(&tstart);
-+ return(0);
-+ }
-+ else
-+ {
-+ ftime(&tend);
-+ i=(long)tend.millitm-(long)tstart.millitm;
-+ ret=((double)(tend.time-tstart.time))+((double)i)/1000.0;
-+ return((ret == 0.0)?1e-6:ret);
-+ }
-+#endif
-+ }
-+
-+#ifdef SIGALRM
-+#define print_name(name) fprintf(stderr,"Doing %s's for 10 seconds\n",name); alarm(10);
-+#else
-+#define print_name(name) fprintf(stderr,"Doing %s %ld times\n",name,cb);
-+#endif
-+
-+#define time_it(func,name,index) \
-+ print_name(name); \
-+ Time_F(START); \
-+ for (count=0,run=1; COND(cb); count++) \
-+ { \
-+ unsigned long d[2]; \
-+ func(d,&(sch[0]),DES_ENCRYPT); \
-+ } \
-+ tm[index]=Time_F(STOP); \
-+ fprintf(stderr,"%ld %s's in %.2f second\n",count,name,tm[index]); \
-+ tm[index]=((double)COUNT(cb))/tm[index];
-+
-+#define print_it(name,index) \
-+ fprintf(stderr,"%s bytes per sec = %12.2f (%5.1fuS)\n",name, \
-+ tm[index]*8,1.0e6/tm[index]);
-+
-+int main(argc,argv)
-+int argc;
-+char **argv;
-+ {
-+ long count;
-+ static unsigned char buf[BUFSIZE];
-+ static des_cblock key ={0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0};
-+ static des_cblock key2={0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0x12};
-+ static des_cblock key3={0x56,0x78,0x9a,0xbc,0xde,0xf0,0x12,0x34};
-+ des_key_schedule sch,sch2,sch3;
-+ double d,tm[16],max=0;
-+ int rank[16];
-+ char *str[16];
-+ int max_idx=0,i,num=0,j;
-+#ifndef SIGALARM
-+ long ca,cb,cc,cd,ce;
-+#endif
-+
-+ for (i=0; i<12; i++)
-+ {
-+ tm[i]=0.0;
-+ rank[i]=0;
-+ }
-+
-+#ifndef TIMES
-+ fprintf(stderr,"To get the most acurate results, try to run this\n");
-+ fprintf(stderr,"program when this computer is idle.\n");
-+#endif
-+
-+ des_set_key((C_Block *)key,sch);
-+ des_set_key((C_Block *)key2,sch2);
-+ des_set_key((C_Block *)key3,sch3);
-+
-+#ifndef SIGALRM
-+ fprintf(stderr,"First we calculate the approximate speed ...\n");
-+ des_set_key((C_Block *)key,sch);
-+ count=10;
-+ do {
-+ long i;
-+ unsigned long data[2];
-+
-+ count*=2;
-+ Time_F(START);
-+ for (i=count; i; i--)
-+ des_encrypt(data,&(sch[0]),DES_ENCRYPT);
-+ d=Time_F(STOP);
-+ } while (d < 3.0);
-+ ca=count;
-+ cb=count*3;
-+ cc=count*3*8/BUFSIZE+1;
-+ cd=count*8/BUFSIZE+1;
-+
-+ ce=count/20+1;
-+#define COND(d) (count != (d))
-+#define COUNT(d) (d)
-+#else
-+#define COND(c) (run)
-+#define COUNT(d) (count)
-+ signal(SIGALRM,sig_done);
-+ alarm(10);
-+#endif
-+
-+#ifdef PART1
-+ time_it(des_encrypt_u4_cisc_idx, "des_encrypt_u4_cisc_idx ", 0);
-+ time_it(des_encrypt_u16_cisc_idx, "des_encrypt_u16_cisc_idx ", 1);
-+ time_it(des_encrypt_u4_risc1_idx, "des_encrypt_u4_risc1_idx ", 2);
-+ num+=3;
-+#endif
-+#ifdef PART2
-+ time_it(des_encrypt_u16_risc1_idx,"des_encrypt_u16_risc1_idx", 3);
-+ time_it(des_encrypt_u4_risc2_idx, "des_encrypt_u4_risc2_idx ", 4);
-+ time_it(des_encrypt_u16_risc2_idx,"des_encrypt_u16_risc2_idx", 5);
-+ num+=3;
-+#endif
-+#ifdef PART3
-+ time_it(des_encrypt_u4_cisc_ptr, "des_encrypt_u4_cisc_ptr ", 6);
-+ time_it(des_encrypt_u16_cisc_ptr, "des_encrypt_u16_cisc_ptr ", 7);
-+ time_it(des_encrypt_u4_risc1_ptr, "des_encrypt_u4_risc1_ptr ", 8);
-+ num+=3;
-+#endif
-+#ifdef PART4
-+ time_it(des_encrypt_u16_risc1_ptr,"des_encrypt_u16_risc1_ptr", 9);
-+ time_it(des_encrypt_u4_risc2_ptr, "des_encrypt_u4_risc2_ptr ",10);
-+ time_it(des_encrypt_u16_risc2_ptr,"des_encrypt_u16_risc2_ptr",11);
-+ num+=3;
-+#endif
-+
-+#ifdef PART1
-+ str[0]=" 4 c i";
-+ print_it("des_encrypt_u4_cisc_idx ",0);
-+ max=tm[0];
-+ max_idx=0;
-+ str[1]="16 c i";
-+ print_it("des_encrypt_u16_cisc_idx ",1);
-+ if (max < tm[1]) { max=tm[1]; max_idx=1; }
-+ str[2]=" 4 r1 i";
-+ print_it("des_encrypt_u4_risc1_idx ",2);
-+ if (max < tm[2]) { max=tm[2]; max_idx=2; }
-+#endif
-+#ifdef PART2
-+ str[3]="16 r1 i";
-+ print_it("des_encrypt_u16_risc1_idx",3);
-+ if (max < tm[3]) { max=tm[3]; max_idx=3; }
-+ str[4]=" 4 r2 i";
-+ print_it("des_encrypt_u4_risc2_idx ",4);
-+ if (max < tm[4]) { max=tm[4]; max_idx=4; }
-+ str[5]="16 r2 i";
-+ print_it("des_encrypt_u16_risc2_idx",5);
-+ if (max < tm[5]) { max=tm[5]; max_idx=5; }
-+#endif
-+#ifdef PART3
-+ str[6]=" 4 c p";
-+ print_it("des_encrypt_u4_cisc_ptr ",6);
-+ if (max < tm[6]) { max=tm[6]; max_idx=6; }
-+ str[7]="16 c p";
-+ print_it("des_encrypt_u16_cisc_ptr ",7);
-+ if (max < tm[7]) { max=tm[7]; max_idx=7; }
-+ str[8]=" 4 r1 p";
-+ print_it("des_encrypt_u4_risc1_ptr ",8);
-+ if (max < tm[8]) { max=tm[8]; max_idx=8; }
-+#endif
-+#ifdef PART4
-+ str[9]="16 r1 p";
-+ print_it("des_encrypt_u16_risc1_ptr",9);
-+ if (max < tm[9]) { max=tm[9]; max_idx=9; }
-+ str[10]=" 4 r2 p";
-+ print_it("des_encrypt_u4_risc2_ptr ",10);
-+ if (max < tm[10]) { max=tm[10]; max_idx=10; }
-+ str[11]="16 r2 p";
-+ print_it("des_encrypt_u16_risc2_ptr",11);
-+ if (max < tm[11]) { max=tm[11]; max_idx=11; }
-+#endif
-+ printf("options des ecb/s\n");
-+ printf("%s %12.2f 100.0%%\n",str[max_idx],tm[max_idx]);
-+ d=tm[max_idx];
-+ tm[max_idx]= -2.0;
-+ max= -1.0;
-+ for (;;)
-+ {
-+ for (i=0; i<12; i++)
-+ {
-+ if (max < tm[i]) { max=tm[i]; j=i; }
-+ }
-+ if (max < 0.0) break;
-+ printf("%s %12.2f %4.1f%%\n",str[j],tm[j],tm[j]/d*100.0);
-+ tm[j]= -2.0;
-+ max= -1.0;
-+ }
-+
-+ switch (max_idx)
-+ {
-+ case 0:
-+ printf("-DDES_DEFAULT_OPTIONS\n");
-+ break;
-+ case 1:
-+ printf("-DDES_UNROLL\n");
-+ break;
-+ case 2:
-+ printf("-DDES_RISC1\n");
-+ break;
-+ case 3:
-+ printf("-DDES_UNROLL -DDES_RISC1\n");
-+ break;
-+ case 4:
-+ printf("-DDES_RISC2\n");
-+ break;
-+ case 5:
-+ printf("-DDES_UNROLL -DDES_RISC2\n");
-+ break;
-+ case 6:
-+ printf("-DDES_PTR\n");
-+ break;
-+ case 7:
-+ printf("-DDES_UNROLL -DDES_PTR\n");
-+ break;
-+ case 8:
-+ printf("-DDES_RISC1 -DDES_PTR\n");
-+ break;
-+ case 9:
-+ printf("-DDES_UNROLL -DDES_RISC1 -DDES_PTR\n");
-+ break;
-+ case 10:
-+ printf("-DDES_RISC2 -DDES_PTR\n");
-+ break;
-+ case 11:
-+ printf("-DDES_UNROLL -DDES_RISC2 -DDES_PTR\n");
-+ break;
-+ }
-+ exit(0);
-+#if defined(LINT) || defined(MSDOS)
-+ return(0);
-+#endif
-+ }
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/dx86unix.S Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,3160 @@
-+/*
-+ * This file was originally generated by Michael Richardson <mcr@freeswan.org>
-+ * via the perl scripts found in the ASM subdir. It remains copyright of
-+ * Eric Young, see the file COPYRIGHT.
-+ *
-+ * This was last done on October 9, 2002.
-+ *
-+ * While this file does not need to go through cpp, we pass it through
-+ * CPP by naming it dx86unix.S instead of dx86unix.s because there is
-+ * a bug in Rules.make for .s builds - specifically it references EXTRA_CFLAGS
-+ * which may contain stuff that AS doesn't understand instead of
-+ * referencing EXTRA_AFLAGS.
-+ */
-+
-+ .file "dx86unix.S"
-+ .version "01.01"
-+.text
-+ .align 16
-+.globl des_encrypt
-+ .type des_encrypt , @function
-+des_encrypt:
-+ pushl %esi
-+ pushl %edi
-+
-+
-+ movl 12(%esp), %esi
-+ xorl %ecx, %ecx
-+ pushl %ebx
-+ pushl %ebp
-+ movl (%esi), %eax
-+ movl 28(%esp), %ebx
-+ movl 4(%esi), %edi
-+
-+
-+ roll $4, %eax
-+ movl %eax, %esi
-+ xorl %edi, %eax
-+ andl $0xf0f0f0f0, %eax
-+ xorl %eax, %esi
-+ xorl %eax, %edi
-+
-+ roll $20, %edi
-+ movl %edi, %eax
-+ xorl %esi, %edi
-+ andl $0xfff0000f, %edi
-+ xorl %edi, %eax
-+ xorl %edi, %esi
-+
-+ roll $14, %eax
-+ movl %eax, %edi
-+ xorl %esi, %eax
-+ andl $0x33333333, %eax
-+ xorl %eax, %edi
-+ xorl %eax, %esi
-+
-+ roll $22, %esi
-+ movl %esi, %eax
-+ xorl %edi, %esi
-+ andl $0x03fc03fc, %esi
-+ xorl %esi, %eax
-+ xorl %esi, %edi
-+
-+ roll $9, %eax
-+ movl %eax, %esi
-+ xorl %edi, %eax
-+ andl $0xaaaaaaaa, %eax
-+ xorl %eax, %esi
-+ xorl %eax, %edi
-+
-+.byte 209
-+.byte 199
-+ movl 24(%esp), %ebp
-+ cmpl $0, %ebx
-+ je .L000start_decrypt
-+
-+
-+ movl (%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 4(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 8(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 12(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 16(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 20(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 24(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 28(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 32(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 36(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 40(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 44(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 48(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 52(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 56(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 60(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 64(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 68(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 72(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 76(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 80(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 84(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 88(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 92(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 96(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 100(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 104(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 108(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 112(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 116(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 120(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 124(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+ jmp .L001end
-+.L000start_decrypt:
-+
-+
-+ movl 120(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 124(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 112(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 116(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 104(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 108(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 96(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 100(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 88(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 92(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 80(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 84(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 72(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 76(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 64(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 68(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 56(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 60(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 48(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 52(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 40(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 44(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 32(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 36(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 24(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 28(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 16(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 20(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 8(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 12(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl (%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 4(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+.L001end:
-+
-+
-+ movl 20(%esp), %edx
-+.byte 209
-+.byte 206
-+ movl %edi, %eax
-+ xorl %esi, %edi
-+ andl $0xaaaaaaaa, %edi
-+ xorl %edi, %eax
-+ xorl %edi, %esi
-+
-+ roll $23, %eax
-+ movl %eax, %edi
-+ xorl %esi, %eax
-+ andl $0x03fc03fc, %eax
-+ xorl %eax, %edi
-+ xorl %eax, %esi
-+
-+ roll $10, %edi
-+ movl %edi, %eax
-+ xorl %esi, %edi
-+ andl $0x33333333, %edi
-+ xorl %edi, %eax
-+ xorl %edi, %esi
-+
-+ roll $18, %esi
-+ movl %esi, %edi
-+ xorl %eax, %esi
-+ andl $0xfff0000f, %esi
-+ xorl %esi, %edi
-+ xorl %esi, %eax
-+
-+ roll $12, %edi
-+ movl %edi, %esi
-+ xorl %eax, %edi
-+ andl $0xf0f0f0f0, %edi
-+ xorl %edi, %esi
-+ xorl %edi, %eax
-+
-+ rorl $4, %eax
-+ movl %eax, (%edx)
-+ movl %esi, 4(%edx)
-+ popl %ebp
-+ popl %ebx
-+ popl %edi
-+ popl %esi
-+ ret
-+.des_encrypt_end:
-+ .size des_encrypt , .des_encrypt_end-des_encrypt
-+.ident "desasm.pl"
-+.text
-+ .align 16
-+.globl des_encrypt2
-+ .type des_encrypt2 , @function
-+des_encrypt2:
-+ pushl %esi
-+ pushl %edi
-+
-+
-+ movl 12(%esp), %eax
-+ xorl %ecx, %ecx
-+ pushl %ebx
-+ pushl %ebp
-+ movl (%eax), %esi
-+ movl 28(%esp), %ebx
-+ roll $3, %esi
-+ movl 4(%eax), %edi
-+ roll $3, %edi
-+ movl 24(%esp), %ebp
-+ cmpl $0, %ebx
-+ je .L002start_decrypt
-+
-+
-+ movl (%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 4(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 8(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 12(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 16(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 20(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 24(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 28(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 32(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 36(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 40(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 44(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 48(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 52(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 56(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 60(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 64(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 68(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 72(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 76(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 80(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 84(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 88(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 92(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 96(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 100(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 104(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 108(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 112(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 116(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 120(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 124(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+ jmp .L003end
-+.L002start_decrypt:
-+
-+
-+ movl 120(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 124(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 112(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 116(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 104(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 108(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 96(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 100(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 88(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 92(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 80(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 84(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 72(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 76(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 64(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 68(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 56(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 60(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 48(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 52(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 40(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 44(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 32(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 36(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 24(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 28(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl 16(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 20(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+
-+
-+ movl 8(%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 12(%ebp), %edx
-+ xorl %esi, %eax
-+ xorl %esi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %edi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %edi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %edi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %edi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %edi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %edi
-+
-+
-+ movl (%ebp), %eax
-+ xorl %ebx, %ebx
-+ movl 4(%ebp), %edx
-+ xorl %edi, %eax
-+ xorl %edi, %edx
-+ andl $0xfcfcfcfc, %eax
-+ andl $0xcfcfcfcf, %edx
-+ movb %al, %bl
-+ movb %ah, %cl
-+ rorl $4, %edx
-+ movl des_SPtrans(%ebx),%ebp
-+ movb %dl, %bl
-+ xorl %ebp, %esi
-+ movl 0x200+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movb %dh, %cl
-+ shrl $16, %eax
-+ movl 0x100+des_SPtrans(%ebx),%ebp
-+ xorl %ebp, %esi
-+ movb %ah, %bl
-+ shrl $16, %edx
-+ movl 0x300+des_SPtrans(%ecx),%ebp
-+ xorl %ebp, %esi
-+ movl 24(%esp), %ebp
-+ movb %dh, %cl
-+ andl $0xff, %eax
-+ andl $0xff, %edx
-+ movl 0x600+des_SPtrans(%ebx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x700+des_SPtrans(%ecx),%ebx
-+ xorl %ebx, %esi
-+ movl 0x400+des_SPtrans(%eax),%ebx
-+ xorl %ebx, %esi
-+ movl 0x500+des_SPtrans(%edx),%ebx
-+ xorl %ebx, %esi
-+.L003end:
-+
-+
-+ rorl $3, %edi
-+ movl 20(%esp), %eax
-+ rorl $3, %esi
-+ movl %edi, (%eax)
-+ movl %esi, 4(%eax)
-+ popl %ebp
-+ popl %ebx
-+ popl %edi
-+ popl %esi
-+ ret
-+.des_encrypt2_end:
-+ .size des_encrypt2 , .des_encrypt2_end-des_encrypt2
-+.ident "desasm.pl"
-+.text
-+ .align 16
-+.globl des_encrypt3
-+ .type des_encrypt3 , @function
-+des_encrypt3:
-+ pushl %ebx
-+ movl 8(%esp), %ebx
-+ pushl %ebp
-+ pushl %esi
-+ pushl %edi
-+
-+
-+ movl (%ebx), %edi
-+ movl 4(%ebx), %esi
-+ subl $12, %esp
-+
-+
-+ roll $4, %edi
-+ movl %edi, %edx
-+ xorl %esi, %edi
-+ andl $0xf0f0f0f0, %edi
-+ xorl %edi, %edx
-+ xorl %edi, %esi
-+
-+ roll $20, %esi
-+ movl %esi, %edi
-+ xorl %edx, %esi
-+ andl $0xfff0000f, %esi
-+ xorl %esi, %edi
-+ xorl %esi, %edx
-+
-+ roll $14, %edi
-+ movl %edi, %esi
-+ xorl %edx, %edi
-+ andl $0x33333333, %edi
-+ xorl %edi, %esi
-+ xorl %edi, %edx
-+
-+ roll $22, %edx
-+ movl %edx, %edi
-+ xorl %esi, %edx
-+ andl $0x03fc03fc, %edx
-+ xorl %edx, %edi
-+ xorl %edx, %esi
-+
-+ roll $9, %edi
-+ movl %edi, %edx
-+ xorl %esi, %edi
-+ andl $0xaaaaaaaa, %edi
-+ xorl %edi, %edx
-+ xorl %edi, %esi
-+
-+ rorl $3, %edx
-+ rorl $2, %esi
-+ movl %esi, 4(%ebx)
-+ movl 36(%esp), %eax
-+ movl %edx, (%ebx)
-+ movl 40(%esp), %edi
-+ movl 44(%esp), %esi
-+ movl $1, 8(%esp)
-+ movl %eax, 4(%esp)
-+ movl %ebx, (%esp)
-+ call des_encrypt2
-+ movl $0, 8(%esp)
-+ movl %edi, 4(%esp)
-+ movl %ebx, (%esp)
-+ call des_encrypt2
-+ movl $1, 8(%esp)
-+ movl %esi, 4(%esp)
-+ movl %ebx, (%esp)
-+ call des_encrypt2
-+ addl $12, %esp
-+ movl (%ebx), %edi
-+ movl 4(%ebx), %esi
-+
-+
-+ roll $2, %esi
-+ roll $3, %edi
-+ movl %edi, %eax
-+ xorl %esi, %edi
-+ andl $0xaaaaaaaa, %edi
-+ xorl %edi, %eax
-+ xorl %edi, %esi
-+
-+ roll $23, %eax
-+ movl %eax, %edi
-+ xorl %esi, %eax
-+ andl $0x03fc03fc, %eax
-+ xorl %eax, %edi
-+ xorl %eax, %esi
-+
-+ roll $10, %edi
-+ movl %edi, %eax
-+ xorl %esi, %edi
-+ andl $0x33333333, %edi
-+ xorl %edi, %eax
-+ xorl %edi, %esi
-+
-+ roll $18, %esi
-+ movl %esi, %edi
-+ xorl %eax, %esi
-+ andl $0xfff0000f, %esi
-+ xorl %esi, %edi
-+ xorl %esi, %eax
-+
-+ roll $12, %edi
-+ movl %edi, %esi
-+ xorl %eax, %edi
-+ andl $0xf0f0f0f0, %edi
-+ xorl %edi, %esi
-+ xorl %edi, %eax
-+
-+ rorl $4, %eax
-+ movl %eax, (%ebx)
-+ movl %esi, 4(%ebx)
-+ popl %edi
-+ popl %esi
-+ popl %ebp
-+ popl %ebx
-+ ret
-+.des_encrypt3_end:
-+ .size des_encrypt3 , .des_encrypt3_end-des_encrypt3
-+.ident "desasm.pl"
-+.text
-+ .align 16
-+.globl des_decrypt3
-+ .type des_decrypt3 , @function
-+des_decrypt3:
-+ pushl %ebx
-+ movl 8(%esp), %ebx
-+ pushl %ebp
-+ pushl %esi
-+ pushl %edi
-+
-+
-+ movl (%ebx), %edi
-+ movl 4(%ebx), %esi
-+ subl $12, %esp
-+
-+
-+ roll $4, %edi
-+ movl %edi, %edx
-+ xorl %esi, %edi
-+ andl $0xf0f0f0f0, %edi
-+ xorl %edi, %edx
-+ xorl %edi, %esi
-+
-+ roll $20, %esi
-+ movl %esi, %edi
-+ xorl %edx, %esi
-+ andl $0xfff0000f, %esi
-+ xorl %esi, %edi
-+ xorl %esi, %edx
-+
-+ roll $14, %edi
-+ movl %edi, %esi
-+ xorl %edx, %edi
-+ andl $0x33333333, %edi
-+ xorl %edi, %esi
-+ xorl %edi, %edx
-+
-+ roll $22, %edx
-+ movl %edx, %edi
-+ xorl %esi, %edx
-+ andl $0x03fc03fc, %edx
-+ xorl %edx, %edi
-+ xorl %edx, %esi
-+
-+ roll $9, %edi
-+ movl %edi, %edx
-+ xorl %esi, %edi
-+ andl $0xaaaaaaaa, %edi
-+ xorl %edi, %edx
-+ xorl %edi, %esi
-+
-+ rorl $3, %edx
-+ rorl $2, %esi
-+ movl %esi, 4(%ebx)
-+ movl 36(%esp), %esi
-+ movl %edx, (%ebx)
-+ movl 40(%esp), %edi
-+ movl 44(%esp), %eax
-+ movl $0, 8(%esp)
-+ movl %eax, 4(%esp)
-+ movl %ebx, (%esp)
-+ call des_encrypt2
-+ movl $1, 8(%esp)
-+ movl %edi, 4(%esp)
-+ movl %ebx, (%esp)
-+ call des_encrypt2
-+ movl $0, 8(%esp)
-+ movl %esi, 4(%esp)
-+ movl %ebx, (%esp)
-+ call des_encrypt2
-+ addl $12, %esp
-+ movl (%ebx), %edi
-+ movl 4(%ebx), %esi
-+
-+
-+ roll $2, %esi
-+ roll $3, %edi
-+ movl %edi, %eax
-+ xorl %esi, %edi
-+ andl $0xaaaaaaaa, %edi
-+ xorl %edi, %eax
-+ xorl %edi, %esi
-+
-+ roll $23, %eax
-+ movl %eax, %edi
-+ xorl %esi, %eax
-+ andl $0x03fc03fc, %eax
-+ xorl %eax, %edi
-+ xorl %eax, %esi
-+
-+ roll $10, %edi
-+ movl %edi, %eax
-+ xorl %esi, %edi
-+ andl $0x33333333, %edi
-+ xorl %edi, %eax
-+ xorl %edi, %esi
-+
-+ roll $18, %esi
-+ movl %esi, %edi
-+ xorl %eax, %esi
-+ andl $0xfff0000f, %esi
-+ xorl %esi, %edi
-+ xorl %esi, %eax
-+
-+ roll $12, %edi
-+ movl %edi, %esi
-+ xorl %eax, %edi
-+ andl $0xf0f0f0f0, %edi
-+ xorl %edi, %esi
-+ xorl %edi, %eax
-+
-+ rorl $4, %eax
-+ movl %eax, (%ebx)
-+ movl %esi, 4(%ebx)
-+ popl %edi
-+ popl %esi
-+ popl %ebp
-+ popl %ebx
-+ ret
-+.des_decrypt3_end:
-+ .size des_decrypt3 , .des_decrypt3_end-des_decrypt3
-+.ident "desasm.pl"
-+.text
-+ .align 16
-+.globl des_ncbc_encrypt
-+ .type des_ncbc_encrypt , @function
-+des_ncbc_encrypt:
-+
-+ pushl %ebp
-+ pushl %ebx
-+ pushl %esi
-+ pushl %edi
-+ movl 28(%esp), %ebp
-+
-+ movl 36(%esp), %ebx
-+ movl (%ebx), %esi
-+ movl 4(%ebx), %edi
-+ pushl %edi
-+ pushl %esi
-+ pushl %edi
-+ pushl %esi
-+ movl %esp, %ebx
-+ movl 36(%esp), %esi
-+ movl 40(%esp), %edi
-+
-+ movl 56(%esp), %ecx
-+
-+ pushl %ecx
-+
-+ movl 52(%esp), %eax
-+ pushl %eax
-+ pushl %ebx
-+ cmpl $0, %ecx
-+ jz .L004decrypt
-+ andl $4294967288, %ebp
-+ movl 12(%esp), %eax
-+ movl 16(%esp), %ebx
-+ jz .L005encrypt_finish
-+.L006encrypt_loop:
-+ movl (%esi), %ecx
-+ movl 4(%esi), %edx
-+ xorl %ecx, %eax
-+ xorl %edx, %ebx
-+ movl %eax, 12(%esp)
-+ movl %ebx, 16(%esp)
-+ call des_encrypt
-+ movl 12(%esp), %eax
-+ movl 16(%esp), %ebx
-+ movl %eax, (%edi)
-+ movl %ebx, 4(%edi)
-+ addl $8, %esi
-+ addl $8, %edi
-+ subl $8, %ebp
-+ jnz .L006encrypt_loop
-+.L005encrypt_finish:
-+ movl 56(%esp), %ebp
-+ andl $7, %ebp
-+ jz .L007finish
-+ xorl %ecx, %ecx
-+ xorl %edx, %edx
-+ movl .L008cbc_enc_jmp_table(,%ebp,4),%ebp
-+ jmp *%ebp
-+.L009ej7:
-+ movb 6(%esi), %dh
-+ sall $8, %edx
-+.L010ej6:
-+ movb 5(%esi), %dh
-+.L011ej5:
-+ movb 4(%esi), %dl
-+.L012ej4:
-+ movl (%esi), %ecx
-+ jmp .L013ejend
-+.L014ej3:
-+ movb 2(%esi), %ch
-+ sall $8, %ecx
-+.L015ej2:
-+ movb 1(%esi), %ch
-+.L016ej1:
-+ movb (%esi), %cl
-+.L013ejend:
-+ xorl %ecx, %eax
-+ xorl %edx, %ebx
-+ movl %eax, 12(%esp)
-+ movl %ebx, 16(%esp)
-+ call des_encrypt
-+ movl 12(%esp), %eax
-+ movl 16(%esp), %ebx
-+ movl %eax, (%edi)
-+ movl %ebx, 4(%edi)
-+ jmp .L007finish
-+.align 16
-+.L004decrypt:
-+ andl $4294967288, %ebp
-+ movl 20(%esp), %eax
-+ movl 24(%esp), %ebx
-+ jz .L017decrypt_finish
-+.L018decrypt_loop:
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+ movl %eax, 12(%esp)
-+ movl %ebx, 16(%esp)
-+ call des_encrypt
-+ movl 12(%esp), %eax
-+ movl 16(%esp), %ebx
-+ movl 20(%esp), %ecx
-+ movl 24(%esp), %edx
-+ xorl %eax, %ecx
-+ xorl %ebx, %edx
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+ movl %ecx, (%edi)
-+ movl %edx, 4(%edi)
-+ movl %eax, 20(%esp)
-+ movl %ebx, 24(%esp)
-+ addl $8, %esi
-+ addl $8, %edi
-+ subl $8, %ebp
-+ jnz .L018decrypt_loop
-+.L017decrypt_finish:
-+ movl 56(%esp), %ebp
-+ andl $7, %ebp
-+ jz .L007finish
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+ movl %eax, 12(%esp)
-+ movl %ebx, 16(%esp)
-+ call des_encrypt
-+ movl 12(%esp), %eax
-+ movl 16(%esp), %ebx
-+ movl 20(%esp), %ecx
-+ movl 24(%esp), %edx
-+ xorl %eax, %ecx
-+ xorl %ebx, %edx
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+.L019dj7:
-+ rorl $16, %edx
-+ movb %dl, 6(%edi)
-+ shrl $16, %edx
-+.L020dj6:
-+ movb %dh, 5(%edi)
-+.L021dj5:
-+ movb %dl, 4(%edi)
-+.L022dj4:
-+ movl %ecx, (%edi)
-+ jmp .L023djend
-+.L024dj3:
-+ rorl $16, %ecx
-+ movb %cl, 2(%edi)
-+ sall $16, %ecx
-+.L025dj2:
-+ movb %ch, 1(%esi)
-+.L026dj1:
-+ movb %cl, (%esi)
-+.L023djend:
-+ jmp .L007finish
-+.align 16
-+.L007finish:
-+ movl 64(%esp), %ecx
-+ addl $28, %esp
-+ movl %eax, (%ecx)
-+ movl %ebx, 4(%ecx)
-+ popl %edi
-+ popl %esi
-+ popl %ebx
-+ popl %ebp
-+ ret
-+.align 16
-+.L008cbc_enc_jmp_table:
-+ .long 0
-+ .long .L016ej1
-+ .long .L015ej2
-+ .long .L014ej3
-+ .long .L012ej4
-+ .long .L011ej5
-+ .long .L010ej6
-+ .long .L009ej7
-+.align 16
-+.L027cbc_dec_jmp_table:
-+ .long 0
-+ .long .L026dj1
-+ .long .L025dj2
-+ .long .L024dj3
-+ .long .L022dj4
-+ .long .L021dj5
-+ .long .L020dj6
-+ .long .L019dj7
-+.des_ncbc_encrypt_end:
-+ .size des_ncbc_encrypt , .des_ncbc_encrypt_end-des_ncbc_encrypt
-+.ident "desasm.pl"
-+.text
-+ .align 16
-+.globl des_ede3_cbc_encrypt
-+ .type des_ede3_cbc_encrypt , @function
-+des_ede3_cbc_encrypt:
-+
-+ pushl %ebp
-+ pushl %ebx
-+ pushl %esi
-+ pushl %edi
-+ movl 28(%esp), %ebp
-+
-+ movl 44(%esp), %ebx
-+ movl (%ebx), %esi
-+ movl 4(%ebx), %edi
-+ pushl %edi
-+ pushl %esi
-+ pushl %edi
-+ pushl %esi
-+ movl %esp, %ebx
-+ movl 36(%esp), %esi
-+ movl 40(%esp), %edi
-+
-+ movl 64(%esp), %ecx
-+
-+ movl 56(%esp), %eax
-+ pushl %eax
-+
-+ movl 56(%esp), %eax
-+ pushl %eax
-+
-+ movl 56(%esp), %eax
-+ pushl %eax
-+ pushl %ebx
-+ cmpl $0, %ecx
-+ jz .L028decrypt
-+ andl $4294967288, %ebp
-+ movl 16(%esp), %eax
-+ movl 20(%esp), %ebx
-+ jz .L029encrypt_finish
-+.L030encrypt_loop:
-+ movl (%esi), %ecx
-+ movl 4(%esi), %edx
-+ xorl %ecx, %eax
-+ xorl %edx, %ebx
-+ movl %eax, 16(%esp)
-+ movl %ebx, 20(%esp)
-+ call des_encrypt3
-+ movl 16(%esp), %eax
-+ movl 20(%esp), %ebx
-+ movl %eax, (%edi)
-+ movl %ebx, 4(%edi)
-+ addl $8, %esi
-+ addl $8, %edi
-+ subl $8, %ebp
-+ jnz .L030encrypt_loop
-+.L029encrypt_finish:
-+ movl 60(%esp), %ebp
-+ andl $7, %ebp
-+ jz .L031finish
-+ xorl %ecx, %ecx
-+ xorl %edx, %edx
-+ movl .L032cbc_enc_jmp_table(,%ebp,4),%ebp
-+ jmp *%ebp
-+.L033ej7:
-+ movb 6(%esi), %dh
-+ sall $8, %edx
-+.L034ej6:
-+ movb 5(%esi), %dh
-+.L035ej5:
-+ movb 4(%esi), %dl
-+.L036ej4:
-+ movl (%esi), %ecx
-+ jmp .L037ejend
-+.L038ej3:
-+ movb 2(%esi), %ch
-+ sall $8, %ecx
-+.L039ej2:
-+ movb 1(%esi), %ch
-+.L040ej1:
-+ movb (%esi), %cl
-+.L037ejend:
-+ xorl %ecx, %eax
-+ xorl %edx, %ebx
-+ movl %eax, 16(%esp)
-+ movl %ebx, 20(%esp)
-+ call des_encrypt3
-+ movl 16(%esp), %eax
-+ movl 20(%esp), %ebx
-+ movl %eax, (%edi)
-+ movl %ebx, 4(%edi)
-+ jmp .L031finish
-+.align 16
-+.L028decrypt:
-+ andl $4294967288, %ebp
-+ movl 24(%esp), %eax
-+ movl 28(%esp), %ebx
-+ jz .L041decrypt_finish
-+.L042decrypt_loop:
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+ movl %eax, 16(%esp)
-+ movl %ebx, 20(%esp)
-+ call des_decrypt3
-+ movl 16(%esp), %eax
-+ movl 20(%esp), %ebx
-+ movl 24(%esp), %ecx
-+ movl 28(%esp), %edx
-+ xorl %eax, %ecx
-+ xorl %ebx, %edx
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+ movl %ecx, (%edi)
-+ movl %edx, 4(%edi)
-+ movl %eax, 24(%esp)
-+ movl %ebx, 28(%esp)
-+ addl $8, %esi
-+ addl $8, %edi
-+ subl $8, %ebp
-+ jnz .L042decrypt_loop
-+.L041decrypt_finish:
-+ movl 60(%esp), %ebp
-+ andl $7, %ebp
-+ jz .L031finish
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+ movl %eax, 16(%esp)
-+ movl %ebx, 20(%esp)
-+ call des_decrypt3
-+ movl 16(%esp), %eax
-+ movl 20(%esp), %ebx
-+ movl 24(%esp), %ecx
-+ movl 28(%esp), %edx
-+ xorl %eax, %ecx
-+ xorl %ebx, %edx
-+ movl (%esi), %eax
-+ movl 4(%esi), %ebx
-+.L043dj7:
-+ rorl $16, %edx
-+ movb %dl, 6(%edi)
-+ shrl $16, %edx
-+.L044dj6:
-+ movb %dh, 5(%edi)
-+.L045dj5:
-+ movb %dl, 4(%edi)
-+.L046dj4:
-+ movl %ecx, (%edi)
-+ jmp .L047djend
-+.L048dj3:
-+ rorl $16, %ecx
-+ movb %cl, 2(%edi)
-+ sall $16, %ecx
-+.L049dj2:
-+ movb %ch, 1(%esi)
-+.L050dj1:
-+ movb %cl, (%esi)
-+.L047djend:
-+ jmp .L031finish
-+.align 16
-+.L031finish:
-+ movl 76(%esp), %ecx
-+ addl $32, %esp
-+ movl %eax, (%ecx)
-+ movl %ebx, 4(%ecx)
-+ popl %edi
-+ popl %esi
-+ popl %ebx
-+ popl %ebp
-+ ret
-+.align 16
-+.L032cbc_enc_jmp_table:
-+ .long 0
-+ .long .L040ej1
-+ .long .L039ej2
-+ .long .L038ej3
-+ .long .L036ej4
-+ .long .L035ej5
-+ .long .L034ej6
-+ .long .L033ej7
-+.align 16
-+.L051cbc_dec_jmp_table:
-+ .long 0
-+ .long .L050dj1
-+ .long .L049dj2
-+ .long .L048dj3
-+ .long .L046dj4
-+ .long .L045dj5
-+ .long .L044dj6
-+ .long .L043dj7
-+.des_ede3_cbc_encrypt_end:
-+ .size des_ede3_cbc_encrypt , .des_ede3_cbc_encrypt_end-des_ede3_cbc_encrypt
-+.ident "desasm.pl"
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/ecb_enc.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,135 @@
-+/* crypto/des/ecb_enc.c */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+#include "des/des_locl.h"
-+#include "des/spr.h"
-+
-+char *libdes_version="libdes v 3.24 - 20-Apr-1996 - eay";
-+char *DES_version="DES part of SSLeay 0.8.2b 08-Jan-1998";
-+
-+/* RCSID $Id: ecb_enc.c,v 1.8 2004/08/04 15:57:22 mcr Exp $ */
-+/* This function ifdef'ed out for FreeS/WAN project. */
-+#ifdef notdef
-+char *des_options()
-+ {
-+ static int init=1;
-+ static char buf[32];
-+
-+ if (init)
-+ {
-+ char *ptr,*unroll,*risc,*size;
-+
-+ init=0;
-+#ifdef DES_PTR
-+ ptr="ptr";
-+#else
-+ ptr="idx";
-+#endif
-+#if defined(DES_RISC1) || defined(DES_RISC2)
-+#ifdef DES_RISC1
-+ risc="risc1";
-+#endif
-+#ifdef DES_RISC2
-+ risc="risc2";
-+#endif
-+#else
-+ risc="cisc";
-+#endif
-+#ifdef DES_UNROLL
-+ unroll="16";
-+#else
-+ unroll="4";
-+#endif
-+ if (sizeof(DES_LONG) != sizeof(long))
-+ size="int";
-+ else
-+ size="long";
-+ sprintf(buf,"des(%s,%s,%s,%s)",ptr,risc,unroll,size);
-+ }
-+ return(buf);
-+ }
-+#endif
-+
-+
-+void des_ecb_encrypt(input, output, ks, enc)
-+des_cblock (*input);
-+des_cblock (*output);
-+des_key_schedule ks;
-+int enc;
-+ {
-+ register DES_LONG l;
-+ register unsigned char *in,*out;
-+ DES_LONG ll[2];
-+
-+#ifdef OCF_ASSIST
-+ if (ocf_des_assist() & OCF_PROVIDES_DES_3DES) {
-+ ocf_des_ecb_encrypt(input, output, ks, enc);
-+ return;
-+ }
-+#endif
-+
-+ in=(unsigned char *)input;
-+ out=(unsigned char *)output;
-+ c2l(in,l); ll[0]=l;
-+ c2l(in,l); ll[1]=l;
-+ des_encrypt(ll,ks,enc);
-+ l=ll[0]; l2c(l,out);
-+ l=ll[1]; l2c(l,out);
-+ l=ll[0]=ll[1]=0;
-+ }
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/ipsec_alg_3des.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,182 @@
-+/*
-+ * ipsec_alg 3DES cipher stubs
-+ *
-+ * Copyright (C) 2005 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * Adapted from ipsec_alg_aes.c by JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>
-+ *
-+ * ipsec_alg_aes.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+/*
-+ * special case: ipsec core modular with this static algo inside:
-+ * must avoid MODULE magic for this file
-+ */
-+#if defined(CONFIG_KLIPS_MODULE) && defined(CONFIG_KLIPS_ENC_3DES)
-+#undef MODULE
-+#endif
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+
-+#include <linux/kernel.h> /* printk() */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/string.h>
-+
-+/* Low freeswan header coupling */
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_alg.h"
-+#include "klips-crypto/des.h"
-+#include "openswan/ipsec_alg_3des.h"
-+
-+#define AES_CONTEXT_T aes_context
-+static int debug_3des=0;
-+static int test_3des=0;
-+static int excl_3des=0;
-+
-+#if defined(CONFIG_KLIPS_ENC_3DES_MODULE)
-+MODULE_AUTHOR("Michael Richardson <mcr@xelerance.com>");
-+#ifdef module_param
-+module_param(debug_3des, int, 0664);
-+module_param(test_des, int, 0664);
-+module_param(excl_des, int, 0664);
-+#else
-+MODULE_PARM(debug_3des, "i");
-+MODULE_PARM(test_des, "i");
-+MODULE_PARM(excl_des, "i");
-+#endif
-+#endif
-+
-+#define ESP_AES_MAC_KEY_SZ 16 /* 128 bit MAC key */
-+#define ESP_AES_MAC_BLK_LEN 16 /* 128 bit block */
-+
-+static int _3des_set_key(struct ipsec_alg_enc *alg,
-+ __u8 * key_e, const __u8 * key,
-+ size_t keysize)
-+{
-+ int ret = 0;
-+ TripleDES_context *ctx = (TripleDES_context*)key_e;
-+
-+ if(keysize != 192/8) {
-+ return EINVAL;
-+ }
-+
-+ des_set_key((des_cblock *)(key + DES_KEY_SZ*0), ctx->s1);
-+ des_set_key((des_cblock *)(key + DES_KEY_SZ*1), ctx->s2);
-+ des_set_key((des_cblock *)(key + DES_KEY_SZ*2), ctx->s3);
-+
-+ if (debug_3des > 0)
-+ printk(KERN_DEBUG "klips_debug:_3des_set_key:"
-+ "ret=%d key_e=%p key=%p keysize=%ld\n",
-+ ret, key_e, key, (unsigned long int) keysize);
-+ return ret;
-+}
-+
-+static int _3des_cbc_encrypt(struct ipsec_alg_enc *alg,
-+ __u8 * key_e,
-+ const __u8 * in,
-+ int ilen, const __u8 * iv,
-+ int encrypt)
-+{
-+ TripleDES_context *ctx=(TripleDES_context*)key_e;
-+ des_cblock miv;
-+
-+ memcpy(&miv, iv, sizeof(miv));
-+
-+ if (debug_3des > 0)
-+ printk(KERN_DEBUG "klips_debug:_3des_cbc_encrypt:"
-+ "key_e=%p in=%p ilen=%d iv=%p encrypt=%d\n",
-+ key_e, in, ilen, iv, encrypt);
-+
-+ des_ede3_cbc_encrypt((des_cblock *)in,
-+ (des_cblock *)in,
-+ ilen,
-+ ctx->s1,
-+ ctx->s2,
-+ ctx->s3,
-+ &miv, encrypt);
-+ return 1;
-+}
-+
-+static struct ipsec_alg_enc ipsec_alg_3DES = {
-+ ixt_common: { ixt_version: IPSEC_ALG_VERSION,
-+ ixt_refcnt: ATOMIC_INIT(0),
-+ ixt_name: "3des",
-+ ixt_blocksize: ESP_3DES_CBC_BLK_LEN,
-+ ixt_support: {
-+ ias_exttype: IPSEC_ALG_TYPE_ENCRYPT,
-+ ias_id: ESP_3DES,
-+ //ias_ivlen: 64,
-+ ias_keyminbits: ESP_3DES_KEY_SZ*8,
-+ ias_keymaxbits: ESP_3DES_KEY_SZ*8,
-+ },
-+ },
-+#if defined(MODULE_KLIPS_ENC_3DES_MODULE)
-+ ixt_module: THIS_MODULE,
-+#endif
-+ ixt_e_keylen: ESP_3DES_KEY_SZ*8,
-+ ixt_e_ctx_size: sizeof(TripleDES_context),
-+ ixt_e_set_key: _3des_set_key,
-+ ixt_e_cbc_encrypt:_3des_cbc_encrypt,
-+};
-+
-+#if defined(CONFIG_KLIPS_ENC_3DES_MODULE)
-+IPSEC_ALG_MODULE_INIT_MOD( ipsec_3des_init )
-+#else
-+IPSEC_ALG_MODULE_INIT_STATIC( ipsec_3des_init )
-+#endif
-+{
-+ int ret, test_ret;
-+
-+ if (excl_3des) ipsec_alg_3DES.ixt_common.ixt_state |= IPSEC_ALG_ST_EXCL;
-+ ret=register_ipsec_alg_enc(&ipsec_alg_3DES);
-+ printk("ipsec_3des_init(alg_type=%d alg_id=%d name=%s): ret=%d\n",
-+ ipsec_alg_3DES.ixt_common.ixt_support.ias_exttype,
-+ ipsec_alg_3DES.ixt_common.ixt_support.ias_id,
-+ ipsec_alg_3DES.ixt_common.ixt_name,
-+ ret);
-+ if (ret==0 && test_3des) {
-+ test_ret=ipsec_alg_test(
-+ ipsec_alg_3DES.ixt_common.ixt_support.ias_exttype,
-+ ipsec_alg_3DES.ixt_common.ixt_support.ias_id,
-+ test_3des);
-+ printk("ipsec_3des_init(alg_type=%d alg_id=%d): test_ret=%d\n",
-+ ipsec_alg_3DES.ixt_common.ixt_support.ias_exttype,
-+ ipsec_alg_3DES.ixt_common.ixt_support.ias_id,
-+ test_ret);
-+ }
-+ return ret;
-+}
-+
-+#if defined(CONFIG_KLIPS_ENC_3DES_MODULE)
-+IPSEC_ALG_MODULE_EXIT_MOD( ipsec_3des_fini )
-+#else
-+IPSEC_ALG_MODULE_EXIT_STATIC( ipsec_3des_fini )
-+#endif
-+{
-+ unregister_ipsec_alg_enc(&ipsec_alg_3DES);
-+ return;
-+}
-+
-+/* Dual, because 3des code is 4-clause BSD licensed */
-+#ifdef MODULE_LICENSE
-+MODULE_LICENSE("Dual BSD/GPL");
-+#endif
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/des/set_key.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,251 @@
-+/* crypto/des/set_key.c */
-+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
-+ * All rights reserved.
-+ *
-+ * This package is an SSL implementation written
-+ * by Eric Young (eay@cryptsoft.com).
-+ * The implementation was written so as to conform with Netscapes SSL.
-+ *
-+ * This library is free for commercial and non-commercial use as long as
-+ * the following conditions are aheared to. The following conditions
-+ * apply to all code found in this distribution, be it the RC4, RSA,
-+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
-+ * included with this distribution is covered by the same copyright terms
-+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
-+ *
-+ * Copyright remains Eric Young's, and as such any Copyright notices in
-+ * the code are not to be removed.
-+ * If this package is used in a product, Eric Young should be given attribution
-+ * as the author of the parts of the library used.
-+ * This can be in the form of a textual message at program startup or
-+ * in documentation (online or textual) provided with the package.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * "This product includes cryptographic software written by
-+ * Eric Young (eay@cryptsoft.com)"
-+ * The word 'cryptographic' can be left out if the rouines from the library
-+ * being used are not cryptographic related :-).
-+ * 4. If you include any Windows specific code (or a derivative thereof) from
-+ * the apps directory (application code) you must include an acknowledgement:
-+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * The licence and distribution terms for any publically available version or
-+ * derivative of this code cannot be changed. i.e. this code cannot simply be
-+ * copied and put under another distribution licence
-+ * [including the GNU Public Licence.]
-+ */
-+
-+/* set_key.c v 1.4 eay 24/9/91
-+ * 1.4 Speed up by 400% :-)
-+ * 1.3 added register declarations.
-+ * 1.2 unrolled make_key_sched a bit more
-+ * 1.1 added norm_expand_bits
-+ * 1.0 First working version
-+ */
-+#include "des/des_locl.h"
-+#include "des/podd.h"
-+#include "des/sk.h"
-+
-+#ifndef NOPROTO
-+static int check_parity(des_cblock (*key));
-+#else
-+static int check_parity();
-+#endif
-+
-+int des_check_key=0;
-+
-+void des_set_odd_parity(key)
-+des_cblock (*key);
-+ {
-+ int i;
-+
-+ for (i=0; i<DES_KEY_SZ; i++)
-+ (*key)[i]=odd_parity[(*key)[i]];
-+ }
-+
-+static int check_parity(key)
-+des_cblock (*key);
-+ {
-+ int i;
-+
-+ for (i=0; i<DES_KEY_SZ; i++)
-+ {
-+ if ((*key)[i] != odd_parity[(*key)[i]])
-+ return(0);
-+ }
-+ return(1);
-+ }
-+
-+/* Weak and semi week keys as take from
-+ * %A D.W. Davies
-+ * %A W.L. Price
-+ * %T Security for Computer Networks
-+ * %I John Wiley & Sons
-+ * %D 1984
-+ * Many thanks to smb@ulysses.att.com (Steven Bellovin) for the reference
-+ * (and actual cblock values).
-+ */
-+#define NUM_WEAK_KEY 16
-+static des_cblock weak_keys[NUM_WEAK_KEY]={
-+ /* weak keys */
-+ {0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01},
-+ {0xFE,0xFE,0xFE,0xFE,0xFE,0xFE,0xFE,0xFE},
-+ {0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F},
-+ {0xE0,0xE0,0xE0,0xE0,0xE0,0xE0,0xE0,0xE0},
-+ /* semi-weak keys */
-+ {0x01,0xFE,0x01,0xFE,0x01,0xFE,0x01,0xFE},
-+ {0xFE,0x01,0xFE,0x01,0xFE,0x01,0xFE,0x01},
-+ {0x1F,0xE0,0x1F,0xE0,0x0E,0xF1,0x0E,0xF1},
-+ {0xE0,0x1F,0xE0,0x1F,0xF1,0x0E,0xF1,0x0E},
-+ {0x01,0xE0,0x01,0xE0,0x01,0xF1,0x01,0xF1},
-+ {0xE0,0x01,0xE0,0x01,0xF1,0x01,0xF1,0x01},
-+ {0x1F,0xFE,0x1F,0xFE,0x0E,0xFE,0x0E,0xFE},
-+ {0xFE,0x1F,0xFE,0x1F,0xFE,0x0E,0xFE,0x0E},
-+ {0x01,0x1F,0x01,0x1F,0x01,0x0E,0x01,0x0E},
-+ {0x1F,0x01,0x1F,0x01,0x0E,0x01,0x0E,0x01},
-+ {0xE0,0xFE,0xE0,0xFE,0xF1,0xFE,0xF1,0xFE},
-+ {0xFE,0xE0,0xFE,0xE0,0xFE,0xF1,0xFE,0xF1}};
-+
-+int des_is_weak_key(key)
-+des_cblock (*key);
-+ {
-+ int i;
-+
-+ for (i=0; i<NUM_WEAK_KEY; i++)
-+ /* Added == 0 to comparision, I obviously don't run
-+ * this section very often :-(, thanks to
-+ * engineering@MorningStar.Com for the fix
-+ * eay 93/06/29
-+ * Another problem, I was comparing only the first 4
-+ * bytes, 97/03/18 */
-+ if (memcmp(weak_keys[i],key,sizeof(des_cblock)) == 0) return(1);
-+ return(0);
-+ }
-+
-+/* NOW DEFINED IN des_local.h
-+ * See ecb_encrypt.c for a pseudo description of these macros.
-+ * #define PERM_OP(a,b,t,n,m) ((t)=((((a)>>(n))^(b))&(m)),\
-+ * (b)^=(t),\
-+ * (a)=((a)^((t)<<(n))))
-+ */
-+
-+#define HPERM_OP(a,t,n,m) ((t)=((((a)<<(16-(n)))^(a))&(m)),\
-+ (a)=(a)^(t)^(t>>(16-(n))))
-+
-+/* return 0 if key parity is odd (correct),
-+ * return -1 if key parity error,
-+ * return -2 if illegal weak key.
-+ */
-+int des_set_key(key, schedule)
-+des_cblock (*key);
-+des_key_schedule schedule;
-+ {
-+ static int shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0};
-+ register DES_LONG c,d,t,s,t2;
-+ register unsigned char *in;
-+ register DES_LONG *k;
-+ register int i;
-+
-+ if (des_check_key)
-+ {
-+ if (!check_parity(key))
-+ return(-1);
-+
-+ if (des_is_weak_key(key))
-+ return(-2);
-+ }
-+
-+#ifdef OCF_ASSIST
-+ if (ocf_des_assist() & OCF_PROVIDES_DES_3DES)
-+ return(ocf_des_set_key(key, schedule));
-+#endif
-+
-+ k=(DES_LONG *)schedule;
-+ in=(unsigned char *)key;
-+
-+ c2l(in,c);
-+ c2l(in,d);
-+
-+ /* do PC1 in 60 simple operations */
-+/* PERM_OP(d,c,t,4,0x0f0f0f0fL);
-+ HPERM_OP(c,t,-2, 0xcccc0000L);
-+ HPERM_OP(c,t,-1, 0xaaaa0000L);
-+ HPERM_OP(c,t, 8, 0x00ff0000L);
-+ HPERM_OP(c,t,-1, 0xaaaa0000L);
-+ HPERM_OP(d,t,-8, 0xff000000L);
-+ HPERM_OP(d,t, 8, 0x00ff0000L);
-+ HPERM_OP(d,t, 2, 0x33330000L);
-+ d=((d&0x00aa00aaL)<<7L)|((d&0x55005500L)>>7L)|(d&0xaa55aa55L);
-+ d=(d>>8)|((c&0xf0000000L)>>4);
-+ c&=0x0fffffffL; */
-+
-+ /* I now do it in 47 simple operations :-)
-+ * Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov)
-+ * for the inspiration. :-) */
-+ PERM_OP (d,c,t,4,0x0f0f0f0fL);
-+ HPERM_OP(c,t,-2,0xcccc0000L);
-+ HPERM_OP(d,t,-2,0xcccc0000L);
-+ PERM_OP (d,c,t,1,0x55555555L);
-+ PERM_OP (c,d,t,8,0x00ff00ffL);
-+ PERM_OP (d,c,t,1,0x55555555L);
-+ d= (((d&0x000000ffL)<<16L)| (d&0x0000ff00L) |
-+ ((d&0x00ff0000L)>>16L)|((c&0xf0000000L)>>4L));
-+ c&=0x0fffffffL;
-+
-+ for (i=0; i<ITERATIONS; i++)
-+ {
-+ if (shifts2[i])
-+ { c=((c>>2L)|(c<<26L)); d=((d>>2L)|(d<<26L)); }
-+ else
-+ { c=((c>>1L)|(c<<27L)); d=((d>>1L)|(d<<27L)); }
-+ c&=0x0fffffffL;
-+ d&=0x0fffffffL;
-+ /* could be a few less shifts but I am to lazy at this
-+ * point in time to investigate */
-+ s= des_skb[0][ (c )&0x3f ]|
-+ des_skb[1][((c>> 6)&0x03)|((c>> 7L)&0x3c)]|
-+ des_skb[2][((c>>13)&0x0f)|((c>>14L)&0x30)]|
-+ des_skb[3][((c>>20)&0x01)|((c>>21L)&0x06) |
-+ ((c>>22L)&0x38)];
-+ t= des_skb[4][ (d )&0x3f ]|
-+ des_skb[5][((d>> 7L)&0x03)|((d>> 8L)&0x3c)]|
-+ des_skb[6][ (d>>15L)&0x3f ]|
-+ des_skb[7][((d>>21L)&0x0f)|((d>>22L)&0x30)];
-+
-+ /* table contained 0213 4657 */
-+ t2=((t<<16L)|(s&0x0000ffffL))&0xffffffffL;
-+ *(k++)=ROTATE(t2,30)&0xffffffffL;
-+
-+ t2=((s>>16L)|(t&0xffff0000L));
-+ *(k++)=ROTATE(t2,26)&0xffffffffL;
-+ }
-+ return(0);
-+ }
-+
-+int des_key_sched(key, schedule)
-+des_cblock (*key);
-+des_key_schedule schedule;
-+ {
-+ return(des_set_key(key,schedule));
-+ }
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/goodmask.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,150 @@
-+/*
-+ * minor utilities for subnet-mask manipulation
-+ * Copyright (C) 1998, 1999 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: goodmask.c,v 1.12 2004/07/10 07:43:47 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+#ifndef ABITS
-+#define ABITS 32 /* bits in an IPv4 address */
-+#endif
-+
-+/* This file does not use sysdep.h, otherwise this should go into
-+ * ports/darwin/include/sysdep.h
-+ */
-+#ifndef s6_addr32
-+#define s6_addr32 __u6_addr.__u6_addr32
-+#endif
-+
-+/*
-+ - goodmask - is this a good (^1*0*$) subnet mask?
-+ * You are not expected to understand this. See Henry S. Warren Jr,
-+ * "Functions realizable with word-parallel logical and two's-complement
-+ * addition instructions", CACM 20.6 (June 1977), p.439.
-+ */
-+int /* predicate */
-+goodmask(mask)
-+struct in_addr mask;
-+{
-+ unsigned long x = ntohl(mask.s_addr);
-+ /* clear rightmost contiguous string of 1-bits */
-+# define CRCS1B(x) (((x|(x-1))+1)&x)
-+# define TOPBIT (1UL << 31)
-+
-+ /* either zero, or has one string of 1-bits which is left-justified */
-+ if (x == 0 || (CRCS1B(x) == 0 && (x&TOPBIT)))
-+ return 1;
-+ return 0;
-+}
-+
-+/*
-+ - masktobits - how many bits in this mask?
-+ * The algorithm is essentially a binary search, but highly optimized
-+ * for this particular task.
-+ */
-+int /* -1 means !goodmask() */
-+masktobits(mask)
-+struct in_addr mask;
-+{
-+ unsigned long m = ntohl(mask.s_addr);
-+ int masklen;
-+
-+ if (!goodmask(mask))
-+ return -1;
-+
-+ if (m&0x00000001UL)
-+ return 32;
-+ masklen = 0;
-+ if (m&(0x0000ffffUL<<1)) { /* <<1 for 1-origin numbering */
-+ masklen |= 0x10;
-+ m <<= 16;
-+ }
-+ if (m&(0x00ff0000UL<<1)) {
-+ masklen |= 0x08;
-+ m <<= 8;
-+ }
-+ if (m&(0x0f000000UL<<1)) {
-+ masklen |= 0x04;
-+ m <<= 4;
-+ }
-+ if (m&(0x30000000UL<<1)) {
-+ masklen |= 0x02;
-+ m <<= 2;
-+ }
-+ if (m&(0x40000000UL<<1))
-+ masklen |= 0x01;
-+
-+ return masklen;
-+}
-+
-+/*
-+ - bitstomask - return a mask with this many high bits on
-+ */
-+struct in_addr
-+bitstomask(n)
-+int n;
-+{
-+ struct in_addr result;
-+
-+ if (n > 0 && n <= ABITS)
-+ result.s_addr = htonl(~((1UL << (ABITS - n)) - 1));
-+ else if (n == 0)
-+ result.s_addr = 0;
-+ else
-+ result.s_addr = 0; /* best error report we can do */
-+ return result;
-+}
-+
-+/*
-+ - bitstomask6 - return a mask with this many high bits on
-+ */
-+struct in6_addr
-+bitstomask6(n)
-+int n;
-+{
-+ struct in6_addr result;
-+
-+ if (n > 0 && n <= 32) {
-+ result.s6_addr32[0] = htonl(~((1UL << (32 - n)) - 1));
-+ result.s6_addr32[1]=0;
-+ result.s6_addr32[2]=0;
-+ result.s6_addr32[3]=0;
-+ }
-+ else if (n > 32 && n <= 64) {
-+ result.s6_addr32[0]=0xffffffffUL;
-+ result.s6_addr32[1] = htonl(~((1UL << (64 - n)) - 1));
-+ result.s6_addr32[2]=0;
-+ result.s6_addr32[3]=0;
-+ }
-+ else if (n > 64 && n <= 96) {
-+ result.s6_addr32[0]=0xffffffffUL;
-+ result.s6_addr32[1]=0xffffffffUL;
-+ result.s6_addr32[2] = htonl(~((1UL << (96 - n)) - 1));
-+ result.s6_addr32[3]=0;
-+ }
-+ else if (n > 96 && n <= 128) {
-+ result.s6_addr32[0]=0xffffffff;
-+ result.s6_addr32[1]=0xffffffff;
-+ result.s6_addr32[2]=0xffffffff;
-+ result.s6_addr32[3] = htonl(~((1UL << (128 - n)) - 1));
-+ }
-+ else {
-+ result.s6_addr32[0] = 0;
-+ result.s6_addr32[0] = 0;
-+ result.s6_addr32[0] = 0;
-+ result.s6_addr32[0] = 0;
-+ }
-+
-+ return result;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/infblock.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,403 @@
-+/* infblock.c -- interpret and process block types to last block
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+#include <zlib/zutil.h>
-+#include "infblock.h"
-+#include "inftrees.h"
-+#include "infcodes.h"
-+#include "infutil.h"
-+
-+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
-+
-+/* simplify the use of the inflate_huft type with some defines */
-+#define exop word.what.Exop
-+#define bits word.what.Bits
-+
-+/* Table for deflate from PKZIP's appnote.txt. */
-+local const uInt border[] = { /* Order of the bit length code lengths */
-+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-+
-+/*
-+ Notes beyond the 1.93a appnote.txt:
-+
-+ 1. Distance pointers never point before the beginning of the output
-+ stream.
-+ 2. Distance pointers can point back across blocks, up to 32k away.
-+ 3. There is an implied maximum of 7 bits for the bit length table and
-+ 15 bits for the actual data.
-+ 4. If only one code exists, then it is encoded using one bit. (Zero
-+ would be more efficient, but perhaps a little confusing.) If two
-+ codes exist, they are coded using one bit each (0 and 1).
-+ 5. There is no way of sending zero distance codes--a dummy must be
-+ sent if there are none. (History: a pre 2.0 version of PKZIP would
-+ store blocks with no distance codes, but this was discovered to be
-+ too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
-+ zero distance codes, which is sent as one code of zero bits in
-+ length.
-+ 6. There are up to 286 literal/length codes. Code 256 represents the
-+ end-of-block. Note however that the static length tree defines
-+ 288 codes just to fill out the Huffman codes. Codes 286 and 287
-+ cannot be used though, since there is no length base or extra bits
-+ defined for them. Similarily, there are up to 30 distance codes.
-+ However, static trees define 32 codes (all 5 bits) to fill out the
-+ Huffman codes, but the last two had better not show up in the data.
-+ 7. Unzip can check dynamic Huffman blocks for complete code sets.
-+ The exception is that a single code would not be complete (see #4).
-+ 8. The five bits following the block type is really the number of
-+ literal codes sent minus 257.
-+ 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
-+ (1+6+6). Therefore, to output three times the length, you output
-+ three codes (1+1+1), whereas to output four times the same length,
-+ you only need two codes (1+3). Hmm.
-+ 10. In the tree reconstruction algorithm, Code = Code + Increment
-+ only if BitLength(i) is not zero. (Pretty obvious.)
-+ 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
-+ 12. Note: length code 284 can represent 227-258, but length code 285
-+ really is 258. The last length deserves its own, short code
-+ since it gets used a lot in very redundant files. The length
-+ 258 is special since 258 - 3 (the min match length) is 255.
-+ 13. The literal/length and distance code bit lengths are read as a
-+ single stream of lengths. It is possible (and advantageous) for
-+ a repeat code (16, 17, or 18) to go across the boundary between
-+ the two sets of lengths.
-+ */
-+
-+
-+void inflate_blocks_reset(s, z, c)
-+inflate_blocks_statef *s;
-+z_streamp z;
-+uLongf *c;
-+{
-+ if (c != Z_NULL)
-+ *c = s->check;
-+ if (s->mode == BTREE || s->mode == DTREE)
-+ ZFREE(z, s->sub.trees.blens);
-+ if (s->mode == CODES)
-+ inflate_codes_free(s->sub.decode.codes, z);
-+ s->mode = TYPE;
-+ s->bitk = 0;
-+ s->bitb = 0;
-+ s->read = s->write = s->window;
-+ if (s->checkfn != Z_NULL)
-+ z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0);
-+ Tracev((stderr, "inflate: blocks reset\n"));
-+}
-+
-+
-+inflate_blocks_statef *inflate_blocks_new(z, c, w)
-+z_streamp z;
-+check_func c;
-+uInt w;
-+{
-+ inflate_blocks_statef *s;
-+
-+ if ((s = (inflate_blocks_statef *)ZALLOC
-+ (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL)
-+ return s;
-+ if ((s->hufts =
-+ (inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL)
-+ {
-+ ZFREE(z, s);
-+ return Z_NULL;
-+ }
-+ if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL)
-+ {
-+ ZFREE(z, s->hufts);
-+ ZFREE(z, s);
-+ return Z_NULL;
-+ }
-+ s->end = s->window + w;
-+ s->checkfn = c;
-+ s->mode = TYPE;
-+ Tracev((stderr, "inflate: blocks allocated\n"));
-+ inflate_blocks_reset(s, z, Z_NULL);
-+ return s;
-+}
-+
-+
-+int inflate_blocks(s, z, r)
-+inflate_blocks_statef *s;
-+z_streamp z;
-+int r;
-+{
-+ uInt t; /* temporary storage */
-+ uLong b; /* bit buffer */
-+ uInt k; /* bits in bit buffer */
-+ Bytef *p; /* input data pointer */
-+ uInt n; /* bytes available there */
-+ Bytef *q; /* output window write pointer */
-+ uInt m; /* bytes to end of window or read pointer */
-+
-+ /* copy input/output information to locals (UPDATE macro restores) */
-+ LOAD
-+
-+ /* process input based on current state */
-+ while (1) switch (s->mode)
-+ {
-+ case TYPE:
-+ NEEDBITS(3)
-+ t = (uInt)b & 7;
-+ s->last = t & 1;
-+ switch (t >> 1)
-+ {
-+ case 0: /* stored */
-+ Tracev((stderr, "inflate: stored block%s\n",
-+ s->last ? " (last)" : ""));
-+ DUMPBITS(3)
-+ t = k & 7; /* go to byte boundary */
-+ DUMPBITS(t)
-+ s->mode = LENS; /* get length of stored block */
-+ break;
-+ case 1: /* fixed */
-+ Tracev((stderr, "inflate: fixed codes block%s\n",
-+ s->last ? " (last)" : ""));
-+ {
-+ uInt bl, bd;
-+ inflate_huft *tl, *td;
-+
-+ inflate_trees_fixed(&bl, &bd, &tl, &td, z);
-+ s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z);
-+ if (s->sub.decode.codes == Z_NULL)
-+ {
-+ r = Z_MEM_ERROR;
-+ LEAVE
-+ }
-+ }
-+ DUMPBITS(3)
-+ s->mode = CODES;
-+ break;
-+ case 2: /* dynamic */
-+ Tracev((stderr, "inflate: dynamic codes block%s\n",
-+ s->last ? " (last)" : ""));
-+ DUMPBITS(3)
-+ s->mode = TABLE;
-+ break;
-+ case 3: /* illegal */
-+ DUMPBITS(3)
-+ s->mode = BAD;
-+ z->msg = (char*)"invalid block type";
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ }
-+ break;
-+ case LENS:
-+ NEEDBITS(32)
-+ if ((((~b) >> 16) & 0xffff) != (b & 0xffff))
-+ {
-+ s->mode = BAD;
-+ z->msg = (char*)"invalid stored block lengths";
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ }
-+ s->sub.left = (uInt)b & 0xffff;
-+ b = k = 0; /* dump bits */
-+ Tracev((stderr, "inflate: stored length %u\n", s->sub.left));
-+ s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE);
-+ break;
-+ case STORED:
-+ if (n == 0)
-+ LEAVE
-+ NEEDOUT
-+ t = s->sub.left;
-+ if (t > n) t = n;
-+ if (t > m) t = m;
-+ zmemcpy(q, p, t);
-+ p += t; n -= t;
-+ q += t; m -= t;
-+ if ((s->sub.left -= t) != 0)
-+ break;
-+ Tracev((stderr, "inflate: stored end, %lu total out\n",
-+ z->total_out + (q >= s->read ? q - s->read :
-+ (s->end - s->read) + (q - s->window))));
-+ s->mode = s->last ? DRY : TYPE;
-+ break;
-+ case TABLE:
-+ NEEDBITS(14)
-+ s->sub.trees.table = t = (uInt)b & 0x3fff;
-+#ifndef PKZIP_BUG_WORKAROUND
-+ if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
-+ {
-+ s->mode = BAD;
-+ z->msg = (char*)"too many length or distance symbols";
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ }
-+#endif
-+ t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f);
-+ if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL)
-+ {
-+ r = Z_MEM_ERROR;
-+ LEAVE
-+ }
-+ DUMPBITS(14)
-+ s->sub.trees.index = 0;
-+ Tracev((stderr, "inflate: table sizes ok\n"));
-+ s->mode = BTREE;
-+ case BTREE:
-+ while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
-+ {
-+ NEEDBITS(3)
-+ s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7;
-+ DUMPBITS(3)
-+ }
-+ while (s->sub.trees.index < 19)
-+ s->sub.trees.blens[border[s->sub.trees.index++]] = 0;
-+ s->sub.trees.bb = 7;
-+ t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb,
-+ &s->sub.trees.tb, s->hufts, z);
-+ if (t != Z_OK)
-+ {
-+ r = t;
-+ if (r == Z_DATA_ERROR)
-+ {
-+ ZFREE(z, s->sub.trees.blens);
-+ s->mode = BAD;
-+ }
-+ LEAVE
-+ }
-+ s->sub.trees.index = 0;
-+ Tracev((stderr, "inflate: bits tree ok\n"));
-+ s->mode = DTREE;
-+ case DTREE:
-+ while (t = s->sub.trees.table,
-+ s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
-+ {
-+ inflate_huft *h;
-+ uInt i, j, c;
-+
-+ t = s->sub.trees.bb;
-+ NEEDBITS(t)
-+ h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]);
-+ t = h->bits;
-+ c = h->base;
-+ if (c < 16)
-+ {
-+ DUMPBITS(t)
-+ s->sub.trees.blens[s->sub.trees.index++] = c;
-+ }
-+ else /* c == 16..18 */
-+ {
-+ i = c == 18 ? 7 : c - 14;
-+ j = c == 18 ? 11 : 3;
-+ NEEDBITS(t + i)
-+ DUMPBITS(t)
-+ j += (uInt)b & inflate_mask[i];
-+ DUMPBITS(i)
-+ i = s->sub.trees.index;
-+ t = s->sub.trees.table;
-+ if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
-+ (c == 16 && i < 1))
-+ {
-+ ZFREE(z, s->sub.trees.blens);
-+ s->mode = BAD;
-+ z->msg = (char*)"invalid bit length repeat";
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ }
-+ c = c == 16 ? s->sub.trees.blens[i - 1] : 0;
-+ do {
-+ s->sub.trees.blens[i++] = c;
-+ } while (--j);
-+ s->sub.trees.index = i;
-+ }
-+ }
-+ s->sub.trees.tb = Z_NULL;
-+ {
-+ uInt bl, bd;
-+ inflate_huft *tl, *td;
-+ inflate_codes_statef *c;
-+
-+ bl = 9; /* must be <= 9 for lookahead assumptions */
-+ bd = 6; /* must be <= 9 for lookahead assumptions */
-+ t = s->sub.trees.table;
-+ t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f),
-+ s->sub.trees.blens, &bl, &bd, &tl, &td,
-+ s->hufts, z);
-+ if (t != Z_OK)
-+ {
-+ if (t == (uInt)Z_DATA_ERROR)
-+ {
-+ ZFREE(z, s->sub.trees.blens);
-+ s->mode = BAD;
-+ }
-+ r = t;
-+ LEAVE
-+ }
-+ Tracev((stderr, "inflate: trees ok\n"));
-+ if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL)
-+ {
-+ r = Z_MEM_ERROR;
-+ LEAVE
-+ }
-+ s->sub.decode.codes = c;
-+ }
-+ ZFREE(z, s->sub.trees.blens);
-+ s->mode = CODES;
-+ case CODES:
-+ UPDATE
-+ if ((r = inflate_codes(s, z, r)) != Z_STREAM_END)
-+ return inflate_flush(s, z, r);
-+ r = Z_OK;
-+ inflate_codes_free(s->sub.decode.codes, z);
-+ LOAD
-+ Tracev((stderr, "inflate: codes end, %lu total out\n",
-+ z->total_out + (q >= s->read ? q - s->read :
-+ (s->end - s->read) + (q - s->window))));
-+ if (!s->last)
-+ {
-+ s->mode = TYPE;
-+ break;
-+ }
-+ s->mode = DRY;
-+ case DRY:
-+ FLUSH
-+ if (s->read != s->write)
-+ LEAVE
-+ s->mode = DONE;
-+ case DONE:
-+ r = Z_STREAM_END;
-+ LEAVE
-+ case BAD:
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ default:
-+ r = Z_STREAM_ERROR;
-+ LEAVE
-+ }
-+}
-+
-+
-+int inflate_blocks_free(s, z)
-+inflate_blocks_statef *s;
-+z_streamp z;
-+{
-+ inflate_blocks_reset(s, z, Z_NULL);
-+ ZFREE(z, s->window);
-+ ZFREE(z, s->hufts);
-+ ZFREE(z, s);
-+ Tracev((stderr, "inflate: blocks freed\n"));
-+ return Z_OK;
-+}
-+
-+
-+void inflate_set_dictionary(s, d, n)
-+inflate_blocks_statef *s;
-+const Bytef *d;
-+uInt n;
-+{
-+ zmemcpy(s->window, d, n);
-+ s->read = s->write = s->window + n;
-+}
-+
-+
-+/* Returns true if inflate is currently at the end of a block generated
-+ * by Z_SYNC_FLUSH or Z_FULL_FLUSH.
-+ * IN assertion: s != Z_NULL
-+ */
-+int inflate_blocks_sync_point(s)
-+inflate_blocks_statef *s;
-+{
-+ return s->mode == LENS;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/infblock.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,39 @@
-+/* infblock.h -- header to use infblock.c
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+struct inflate_blocks_state;
-+typedef struct inflate_blocks_state FAR inflate_blocks_statef;
-+
-+extern inflate_blocks_statef * inflate_blocks_new OF((
-+ z_streamp z,
-+ check_func c, /* check function */
-+ uInt w)); /* window size */
-+
-+extern int inflate_blocks OF((
-+ inflate_blocks_statef *,
-+ z_streamp ,
-+ int)); /* initial return code */
-+
-+extern void inflate_blocks_reset OF((
-+ inflate_blocks_statef *,
-+ z_streamp ,
-+ uLongf *)); /* check value on output */
-+
-+extern int inflate_blocks_free OF((
-+ inflate_blocks_statef *,
-+ z_streamp));
-+
-+extern void inflate_set_dictionary OF((
-+ inflate_blocks_statef *s,
-+ const Bytef *d, /* dictionary */
-+ uInt n)); /* dictionary length */
-+
-+extern int inflate_blocks_sync_point OF((
-+ inflate_blocks_statef *s));
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/infcodes.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,251 @@
-+/* infcodes.c -- process literals and length/distance pairs
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+#include <zlib/zutil.h>
-+#include "inftrees.h"
-+#include "infblock.h"
-+#include "infcodes.h"
-+#include "infutil.h"
-+#include "inffast.h"
-+
-+/* simplify the use of the inflate_huft type with some defines */
-+#define exop word.what.Exop
-+#define bits word.what.Bits
-+
-+typedef enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
-+ START, /* x: set up for LEN */
-+ LEN, /* i: get length/literal/eob next */
-+ LENEXT, /* i: getting length extra (have base) */
-+ DIST, /* i: get distance next */
-+ DISTEXT, /* i: getting distance extra */
-+ COPY, /* o: copying bytes in window, waiting for space */
-+ LIT, /* o: got literal, waiting for output space */
-+ WASH, /* o: got eob, possibly still output waiting */
-+ END, /* x: got eob and all data flushed */
-+ BADCODE} /* x: got error */
-+inflate_codes_mode;
-+
-+/* inflate codes private state */
-+struct inflate_codes_state {
-+
-+ /* mode */
-+ inflate_codes_mode mode; /* current inflate_codes mode */
-+
-+ /* mode dependent information */
-+ uInt len;
-+ union {
-+ struct {
-+ inflate_huft *tree; /* pointer into tree */
-+ uInt need; /* bits needed */
-+ } code; /* if LEN or DIST, where in tree */
-+ uInt lit; /* if LIT, literal */
-+ struct {
-+ uInt get; /* bits to get for extra */
-+ uInt dist; /* distance back to copy from */
-+ } copy; /* if EXT or COPY, where and how much */
-+ } sub; /* submode */
-+
-+ /* mode independent information */
-+ Byte lbits; /* ltree bits decoded per branch */
-+ Byte dbits; /* dtree bits decoder per branch */
-+ inflate_huft *ltree; /* literal/length/eob tree */
-+ inflate_huft *dtree; /* distance tree */
-+
-+};
-+
-+
-+inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z)
-+uInt bl, bd;
-+inflate_huft *tl;
-+inflate_huft *td; /* need separate declaration for Borland C++ */
-+z_streamp z;
-+{
-+ inflate_codes_statef *c;
-+
-+ if ((c = (inflate_codes_statef *)
-+ ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL)
-+ {
-+ c->mode = START;
-+ c->lbits = (Byte)bl;
-+ c->dbits = (Byte)bd;
-+ c->ltree = tl;
-+ c->dtree = td;
-+ Tracev((stderr, "inflate: codes new\n"));
-+ }
-+ return c;
-+}
-+
-+
-+int inflate_codes(s, z, r)
-+inflate_blocks_statef *s;
-+z_streamp z;
-+int r;
-+{
-+ uInt j; /* temporary storage */
-+ inflate_huft *t; /* temporary pointer */
-+ uInt e; /* extra bits or operation */
-+ uLong b; /* bit buffer */
-+ uInt k; /* bits in bit buffer */
-+ Bytef *p; /* input data pointer */
-+ uInt n; /* bytes available there */
-+ Bytef *q; /* output window write pointer */
-+ uInt m; /* bytes to end of window or read pointer */
-+ Bytef *f; /* pointer to copy strings from */
-+ inflate_codes_statef *c = s->sub.decode.codes; /* codes state */
-+
-+ /* copy input/output information to locals (UPDATE macro restores) */
-+ LOAD
-+
-+ /* process input and output based on current state */
-+ while (1) switch (c->mode)
-+ { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
-+ case START: /* x: set up for LEN */
-+#ifndef SLOW
-+ if (m >= 258 && n >= 10)
-+ {
-+ UPDATE
-+ r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z);
-+ LOAD
-+ if (r != Z_OK)
-+ {
-+ c->mode = r == Z_STREAM_END ? WASH : BADCODE;
-+ break;
-+ }
-+ }
-+#endif /* !SLOW */
-+ c->sub.code.need = c->lbits;
-+ c->sub.code.tree = c->ltree;
-+ c->mode = LEN;
-+ case LEN: /* i: get length/literal/eob next */
-+ j = c->sub.code.need;
-+ NEEDBITS(j)
-+ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
-+ DUMPBITS(t->bits)
-+ e = (uInt)(t->exop);
-+ if (e == 0) /* literal */
-+ {
-+ c->sub.lit = t->base;
-+ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
-+ "inflate: literal '%c'\n" :
-+ "inflate: literal 0x%02x\n", t->base));
-+ c->mode = LIT;
-+ break;
-+ }
-+ if (e & 16) /* length */
-+ {
-+ c->sub.copy.get = e & 15;
-+ c->len = t->base;
-+ c->mode = LENEXT;
-+ break;
-+ }
-+ if ((e & 64) == 0) /* next table */
-+ {
-+ c->sub.code.need = e;
-+ c->sub.code.tree = t + t->base;
-+ break;
-+ }
-+ if (e & 32) /* end of block */
-+ {
-+ Tracevv((stderr, "inflate: end of block\n"));
-+ c->mode = WASH;
-+ break;
-+ }
-+ c->mode = BADCODE; /* invalid code */
-+ z->msg = (char*)"invalid literal/length code";
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ case LENEXT: /* i: getting length extra (have base) */
-+ j = c->sub.copy.get;
-+ NEEDBITS(j)
-+ c->len += (uInt)b & inflate_mask[j];
-+ DUMPBITS(j)
-+ c->sub.code.need = c->dbits;
-+ c->sub.code.tree = c->dtree;
-+ Tracevv((stderr, "inflate: length %u\n", c->len));
-+ c->mode = DIST;
-+ case DIST: /* i: get distance next */
-+ j = c->sub.code.need;
-+ NEEDBITS(j)
-+ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
-+ DUMPBITS(t->bits)
-+ e = (uInt)(t->exop);
-+ if (e & 16) /* distance */
-+ {
-+ c->sub.copy.get = e & 15;
-+ c->sub.copy.dist = t->base;
-+ c->mode = DISTEXT;
-+ break;
-+ }
-+ if ((e & 64) == 0) /* next table */
-+ {
-+ c->sub.code.need = e;
-+ c->sub.code.tree = t + t->base;
-+ break;
-+ }
-+ c->mode = BADCODE; /* invalid code */
-+ z->msg = (char*)"invalid distance code";
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ case DISTEXT: /* i: getting distance extra */
-+ j = c->sub.copy.get;
-+ NEEDBITS(j)
-+ c->sub.copy.dist += (uInt)b & inflate_mask[j];
-+ DUMPBITS(j)
-+ Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist));
-+ c->mode = COPY;
-+ case COPY: /* o: copying bytes in window, waiting for space */
-+ f = q - c->sub.copy.dist;
-+ while (f < s->window) /* modulo window size-"while" instead */
-+ f += s->end - s->window; /* of "if" handles invalid distances */
-+ while (c->len)
-+ {
-+ NEEDOUT
-+ OUTBYTE(*f++)
-+ if (f == s->end)
-+ f = s->window;
-+ c->len--;
-+ }
-+ c->mode = START;
-+ break;
-+ case LIT: /* o: got literal, waiting for output space */
-+ NEEDOUT
-+ OUTBYTE(c->sub.lit)
-+ c->mode = START;
-+ break;
-+ case WASH: /* o: got eob, possibly more output */
-+ if (k > 7) /* return unused byte, if any */
-+ {
-+ Assert(k < 16, "inflate_codes grabbed too many bytes")
-+ k -= 8;
-+ n++;
-+ p--; /* can always return one */
-+ }
-+ FLUSH
-+ if (s->read != s->write)
-+ LEAVE
-+ c->mode = END;
-+ case END:
-+ r = Z_STREAM_END;
-+ LEAVE
-+ case BADCODE: /* x: got error */
-+ r = Z_DATA_ERROR;
-+ LEAVE
-+ default:
-+ r = Z_STREAM_ERROR;
-+ LEAVE
-+ }
-+#ifdef NEED_DUMMY_RETURN
-+ return Z_STREAM_ERROR; /* Some dumb compilers complain without this */
-+#endif
-+}
-+
-+
-+void inflate_codes_free(c, z)
-+inflate_codes_statef *c;
-+z_streamp z;
-+{
-+ ZFREE(z, c);
-+ Tracev((stderr, "inflate: codes free\n"));
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/infcodes.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,31 @@
-+/* infcodes.h -- header to use infcodes.c
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+#ifndef _INFCODES_H
-+#define _INFCODES_H
-+
-+struct inflate_codes_state;
-+typedef struct inflate_codes_state FAR inflate_codes_statef;
-+
-+extern inflate_codes_statef *inflate_codes_new OF((
-+ uInt, uInt,
-+ inflate_huft *, inflate_huft *,
-+ z_streamp ));
-+
-+extern int inflate_codes OF((
-+ inflate_blocks_statef *,
-+ z_streamp ,
-+ int));
-+
-+extern void inflate_codes_free OF((
-+ inflate_codes_statef *,
-+ z_streamp ));
-+
-+#endif /* _INFCODES_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/inffast.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,183 @@
-+/* inffast.c -- process literals and length/distance pairs fast
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+#include <zlib/zutil.h>
-+#include "inftrees.h"
-+#include "infblock.h"
-+#include "infcodes.h"
-+#include "infutil.h"
-+#include "inffast.h"
-+
-+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
-+
-+/* simplify the use of the inflate_huft type with some defines */
-+#define exop word.what.Exop
-+#define bits word.what.Bits
-+
-+/* macros for bit input with no checking and for returning unused bytes */
-+#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}}
-+#define UNGRAB {c=z->avail_in-n;c=(k>>3)<c?k>>3:c;n+=c;p-=c;k-=c<<3;}
-+
-+/* Called with number of bytes left to write in window at least 258
-+ (the maximum string length) and number of input bytes available
-+ at least ten. The ten bytes are six bytes for the longest length/
-+ distance pair plus four bytes for overloading the bit buffer. */
-+
-+int inflate_fast(bl, bd, tl, td, s, z)
-+uInt bl, bd;
-+inflate_huft *tl;
-+inflate_huft *td; /* need separate declaration for Borland C++ */
-+inflate_blocks_statef *s;
-+z_streamp z;
-+{
-+ inflate_huft *t; /* temporary pointer */
-+ uInt e; /* extra bits or operation */
-+ uLong b; /* bit buffer */
-+ uInt k; /* bits in bit buffer */
-+ Bytef *p; /* input data pointer */
-+ uInt n; /* bytes available there */
-+ Bytef *q; /* output window write pointer */
-+ uInt m; /* bytes to end of window or read pointer */
-+ uInt ml; /* mask for literal/length tree */
-+ uInt md; /* mask for distance tree */
-+ uInt c; /* bytes to copy */
-+ uInt d; /* distance back to copy from */
-+ Bytef *r; /* copy source pointer */
-+
-+ /* load input, output, bit values */
-+ LOAD
-+
-+ /* initialize masks */
-+ ml = inflate_mask[bl];
-+ md = inflate_mask[bd];
-+
-+ /* do until not enough input or output space for fast loop */
-+ do { /* assume called with m >= 258 && n >= 10 */
-+ /* get literal/length code */
-+ GRABBITS(20) /* max bits for literal/length code */
-+ if ((e = (t = tl + ((uInt)b & ml))->exop) == 0)
-+ {
-+ DUMPBITS(t->bits)
-+ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
-+ "inflate: * literal '%c'\n" :
-+ "inflate: * literal 0x%02x\n", t->base));
-+ *q++ = (Byte)t->base;
-+ m--;
-+ continue;
-+ }
-+ do {
-+ DUMPBITS(t->bits)
-+ if (e & 16)
-+ {
-+ /* get extra bits for length */
-+ e &= 15;
-+ c = t->base + ((uInt)b & inflate_mask[e]);
-+ DUMPBITS(e)
-+ Tracevv((stderr, "inflate: * length %u\n", c));
-+
-+ /* decode distance base of block to copy */
-+ GRABBITS(15); /* max bits for distance code */
-+ e = (t = td + ((uInt)b & md))->exop;
-+ do {
-+ DUMPBITS(t->bits)
-+ if (e & 16)
-+ {
-+ /* get extra bits to add to distance base */
-+ e &= 15;
-+ GRABBITS(e) /* get extra bits (up to 13) */
-+ d = t->base + ((uInt)b & inflate_mask[e]);
-+ DUMPBITS(e)
-+ Tracevv((stderr, "inflate: * distance %u\n", d));
-+
-+ /* do the copy */
-+ m -= c;
-+ r = q - d;
-+ if (r < s->window) /* wrap if needed */
-+ {
-+ do {
-+ r += s->end - s->window; /* force pointer in window */
-+ } while (r < s->window); /* covers invalid distances */
-+ e = s->end - r;
-+ if (c > e)
-+ {
-+ c -= e; /* wrapped copy */
-+ do {
-+ *q++ = *r++;
-+ } while (--e);
-+ r = s->window;
-+ do {
-+ *q++ = *r++;
-+ } while (--c);
-+ }
-+ else /* normal copy */
-+ {
-+ *q++ = *r++; c--;
-+ *q++ = *r++; c--;
-+ do {
-+ *q++ = *r++;
-+ } while (--c);
-+ }
-+ }
-+ else /* normal copy */
-+ {
-+ *q++ = *r++; c--;
-+ *q++ = *r++; c--;
-+ do {
-+ *q++ = *r++;
-+ } while (--c);
-+ }
-+ break;
-+ }
-+ else if ((e & 64) == 0)
-+ {
-+ t += t->base;
-+ e = (t += ((uInt)b & inflate_mask[e]))->exop;
-+ }
-+ else
-+ {
-+ z->msg = (char*)"invalid distance code";
-+ UNGRAB
-+ UPDATE
-+ return Z_DATA_ERROR;
-+ }
-+ } while (1);
-+ break;
-+ }
-+ if ((e & 64) == 0)
-+ {
-+ t += t->base;
-+ if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0)
-+ {
-+ DUMPBITS(t->bits)
-+ Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
-+ "inflate: * literal '%c'\n" :
-+ "inflate: * literal 0x%02x\n", t->base));
-+ *q++ = (Byte)t->base;
-+ m--;
-+ break;
-+ }
-+ }
-+ else if (e & 32)
-+ {
-+ Tracevv((stderr, "inflate: * end of block\n"));
-+ UNGRAB
-+ UPDATE
-+ return Z_STREAM_END;
-+ }
-+ else
-+ {
-+ z->msg = (char*)"invalid literal/length code";
-+ UNGRAB
-+ UPDATE
-+ return Z_DATA_ERROR;
-+ }
-+ } while (1);
-+ } while (m >= 258 && n >= 10);
-+
-+ /* not enough input or output--restore pointers and return */
-+ UNGRAB
-+ UPDATE
-+ return Z_OK;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/inffast.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,22 @@
-+/* inffast.h -- header to use inffast.c
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+#ifndef _INFFAST_H
-+#define _INFFAST_H
-+
-+extern int inflate_fast OF((
-+ uInt,
-+ uInt,
-+ inflate_huft *,
-+ inflate_huft *,
-+ inflate_blocks_statef *,
-+ z_streamp ));
-+
-+#endif /* _INFFAST_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/inffixed.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,151 @@
-+/* inffixed.h -- table for decoding fixed codes
-+ * Generated automatically by the maketree.c program
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+local uInt fixed_bl = 9;
-+local uInt fixed_bd = 5;
-+local inflate_huft fixed_tl[] = {
-+ {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115},
-+ {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},192},
-+ {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},160},
-+ {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},224},
-+ {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},144},
-+ {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},208},
-+ {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},176},
-+ {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},240},
-+ {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227},
-+ {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},200},
-+ {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},168},
-+ {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},232},
-+ {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},152},
-+ {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},216},
-+ {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},184},
-+ {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},248},
-+ {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163},
-+ {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},196},
-+ {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},164},
-+ {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},228},
-+ {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},148},
-+ {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},212},
-+ {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},180},
-+ {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},244},
-+ {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0},
-+ {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},204},
-+ {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},172},
-+ {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},236},
-+ {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},156},
-+ {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},220},
-+ {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},188},
-+ {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},252},
-+ {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131},
-+ {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},194},
-+ {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},162},
-+ {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},226},
-+ {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},146},
-+ {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},210},
-+ {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},178},
-+ {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},242},
-+ {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258},
-+ {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},202},
-+ {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},170},
-+ {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},234},
-+ {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},154},
-+ {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},218},
-+ {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},186},
-+ {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},250},
-+ {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195},
-+ {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},198},
-+ {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},166},
-+ {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},230},
-+ {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},150},
-+ {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},214},
-+ {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},182},
-+ {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},246},
-+ {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0},
-+ {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},206},
-+ {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},174},
-+ {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},238},
-+ {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},158},
-+ {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},222},
-+ {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},190},
-+ {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},254},
-+ {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115},
-+ {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},193},
-+ {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},161},
-+ {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},225},
-+ {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},145},
-+ {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},209},
-+ {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},177},
-+ {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},241},
-+ {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227},
-+ {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},201},
-+ {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},169},
-+ {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},233},
-+ {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},153},
-+ {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},217},
-+ {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},185},
-+ {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},249},
-+ {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163},
-+ {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},197},
-+ {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},165},
-+ {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},229},
-+ {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},149},
-+ {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},213},
-+ {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},181},
-+ {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},245},
-+ {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0},
-+ {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},205},
-+ {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},173},
-+ {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},237},
-+ {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},157},
-+ {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},221},
-+ {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},189},
-+ {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},253},
-+ {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131},
-+ {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},195},
-+ {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},163},
-+ {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},227},
-+ {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},147},
-+ {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},211},
-+ {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},179},
-+ {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},243},
-+ {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258},
-+ {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},203},
-+ {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},171},
-+ {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},235},
-+ {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},155},
-+ {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},219},
-+ {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},187},
-+ {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},251},
-+ {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195},
-+ {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},199},
-+ {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},167},
-+ {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},231},
-+ {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},151},
-+ {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},215},
-+ {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},183},
-+ {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},247},
-+ {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0},
-+ {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},207},
-+ {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},175},
-+ {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},239},
-+ {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},159},
-+ {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},223},
-+ {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},191},
-+ {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},255}
-+ };
-+local inflate_huft fixed_td[] = {
-+ {{{80,5}},1}, {{{87,5}},257}, {{{83,5}},17}, {{{91,5}},4097},
-+ {{{81,5}},5}, {{{89,5}},1025}, {{{85,5}},65}, {{{93,5}},16385},
-+ {{{80,5}},3}, {{{88,5}},513}, {{{84,5}},33}, {{{92,5}},8193},
-+ {{{82,5}},9}, {{{90,5}},2049}, {{{86,5}},129}, {{{192,5}},24577},
-+ {{{80,5}},2}, {{{87,5}},385}, {{{83,5}},25}, {{{91,5}},6145},
-+ {{{81,5}},7}, {{{89,5}},1537}, {{{85,5}},97}, {{{93,5}},24577},
-+ {{{80,5}},4}, {{{88,5}},769}, {{{84,5}},49}, {{{92,5}},12289},
-+ {{{82,5}},13}, {{{90,5}},3073}, {{{86,5}},193}, {{{192,5}},24577}
-+ };
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/inflate.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,368 @@
-+/* inflate.c -- zlib interface to inflate modules
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+#include <zlib/zutil.h>
-+#include "infblock.h"
-+
-+struct inflate_blocks_state {int dummy;}; /* for buggy compilers */
-+
-+typedef enum {
-+ METHOD, /* waiting for method byte */
-+ FLAG, /* waiting for flag byte */
-+ DICT4, /* four dictionary check bytes to go */
-+ DICT3, /* three dictionary check bytes to go */
-+ DICT2, /* two dictionary check bytes to go */
-+ DICT1, /* one dictionary check byte to go */
-+ DICT0, /* waiting for inflateSetDictionary */
-+ BLOCKS, /* decompressing blocks */
-+ CHECK4, /* four check bytes to go */
-+ CHECK3, /* three check bytes to go */
-+ CHECK2, /* two check bytes to go */
-+ CHECK1, /* one check byte to go */
-+ DONE, /* finished check, done */
-+ BAD} /* got an error--stay here */
-+inflate_mode;
-+
-+/* inflate private state */
-+struct internal_state {
-+
-+ /* mode */
-+ inflate_mode mode; /* current inflate mode */
-+
-+ /* mode dependent information */
-+ union {
-+ uInt method; /* if FLAGS, method byte */
-+ struct {
-+ uLong was; /* computed check value */
-+ uLong need; /* stream check value */
-+ } check; /* if CHECK, check values to compare */
-+ uInt marker; /* if BAD, inflateSync's marker bytes count */
-+ } sub; /* submode */
-+
-+ /* mode independent information */
-+ int nowrap; /* flag for no wrapper */
-+ uInt wbits; /* log2(window size) (8..15, defaults to 15) */
-+ inflate_blocks_statef
-+ *blocks; /* current inflate_blocks state */
-+
-+};
-+
-+
-+int ZEXPORT inflateReset(z)
-+z_streamp z;
-+{
-+ if (z == Z_NULL || z->state == Z_NULL)
-+ return Z_STREAM_ERROR;
-+ z->total_in = z->total_out = 0;
-+ z->msg = Z_NULL;
-+ z->state->mode = z->state->nowrap ? BLOCKS : METHOD;
-+ inflate_blocks_reset(z->state->blocks, z, Z_NULL);
-+ Tracev((stderr, "inflate: reset\n"));
-+ return Z_OK;
-+}
-+
-+
-+int ZEXPORT inflateEnd(z)
-+z_streamp z;
-+{
-+ if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL)
-+ return Z_STREAM_ERROR;
-+ if (z->state->blocks != Z_NULL)
-+ inflate_blocks_free(z->state->blocks, z);
-+ ZFREE(z, z->state);
-+ z->state = Z_NULL;
-+ Tracev((stderr, "inflate: end\n"));
-+ return Z_OK;
-+}
-+
-+
-+int ZEXPORT inflateInit2_(z, w, version, stream_size)
-+z_streamp z;
-+int w;
-+const char *version;
-+int stream_size;
-+{
-+ if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
-+ stream_size != sizeof(z_stream))
-+ return Z_VERSION_ERROR;
-+
-+ /* initialize state */
-+ if (z == Z_NULL)
-+ return Z_STREAM_ERROR;
-+ z->msg = Z_NULL;
-+ if (z->zalloc == Z_NULL)
-+ {
-+ return Z_STREAM_ERROR;
-+/* z->zalloc = zcalloc;
-+ z->opaque = (voidpf)0;
-+*/
-+ }
-+ if (z->zfree == Z_NULL) return Z_STREAM_ERROR; /* z->zfree = zcfree; */
-+ if ((z->state = (struct internal_state FAR *)
-+ ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL)
-+ return Z_MEM_ERROR;
-+ z->state->blocks = Z_NULL;
-+
-+ /* handle undocumented nowrap option (no zlib header or check) */
-+ z->state->nowrap = 0;
-+ if (w < 0)
-+ {
-+ w = - w;
-+ z->state->nowrap = 1;
-+ }
-+
-+ /* set window size */
-+ if (w < 8 || w > 15)
-+ {
-+ inflateEnd(z);
-+ return Z_STREAM_ERROR;
-+ }
-+ z->state->wbits = (uInt)w;
-+
-+ /* create inflate_blocks state */
-+ if ((z->state->blocks =
-+ inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w))
-+ == Z_NULL)
-+ {
-+ inflateEnd(z);
-+ return Z_MEM_ERROR;
-+ }
-+ Tracev((stderr, "inflate: allocated\n"));
-+
-+ /* reset state */
-+ inflateReset(z);
-+ return Z_OK;
-+}
-+
-+
-+int ZEXPORT inflateInit_(z, version, stream_size)
-+z_streamp z;
-+const char *version;
-+int stream_size;
-+{
-+ return inflateInit2_(z, DEF_WBITS, version, stream_size);
-+}
-+
-+
-+#define NEEDBYTE {if(z->avail_in==0)return r;r=f;}
-+#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++)
-+
-+int ZEXPORT inflate(z, f)
-+z_streamp z;
-+int f;
-+{
-+ int r;
-+ uInt b;
-+
-+ if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL)
-+ return Z_STREAM_ERROR;
-+ f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK;
-+ r = Z_BUF_ERROR;
-+ while (1) switch (z->state->mode)
-+ {
-+ case METHOD:
-+ NEEDBYTE
-+ if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED)
-+ {
-+ z->state->mode = BAD;
-+ z->msg = (char*)"unknown compression method";
-+ z->state->sub.marker = 5; /* can't try inflateSync */
-+ break;
-+ }
-+ if ((z->state->sub.method >> 4) + 8 > z->state->wbits)
-+ {
-+ z->state->mode = BAD;
-+ z->msg = (char*)"invalid window size";
-+ z->state->sub.marker = 5; /* can't try inflateSync */
-+ break;
-+ }
-+ z->state->mode = FLAG;
-+ case FLAG:
-+ NEEDBYTE
-+ b = NEXTBYTE;
-+ if (((z->state->sub.method << 8) + b) % 31)
-+ {
-+ z->state->mode = BAD;
-+ z->msg = (char*)"incorrect header check";
-+ z->state->sub.marker = 5; /* can't try inflateSync */
-+ break;
-+ }
-+ Tracev((stderr, "inflate: zlib header ok\n"));
-+ if (!(b & PRESET_DICT))
-+ {
-+ z->state->mode = BLOCKS;
-+ break;
-+ }
-+ z->state->mode = DICT4;
-+ case DICT4:
-+ NEEDBYTE
-+ z->state->sub.check.need = (uLong)NEXTBYTE << 24;
-+ z->state->mode = DICT3;
-+ case DICT3:
-+ NEEDBYTE
-+ z->state->sub.check.need += (uLong)NEXTBYTE << 16;
-+ z->state->mode = DICT2;
-+ case DICT2:
-+ NEEDBYTE
-+ z->state->sub.check.need += (uLong)NEXTBYTE << 8;
-+ z->state->mode = DICT1;
-+ case DICT1:
-+ NEEDBYTE
-+ z->state->sub.check.need += (uLong)NEXTBYTE;
-+ z->adler = z->state->sub.check.need;
-+ z->state->mode = DICT0;
-+ return Z_NEED_DICT;
-+ case DICT0:
-+ z->state->mode = BAD;
-+ z->msg = (char*)"need dictionary";
-+ z->state->sub.marker = 0; /* can try inflateSync */
-+ return Z_STREAM_ERROR;
-+ case BLOCKS:
-+ r = inflate_blocks(z->state->blocks, z, r);
-+ if (r == Z_DATA_ERROR)
-+ {
-+ z->state->mode = BAD;
-+ z->state->sub.marker = 0; /* can try inflateSync */
-+ break;
-+ }
-+ if (r == Z_OK)
-+ r = f;
-+ if (r != Z_STREAM_END)
-+ return r;
-+ r = f;
-+ inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was);
-+ if (z->state->nowrap)
-+ {
-+ z->state->mode = DONE;
-+ break;
-+ }
-+ z->state->mode = CHECK4;
-+ case CHECK4:
-+ NEEDBYTE
-+ z->state->sub.check.need = (uLong)NEXTBYTE << 24;
-+ z->state->mode = CHECK3;
-+ case CHECK3:
-+ NEEDBYTE
-+ z->state->sub.check.need += (uLong)NEXTBYTE << 16;
-+ z->state->mode = CHECK2;
-+ case CHECK2:
-+ NEEDBYTE
-+ z->state->sub.check.need += (uLong)NEXTBYTE << 8;
-+ z->state->mode = CHECK1;
-+ case CHECK1:
-+ NEEDBYTE
-+ z->state->sub.check.need += (uLong)NEXTBYTE;
-+
-+ if (z->state->sub.check.was != z->state->sub.check.need)
-+ {
-+ z->state->mode = BAD;
-+ z->msg = (char*)"incorrect data check";
-+ z->state->sub.marker = 5; /* can't try inflateSync */
-+ break;
-+ }
-+ Tracev((stderr, "inflate: zlib check ok\n"));
-+ z->state->mode = DONE;
-+ case DONE:
-+ return Z_STREAM_END;
-+ case BAD:
-+ return Z_DATA_ERROR;
-+ default:
-+ return Z_STREAM_ERROR;
-+ }
-+#ifdef NEED_DUMMY_RETURN
-+ return Z_STREAM_ERROR; /* Some dumb compilers complain without this */
-+#endif
-+}
-+
-+
-+int ZEXPORT inflateSetDictionary(z, dictionary, dictLength)
-+z_streamp z;
-+const Bytef *dictionary;
-+uInt dictLength;
-+{
-+ uInt length = dictLength;
-+
-+ if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0)
-+ return Z_STREAM_ERROR;
-+
-+ if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR;
-+ z->adler = 1L;
-+
-+ if (length >= ((uInt)1<<z->state->wbits))
-+ {
-+ length = (1<<z->state->wbits)-1;
-+ dictionary += dictLength - length;
-+ }
-+ inflate_set_dictionary(z->state->blocks, dictionary, length);
-+ z->state->mode = BLOCKS;
-+ return Z_OK;
-+}
-+
-+
-+int ZEXPORT inflateSync(z)
-+z_streamp z;
-+{
-+ uInt n; /* number of bytes to look at */
-+ Bytef *p; /* pointer to bytes */
-+ uInt m; /* number of marker bytes found in a row */
-+ uLong r, w; /* temporaries to save total_in and total_out */
-+
-+ /* set up */
-+ if (z == Z_NULL || z->state == Z_NULL)
-+ return Z_STREAM_ERROR;
-+ if (z->state->mode != BAD)
-+ {
-+ z->state->mode = BAD;
-+ z->state->sub.marker = 0;
-+ }
-+ if ((n = z->avail_in) == 0)
-+ return Z_BUF_ERROR;
-+ p = z->next_in;
-+ m = z->state->sub.marker;
-+
-+ /* search */
-+ while (n && m < 4)
-+ {
-+ static const Byte mark[4] = {0, 0, 0xff, 0xff};
-+ if (*p == mark[m])
-+ m++;
-+ else if (*p)
-+ m = 0;
-+ else
-+ m = 4 - m;
-+ p++, n--;
-+ }
-+
-+ /* restore */
-+ z->total_in += p - z->next_in;
-+ z->next_in = p;
-+ z->avail_in = n;
-+ z->state->sub.marker = m;
-+
-+ /* return no joy or set up to restart on a new block */
-+ if (m != 4)
-+ return Z_DATA_ERROR;
-+ r = z->total_in; w = z->total_out;
-+ inflateReset(z);
-+ z->total_in = r; z->total_out = w;
-+ z->state->mode = BLOCKS;
-+ return Z_OK;
-+}
-+
-+
-+/* Returns true if inflate is currently at the end of a block generated
-+ * by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
-+ * implementation to provide an additional safety check. PPP uses Z_SYNC_FLUSH
-+ * but removes the length bytes of the resulting empty stored block. When
-+ * decompressing, PPP checks that at the end of input packet, inflate is
-+ * waiting for these length bytes.
-+ */
-+int ZEXPORT inflateSyncPoint(z)
-+z_streamp z;
-+{
-+ if (z == Z_NULL || z->state == Z_NULL || z->state->blocks == Z_NULL)
-+ return Z_STREAM_ERROR;
-+ return inflate_blocks_sync_point(z->state->blocks);
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/inftrees.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,454 @@
-+/* inftrees.c -- generate Huffman trees for efficient decoding
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+#include <zlib/zutil.h>
-+#include "inftrees.h"
-+
-+#if !defined(BUILDFIXED) && !defined(STDC)
-+# define BUILDFIXED /* non ANSI compilers may not accept inffixed.h */
-+#endif
-+
-+local const char inflate_copyright[] =
-+ " inflate 1.1.4 Copyright 1995-2002 Mark Adler ";
-+/*
-+ If you use the zlib library in a product, an acknowledgment is welcome
-+ in the documentation of your product. If for some reason you cannot
-+ include such an acknowledgment, I would appreciate that you keep this
-+ copyright string in the executable of your product.
-+ */
-+struct internal_state {int dummy;}; /* for buggy compilers */
-+
-+/* simplify the use of the inflate_huft type with some defines */
-+#define exop word.what.Exop
-+#define bits word.what.Bits
-+
-+
-+local int huft_build OF((
-+ uIntf *, /* code lengths in bits */
-+ uInt, /* number of codes */
-+ uInt, /* number of "simple" codes */
-+ const uIntf *, /* list of base values for non-simple codes */
-+ const uIntf *, /* list of extra bits for non-simple codes */
-+ inflate_huft * FAR*,/* result: starting table */
-+ uIntf *, /* maximum lookup bits (returns actual) */
-+ inflate_huft *, /* space for trees */
-+ uInt *, /* hufts used in space */
-+ uIntf * )); /* space for values */
-+
-+/* Tables for deflate from PKZIP's appnote.txt. */
-+local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */
-+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
-+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
-+ /* see note #13 above about 258 */
-+local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */
-+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
-+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */
-+local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */
-+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
-+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
-+ 8193, 12289, 16385, 24577};
-+local const uInt cpdext[30] = { /* Extra bits for distance codes */
-+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
-+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
-+ 12, 12, 13, 13};
-+
-+/*
-+ Huffman code decoding is performed using a multi-level table lookup.
-+ The fastest way to decode is to simply build a lookup table whose
-+ size is determined by the longest code. However, the time it takes
-+ to build this table can also be a factor if the data being decoded
-+ is not very long. The most common codes are necessarily the
-+ shortest codes, so those codes dominate the decoding time, and hence
-+ the speed. The idea is you can have a shorter table that decodes the
-+ shorter, more probable codes, and then point to subsidiary tables for
-+ the longer codes. The time it costs to decode the longer codes is
-+ then traded against the time it takes to make longer tables.
-+
-+ This results of this trade are in the variables lbits and dbits
-+ below. lbits is the number of bits the first level table for literal/
-+ length codes can decode in one step, and dbits is the same thing for
-+ the distance codes. Subsequent tables are also less than or equal to
-+ those sizes. These values may be adjusted either when all of the
-+ codes are shorter than that, in which case the longest code length in
-+ bits is used, or when the shortest code is *longer* than the requested
-+ table size, in which case the length of the shortest code in bits is
-+ used.
-+
-+ There are two different values for the two tables, since they code a
-+ different number of possibilities each. The literal/length table
-+ codes 286 possible values, or in a flat code, a little over eight
-+ bits. The distance table codes 30 possible values, or a little less
-+ than five bits, flat. The optimum values for speed end up being
-+ about one bit more than those, so lbits is 8+1 and dbits is 5+1.
-+ The optimum values may differ though from machine to machine, and
-+ possibly even between compilers. Your mileage may vary.
-+ */
-+
-+
-+/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */
-+#define BMAX 15 /* maximum bit length of any code */
-+
-+local int huft_build(b, n, s, d, e, t, m, hp, hn, v)
-+uIntf *b; /* code lengths in bits (all assumed <= BMAX) */
-+uInt n; /* number of codes (assumed <= 288) */
-+uInt s; /* number of simple-valued codes (0..s-1) */
-+const uIntf *d; /* list of base values for non-simple codes */
-+const uIntf *e; /* list of extra bits for non-simple codes */
-+inflate_huft * FAR *t; /* result: starting table */
-+uIntf *m; /* maximum lookup bits, returns actual */
-+inflate_huft *hp; /* space for trees */
-+uInt *hn; /* hufts used in space */
-+uIntf *v; /* working area: values in order of bit length */
-+/* Given a list of code lengths and a maximum table size, make a set of
-+ tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR
-+ if the given code set is incomplete (the tables are still built in this
-+ case), or Z_DATA_ERROR if the input is invalid. */
-+{
-+
-+ uInt a; /* counter for codes of length k */
-+ uInt c[BMAX+1]; /* bit length count table */
-+ uInt f; /* i repeats in table every f entries */
-+ int g; /* maximum code length */
-+ int h; /* table level */
-+ register uInt i; /* counter, current code */
-+ register uInt j; /* counter */
-+ register int k; /* number of bits in current code */
-+ int l; /* bits per table (returned in m) */
-+ uInt mask; /* (1 << w) - 1, to avoid cc -O bug on HP */
-+ register uIntf *p; /* pointer into c[], b[], or v[] */
-+ inflate_huft *q; /* points to current table */
-+ struct inflate_huft_s r; /* table entry for structure assignment */
-+ inflate_huft *u[BMAX]; /* table stack */
-+ register int w; /* bits before this table == (l * h) */
-+ uInt x[BMAX+1]; /* bit offsets, then code stack */
-+ uIntf *xp; /* pointer into x */
-+ int y; /* number of dummy codes added */
-+ uInt z; /* number of entries in current table */
-+
-+
-+ /* Generate counts for each bit length */
-+ p = c;
-+#define C0 *p++ = 0;
-+#define C2 C0 C0 C0 C0
-+#define C4 C2 C2 C2 C2
-+ C4 /* clear c[]--assume BMAX+1 is 16 */
-+ p = b; i = n;
-+ do {
-+ c[*p++]++; /* assume all entries <= BMAX */
-+ } while (--i);
-+ if (c[0] == n) /* null input--all zero length codes */
-+ {
-+ *t = (inflate_huft *)Z_NULL;
-+ *m = 0;
-+ return Z_OK;
-+ }
-+
-+
-+ /* Find minimum and maximum length, bound *m by those */
-+ l = *m;
-+ for (j = 1; j <= BMAX; j++)
-+ if (c[j])
-+ break;
-+ k = j; /* minimum code length */
-+ if ((uInt)l < j)
-+ l = j;
-+ for (i = BMAX; i; i--)
-+ if (c[i])
-+ break;
-+ g = i; /* maximum code length */
-+ if ((uInt)l > i)
-+ l = i;
-+ *m = l;
-+
-+
-+ /* Adjust last length count to fill out codes, if needed */
-+ for (y = 1 << j; j < i; j++, y <<= 1)
-+ if ((y -= c[j]) < 0)
-+ return Z_DATA_ERROR;
-+ if ((y -= c[i]) < 0)
-+ return Z_DATA_ERROR;
-+ c[i] += y;
-+
-+
-+ /* Generate starting offsets into the value table for each length */
-+ x[1] = j = 0;
-+ p = c + 1; xp = x + 2;
-+ while (--i) { /* note that i == g from above */
-+ *xp++ = (j += *p++);
-+ }
-+
-+
-+ /* Make a table of values in order of bit lengths */
-+ p = b; i = 0;
-+ do {
-+ if ((j = *p++) != 0)
-+ v[x[j]++] = i;
-+ } while (++i < n);
-+ n = x[g]; /* set n to length of v */
-+
-+
-+ /* Generate the Huffman codes and for each, make the table entries */
-+ x[0] = i = 0; /* first Huffman code is zero */
-+ p = v; /* grab values in bit order */
-+ h = -1; /* no tables yet--level -1 */
-+ w = -l; /* bits decoded == (l * h) */
-+ u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */
-+ q = (inflate_huft *)Z_NULL; /* ditto */
-+ z = 0; /* ditto */
-+
-+ /* go through the bit lengths (k already is bits in shortest code) */
-+ for (; k <= g; k++)
-+ {
-+ a = c[k];
-+ while (a--)
-+ {
-+ /* here i is the Huffman code of length k bits for value *p */
-+ /* make tables up to required level */
-+ while (k > w + l)
-+ {
-+ h++;
-+ w += l; /* previous table always l bits */
-+
-+ /* compute minimum size table less than or equal to l bits */
-+ z = g - w;
-+ z = z > (uInt)l ? l : z; /* table size upper limit */
-+ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
-+ { /* too few codes for k-w bit table */
-+ f -= a + 1; /* deduct codes from patterns left */
-+ xp = c + k;
-+ if (j < z)
-+ while (++j < z) /* try smaller tables up to z bits */
-+ {
-+ if ((f <<= 1) <= *++xp)
-+ break; /* enough codes to use up j bits */
-+ f -= *xp; /* else deduct codes from patterns */
-+ }
-+ }
-+ z = 1 << j; /* table entries for j-bit table */
-+
-+ /* allocate new table */
-+ if (*hn + z > MANY) /* (note: doesn't matter for fixed) */
-+ return Z_DATA_ERROR; /* overflow of MANY */
-+ u[h] = q = hp + *hn;
-+ *hn += z;
-+
-+ /* connect to last table, if there is one */
-+ if (h)
-+ {
-+ x[h] = i; /* save pattern for backing up */
-+ r.bits = (Byte)l; /* bits to dump before this table */
-+ r.exop = (Byte)j; /* bits in this table */
-+ j = i >> (w - l);
-+ r.base = (uInt)(q - u[h-1] - j); /* offset to this table */
-+ u[h-1][j] = r; /* connect to last table */
-+ }
-+ else
-+ *t = q; /* first table is returned result */
-+ }
-+
-+ /* set up table entry in r */
-+ r.bits = (Byte)(k - w);
-+ if (p >= v + n)
-+ r.exop = 128 + 64; /* out of values--invalid code */
-+ else if (*p < s)
-+ {
-+ r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */
-+ r.base = *p++; /* simple code is just the value */
-+ }
-+ else
-+ {
-+ r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */
-+ r.base = d[*p++ - s];
-+ }
-+
-+ /* fill code-like entries with r */
-+ f = 1 << (k - w);
-+ for (j = i >> w; j < z; j += f)
-+ q[j] = r;
-+
-+ /* backwards increment the k-bit code i */
-+ for (j = 1 << (k - 1); i & j; j >>= 1)
-+ i ^= j;
-+ i ^= j;
-+
-+ /* backup over finished tables */
-+ mask = (1 << w) - 1; /* needed on HP, cc -O bug */
-+ while ((i & mask) != x[h])
-+ {
-+ h--; /* don't need to update q */
-+ w -= l;
-+ mask = (1 << w) - 1;
-+ }
-+ }
-+ }
-+
-+
-+ /* Return Z_BUF_ERROR if we were given an incomplete table */
-+ return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK;
-+}
-+
-+
-+int inflate_trees_bits(c, bb, tb, hp, z)
-+uIntf *c; /* 19 code lengths */
-+uIntf *bb; /* bits tree desired/actual depth */
-+inflate_huft * FAR *tb; /* bits tree result */
-+inflate_huft *hp; /* space for trees */
-+z_streamp z; /* for messages */
-+{
-+ int r;
-+ uInt hn = 0; /* hufts used in space */
-+ uIntf *v; /* work area for huft_build */
-+
-+ if ((v = (uIntf*)ZALLOC(z, 19, sizeof(uInt))) == Z_NULL)
-+ return Z_MEM_ERROR;
-+ r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL,
-+ tb, bb, hp, &hn, v);
-+ if (r == Z_DATA_ERROR)
-+ z->msg = (char*)"oversubscribed dynamic bit lengths tree";
-+ else if (r == Z_BUF_ERROR || *bb == 0)
-+ {
-+ z->msg = (char*)"incomplete dynamic bit lengths tree";
-+ r = Z_DATA_ERROR;
-+ }
-+ ZFREE(z, v);
-+ return r;
-+}
-+
-+
-+int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, hp, z)
-+uInt nl; /* number of literal/length codes */
-+uInt nd; /* number of distance codes */
-+uIntf *c; /* that many (total) code lengths */
-+uIntf *bl; /* literal desired/actual bit depth */
-+uIntf *bd; /* distance desired/actual bit depth */
-+inflate_huft * FAR *tl; /* literal/length tree result */
-+inflate_huft * FAR *td; /* distance tree result */
-+inflate_huft *hp; /* space for trees */
-+z_streamp z; /* for messages */
-+{
-+ int r;
-+ uInt hn = 0; /* hufts used in space */
-+ uIntf *v; /* work area for huft_build */
-+
-+ /* allocate work area */
-+ if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
-+ return Z_MEM_ERROR;
-+
-+ /* build literal/length tree */
-+ r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v);
-+ if (r != Z_OK || *bl == 0)
-+ {
-+ if (r == Z_DATA_ERROR)
-+ z->msg = (char*)"oversubscribed literal/length tree";
-+ else if (r != Z_MEM_ERROR)
-+ {
-+ z->msg = (char*)"incomplete literal/length tree";
-+ r = Z_DATA_ERROR;
-+ }
-+ ZFREE(z, v);
-+ return r;
-+ }
-+
-+ /* build distance tree */
-+ r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v);
-+ if (r != Z_OK || (*bd == 0 && nl > 257))
-+ {
-+ if (r == Z_DATA_ERROR)
-+ z->msg = (char*)"oversubscribed distance tree";
-+ else if (r == Z_BUF_ERROR) {
-+#ifdef PKZIP_BUG_WORKAROUND
-+ r = Z_OK;
-+ }
-+#else
-+ z->msg = (char*)"incomplete distance tree";
-+ r = Z_DATA_ERROR;
-+ }
-+ else if (r != Z_MEM_ERROR)
-+ {
-+ z->msg = (char*)"empty distance tree with lengths";
-+ r = Z_DATA_ERROR;
-+ }
-+ ZFREE(z, v);
-+ return r;
-+#endif
-+ }
-+
-+ /* done */
-+ ZFREE(z, v);
-+ return Z_OK;
-+}
-+
-+
-+/* build fixed tables only once--keep them here */
-+#ifdef BUILDFIXED
-+local int fixed_built = 0;
-+#define FIXEDH 544 /* number of hufts used by fixed tables */
-+local inflate_huft fixed_mem[FIXEDH];
-+local uInt fixed_bl;
-+local uInt fixed_bd;
-+local inflate_huft *fixed_tl;
-+local inflate_huft *fixed_td;
-+#else
-+#include "inffixed.h"
-+#endif
-+
-+
-+int inflate_trees_fixed(bl, bd, tl, td, z)
-+uIntf *bl; /* literal desired/actual bit depth */
-+uIntf *bd; /* distance desired/actual bit depth */
-+inflate_huft * FAR *tl; /* literal/length tree result */
-+inflate_huft * FAR *td; /* distance tree result */
-+z_streamp z; /* for memory allocation */
-+{
-+#ifdef BUILDFIXED
-+ /* build fixed tables if not already */
-+ if (!fixed_built)
-+ {
-+ int k; /* temporary variable */
-+ uInt f = 0; /* number of hufts used in fixed_mem */
-+ uIntf *c; /* length list for huft_build */
-+ uIntf *v; /* work area for huft_build */
-+
-+ /* allocate memory */
-+ if ((c = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
-+ return Z_MEM_ERROR;
-+ if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
-+ {
-+ ZFREE(z, c);
-+ return Z_MEM_ERROR;
-+ }
-+
-+ /* literal table */
-+ for (k = 0; k < 144; k++)
-+ c[k] = 8;
-+ for (; k < 256; k++)
-+ c[k] = 9;
-+ for (; k < 280; k++)
-+ c[k] = 7;
-+ for (; k < 288; k++)
-+ c[k] = 8;
-+ fixed_bl = 9;
-+ huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl,
-+ fixed_mem, &f, v);
-+
-+ /* distance table */
-+ for (k = 0; k < 30; k++)
-+ c[k] = 5;
-+ fixed_bd = 5;
-+ huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd,
-+ fixed_mem, &f, v);
-+
-+ /* done */
-+ ZFREE(z, v);
-+ ZFREE(z, c);
-+ fixed_built = 1;
-+ }
-+#endif
-+ *bl = fixed_bl;
-+ *bd = fixed_bd;
-+ *tl = fixed_tl;
-+ *td = fixed_td;
-+ return Z_OK;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/inftrees.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,63 @@
-+/* inftrees.h -- header to use inftrees.c
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+/* Huffman code lookup table entry--this entry is four bytes for machines
-+ that have 16-bit pointers (e.g. PC's in the small or medium model). */
-+
-+#ifndef _INFTREES_H
-+#define _INFTREES_H
-+
-+typedef struct inflate_huft_s FAR inflate_huft;
-+
-+struct inflate_huft_s {
-+ union {
-+ struct {
-+ Byte Exop; /* number of extra bits or operation */
-+ Byte Bits; /* number of bits in this code or subcode */
-+ } what;
-+ uInt pad; /* pad structure to a power of 2 (4 bytes for */
-+ } word; /* 16-bit, 8 bytes for 32-bit int's) */
-+ uInt base; /* literal, length base, distance base,
-+ or table offset */
-+};
-+
-+/* Maximum size of dynamic tree. The maximum found in a long but non-
-+ exhaustive search was 1004 huft structures (850 for length/literals
-+ and 154 for distances, the latter actually the result of an
-+ exhaustive search). The actual maximum is not known, but the
-+ value below is more than safe. */
-+#define MANY 1440
-+
-+extern int inflate_trees_bits OF((
-+ uIntf *, /* 19 code lengths */
-+ uIntf *, /* bits tree desired/actual depth */
-+ inflate_huft * FAR *, /* bits tree result */
-+ inflate_huft *, /* space for trees */
-+ z_streamp)); /* for messages */
-+
-+extern int inflate_trees_dynamic OF((
-+ uInt, /* number of literal/length codes */
-+ uInt, /* number of distance codes */
-+ uIntf *, /* that many (total) code lengths */
-+ uIntf *, /* literal desired/actual bit depth */
-+ uIntf *, /* distance desired/actual bit depth */
-+ inflate_huft * FAR *, /* literal/length tree result */
-+ inflate_huft * FAR *, /* distance tree result */
-+ inflate_huft *, /* space for trees */
-+ z_streamp)); /* for messages */
-+
-+extern int inflate_trees_fixed OF((
-+ uIntf *, /* literal desired/actual bit depth */
-+ uIntf *, /* distance desired/actual bit depth */
-+ inflate_huft * FAR *, /* literal/length tree result */
-+ inflate_huft * FAR *, /* distance tree result */
-+ z_streamp)); /* for memory allocation */
-+
-+#endif /* _INFTREES_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/infutil.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,87 @@
-+/* inflate_util.c -- data and routines common to blocks and codes
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+#include <zlib/zutil.h>
-+#include "infblock.h"
-+#include "inftrees.h"
-+#include "infcodes.h"
-+#include "infutil.h"
-+
-+struct inflate_codes_state {int dummy;}; /* for buggy compilers */
-+
-+/* And'ing with mask[n] masks the lower n bits */
-+uInt inflate_mask[17] = {
-+ 0x0000,
-+ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
-+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
-+};
-+
-+
-+/* copy as much as possible from the sliding window to the output area */
-+int inflate_flush(s, z, r)
-+inflate_blocks_statef *s;
-+z_streamp z;
-+int r;
-+{
-+ uInt n;
-+ Bytef *p;
-+ Bytef *q;
-+
-+ /* local copies of source and destination pointers */
-+ p = z->next_out;
-+ q = s->read;
-+
-+ /* compute number of bytes to copy as far as end of window */
-+ n = (uInt)((q <= s->write ? s->write : s->end) - q);
-+ if (n > z->avail_out) n = z->avail_out;
-+ if (n && r == Z_BUF_ERROR) r = Z_OK;
-+
-+ /* update counters */
-+ z->avail_out -= n;
-+ z->total_out += n;
-+
-+ /* update check information */
-+ if (s->checkfn != Z_NULL)
-+ z->adler = s->check = (*s->checkfn)(s->check, q, n);
-+
-+ /* copy as far as end of window */
-+ zmemcpy(p, q, n);
-+ p += n;
-+ q += n;
-+
-+ /* see if more to copy at beginning of window */
-+ if (q == s->end)
-+ {
-+ /* wrap pointers */
-+ q = s->window;
-+ if (s->write == s->end)
-+ s->write = s->window;
-+
-+ /* compute bytes to copy */
-+ n = (uInt)(s->write - q);
-+ if (n > z->avail_out) n = z->avail_out;
-+ if (n && r == Z_BUF_ERROR) r = Z_OK;
-+
-+ /* update counters */
-+ z->avail_out -= n;
-+ z->total_out += n;
-+
-+ /* update check information */
-+ if (s->checkfn != Z_NULL)
-+ z->adler = s->check = (*s->checkfn)(s->check, q, n);
-+
-+ /* copy */
-+ zmemcpy(p, q, n);
-+ p += n;
-+ q += n;
-+ }
-+
-+ /* update pointers */
-+ z->next_out = p;
-+ s->read = q;
-+
-+ /* done */
-+ return r;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/infutil.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,98 @@
-+/* infutil.h -- types and macros common to blocks and codes
-+ * Copyright (C) 1995-2002 Mark Adler
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* WARNING: this file should *not* be used by applications. It is
-+ part of the implementation of the compression library and is
-+ subject to change. Applications should only use zlib.h.
-+ */
-+
-+#ifndef _INFUTIL_H
-+#define _INFUTIL_H
-+
-+typedef enum {
-+ TYPE, /* get type bits (3, including end bit) */
-+ LENS, /* get lengths for stored */
-+ STORED, /* processing stored block */
-+ TABLE, /* get table lengths */
-+ BTREE, /* get bit lengths tree for a dynamic block */
-+ DTREE, /* get length, distance trees for a dynamic block */
-+ CODES, /* processing fixed or dynamic block */
-+ DRY, /* output remaining window bytes */
-+ DONE, /* finished last block, done */
-+ BAD} /* got a data error--stuck here */
-+inflate_block_mode;
-+
-+/* inflate blocks semi-private state */
-+struct inflate_blocks_state {
-+
-+ /* mode */
-+ inflate_block_mode mode; /* current inflate_block mode */
-+
-+ /* mode dependent information */
-+ union {
-+ uInt left; /* if STORED, bytes left to copy */
-+ struct {
-+ uInt table; /* table lengths (14 bits) */
-+ uInt index; /* index into blens (or border) */
-+ uIntf *blens; /* bit lengths of codes */
-+ uInt bb; /* bit length tree depth */
-+ inflate_huft *tb; /* bit length decoding tree */
-+ } trees; /* if DTREE, decoding info for trees */
-+ struct {
-+ inflate_codes_statef
-+ *codes;
-+ } decode; /* if CODES, current state */
-+ } sub; /* submode */
-+ uInt last; /* true if this block is the last block */
-+
-+ /* mode independent information */
-+ uInt bitk; /* bits in bit buffer */
-+ uLong bitb; /* bit buffer */
-+ inflate_huft *hufts; /* single malloc for tree space */
-+ Bytef *window; /* sliding window */
-+ Bytef *end; /* one byte after sliding window */
-+ Bytef *read; /* window read pointer */
-+ Bytef *write; /* window write pointer */
-+ check_func checkfn; /* check function */
-+ uLong check; /* check on output */
-+
-+};
-+
-+
-+/* defines for inflate input/output */
-+/* update pointers and return */
-+#define UPDBITS {s->bitb=b;s->bitk=k;}
-+#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;}
-+#define UPDOUT {s->write=q;}
-+#define UPDATE {UPDBITS UPDIN UPDOUT}
-+#define LEAVE {UPDATE return inflate_flush(s,z,r);}
-+/* get bytes and bits */
-+#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;}
-+#define NEEDBYTE {if(n)r=Z_OK;else LEAVE}
-+#define NEXTBYTE (n--,*p++)
-+#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}}
-+#define DUMPBITS(j) {b>>=(j);k-=(j);}
-+/* output bytes */
-+#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q)
-+#define LOADOUT {q=s->write;m=(uInt)WAVAIL;}
-+#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}}
-+#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT}
-+#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;}
-+#define OUTBYTE(a) {*q++=(Byte)(a);m--;}
-+/* load local pointers */
-+#define LOAD {LOADIN LOADOUT}
-+
-+/* masks for lower bits (size given to avoid silly warnings with Visual C++) */
-+extern uInt inflate_mask[17];
-+
-+/* copy as much as possible from the sliding window to the output area */
-+extern int inflate_flush OF((
-+ inflate_blocks_statef *,
-+ z_streamp ,
-+ int));
-+
-+struct internal_state {int dummy;}; /* for buggy compilers */
-+
-+#endif /* _INFUTIL_H */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/initaddr.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,85 @@
-+/*
-+ * initialize address structure
-+ * Copyright (C) 2000 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: initaddr.c,v 1.6 2004/07/10 07:43:47 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+err_t
-+add_port(af, addr, port)
-+int af;
-+ip_address *addr;
-+unsigned short port;
-+{
-+ switch (af) {
-+ case AF_INET:
-+ addr->u.v4.sin_port = port;
-+ break;
-+ case AF_INET6:
-+ addr->u.v6.sin6_port = port;
-+ break;
-+ default:
-+ return "unknown address family in add_port";
-+ break;
-+ }
-+ return NULL;
-+}
-+
-+/*
-+ - initaddr - initialize ip_address from bytes
-+ */
-+err_t /* NULL for success, else string literal */
-+initaddr(src, srclen, af, dst)
-+const unsigned char *src;
-+size_t srclen;
-+int af; /* address family */
-+ip_address *dst;
-+{
-+ switch (af) {
-+ case AF_INET:
-+ if (srclen != 4)
-+ return "IPv4 address must be exactly 4 bytes";
-+#if !defined(__KERNEL__)
-+ /* On BSD, the kernel compares the entire struct sockaddr when
-+ * using bind(). However, this is as large as the largest
-+ * address family, so the 'remainder' has to be 0. Linux
-+ * compares interface addresses with the length of sa_len,
-+ * instead of sizeof(struct sockaddr), so in that case padding
-+ * is not needed.
-+ *
-+ * Patch by Stefan Arentz <stefan@soze.com>
-+ */
-+ bzero(&dst->u.v4, sizeof(dst->u.v4));
-+#endif
-+ dst->u.v4.sin_family = af;
-+ dst->u.v4.sin_port = 0;
-+ memcpy((char *)&dst->u.v4.sin_addr.s_addr, src, srclen);
-+ break;
-+ case AF_INET6:
-+ if (srclen != 16)
-+ return "IPv6 address must be exactly 16 bytes";
-+#if !defined(__KERNEL__)
-+ bzero(&dst->u.v6, sizeof(dst->u.v6));
-+#endif
-+ dst->u.v6.sin6_family = af;
-+ dst->u.v6.sin6_flowinfo = 0; /* unused */
-+ dst->u.v6.sin6_port = 0;
-+ memcpy((char *)&dst->u.v6.sin6_addr, src, srclen);
-+ break;
-+ default:
-+ return "unknown address family in initaddr";
-+ break;
-+ }
-+ return NULL;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipcomp.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,694 @@
-+/*
-+ * IPCOMP zlib interface code.
-+ * implementation of RFC 3173.
-+ *
-+ * Copyright (C) 2000 Svenning Soerensen <svenning@post5.tele.dk>
-+ * Copyright (C) 2000, 2001 Richard Guy Briggs <rgb@conscoop.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+/* SSS */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h>
-+#include <linux/netdevice.h>
-+#include <linux/ip.h>
-+#include <linux/skbuff.h>
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <asm/uaccess.h>
-+#include <asm/checksum.h>
-+
-+#include <openswan.h>
-+
-+#include <net/ip.h>
-+
-+#include "openswan/ipsec_kern24.h"
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_rcv.h" /* sysctl_ipsec_inbound_policy_check */
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipcomp.h"
-+#include "zlib/zlib.h"
-+#include "zlib/zutil.h"
-+
-+#include <openswan/pfkeyv2.h> /* SADB_X_CALG_DEFLATE */
-+
-+static
-+struct sk_buff *skb_copy_ipcomp(struct sk_buff *skb, int data_growth, int gfp_mask);
-+
-+static
-+voidpf my_zcalloc(voidpf opaque, uInt items, uInt size)
-+{
-+ return (voidpf) kmalloc(items*size, GFP_ATOMIC);
-+}
-+
-+static
-+void my_zfree(voidpf opaque, voidpf address)
-+{
-+ kfree(address);
-+}
-+
-+/*
-+ * We use this function because sometimes we want to pass a negative offset
-+ * into skb_put(), this does not work on 64bit platforms because long to
-+ * unsigned int casting.
-+ */
-+static inline unsigned char *
-+safe_skb_put(struct sk_buff *skb, int extend)
-+{
-+ unsigned char *ptr;
-+
-+ if (extend>0) {
-+ // increase the size of the packet
-+ ptr = skb_put(skb, extend);
-+ } else {
-+ // shrink the size of the packet
-+ ptr = skb_tail_pointer(skb);
-+ skb_trim (skb, skb->len + extend);
-+ }
-+
-+ return ptr;
-+}
-+
-+struct sk_buff *skb_compress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags)
-+{
-+ struct iphdr *iph;
-+ unsigned int iphlen, pyldsz, cpyldsz;
-+ unsigned char *buffer;
-+ z_stream zs;
-+ int zresult;
-+
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: .\n");
-+
-+ if(skb == NULL) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "passed in NULL skb, returning ERROR.\n");
-+ if(flags != NULL) {
-+ *flags |= IPCOMP_PARMERROR;
-+ }
-+ return skb;
-+ }
-+
-+ if(ips == NULL) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "passed in NULL ipsec_sa needed for cpi, returning ERROR.\n");
-+ if(flags) {
-+ *flags |= IPCOMP_PARMERROR;
-+ }
-+ return skb;
-+ }
-+
-+ if (flags == NULL) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "passed in NULL flags, returning ERROR.\n");
-+ ipsec_kfree_skb(skb);
-+ return NULL;
-+ }
-+
-+#ifdef NET_21
-+ iph = ip_hdr(skb);
-+#else /* NET_21 */
-+ iph = skb->ip_hdr;
-+#endif /* NET_21 */
-+
-+ switch (iph->protocol) {
-+ case IPPROTO_COMP:
-+ case IPPROTO_AH:
-+ case IPPROTO_ESP:
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "skipping compression of packet with ip protocol %d.\n",
-+ iph->protocol);
-+ *flags |= IPCOMP_UNCOMPRESSABLE;
-+ return skb;
-+ }
-+
-+ /* Don't compress packets already fragmented */
-+ if (iph->frag_off & __constant_htons(IP_MF | IP_OFFSET)) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "skipping compression of fragmented packet.\n");
-+ *flags |= IPCOMP_UNCOMPRESSABLE;
-+ return skb;
-+ }
-+
-+ iphlen = iph->ihl << 2;
-+ pyldsz = ntohs(iph->tot_len) - iphlen;
-+
-+ /* Don't compress less than 90 bytes (rfc 2394) */
-+ if (pyldsz < 90) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "skipping compression of tiny packet, len=%d.\n",
-+ pyldsz);
-+ *flags |= IPCOMP_UNCOMPRESSABLE;
-+ return skb;
-+ }
-+
-+ /* Adaptive decision */
-+ if (ips->ips_comp_adapt_skip) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "skipping compression: ips_comp_adapt_skip=%d.\n",
-+ ips->ips_comp_adapt_skip);
-+ ips->ips_comp_adapt_skip--;
-+ *flags |= IPCOMP_UNCOMPRESSABLE;
-+ return skb;
-+ }
-+
-+ zs.zalloc = my_zcalloc;
-+ zs.zfree = my_zfree;
-+ zs.opaque = 0;
-+
-+ /* We want to use deflateInit2 because we don't want the adler
-+ header. */
-+ zresult = deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -11,
-+ DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
-+ if (zresult != Z_OK) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_compress: "
-+ "deflateInit2() returned error %d (%s), "
-+ "skipping compression.\n",
-+ zresult,
-+ zs.msg ? zs.msg : zError(zresult));
-+ *flags |= IPCOMP_COMPRESSIONERROR;
-+ return skb;
-+ }
-+
-+
-+ /* Max output size. Result should be max this size.
-+ * Implementation specific tweak:
-+ * If it's not at least 32 bytes and 6.25% smaller than
-+ * the original packet, it's probably not worth wasting
-+ * the receiver's CPU cycles decompressing it.
-+ * Your mileage may vary.
-+ */
-+ cpyldsz = pyldsz - sizeof(struct ipcomphdr) - (pyldsz <= 512 ? 32 : pyldsz >> 4);
-+
-+ buffer = kmalloc(cpyldsz, GFP_ATOMIC);
-+ if (!buffer) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_compress: "
-+ "unable to kmalloc(%d, GFP_ATOMIC), "
-+ "skipping compression.\n",
-+ cpyldsz);
-+ *flags |= IPCOMP_COMPRESSIONERROR;
-+ deflateEnd(&zs);
-+ return skb;
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) {
-+ __u8 *c;
-+
-+ c = (__u8*)iph + iphlen;
-+ ipsec_dmp_block("compress before", c, pyldsz);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ zs.next_in = (char *) iph + iphlen; /* start of payload */
-+ zs.avail_in = pyldsz;
-+ zs.next_out = buffer; /* start of compressed payload */
-+ zs.avail_out = cpyldsz;
-+
-+ /* Finish compression in one step */
-+ zresult = deflate(&zs, Z_FINISH);
-+
-+ /* Free all dynamically allocated buffers */
-+ deflateEnd(&zs);
-+ if (zresult != Z_STREAM_END) {
-+ *flags |= IPCOMP_UNCOMPRESSABLE;
-+ kfree(buffer);
-+
-+ /* Adjust adaptive counters */
-+ if (++(ips->ips_comp_adapt_tries) == IPCOMP_ADAPT_INITIAL_TRIES) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "first %d packets didn't compress, "
-+ "skipping next %d\n",
-+ IPCOMP_ADAPT_INITIAL_TRIES,
-+ IPCOMP_ADAPT_INITIAL_SKIP);
-+ ips->ips_comp_adapt_skip = IPCOMP_ADAPT_INITIAL_SKIP;
-+ }
-+ else if (ips->ips_comp_adapt_tries == IPCOMP_ADAPT_INITIAL_TRIES + IPCOMP_ADAPT_SUBSEQ_TRIES) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "next %d packets didn't compress, "
-+ "skipping next %d\n",
-+ IPCOMP_ADAPT_SUBSEQ_TRIES,
-+ IPCOMP_ADAPT_SUBSEQ_SKIP);
-+ ips->ips_comp_adapt_skip = IPCOMP_ADAPT_SUBSEQ_SKIP;
-+ ips->ips_comp_adapt_tries = IPCOMP_ADAPT_INITIAL_TRIES;
-+ }
-+
-+ return skb;
-+ }
-+
-+ /* resulting compressed size */
-+ cpyldsz -= zs.avail_out;
-+
-+ /* Insert IPCOMP header */
-+ ((struct ipcomphdr*) ((char*) iph + iphlen))->ipcomp_nh = iph->protocol;
-+ ((struct ipcomphdr*) ((char*) iph + iphlen))->ipcomp_flags = 0;
-+ /* use the bottom 16 bits of the spi for the cpi. The top 16 bits are
-+ for internal reference only. */
-+ ((struct ipcomphdr*) (((char*)iph) + iphlen))->ipcomp_cpi = htons((__u16)(ntohl(ips->ips_said.spi) & 0x0000ffff));
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_compress: "
-+ "spi=%08x, spi&0xffff=%04x, cpi=%04x, payload size: raw=%d, comp=%d.\n",
-+ ntohl(ips->ips_said.spi),
-+ ntohl(ips->ips_said.spi) & 0x0000ffff,
-+ ntohs(((struct ipcomphdr*)(((char*)iph)+iphlen))->ipcomp_cpi),
-+ pyldsz,
-+ cpyldsz);
-+
-+ /* Update IP header */
-+ iph->protocol = IPPROTO_COMP;
-+ iph->tot_len = htons(iphlen + sizeof(struct ipcomphdr) + cpyldsz);
-+#if 1 /* XXX checksum is done by ipsec_tunnel ? */
-+ iph->check = 0;
-+ iph->check = ip_fast_csum((char *) iph, iph->ihl);
-+#endif
-+
-+ /* Copy compressed payload */
-+ memcpy((char *) iph + iphlen + sizeof(struct ipcomphdr),
-+ buffer,
-+ cpyldsz);
-+ kfree(buffer);
-+
-+ /* Update skb length/tail by "unputting" the shrinkage */
-+ safe_skb_put (skb, cpyldsz + sizeof(struct ipcomphdr) - pyldsz);
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) {
-+ __u8 *c;
-+
-+ c = (__u8*)iph + iphlen + sizeof(struct ipcomphdr);
-+ ipsec_dmp_block("compress result", c, cpyldsz);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ ips->ips_comp_adapt_skip = 0;
-+ ips->ips_comp_adapt_tries = 0;
-+
-+ return skb;
-+}
-+
-+struct sk_buff *skb_decompress(struct sk_buff *skb, struct ipsec_sa *ips, unsigned int *flags)
-+{
-+ struct sk_buff *nskb = NULL;
-+
-+ /* original ip header */
-+ struct iphdr *oiph, *iph;
-+ unsigned int iphlen, pyldsz, cpyldsz;
-+ z_stream zs;
-+ int zresult;
-+
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_decompress: .\n");
-+
-+ if(!skb) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "passed in NULL skb, returning ERROR.\n");
-+ if (flags) *flags |= IPCOMP_PARMERROR;
-+ return skb;
-+ }
-+
-+ if(!ips && sysctl_ipsec_inbound_policy_check) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "passed in NULL ipsec_sa needed for comp alg, returning ERROR.\n");
-+ if (flags) *flags |= IPCOMP_PARMERROR;
-+ return skb;
-+ }
-+
-+ if (!flags) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "passed in NULL flags, returning ERROR.\n");
-+ ipsec_kfree_skb(skb);
-+ return NULL;
-+ }
-+
-+#ifdef NET_21
-+ oiph = ip_hdr(skb);
-+#else /* NET_21 */
-+ oiph = skb->ip_hdr;
-+#endif /* NET_21 */
-+
-+ iphlen = oiph->ihl << 2;
-+
-+ if (oiph->protocol != IPPROTO_COMP) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "called with non-IPCOMP packet (protocol=%d),"
-+ "skipping decompression.\n",
-+ oiph->protocol);
-+ *flags |= IPCOMP_PARMERROR;
-+ return skb;
-+ }
-+
-+ if ( (((struct ipcomphdr*)((char*) oiph + iphlen))->ipcomp_flags != 0)
-+ || ((((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_cpi
-+ != htons(SADB_X_CALG_DEFLATE))
-+ && sysctl_ipsec_inbound_policy_check
-+ && (!ips || (ips && (ips->ips_encalg != SADB_X_CALG_DEFLATE)))) ) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "called with incompatible IPCOMP packet (flags=%d, "
-+ "cpi=%d), ips-compalg=%d, skipping decompression.\n",
-+ ntohs(((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_flags),
-+ ntohs(((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_cpi),
-+ ips ? ips->ips_encalg : 0);
-+ *flags |= IPCOMP_PARMERROR;
-+
-+ return skb;
-+ }
-+
-+ if (ntohs(oiph->frag_off) & ~0x4000) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "called with fragmented IPCOMP packet, "
-+ "skipping decompression.\n");
-+ *flags |= IPCOMP_PARMERROR;
-+ return skb;
-+ }
-+
-+ /* original compressed payload size */
-+ cpyldsz = ntohs(oiph->tot_len) - iphlen - sizeof(struct ipcomphdr);
-+
-+ zs.zalloc = my_zcalloc;
-+ zs.zfree = my_zfree;
-+ zs.opaque = 0;
-+
-+ zs.next_in = (char *) oiph + iphlen + sizeof(struct ipcomphdr);
-+ zs.avail_in = cpyldsz;
-+
-+ /* Maybe we should be a bit conservative about memory
-+ requirements and use inflateInit2 */
-+ /* Beware, that this might make us unable to decompress packets
-+ from other implementations - HINT: check PGPnet source code */
-+ /* We want to use inflateInit2 because we don't want the adler
-+ header. */
-+ zresult = inflateInit2(&zs, -15);
-+ if (zresult != Z_OK) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "inflateInit2() returned error %d (%s), "
-+ "skipping decompression.\n",
-+ zresult,
-+ zs.msg ? zs.msg : zError(zresult));
-+ *flags |= IPCOMP_DECOMPRESSIONERROR;
-+
-+ return skb;
-+ }
-+
-+ /* We have no way of knowing the exact length of the resulting
-+ decompressed output before we have actually done the decompression.
-+ For now, we guess that the packet will not be bigger than the
-+ attached ipsec device's mtu or 16260, whichever is biggest.
-+ This may be wrong, since the sender's mtu may be bigger yet.
-+ XXX This must be dealt with later XXX
-+ */
-+
-+ /* max payload size */
-+ pyldsz = skb->dev ? (skb->dev->mtu < 16260 ? 16260 : skb->dev->mtu)
-+ : (65520 - iphlen);
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_decompress: "
-+ "max payload size: %d\n", pyldsz);
-+
-+ while (pyldsz > (cpyldsz + sizeof(struct ipcomphdr)) &&
-+ (nskb = skb_copy_ipcomp(skb,
-+ pyldsz - cpyldsz - sizeof(struct ipcomphdr),
-+ GFP_ATOMIC)) == NULL) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "unable to skb_copy_ipcomp(skb, %d, GFP_ATOMIC), "
-+ "trying with less payload size.\n",
-+ (int)(pyldsz - cpyldsz - sizeof(struct ipcomphdr)));
-+ pyldsz >>=1;
-+ }
-+
-+ if (!nskb) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "unable to allocate memory, dropping packet.\n");
-+ *flags |= IPCOMP_DECOMPRESSIONERROR;
-+ inflateEnd(&zs);
-+
-+ return skb;
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) {
-+ __u8 *c;
-+
-+ c = (__u8*)oiph + iphlen + sizeof(struct ipcomphdr);
-+ ipsec_dmp_block("decompress before", c, cpyldsz);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+#ifdef NET_21
-+ iph = ip_hdr(nskb);
-+#else /* NET_21 */
-+ iph = nskb->ip_hdr;
-+#endif /* NET_21 */
-+ zs.next_out = (char *)iph + iphlen;
-+ zs.avail_out = pyldsz;
-+
-+ zresult = inflate(&zs, Z_SYNC_FLUSH);
-+
-+ /* work around a bug in zlib, which sometimes wants to taste an extra
-+ * byte when being used in the (undocumented) raw deflate mode.
-+ */
-+ if (zresult == Z_OK && !zs.avail_in && zs.avail_out) {
-+ __u8 zerostuff = 0;
-+
-+ zs.next_in = &zerostuff;
-+ zs.avail_in = 1;
-+ zresult = inflate(&zs, Z_FINISH);
-+ }
-+
-+ inflateEnd(&zs);
-+ if (zresult != Z_STREAM_END) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_error:skb_decompress: "
-+ "inflate() returned error %d (%s), "
-+ "skipping decompression.\n",
-+ zresult,
-+ zs.msg ? zs.msg : zError(zresult));
-+ *flags |= IPCOMP_DECOMPRESSIONERROR;
-+ ipsec_kfree_skb(nskb);
-+
-+ return skb;
-+ }
-+
-+ /* Update IP header */
-+ /* resulting decompressed size */
-+ pyldsz -= zs.avail_out;
-+ iph->tot_len = htons(iphlen + pyldsz);
-+ iph->protocol = ((struct ipcomphdr*) ((char*) oiph + iphlen))->ipcomp_nh;
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_decompress: "
-+ "spi=%08x, spi&0xffff=%04x, cpi=%04x, payload size: comp=%d, raw=%d, nh=%d.\n",
-+ ips ? ntohl(ips->ips_said.spi) : 0,
-+ ips ? ntohl(ips->ips_said.spi) & 0x0000ffff : 0,
-+ ntohs(((struct ipcomphdr*)(((char*)oiph)+iphlen))->ipcomp_cpi),
-+ cpyldsz,
-+ pyldsz,
-+ iph->protocol);
-+
-+#if 1 /* XXX checksum is done by ipsec_rcv ? */
-+ iph->check = 0;
-+ iph->check = ip_fast_csum((char*) iph, iph->ihl);
-+#endif
-+
-+ /* Update skb length/tail by "unputting" the unused data area */
-+ safe_skb_put(nskb, -zs.avail_out);
-+
-+ ipsec_kfree_skb(skb);
-+
-+ if (iph->protocol == IPPROTO_COMP)
-+ {
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(sysctl_ipsec_debug_ipcomp)
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_decompress: "
-+ "Eh? inner packet is also compressed, dropping.\n");
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ ipsec_kfree_skb(nskb);
-+ return NULL;
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(sysctl_ipsec_debug_ipcomp && sysctl_ipsec_debug_verbose) {
-+ __u8 *c;
-+
-+ c = (__u8*)iph + iphlen;
-+ ipsec_dmp_block("decompress result", c, pyldsz);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ return nskb;
-+}
-+
-+
-+/* this is derived from skb_copy() in linux 2.2.14 */
-+/* May be incompatible with other kernel versions!! */
-+static
-+struct sk_buff *skb_copy_ipcomp(struct sk_buff *skb, int data_growth, int gfp_mask)
-+{
-+ struct sk_buff *n;
-+ struct iphdr *iph;
-+ unsigned long offset;
-+ unsigned int iphlen;
-+
-+ if(!skb) {
-+ KLIPS_PRINT(sysctl_ipsec_debug_ipcomp,
-+ "klips_debug:skb_copy_ipcomp: "
-+ "passed in NULL skb, returning NULL.\n");
-+ return NULL;
-+ }
-+
-+ /*
-+ * Allocate the copy buffer
-+ */
-+
-+#ifdef NET_21
-+ iph = ip_hdr(skb);
-+#else /* NET_21 */
-+ iph = skb->ip_hdr;
-+#endif /* NET_21 */
-+ if (!iph) return NULL;
-+ iphlen = iph->ihl << 2;
-+
-+ n=alloc_skb(skb_end_pointer(skb) - skb->head + data_growth, gfp_mask);
-+ if(n==NULL)
-+ return NULL;
-+
-+ /*
-+ * Shift between the two data areas in bytes
-+ */
-+
-+ offset=n->head-skb->head;
-+
-+ /* Set the data pointer */
-+ skb_reserve(n,skb->data-skb->head);
-+ /* Set the tail pointer and length */
-+ safe_skb_put(n,skb->len+data_growth);
-+ /* Copy the bytes up to and including the ip header */
-+ memcpy(n->head,
-+ skb->head,
-+ ((char *)iph - (char *)skb->head) + iphlen);
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
-+ n->list=NULL;
-+#endif
-+ n->next=NULL;
-+ n->prev=NULL;
-+ n->sk=NULL;
-+ n->dev=skb->dev;
-+ if (skb_transport_header(skb))
-+ skb_set_transport_header(n, offset);
-+ n->protocol=skb->protocol;
-+#ifdef NET_21
-+ n->csum = 0;
-+ n->priority=skb->priority;
-+ n->dst=dst_clone(skb->dst);
-+ skb_set_network_header(n, offset);
-+#ifndef NETDEV_23
-+ n->is_clone=0;
-+#endif /* NETDEV_23 */
-+ atomic_set(&n->users, 1);
-+ n->destructor = NULL;
-+#ifdef HAVE_SOCK_SECURITY
-+ n->security=skb->security;
-+#endif
-+ memcpy(n->cb, skb->cb, sizeof(skb->cb));
-+#ifdef CONFIG_IP_FIREWALL
-+ n->fwmark = skb->fwmark;
-+#endif
-+#else /* NET_21 */
-+ n->link3=NULL;
-+ n->when=skb->when;
-+ n->ip_hdr=(struct iphdr *)(((char *)skb->ip_hdr)+offset);
-+ n->saddr=skb->saddr;
-+ n->daddr=skb->daddr;
-+ n->raddr=skb->raddr;
-+ n->seq=skb->seq;
-+ n->end_seq=skb->end_seq;
-+ n->ack_seq=skb->ack_seq;
-+ n->acked=skb->acked;
-+ n->free=1;
-+ n->arp=skb->arp;
-+ n->tries=0;
-+ n->lock=0;
-+ n->users=0;
-+ memcpy(n->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
-+#endif /* NET_21 */
-+ if (skb_mac_header(skb))
-+ skb_set_mac_header(n, offset);
-+#ifndef NETDEV_23
-+ n->used=skb->used;
-+#endif /* !NETDEV_23 */
-+ n->pkt_type=skb->pkt_type;
-+#ifndef NETDEV_23
-+ n->pkt_bridged=skb->pkt_bridged;
-+#endif /* NETDEV_23 */
-+ n->ip_summed=0;
-+#ifdef HAVE_TSTAMP
-+ n->tstamp = skb->tstamp;
-+#else
-+ n->stamp=skb->stamp;
-+#endif
-+#ifndef NETDEV_23 /* this seems to have been removed in 2.4 */
-+#if defined(CONFIG_SHAPER) || defined(CONFIG_SHAPER_MODULE)
-+ n->shapelatency=skb->shapelatency; /* Latency on frame */
-+ n->shapeclock=skb->shapeclock; /* Time it should go out */
-+ n->shapelen=skb->shapelen; /* Frame length in clocks */
-+ n->shapestamp=skb->shapestamp; /* Stamp for shaper */
-+ n->shapepend=skb->shapepend; /* Pending */
-+#endif /* defined(CONFIG_SHAPER) || defined(CONFIG_SHAPER_MODULE) */
-+#endif /* NETDEV_23 */
-+
-+ return n;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_ah.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,369 @@
-+/*
-+ * processing code for AH
-+ * Copyright (C) 2003-2004 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <openswan.h>
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+#include <net/protocol.h>
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_xmit.h"
-+
-+#include "openswan/ipsec_auth.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_proto.h"
-+
-+#include "ipsec_ocf.h"
-+
-+__u32 zeroes[AH_AMAX];
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_ah_checks(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb)
-+{
-+ int ahminlen;
-+
-+ ahminlen = irs->hard_header_len + sizeof(struct iphdr);
-+
-+ /* take care not to deref this pointer until we check the minlen though */
-+ irs->protostuff.ahstuff.ahp = (struct ahhdr *)skb_transport_header(skb);
-+
-+ if((skb->len < ahminlen+sizeof(struct ahhdr)) ||
-+ (skb->len < ahminlen+(irs->protostuff.ahstuff.ahp->ah_hl << 2))) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_INAU,
-+ "klips_debug:ipsec_rcv: "
-+ "runt ah packet of skb->len=%d received from %s, dropped.\n",
-+ skb->len,
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BADLEN;
-+ }
-+
-+ irs->said.spi = irs->protostuff.ahstuff.ahp->ah_spi;
-+
-+ /* XXX we only support the one 12-byte authenticator for now */
-+ if(irs->protostuff.ahstuff.ahp->ah_hl != ((AHHMAC_HASHLEN+AHHMAC_RPLLEN) >> 2)) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_INAU,
-+ "klips_debug:ipsec_rcv: "
-+ "bad authenticator length %ld, expected %lu from %s.\n",
-+ (long)(irs->protostuff.ahstuff.ahp->ah_hl << 2),
-+ (unsigned long) sizeof(struct ahhdr),
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BADLEN;
-+ }
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_ah_setup_auth(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb,
-+ __u32 *replay,
-+ unsigned char **authenticator)
-+{
-+ struct ahhdr *ahp = irs->protostuff.ahstuff.ahp;
-+
-+ *replay = ntohl(ahp->ah_rpl);
-+ *authenticator = ahp->ah_data;
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_ah_authcalc(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb)
-+{
-+ struct auth_alg *aa;
-+ struct ahhdr *ahp = irs->protostuff.ahstuff.ahp;
-+ union {
-+ MD5_CTX md5;
-+ SHA1_CTX sha1;
-+ } tctx;
-+ struct iphdr ipo;
-+ int ahhlen;
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (irs->ipsp->ocf_in_use)
-+ return(ipsec_ocf_rcv(irs));
-+#endif
-+
-+ aa = irs->authfuncs;
-+
-+ /* copy the initialized keying material */
-+ memcpy(&tctx, irs->ictx, irs->ictx_len);
-+
-+ ipo = *irs->ipp;
-+ ipo.tos = 0; /* mutable RFC 2402 3.3.3.1.1.1 */
-+ ipo.frag_off = 0;
-+ ipo.ttl = 0;
-+ ipo.check = 0;
-+
-+
-+ /* do the sanitized header */
-+ (*aa->update)((void*)&tctx, (caddr_t)&ipo, sizeof(struct iphdr));
-+
-+ /* XXX we didn't do the options here! */
-+
-+ /* now do the AH header itself */
-+ ahhlen = AH_BASIC_LEN + (ahp->ah_hl << 2);
-+ (*aa->update)((void*)&tctx, (caddr_t)ahp, ahhlen - AHHMAC_HASHLEN);
-+
-+ /* now, do some zeroes */
-+ (*aa->update)((void*)&tctx, (caddr_t)zeroes, AHHMAC_HASHLEN);
-+
-+ /* finally, do the packet contents themselves */
-+ (*aa->update)((void*)&tctx,
-+ (caddr_t)skb_transport_header(skb) + ahhlen,
-+ skb->len - ahhlen);
-+
-+ (*aa->final)(irs->hash, (void *)&tctx);
-+
-+ memcpy(&tctx, irs->octx, irs->octx_len);
-+
-+ (*aa->update)((void *)&tctx, irs->hash, aa->hashlen);
-+ (*aa->final)(irs->hash, (void *)&tctx);
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_ah_decap(struct ipsec_rcv_state *irs)
-+{
-+ struct ahhdr *ahp = irs->protostuff.ahstuff.ahp;
-+ struct sk_buff *skb;
-+ int ahhlen;
-+
-+ skb=irs->skb;
-+
-+ ahhlen = AH_BASIC_LEN + (ahp->ah_hl << 2);
-+
-+ irs->ipp->tot_len = htons(ntohs(irs->ipp->tot_len) - ahhlen);
-+ irs->next_header = ahp->ah_nh;
-+
-+ /*
-+ * move the IP header forward by the size of the AH header, which
-+ * will remove the the AH header from the packet.
-+ */
-+ memmove((void *)(skb_network_header(skb) + ahhlen),
-+ (void *)(skb_network_header(skb)), irs->iphlen);
-+
-+ ipsec_rcv_dmp("ah postmove", skb->data, skb->len);
-+
-+ /* skb_pull below, will move up by ahhlen */
-+
-+ /* XXX not clear how this can happen, as the message indicates */
-+ if(skb->len < ahhlen) {
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_rcv: "
-+ "tried to skb_pull ahhlen=%d, %d available. This should never happen, please report.\n",
-+ ahhlen,
-+ (int)(skb->len));
-+ return IPSEC_RCV_DECAPFAIL;
-+ }
-+ skb_pull(skb, ahhlen);
-+
-+ skb_set_network_header(skb, ahhlen);
-+ irs->ipp = ip_hdr(skb);
-+
-+ ipsec_rcv_dmp("ah postpull", (void *)ip_hdr(skb), skb->len);
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_ah_setup(struct ipsec_xmit_state *ixs)
-+{
-+ struct iphdr ipo;
-+ struct ahhdr *ahp;
-+#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ __u8 hash[AH_AMAX];
-+ union {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ MD5_CTX md5;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ SHA1_CTX sha1;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ } tctx;
-+#endif
-+ unsigned char *dat = (unsigned char *)ixs->iph;
-+
-+ ahp = (struct ahhdr *)(dat + ixs->iphlen);
-+ ahp->ah_spi = ixs->ipsp->ips_said.spi;
-+ ahp->ah_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq));
-+ ahp->ah_rv = 0;
-+ ahp->ah_nh = ixs->iph->protocol;
-+ ahp->ah_hl = (sizeof(struct ahhdr) >> 2) - sizeof(__u64)/sizeof(__u32);
-+ ixs->iph->protocol = IPPROTO_AH;
-+ ipsec_xmit_dmp("ahp", (char*)ahp, sizeof(*ahp));
-+
-+ ipo = *ixs->iph;
-+ ipo.tos = 0;
-+ ipo.frag_off = 0;
-+ ipo.ttl = 0;
-+ ipo.check = 0;
-+ ipsec_xmit_dmp("ipo", (char*)&ipo, sizeof(ipo));
-+
-+ switch(ixs->ipsp->ips_authalg) {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5:
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ ipsec_xmit_dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (unsigned char *)&ipo, sizeof (struct iphdr));
-+ ipsec_xmit_dmp("ictx+ipo", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (unsigned char *)ahp,
-+ sizeof(struct ahhdr) - sizeof(ahp->ah_data));
-+ ipsec_xmit_dmp("ictx+ahp", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (unsigned char *)zeroes, AHHMAC_HASHLEN);
-+ ipsec_xmit_dmp("ictx+zeroes", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, dat + ixs->iphlen + sizeof(struct ahhdr),
-+ ixs->skb->len - ixs->iphlen - sizeof(struct ahhdr));
-+ ipsec_xmit_dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ ipsec_xmit_dmp("ictx hash", (char*)&hash, sizeof(hash));
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ ipsec_xmit_dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, hash, AHMD596_ALEN);
-+ ipsec_xmit_dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ ipsec_xmit_dmp("octx hash", (char*)&hash, sizeof(hash));
-+
-+ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA:
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ SHA1Update(&tctx.sha1, (unsigned char *)&ipo, sizeof (struct iphdr));
-+ SHA1Update(&tctx.sha1, (unsigned char *)ahp, sizeof(struct ahhdr) - sizeof(ahp->ah_data));
-+ SHA1Update(&tctx.sha1, (unsigned char *)zeroes, AHHMAC_HASHLEN);
-+ SHA1Update(&tctx.sha1, dat + ixs->iphlen + sizeof(struct ahhdr),
-+ ixs->skb->len - ixs->iphlen - sizeof(struct ahhdr));
-+ SHA1Final(hash, &tctx.sha1);
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN);
-+ SHA1Final(hash, &tctx.sha1);
-+
-+ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_AH_BADALG;
-+ }
-+#ifdef NET_21
-+ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ahp));
-+#endif /* NET_21 */
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+struct xform_functions ah_xform_funcs[]={
-+ {
-+ protocol: IPPROTO_AH,
-+ rcv_checks: ipsec_rcv_ah_checks,
-+ rcv_setup_auth: ipsec_rcv_ah_setup_auth,
-+ rcv_calc_auth: ipsec_rcv_ah_authcalc,
-+ rcv_decrypt: ipsec_rcv_ah_decap,
-+
-+ xmit_setup: ipsec_xmit_ah_setup,
-+ xmit_headroom: sizeof(struct ahhdr),
-+ xmit_needtailroom: 0,
-+ },
-+};
-+
-+
-+#ifndef CONFIG_XFRM_ALTERNATE_STACK
-+#ifdef NET_26
-+struct inet_protocol ah_protocol = {
-+ .handler = ipsec_rcv,
-+ .no_policy = 1,
-+};
-+#else
-+struct inet_protocol ah_protocol =
-+{
-+ ipsec_rcv, /* AH handler */
-+ NULL, /* TUNNEL error control */
-+#ifdef NETDEV_25
-+ 1, /* no policy */
-+#else
-+ 0, /* next */
-+ IPPROTO_AH, /* protocol ID */
-+ 0, /* copy */
-+ NULL, /* data */
-+ "AH" /* name */
-+#endif
-+};
-+#endif /* NET_26 */
-+#endif /* CONFIG_XFRM_ALTERNATE_STACK */
-+
-+/*
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_alg.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1045 @@
-+/*
-+ * Modular extensions service and registration functions
-+ *
-+ * Author: JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>
-+ *
-+ * Version: 0.8.1
-+ *
-+ * ipsec_alg.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <linux/socket.h>
-+#include <linux/in.h>
-+#include <linux/types.h>
-+#include <linux/string.h> /* memcmp() */
-+#include <linux/random.h> /* get_random_bytes() */
-+#include <linux/errno.h> /* error codes */
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include "openswan/ipsec_param.h"
-+#include <openswan.h>
-+#include "openswan/ipsec_sa.h"
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_rcv.h"
-+#if defined(CONFIG_KLIPS_ESP) || defined(CONFIG_KLIPS_AH)
-+# include "openswan/ipsec_ah.h"
-+#endif /* defined(CONFIG_KLIPS_ESP) || defined(CONFIG_KLIPS_AH) */
-+#ifdef CONFIG_KLIPS_ESP
-+# include "openswan/ipsec_esp.h"
-+#endif /* !CONFIG_KLIPS_ESP */
-+#ifdef CONFIG_KLIPS_IPCOMP
-+# include "openswan/ipcomp.h"
-+#endif /* CONFIG_KLIPS_COMP */
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_alg.h"
-+#include "openswan/ipsec_proto.h"
-+
-+#if K_SADB_EALG_MAX < 255
-+#warning Compiling with limited ESP support ( K_SADB_EALG_MAX < 256 )
-+#endif
-+
-+static rwlock_t ipsec_alg_lock = RW_LOCK_UNLOCKED;
-+#define IPSEC_ALG_HASHSZ 16 /* must be power of 2, even 2^0=1 */
-+static struct list_head ipsec_alg_hash_table[IPSEC_ALG_HASHSZ];
-+
-+/* Old gcc's will fail here */
-+#define barf_out(fmt, args...) do { struct ipsec_alg *ixtc = (struct ipsec_alg *)ixt; printk(KERN_ERR "%s: (%s) " fmt, __FUNCTION__, ixtc->ixt_name , ## args) \
-+ ; goto out; } while(0)
-+
-+#ifdef NET_26
-+/*
-+ * Must be already protected by lock
-+ */
-+static void __ipsec_alg_usage_inc(struct ipsec_alg *ixt)
-+{
-+#ifdef MODULE
-+ if (ixt->ixt_module)
-+ try_module_get(ixt->ixt_module);
-+#endif
-+ atomic_inc(&ixt->ixt_refcnt);
-+}
-+static void __ipsec_alg_usage_dec(struct ipsec_alg *ixt) {
-+ atomic_dec(&ixt->ixt_refcnt);
-+#ifdef MODULE
-+ if (ixt->ixt_module)
-+ module_put(ixt->ixt_module);
-+#endif
-+}
-+
-+#else
-+
-+/*
-+ * Must be already protected by lock
-+ */
-+static void __ipsec_alg_usage_inc(struct ipsec_alg *ixt) {
-+#ifdef MODULE
-+ if (ixt->ixt_module) {
-+ __MOD_INC_USE_COUNT(ixt->ixt_module);
-+ }
-+#endif
-+ atomic_inc(&ixt->ixt_refcnt);
-+}
-+static void __ipsec_alg_usage_dec(struct ipsec_alg *ixt) {
-+ atomic_dec(&ixt->ixt_refcnt);
-+#ifdef MODULE
-+ if (ixt->ixt_module)
-+ __MOD_DEC_USE_COUNT(ixt->ixt_module);
-+#endif
-+}
-+#endif
-+
-+/*
-+ * simple hash function, optimized for 0-hash (1 list) special
-+ * case
-+ */
-+#if IPSEC_ALG_HASHSZ > 1
-+static inline unsigned ipsec_alg_hashfn(int alg_type, int alg_id) {
-+ return ((alg_type^alg_id)&(IPSEC_ALG_HASHSZ-1));
-+}
-+#else
-+#define ipsec_alg_hashfn(x,y) (0)
-+#endif
-+
-+/*****************************************************************
-+ *
-+ * INTERNAL table handling: insert, delete, find
-+ *
-+ *****************************************************************/
-+
-+/*
-+ * hash table initialization, called from ipsec_alg_init()
-+ */
-+static void ipsec_alg_hash_init(void) {
-+ struct list_head *head = ipsec_alg_hash_table;
-+ int i = IPSEC_ALG_HASHSZ;
-+ do {
-+ INIT_LIST_HEAD(head);
-+ head++;
-+ i--;
-+ } while (i);
-+}
-+/*
-+ * hash list lookup by {alg_type, alg_id} and table head,
-+ * must be already protected by lock
-+ */
-+static struct ipsec_alg *__ipsec_alg_find(unsigned alg_type, unsigned alg_id, struct list_head * head) {
-+ struct list_head *p;
-+ struct ipsec_alg *ixt=NULL;
-+ for (p=head->next; p!=head; p=p->next) {
-+ ixt = list_entry(p, struct ipsec_alg, ixt_list);
-+ if (ixt->ixt_alg_type == alg_type && ixt->ixt_alg_id==alg_id) {
-+ goto out;
-+ }
-+ }
-+ ixt=NULL;
-+out:
-+ return ixt;
-+}
-+/*
-+ * inserts (in front) a new entry in hash table,
-+ * called from ipsec_alg_register() when new algorithm is registered.
-+ */
-+static int ipsec_alg_insert(struct ipsec_alg *ixt) {
-+ int ret=-EINVAL;
-+ unsigned hashval=ipsec_alg_hashfn(ixt->ixt_alg_type, ixt->ixt_alg_id);
-+ struct list_head *head= ipsec_alg_hash_table + hashval;
-+ struct ipsec_alg *ixt_cur;
-+
-+ /* new element must be virgin ... */
-+ if (ixt->ixt_list.next != &ixt->ixt_list ||
-+ ixt->ixt_list.prev != &ixt->ixt_list) {
-+ printk(KERN_ERR "ipsec_alg_insert: ixt object \"%s\" "
-+ "list head not initialized\n",
-+ ixt->ixt_name);
-+ return ret;
-+ }
-+ write_lock_bh(&ipsec_alg_lock);
-+
-+ ixt_cur = __ipsec_alg_find(ixt->ixt_alg_type, ixt->ixt_alg_id, head);
-+
-+ /* if previous (current) ipsec_alg found check excl flag of _anyone_ */
-+ if (ixt_cur
-+ && ((ixt->ixt_state|ixt_cur->ixt_state) & IPSEC_ALG_ST_EXCL)) {
-+ barf_out("ipsec_alg for alg_type=%d, alg_id=%d already exist. "
-+ "Not loaded (ret=%d).\n",
-+ ixt->ixt_alg_type,
-+ ixt->ixt_alg_id, ret=-EEXIST);
-+ }
-+ list_add(&ixt->ixt_list, head);
-+ ixt->ixt_state |= IPSEC_ALG_ST_REGISTERED;
-+ ret=0;
-+out:
-+ write_unlock_bh(&ipsec_alg_lock);
-+ return ret;
-+}
-+
-+/*
-+ * deletes an existing entry in hash table,
-+ * called from ipsec_alg_unregister() when algorithm is unregistered.
-+ */
-+static int ipsec_alg_delete(struct ipsec_alg *ixt) {
-+ write_lock_bh(&ipsec_alg_lock);
-+ list_del(&ixt->ixt_list);
-+ write_unlock_bh(&ipsec_alg_lock);
-+ return 0;
-+}
-+
-+/*
-+ * here @user context (read-only when @kernel bh context)
-+ * -> no bh disabling
-+ *
-+ * called from ipsec_sa_init() -> ipsec_alg_sa_init()
-+ */
-+static struct ipsec_alg *ipsec_alg_get(int alg_type, int alg_id)
-+{
-+ unsigned hashval=ipsec_alg_hashfn(alg_type, alg_id);
-+ struct list_head *head= ipsec_alg_hash_table + hashval;
-+ struct ipsec_alg *ixt;
-+
-+ read_lock(&ipsec_alg_lock);
-+ ixt=__ipsec_alg_find(alg_type, alg_id, head);
-+ if (ixt) __ipsec_alg_usage_inc(ixt);
-+ read_unlock(&ipsec_alg_lock);
-+
-+ return ixt;
-+}
-+
-+static void ipsec_alg_put(struct ipsec_alg *ixt) {
-+ __ipsec_alg_usage_dec((struct ipsec_alg *)ixt);
-+}
-+
-+/*****************************************************************
-+ *
-+ * INTERFACE for ENC services: key creation, encrypt function
-+ *
-+ *****************************************************************/
-+
-+/*
-+ * main encrypt service entry point
-+ * called from ipsec_rcv() with encrypt=IPSEC_ALG_DECRYPT and
-+ * ipsec_tunnel_start_xmit with encrypt=IPSEC_ALG_ENCRYPT
-+ */
-+int ipsec_alg_esp_encrypt(struct ipsec_sa *sa_p, __u8 * idat,
-+ int ilen, __u8 * iv, int encrypt)
-+{
-+ int ret;
-+ struct ipsec_alg_enc *ixt_e=sa_p->ips_alg_enc;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ int debug_flag = (encrypt==IPSEC_ALG_ENCRYPT ?
-+ debug_tunnel : debug_rcv);
-+#endif
-+
-+ KLIPS_PRINT(debug_flag,
-+ "klips_debug:ipsec_alg_esp_encrypt: "
-+ "entering with encalg=%d, ixt_e=%p\n",
-+ sa_p->ips_encalg, ixt_e);
-+ if (ixt_e == NULL) {
-+ KLIPS_ERROR(debug_flag,
-+ "klips_debug:ipsec_alg_esp_encrypt: "
-+ "NULL ipsec_alg_enc object\n");
-+ return -1;
-+ }
-+ KLIPS_PRINT(debug_flag,
-+ "klips_debug:ipsec_alg_esp_encrypt: "
-+ "calling cbc_encrypt encalg=%d "
-+ "ips_key_e=%p idat=%p ilen=%d iv=%p, encrypt=%d\n",
-+ sa_p->ips_encalg,
-+ sa_p->ips_key_e, idat, ilen, iv, encrypt);
-+ ret=ixt_e->ixt_e_cbc_encrypt(ixt_e, sa_p->ips_key_e, idat,
-+ ilen, iv, encrypt);
-+ KLIPS_PRINT(debug_flag,
-+ "klips_debug:ipsec_alg_esp_encrypt: "
-+ "returned ret=%d\n",
-+ ret);
-+ return ret;
-+}
-+
-+/*
-+ * encryption key context creation function
-+ * called from pfkey_v2_parser.c:pfkey_ips_init()
-+ */
-+int ipsec_alg_enc_key_create(struct ipsec_sa *sa_p) {
-+ int ret=-EINVAL;
-+ int keyminbits, keymaxbits;
-+ caddr_t ekp;
-+ struct ipsec_alg_enc *ixt_e=sa_p->ips_alg_enc;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_alg_enc_key_create: "
-+ "entering with encalg=%d ixt_e=%p\n",
-+ sa_p->ips_encalg, ixt_e);
-+ if (!ixt_e) {
-+ KLIPS_ERROR(debug_pfkey,
-+ "klips_debug:ipsec_alg_enc_key_create: "
-+ "NULL ipsec_alg_enc object\n");
-+ return -EPROTO;
-+ }
-+
-+ /*
-+ * grRRR... DES 7bits jurassic stuff ... f*ckk --jjo
-+ */
-+ switch(ixt_e->ixt_common.ixt_support.ias_id) {
-+ case ESP_3DES:
-+ keyminbits=keymaxbits=192;break;
-+ case ESP_DES:
-+ keyminbits=keymaxbits=64;break;
-+ default:
-+ keyminbits=ixt_e->ixt_common.ixt_support.ias_keyminbits;
-+ keymaxbits=ixt_e->ixt_common.ixt_support.ias_keymaxbits;
-+ }
-+ if(sa_p->ips_key_bits_e<keyminbits ||
-+ sa_p->ips_key_bits_e>keymaxbits) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_alg_enc_key_create: "
-+ "incorrect encryption key size for id=%d: %d bits -- "
-+ "must be between %d,%d bits\n" /*octets (bytes)\n"*/,
-+ ixt_e->ixt_common.ixt_support.ias_id,
-+ sa_p->ips_key_bits_e, keyminbits, keymaxbits);
-+ ret=-EINVAL;
-+ goto ixt_out;
-+ }
-+ /* save encryption key pointer */
-+ ekp = sa_p->ips_key_e;
-+
-+
-+ if (ixt_e->ixt_e_new_key) {
-+ sa_p->ips_key_e = ixt_e->ixt_e_new_key(ixt_e,
-+ ekp, sa_p->ips_key_bits_e/8);
-+ ret = (sa_p->ips_key_e)? 0 : -EINVAL;
-+ } else {
-+ if((sa_p->ips_key_e = (caddr_t)
-+ kmalloc((sa_p->ips_key_e_size = ixt_e->ixt_e_ctx_size),
-+ GFP_ATOMIC)) == NULL) {
-+ ret=-ENOMEM;
-+ goto ixt_out;
-+ }
-+ /* zero-out key_e */
-+ memset(sa_p->ips_key_e, 0, sa_p->ips_key_e_size);
-+
-+ /* I cast here to allow more decoupling in alg module */
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_alg_enc_key_create: about to call:"
-+ "set_key(key_e=%p, ekp=%p, key_size=%d)\n",
-+ (caddr_t)sa_p->ips_key_e, ekp, sa_p->ips_key_bits_e/8);
-+ ret = ixt_e->ixt_e_set_key(ixt_e, (caddr_t)sa_p->ips_key_e, ekp, sa_p->ips_key_bits_e/8);
-+ }
-+ /* paranoid */
-+ memset(ekp, 0, sa_p->ips_key_bits_e/8);
-+ kfree(ekp);
-+ixt_out:
-+ return ret;
-+}
-+
-+/***************************************************************
-+ *
-+ * INTERFACE for AUTH services: key creation, hash functions
-+ *
-+ ***************************************************************/
-+
-+/*
-+ * auth key context creation function
-+ * called from pfkey_v2_parser.c:pfkey_ips_init()
-+ */
-+int ipsec_alg_auth_key_create(struct ipsec_sa *sa_p) {
-+ int ret=-EINVAL;
-+ struct ipsec_alg_auth *ixt_a=sa_p->ips_alg_auth;
-+ int keyminbits, keymaxbits;
-+ unsigned char *akp;
-+ unsigned int aks;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_alg_auth_key_create: "
-+ "entering with authalg=%d ixt_a=%p\n",
-+ sa_p->ips_authalg, ixt_a);
-+ if (!ixt_a) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_alg_auth_key_create: "
-+ "NULL ipsec_alg_auth object\n");
-+ return -EPROTO;
-+ }
-+ keyminbits=ixt_a->ixt_common.ixt_support.ias_keyminbits;
-+ keymaxbits=ixt_a->ixt_common.ixt_support.ias_keymaxbits;
-+ if(sa_p->ips_key_bits_a<keyminbits || sa_p->ips_key_bits_a>keymaxbits) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_alg_auth_key_create: incorrect auth"
-+ "key size: %d bits -- must be between %d,%d bits\n"/*octets (bytes)\n"*/,
-+ sa_p->ips_key_bits_a, keyminbits, keymaxbits);
-+ ret=-EINVAL;
-+ goto ixt_out;
-+ }
-+ /* save auth key pointer */
-+ sa_p->ips_auth_bits = ixt_a->ixt_a_keylen * 8; /* XXX XXX */
-+ akp = sa_p->ips_key_a;
-+ aks = sa_p->ips_key_a_size;
-+
-+ /* will hold: 2 ctx and a blocksize buffer: kb */
-+ sa_p->ips_key_a_size = ixt_a->ixt_a_ctx_size;
-+ if((sa_p->ips_key_a =
-+ (caddr_t) kmalloc(sa_p->ips_key_a_size, GFP_ATOMIC)) == NULL) {
-+ ret=-ENOMEM;
-+ goto ixt_out;
-+ }
-+ ixt_a->ixt_a_hmac_set_key(ixt_a, sa_p->ips_key_a, akp, sa_p->ips_key_bits_a/8); /* XXX XXX */
-+ ret=0;
-+ memset(akp, 0, aks);
-+ kfree(akp);
-+
-+ixt_out:
-+ return ret;
-+}
-+
-+
-+int ipsec_alg_sa_esp_hash(const struct ipsec_sa *sa_p, const __u8 *espp,
-+ int len, __u8 *hash, int hashlen)
-+{
-+ struct ipsec_alg_auth *ixt_a=sa_p->ips_alg_auth;
-+ if (!ixt_a) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_sa_esp_hash: "
-+ "NULL ipsec_alg_auth object\n");
-+ return -EPROTO;
-+ }
-+ KLIPS_PRINT(debug_tunnel|debug_rcv,
-+ "klips_debug:ipsec_sa_esp_hash: "
-+ "hashing %p (%d bytes) to %p (%d bytes)\n",
-+ espp, len,
-+ hash, hashlen);
-+ ixt_a->ixt_a_hmac_hash(ixt_a,
-+ sa_p->ips_key_a,
-+ espp, len,
-+ hash, hashlen);
-+ return 0;
-+}
-+
-+/***************************************************************
-+ *
-+ * INTERFACE for module loading,testing, and unloading
-+ *
-+ ***************************************************************/
-+
-+/* validation for registering (enc) module */
-+static int check_enc(struct ipsec_alg_enc *ixt)
-+{
-+ int ret=-EINVAL;
-+ if (ixt->ixt_common.ixt_blocksize==0) /* || ixt->ixt_common.ixt_blocksize%2) need for ESP_NULL */
-+ barf_out(KERN_ERR "invalid blocksize=%d\n", ixt->ixt_common.ixt_blocksize);
-+ if (ixt->ixt_common.ixt_support.ias_keyminbits==0
-+ && ixt->ixt_common.ixt_support.ias_keymaxbits==0
-+ && ixt->ixt_e_keylen==0)
-+ goto zero_key_ok;
-+
-+ if (ixt->ixt_common.ixt_support.ias_keyminbits==0)
-+ barf_out(KERN_ERR "invalid keyminbits=%d\n", ixt->ixt_common.ixt_support.ias_keyminbits);
-+
-+ if (ixt->ixt_common.ixt_support.ias_keymaxbits==0)
-+ barf_out(KERN_ERR "invalid keymaxbits=%d\n", ixt->ixt_common.ixt_support.ias_keymaxbits);
-+
-+ if (ixt->ixt_e_keylen==0)
-+ barf_out(KERN_ERR "invalid keysize=%d\n", ixt->ixt_e_keylen);
-+
-+zero_key_ok:
-+ if (ixt->ixt_e_ctx_size==0 && ixt->ixt_e_new_key == NULL)
-+ barf_out(KERN_ERR "invalid key_e_size=%d and ixt_e_new_key=NULL\n", ixt->ixt_e_ctx_size);
-+ if (ixt->ixt_e_cbc_encrypt==NULL)
-+ barf_out(KERN_ERR "e_cbc_encrypt() must be not NULL\n");
-+ ret=0;
-+out:
-+ return ret;
-+}
-+
-+/* validation for registering (auth) module */
-+static int check_auth(struct ipsec_alg_auth *ixt)
-+{
-+ int ret=-EINVAL;
-+ if (ixt->ixt_common.ixt_support.ias_id==0 || ixt->ixt_common.ixt_support.ias_id > K_SADB_AALG_MAX)
-+ barf_out("invalid alg_id=%d > %d (K_SADB_AALG_MAX)\n",
-+ ixt->ixt_common.ixt_support.ias_id, K_SADB_AALG_MAX);
-+
-+ if (ixt->ixt_common.ixt_blocksize==0
-+ || ixt->ixt_common.ixt_blocksize%2)
-+ barf_out(KERN_ERR "invalid blocksize=%d\n",
-+ ixt->ixt_common.ixt_blocksize);
-+
-+ if (ixt->ixt_common.ixt_blocksize>AH_BLKLEN_MAX)
-+ barf_out(KERN_ERR "sorry blocksize=%d > %d. "
-+ "Please increase AH_BLKLEN_MAX and recompile\n",
-+ ixt->ixt_common.ixt_blocksize,
-+ AH_BLKLEN_MAX);
-+ if (ixt->ixt_common.ixt_support.ias_keyminbits==0 && ixt->ixt_common.ixt_support.ias_keymaxbits==0 && ixt->ixt_a_keylen==0)
-+ goto zero_key_ok;
-+ if (ixt->ixt_common.ixt_support.ias_keyminbits==0)
-+ barf_out(KERN_ERR "invalid keyminbits=%d\n", ixt->ixt_common.ixt_support.ias_keyminbits);
-+ if (ixt->ixt_common.ixt_support.ias_keymaxbits==0)
-+ barf_out(KERN_ERR "invalid keymaxbits=%d\n", ixt->ixt_common.ixt_support.ias_keymaxbits);
-+ if (ixt->ixt_common.ixt_support.ias_keymaxbits!=ixt->ixt_common.ixt_support.ias_keyminbits)
-+ barf_out(KERN_ERR "keymaxbits must equal keyminbits (not sure).\n");
-+ if (ixt->ixt_a_keylen==0)
-+ barf_out(KERN_ERR "invalid keysize=%d\n", ixt->ixt_a_keylen);
-+zero_key_ok:
-+ if (ixt->ixt_a_ctx_size==0)
-+ barf_out(KERN_ERR "invalid a_ctx_size=%d\n", ixt->ixt_a_ctx_size);
-+ if (ixt->ixt_a_hmac_set_key==NULL)
-+ barf_out(KERN_ERR "a_hmac_set_key() must be not NULL\n");
-+ if (ixt->ixt_a_hmac_hash==NULL)
-+ barf_out(KERN_ERR "a_hmac_hash() must be not NULL\n");
-+ ret=0;
-+out:
-+ return ret;
-+}
-+
-+/*
-+ * Generic (enc, auth) registration entry point
-+ */
-+int register_ipsec_alg(struct ipsec_alg *ixt)
-+{
-+ int ret=-EINVAL;
-+ /* Validation */
-+ if (ixt==NULL)
-+ barf_out("NULL ipsec_alg object passed\n");
-+ if ((ixt->ixt_version&0xffffff00) != (IPSEC_ALG_VERSION&0xffffff00))
-+ barf_out("incorrect version: %d.%d.%d-%d, "
-+ "must be %d.%d.%d[-%d]\n",
-+ IPSEC_ALG_VERSION_QUAD(ixt->ixt_version),
-+ IPSEC_ALG_VERSION_QUAD(IPSEC_ALG_VERSION));
-+
-+ switch(ixt->ixt_alg_type) {
-+ case IPSEC_ALG_TYPE_AUTH:
-+ if ((ret=check_auth((struct ipsec_alg_auth *)ixt)<0))
-+ goto out;
-+ break;
-+ case IPSEC_ALG_TYPE_ENCRYPT:
-+ if ((ret=check_enc((struct ipsec_alg_enc *)ixt)<0))
-+ goto out;
-+ /*
-+ * Adapted two lines below:
-+ * ivlen == 0 is possible (NULL enc has blocksize==1)
-+ *
-+ * fixed NULL support by David De Reu <DeReu@tComLabs.com>
-+ */
-+ if (ixt->ixt_support.ias_ivlen == 0
-+ && ixt->ixt_blocksize > 1) {
-+ ixt->ixt_support.ias_ivlen = ixt->ixt_blocksize*8;
-+ }
-+ break;
-+ default:
-+ barf_out("alg_type=%d not supported\n", ixt->ixt_alg_type);
-+ }
-+ INIT_LIST_HEAD(&ixt->ixt_list);
-+ ret = ipsec_alg_insert(ixt);
-+ if (ret<0)
-+ barf_out(KERN_WARNING "ipsec_alg for alg_id=%d failed."
-+ "Not loaded (ret=%d).\n",
-+ ixt->ixt_support.ias_id, ret);
-+
-+
-+ ret = pfkey_list_insert_supported((struct ipsec_alg_supported *)&ixt->ixt_support
-+ , &(pfkey_supported_list[K_SADB_SATYPE_ESP]));
-+
-+ if (ret==0) {
-+ ixt->ixt_state |= IPSEC_ALG_ST_SUPP;
-+ /* send register event to userspace */
-+ pfkey_register_reply(K_SADB_SATYPE_ESP, NULL);
-+ } else
-+ printk(KERN_ERR "pfkey_list_insert_supported returned %d. "
-+ "Loading anyway.\n", ret);
-+ ret=0;
-+out:
-+ return ret;
-+}
-+
-+/*
-+ * unregister ipsec_alg object from own tables, if
-+ * success => calls pfkey_list_remove_supported()
-+ */
-+int unregister_ipsec_alg(struct ipsec_alg *ixt) {
-+ int ret= -EINVAL;
-+ switch(ixt->ixt_alg_type) {
-+ case IPSEC_ALG_TYPE_AUTH:
-+ case IPSEC_ALG_TYPE_ENCRYPT:
-+ break;
-+ default:
-+ /* this is not a typo :) */
-+ barf_out("frog found in list (\"%s\"): ixt_p=NULL\n",
-+ ixt->ixt_name);
-+ }
-+
-+ ret=ipsec_alg_delete(ixt);
-+ if (ixt->ixt_state&IPSEC_ALG_ST_SUPP) {
-+ ixt->ixt_state &= ~IPSEC_ALG_ST_SUPP;
-+ pfkey_list_remove_supported((struct ipsec_alg_supported *)&ixt->ixt_support
-+ , &(pfkey_supported_list[K_SADB_SATYPE_ESP]));
-+
-+ /* send register event to userspace */
-+ pfkey_register_reply(K_SADB_SATYPE_ESP, NULL);
-+ }
-+
-+out:
-+ return ret;
-+}
-+
-+/*
-+ * Must be called from user context
-+ * used at module load type for testing algo implementation
-+ */
-+static int ipsec_alg_test_encrypt(int enc_alg, int test) {
-+ int ret;
-+ caddr_t buf = NULL;
-+ int iv_size, keysize, key_e_size;
-+ struct ipsec_alg_enc *ixt_e;
-+ void *tmp_key_e = NULL;
-+ #define BUFSZ 1024
-+ #define MARGIN 0
-+ #define test_enc (buf+MARGIN)
-+ #define test_dec (test_enc+BUFSZ+MARGIN)
-+ #define test_tmp (test_dec+BUFSZ+MARGIN)
-+ #define test_key_e (test_tmp+BUFSZ+MARGIN)
-+ #define test_iv (test_key_e+key_e_size+MARGIN)
-+ #define test_key (test_iv+iv_size+MARGIN)
-+ #define test_size (BUFSZ*3+key_e_size+iv_size+keysize+MARGIN*7)
-+ ixt_e=(struct ipsec_alg_enc *)ipsec_alg_get(IPSEC_ALG_TYPE_ENCRYPT, enc_alg);
-+ if (ixt_e==NULL) {
-+ KLIPS_PRINT(1,
-+ "klips_debug: ipsec_alg_test_encrypt: "
-+ "encalg=%d object not found\n",
-+ enc_alg);
-+ ret=-EINVAL;
-+ goto out;
-+ }
-+ iv_size=ixt_e->ixt_common.ixt_support.ias_ivlen / 8;
-+ key_e_size=ixt_e->ixt_e_ctx_size;
-+ keysize=ixt_e->ixt_e_keylen;
-+ KLIPS_PRINT(1,
-+ "klips_debug: ipsec_alg_test_encrypt: "
-+ "enc_alg=%d blocksize=%d key_e_size=%d keysize=%d\n",
-+ enc_alg, iv_size, key_e_size, keysize);
-+ if ((buf=kmalloc (test_size, GFP_KERNEL)) == NULL) {
-+ ret= -ENOMEM;
-+ goto out;
-+ }
-+ get_random_bytes(test_key, keysize);
-+ get_random_bytes(test_iv, iv_size);
-+ if (ixt_e->ixt_e_new_key) {
-+ tmp_key_e = ixt_e->ixt_e_new_key(ixt_e, test_key, keysize);
-+ ret = tmp_key_e ? 0 : -EINVAL;
-+ } else {
-+ tmp_key_e = test_key_e;
-+ ret = ixt_e->ixt_e_set_key(ixt_e, test_key_e, test_key, keysize);
-+ }
-+ if (ret < 0)
-+ goto out;
-+ get_random_bytes(test_enc, BUFSZ);
-+ memcpy(test_tmp, test_enc, BUFSZ);
-+ ret=ixt_e->ixt_e_cbc_encrypt(ixt_e, tmp_key_e, test_enc, BUFSZ, test_iv, 1);
-+ printk(KERN_INFO
-+ "klips_info: ipsec_alg_test_encrypt: "
-+ "cbc_encrypt=1 ret=%d\n",
-+ ret);
-+ ret=memcmp(test_enc, test_tmp, BUFSZ);
-+ printk(KERN_INFO
-+ "klips_info: ipsec_alg_test_encrypt: "
-+ "memcmp(enc, tmp) ret=%d: %s\n", ret,
-+ ret!=0? "OK. (encr->DIFFers)" : "FAIL! (encr->SAME)" );
-+ memcpy(test_dec, test_enc, BUFSZ);
-+ ret=ixt_e->ixt_e_cbc_encrypt(ixt_e, tmp_key_e, test_dec, BUFSZ, test_iv, 0);
-+ printk(KERN_INFO
-+ "klips_info: ipsec_alg_test_encrypt: "
-+ "cbc_encrypt=0 ret=%d\n", ret);
-+ ret=memcmp(test_dec, test_tmp, BUFSZ);
-+ printk(KERN_INFO
-+ "klips_info: ipsec_alg_test_encrypt: "
-+ "memcmp(dec,tmp) ret=%d: %s\n", ret,
-+ ret==0? "OK. (encr->decr->SAME)" : "FAIL! (encr->decr->DIFFers)" );
-+ {
-+ /* Shamelessly taken from drivers/md sources O:) */
-+ unsigned long now;
-+ int i, count, max=0;
-+ int encrypt, speed;
-+ for (encrypt=0; encrypt <2;encrypt ++) {
-+ for (i = 0; i < 5; i++) {
-+ now = jiffies;
-+ count = 0;
-+ while (jiffies == now) {
-+ mb();
-+ ixt_e->ixt_e_cbc_encrypt(ixt_e,
-+ tmp_key_e, test_tmp,
-+ BUFSZ, test_iv, encrypt);
-+ mb();
-+ count++;
-+ mb();
-+ }
-+ if (count > max)
-+ max = count;
-+ }
-+ speed = max * (HZ * BUFSZ / 1024);
-+ printk(KERN_INFO
-+ "klips_info: ipsec_alg_test_encrypt: "
-+ "%s %s speed=%d KB/s\n",
-+ ixt_e->ixt_common.ixt_name,
-+ encrypt? "encrypt": "decrypt", speed);
-+ }
-+ }
-+out:
-+ if (tmp_key_e && ixt_e->ixt_e_destroy_key) ixt_e->ixt_e_destroy_key(ixt_e, tmp_key_e);
-+ if (buf) kfree(buf);
-+ if (ixt_e) ipsec_alg_put((struct ipsec_alg *)ixt_e);
-+ return ret;
-+ #undef test_enc
-+ #undef test_dec
-+ #undef test_tmp
-+ #undef test_key_e
-+ #undef test_iv
-+ #undef test_key
-+ #undef test_size
-+}
-+
-+/*
-+ * Must be called from user context
-+ * used at module load type for testing algo implementation
-+ */
-+static int ipsec_alg_test_auth(int auth_alg, int test) {
-+ int ret;
-+ caddr_t buf = NULL;
-+ int blocksize, keysize, key_a_size;
-+ struct ipsec_alg_auth *ixt_a;
-+ #define BUFSZ 1024
-+ #define MARGIN 0
-+ #define test_auth (buf+MARGIN)
-+ #define test_key_a (test_auth+BUFSZ+MARGIN)
-+ #define test_key (test_key_a+key_a_size+MARGIN)
-+ #define test_hash (test_key+keysize+MARGIN)
-+ #define test_size (BUFSZ+key_a_size+keysize+AHHMAC_HASHLEN+MARGIN*4)
-+ ixt_a=(struct ipsec_alg_auth *)ipsec_alg_get(IPSEC_ALG_TYPE_AUTH, auth_alg);
-+ if (ixt_a==NULL) {
-+ KLIPS_PRINT(1,
-+ "klips_debug: ipsec_alg_test_auth: "
-+ "encalg=%d object not found\n",
-+ auth_alg);
-+ ret=-EINVAL;
-+ goto out;
-+ }
-+ blocksize=ixt_a->ixt_common.ixt_blocksize;
-+ key_a_size=ixt_a->ixt_a_ctx_size;
-+ keysize=ixt_a->ixt_a_keylen;
-+ KLIPS_PRINT(1,
-+ "klips_debug: ipsec_alg_test_auth: "
-+ "auth_alg=%d blocksize=%d key_a_size=%d keysize=%d\n",
-+ auth_alg, blocksize, key_a_size, keysize);
-+ if ((buf=kmalloc (test_size, GFP_KERNEL)) == NULL) {
-+ ret= -ENOMEM;
-+ goto out;
-+ }
-+ get_random_bytes(test_key, keysize);
-+ ret = ixt_a->ixt_a_hmac_set_key(ixt_a, test_key_a, test_key, keysize);
-+ if (ret < 0 )
-+ goto out;
-+ get_random_bytes(test_auth, BUFSZ);
-+ ret=ixt_a->ixt_a_hmac_hash(ixt_a, test_key_a, test_auth, BUFSZ, test_hash, AHHMAC_HASHLEN);
-+ printk(KERN_INFO
-+ "klips_info: ipsec_alg_test_auth: "
-+ "ret=%d\n", ret);
-+ {
-+ /* Shamelessly taken from drivers/md sources O:) */
-+ unsigned long now;
-+ int i, count, max=0;
-+ int speed;
-+ for (i = 0; i < 5; i++) {
-+ now = jiffies;
-+ count = 0;
-+ while (jiffies == now) {
-+ mb();
-+ ixt_a->ixt_a_hmac_hash(ixt_a, test_key_a, test_auth, BUFSZ, test_hash, AHHMAC_HASHLEN);
-+ mb();
-+ count++;
-+ mb();
-+ }
-+ if (count > max)
-+ max = count;
-+ }
-+ speed = max * (HZ * BUFSZ / 1024);
-+ printk(KERN_INFO
-+ "klips_info: ipsec_alg_test_auth: "
-+ "%s hash speed=%d KB/s\n",
-+ ixt_a->ixt_common.ixt_name,
-+ speed);
-+ }
-+out:
-+ if (buf) kfree(buf);
-+ if (ixt_a) ipsec_alg_put((struct ipsec_alg *)ixt_a);
-+ return ret;
-+ #undef test_auth
-+ #undef test_key_a
-+ #undef test_key
-+ #undef test_hash
-+ #undef test_size
-+}
-+
-+int ipsec_alg_test(unsigned alg_type, unsigned alg_id, int test) {
-+ switch(alg_type) {
-+ case IPSEC_ALG_TYPE_ENCRYPT:
-+ return ipsec_alg_test_encrypt(alg_id, test);
-+ break;
-+ case IPSEC_ALG_TYPE_AUTH:
-+ return ipsec_alg_test_auth(alg_id, test);
-+ break;
-+ }
-+ printk(KERN_ERR "klips_info: ipsec_alg_test() called incorrectly: "
-+ "alg_type=%d alg_id=%d\n",
-+ alg_type, alg_id);
-+ return -EINVAL;
-+}
-+
-+int ipsec_alg_init(void) {
-+ KLIPS_PRINT(1, "klips_info:ipsec_alg_init: "
-+ "KLIPS alg v=%d.%d.%d-%d (EALG_MAX=%d, AALG_MAX=%d)\n",
-+ IPSEC_ALG_VERSION_QUAD(IPSEC_ALG_VERSION),
-+ K_SADB_EALG_MAX, K_SADB_AALG_MAX);
-+ /* Initialize tables */
-+ write_lock_bh(&ipsec_alg_lock);
-+ ipsec_alg_hash_init();
-+ write_unlock_bh(&ipsec_alg_lock);
-+
-+ /* Initialize static algos */
-+ KLIPS_PRINT(1, "klips_info:ipsec_alg_init: "
-+ "calling ipsec_alg_static_init()\n");
-+
-+ /* If we are suppose to use our AES, and don't have
-+ * CryptoAPI enabled...
-+ */
-+#if defined(CONFIG_KLIPS_ENC_AES) && CONFIG_KLIPS_ENC_AES && !defined(CONFIG_KLIPS_ENC_AES_MODULE)
-+#if defined(CONFIG_KLIPS_ENC_CRYPTOAPI) && CONFIG_KLIPS_ENC_CRYPTOAPI
-+#warning "Using built-in AES rather than CryptoAPI AES"
-+#endif
-+ {
-+ extern int ipsec_aes_init(void);
-+ ipsec_aes_init();
-+ }
-+#endif
-+
-+#if defined(CONFIG_KLIPS_ENC_3DES) && !defined(CONFIG_KLIPS_ENC_3DES_MODULE)
-+#if defined(CONFIG_KLIPS_ENC_CRYPTOAPI) && CONFIG_KLIPS_ENC_CRYPTOAPI
-+#warning "Using built-in 3des rather than CryptoAPI 3des"
-+#endif
-+ {
-+ extern int ipsec_3des_init(void);
-+ ipsec_3des_init();
-+ }
-+#endif
-+
-+ /* If we are doing CryptoAPI, then init */
-+#if defined(CONFIG_KLIPS_ENC_CRYPTOAPI) && CONFIG_KLIPS_ENC_CRYPTOAPI && !defined(CONFIG_KLIPS_ENC_CRYPTOAPI_MODULE)
-+ {
-+ extern int ipsec_cryptoapi_init(void);
-+ ipsec_cryptoapi_init();
-+ }
-+#endif
-+
-+
-+ return 0;
-+}
-+
-+/**********************************************
-+ *
-+ * INTERFACE for ipsec_sa init and wipe
-+ *
-+ **********************************************/
-+
-+/*
-+ * Called from pluto -> pfkey_v2_parser.c:pfkey_ipsec_sa_init()
-+ */
-+int ipsec_alg_sa_init(struct ipsec_sa *sa_p) {
-+ struct ipsec_alg_enc *ixt_e;
-+ struct ipsec_alg_auth *ixt_a;
-+
-+ /* Only ESP for now ... */
-+ if (sa_p->ips_said.proto != IPPROTO_ESP)
-+ return -EPROTONOSUPPORT;
-+
-+ KLIPS_PRINT(debug_pfkey, "klips_debug: ipsec_alg_sa_init() :"
-+ "entering for encalg=%d, authalg=%d\n",
-+ sa_p->ips_encalg, sa_p->ips_authalg);
-+
-+ if ((ixt_e=(struct ipsec_alg_enc *)
-+ ipsec_alg_get(IPSEC_ALG_TYPE_ENCRYPT, sa_p->ips_encalg))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug: ipsec_alg_sa_init() :"
-+ "found ipsec_alg (ixt_e=%p) for encalg=%d\n",
-+ ixt_e, sa_p->ips_encalg);
-+ sa_p->ips_alg_enc=ixt_e;
-+ }
-+
-+ if ((ixt_a=(struct ipsec_alg_auth *)
-+ ipsec_alg_get(IPSEC_ALG_TYPE_AUTH, sa_p->ips_authalg))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug: ipsec_alg_sa_init() :"
-+ "found ipsec_alg (ixt_a=%p) for auth=%d\n",
-+ ixt_a, sa_p->ips_authalg);
-+ sa_p->ips_alg_auth=ixt_a;
-+ }
-+ return 0;
-+}
-+
-+/*
-+ * Called from pluto -> ipsec_sa.c:ipsec_sa_delchain()
-+ */
-+int ipsec_alg_sa_wipe(struct ipsec_sa *sa_p) {
-+ struct ipsec_alg *ixt;
-+ if ((ixt=(struct ipsec_alg *)sa_p->ips_alg_enc)) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug: ipsec_alg_sa_wipe() :"
-+ "unlinking for encalg=%d\n",
-+ ixt->ixt_support.ias_id);
-+ ipsec_alg_put(ixt);
-+ }
-+ if ((ixt=(struct ipsec_alg *)sa_p->ips_alg_auth)) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug: ipsec_alg_sa_wipe() :"
-+ "unlinking for authalg=%d\n",
-+ ixt->ixt_support.ias_id);
-+ ipsec_alg_put(ixt);
-+ }
-+ return 0;
-+}
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_xform_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ int len = 0;
-+ off_t begin = 0;
-+ int i;
-+ struct list_head *head;
-+ struct ipsec_alg *ixt;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_tncfg_get_info: "
-+ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n",
-+ buffer,
-+ *start,
-+ (int)offset,
-+ length);
-+
-+ for(i = 0, head = ipsec_alg_hash_table;
-+ i<IPSEC_ALG_HASHSZ;
-+ i++, head++)
-+ {
-+ struct list_head *p;
-+ for (p=head->next; p!=head; p=p->next)
-+ {
-+ ixt = list_entry(p, struct ipsec_alg, ixt_list);
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ "VERSION=%d TYPE=%d ID=%d NAME=%s REFCNT=%d ",
-+ ixt->ixt_version, ixt->ixt_alg_type, ixt->ixt_support.ias_id,
-+ ixt->ixt_name, ixt->ixt_refcnt);
-+
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ "STATE=%08x BLOCKSIZE=%d IVLEN=%d KEYMINBITS=%d KEYMAXBITS=%d ",
-+ ixt->ixt_state, ixt->ixt_blocksize,
-+ ixt->ixt_support.ias_ivlen, ixt->ixt_support.ias_keyminbits, ixt->ixt_support.ias_keymaxbits);
-+
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ "IVLEN=%d KEYMINBITS=%d KEYMAXBITS=%d ",
-+ ixt->ixt_support.ias_ivlen, ixt->ixt_support.ias_keyminbits, ixt->ixt_support.ias_keymaxbits);
-+
-+ switch(ixt->ixt_alg_type)
-+ {
-+ case IPSEC_ALG_TYPE_AUTH:
-+ {
-+ struct ipsec_alg_auth *auth = (struct ipsec_alg_auth *)ixt;
-+
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ "KEYLEN=%d CTXSIZE=%d AUTHLEN=%d ",
-+ auth->ixt_a_keylen, auth->ixt_a_ctx_size,
-+ auth->ixt_a_authlen);
-+ break;
-+ }
-+ case IPSEC_ALG_TYPE_ENCRYPT:
-+ {
-+ struct ipsec_alg_enc *enc = (struct ipsec_alg_enc *)ixt;
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ "KEYLEN=%d CTXSIZE=%d ",
-+ enc->ixt_e_keylen, enc->ixt_e_ctx_size);
-+
-+ break;
-+ }
-+ }
-+
-+ len += ipsec_snprintf(buffer+len, length-len, "\n");
-+ }
-+ }
-+
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ len -= (offset - begin); /* Start slop */
-+ if (len > length)
-+ len = length;
-+ return len;
-+}
-+
-+
-+/*
-+ * As the author of this module, I ONLY ALLOW using it from
-+ * GPL (or same LICENSE TERMS as kernel source) modules.
-+ *
-+ * In respect to hardware crypto engines this means:
-+ * * Closed-source device drivers ARE NOT ALLOWED to use
-+ * this interface.
-+ * * Closed-source VHDL/Verilog firmware running on
-+ * the crypto hardware device IS ALLOWED to use this interface
-+ * via a GPL (or same LICENSE TERMS as kernel source) device driver.
-+ * --Juan Jose Ciarlante 20/03/2002 (thanks RGB for the correct wording)
-+ */
-+
-+/*
-+ * These symbols can only be used from GPL modules
-+ * for now, I'm disabling this because it creates false
-+ * symbol problems for old modutils.
-+ */
-+
-+#ifdef CONFIG_MODULES
-+#ifndef NET_26
-+#if 0
-+#ifndef EXPORT_SYMBOL_GPL
-+#undef EXPORT_SYMBOL_GPL
-+#define EXPORT_SYMBOL_GPL EXPORT_SYMBOL
-+#endif
-+#endif
-+EXPORT_SYMBOL(register_ipsec_alg);
-+EXPORT_SYMBOL(unregister_ipsec_alg);
-+EXPORT_SYMBOL(ipsec_alg_test);
-+#endif
-+#endif
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_alg_cryptoapi.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,515 @@
-+/*
-+ * ipsec_alg to linux cryptoapi GLUE
-+ *
-+ * Authors: CODE.ar TEAM
-+ * Harpo MAxx <harpo@linuxmendoza.org.ar>
-+ * JuanJo Ciarlante <jjo-ipsec@mendoza.gov.ar>
-+ * Luciano Ruete <docemeses@softhome.net>
-+ *
-+ * ipsec_alg_cryptoapi.c,v 1.1.2.1 2003/11/21 18:12:23 jjo Exp
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * Example usage:
-+ * modinfo -p ipsec_cryptoapi (quite useful info, including supported algos)
-+ * modprobe ipsec_cryptoapi
-+ * modprobe ipsec_cryptoapi test=1
-+ * modprobe ipsec_cryptoapi excl=1 (exclusive cipher/algo)
-+ * modprobe ipsec_cryptoapi noauto=1 aes=1 twofish=1 (only these ciphers)
-+ * modprobe ipsec_cryptoapi aes=128,128 (force these keylens)
-+ * modprobe ipsec_cryptoapi des_ede3=0 (everything but 3DES)
-+ */
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+/*
-+ * special case: ipsec core modular with this static algo inside:
-+ * must avoid MODULE magic for this file
-+ */
-+#if CONFIG_KLIPS_MODULE && CONFIG_KLIPS_ENC_CRYPTOAPI
-+#undef MODULE
-+#endif
-+
-+#include <linux/module.h>
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-+# include <linux/moduleparam.h>
-+#endif
-+#include <linux/init.h>
-+
-+#include <linux/kernel.h> /* printk() */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/string.h>
-+
-+/* Check if __exit is defined, if not null it */
-+#ifndef __exit
-+#define __exit
-+#endif
-+
-+/* warn the innocent */
-+#if !defined (CONFIG_CRYPTO) && !defined (CONFIG_CRYPTO_MODULE)
-+#warning "No linux CryptoAPI configured, install 2.4.22+ or 2.6.x or enable CryptoAPI"
-+#define NO_CRYPTOAPI_SUPPORT
-+#endif
-+
-+#include "openswan.h"
-+#include "openswan/ipsec_alg.h"
-+#include "openswan/ipsec_policy.h"
-+
-+#include <linux/crypto.h>
-+#ifdef CRYPTO_API_VERSION_CODE
-+#warning "Old CryptoAPI is not supported. Only linux-2.4.22+ or linux-2.6.x are supported"
-+#define NO_CRYPTOAPI_SUPPORT
-+#endif
-+
-+#ifdef NO_CRYPTOAPI_SUPPORT
-+#warning "Building an unusable module :P"
-+/* Catch old CryptoAPI by not allowing module to load */
-+IPSEC_ALG_MODULE_INIT_STATIC( ipsec_cryptoapi_init )
-+{
-+ printk(KERN_WARNING "ipsec_cryptoapi.o was not built on stock Linux CryptoAPI (2.4.22+ or 2.6.x), not loading.\n");
-+ return -EINVAL;
-+}
-+#else
-+#include <asm/scatterlist.h>
-+#include <asm/pgtable.h>
-+#include <linux/mm.h>
-+
-+/*
-+ * CryptoAPI compat code - we use the current API and macro back to
-+ * the older ones.
-+ */
-+
-+#ifndef CRYPTO_TFM_MODE_CBC
-+/*
-+ * As of linux-2.6.21 this is no longer defined, and presumably no longer
-+ * needed to be passed into the crypto core code.
-+ */
-+#define CRYPTO_TFM_MODE_CBC 0
-+#define CRYPTO_TFM_MODE_ECB 0
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-+ /*
-+ * Linux 2.6.19 introduced a new Crypto API, setup macro's to convert new
-+ * API into old API.
-+ */
-+
-+ /* Symmetric/Block Cipher */
-+ struct blkcipher_desc
-+ {
-+ struct crypto_tfm *tfm;
-+ void *info;
-+ };
-+ #define ecb(X) #X
-+ #define cbc(X) #X
-+ #define crypto_has_blkcipher(X, Y, Z) crypto_alg_available(X, 0)
-+ #define crypto_blkcipher_cast(X) X
-+ #define crypto_blkcipher_tfm(X) X
-+ #define crypto_alloc_blkcipher(X, Y, Z) crypto_alloc_tfm(X, CRYPTO_TFM_MODE_CBC)
-+ #define crypto_blkcipher_ivsize(X) crypto_tfm_alg_ivsize(X)
-+ #define crypto_blkcipher_blocksize(X) crypto_tfm_alg_blocksize(X)
-+ #define crypto_blkcipher_setkey(X, Y, Z) crypto_cipher_setkey(X, Y, Z)
-+ #define crypto_blkcipher_encrypt_iv(W, X, Y, Z) \
-+ crypto_cipher_encrypt_iv((W)->tfm, X, Y, Z, (u8 *)((W)->info))
-+ #define crypto_blkcipher_decrypt_iv(W, X, Y, Z) \
-+ crypto_cipher_decrypt_iv((W)->tfm, X, Y, Z, (u8 *)((W)->info))
-+
-+ /* Hash/HMAC/Digest */
-+ struct hash_desc
-+ {
-+ struct crypto_tfm *tfm;
-+ };
-+ #define hmac(X) #X
-+ #define crypto_has_hash(X, Y, Z) crypto_alg_available(X, 0)
-+ #define crypto_hash_cast(X) X
-+ #define crypto_hash_tfm(X) X
-+ #define crypto_alloc_hash(X, Y, Z) crypto_alloc_tfm(X, 0)
-+ #define crypto_hash_digestsize(X) crypto_tfm_alg_digestsize(X)
-+ #define crypto_hash_digest(W, X, Y, Z) \
-+ crypto_digest_digest((W)->tfm, X, sg_num, Z)
-+
-+ /* Asymmetric Cipher */
-+ #define crypto_has_cipher(X, Y, Z) crypto_alg_available(X, 0)
-+
-+ /* Compression */
-+ #define crypto_has_comp(X, Y, Z) crypto_alg_available(X, 0)
-+ #define crypto_comp_tfm(X) X
-+ #define crypto_comp_cast(X) X
-+ #define crypto_alloc_comp(X, Y, Z) crypto_alloc_tfm(X, 0)
-+#else
-+ #define ecb(X) "ecb(" #X ")"
-+ #define cbc(X) "cbc(" #X ")"
-+ #define hmac(X) "hmac(" #X ")"
-+#endif /* if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) */
-+
-+#define CIPHERNAME_AES cbc(aes)
-+#define CIPHERNAME_1DES cbc(des)
-+#define CIPHERNAME_3DES cbc(des3_ede)
-+#define CIPHERNAME_BLOWFISH cbc(blowfish)
-+#define CIPHERNAME_CAST cbc(cast5)
-+#define CIPHERNAME_SERPENT cbc(serpent)
-+#define CIPHERNAME_TWOFISH cbc(twofish)
-+
-+#define DIGESTNAME_MD5 "md5"
-+#define DIGESTNAME_SHA1 "sha1"
-+
-+#define ESP_SERPENT 252 /* from ipsec drafts */
-+#define ESP_TWOFISH 253 /* from ipsec drafts */
-+
-+MODULE_AUTHOR("Juanjo Ciarlante, Harpo MAxx, Luciano Ruete");
-+static int debug_crypto=0;
-+static int test_crypto=0;
-+static int excl_crypto=0;
-+static int noauto = 0;
-+module_param(debug_crypto,int,0644);
-+module_param(test_crypto,int,0644);
-+module_param(excl_crypto,int,0644);
-+module_param(noauto,int,0644);
-+
-+MODULE_PARM_DESC(noauto, "Dont try all known algos, just setup enabled ones");
-+
-+#ifdef CONFIG_KLIPS_ENC_1DES
-+static int des_ede1[] = {-1, -1};
-+#endif
-+static int des_ede3[] = {-1, -1};
-+static int aes[] = {-1, -1};
-+static int blowfish[] = {-1, -1};
-+static int cast[] = {-1, -1};
-+static int serpent[] = {-1, -1};
-+static int twofish[] = {-1, -1};
-+
-+#ifdef CONFIG_KLIPS_ENC_1DES
-+module_param_array(des_ede1,int,NULL,0444);
-+#endif
-+module_param_array(des_ede3,int,NULL,0444);
-+module_param_array(aes,int,NULL,0444);
-+module_param_array(blowfish,int,NULL,0444);
-+module_param_array(cast,int,NULL,0444);
-+module_param_array(serpent,int,NULL,0444);
-+module_param_array(twofish,int,NULL,0444);
-+
-+MODULE_PARM_DESC(des_ede1, "0: disable | 1: force_enable | min,max: dontuse");
-+MODULE_PARM_DESC(des_ede3, "0: disable | 1: force_enable | min,max: dontuse");
-+MODULE_PARM_DESC(aes, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(blowfish, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(cast, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(serpent, "0: disable | 1: force_enable | min,max: keybitlens");
-+MODULE_PARM_DESC(twofish, "0: disable | 1: force_enable | min,max: keybitlens");
-+
-+struct ipsec_alg_capi_cipher {
-+ const char *ciphername; /* cryptoapi's ciphername */
-+ unsigned blocksize;
-+ unsigned short minbits;
-+ unsigned short maxbits;
-+ int *parm; /* lkm param for this cipher */
-+ struct ipsec_alg_enc alg; /* note it's not a pointer */
-+};
-+
-+static struct ipsec_alg_capi_cipher alg_capi_carray[] = {
-+ { CIPHERNAME_AES, 16, 128, 256, aes, { ixt_common:{ ixt_support:{ ias_id: ESP_AES}}}},
-+ { CIPHERNAME_TWOFISH, 16, 128, 256, twofish, { ixt_common:{ ixt_support:{ ias_id: ESP_TWOFISH,}}}},
-+ { CIPHERNAME_SERPENT, 16, 128, 256, serpent, { ixt_common:{ ixt_support:{ ias_id: ESP_SERPENT,}}}},
-+ { CIPHERNAME_CAST, 8, 128, 128, cast , { ixt_common:{ ixt_support:{ ias_id: ESP_CAST,}}}},
-+ { CIPHERNAME_BLOWFISH, 8, 96, 448, blowfish, { ixt_common:{ ixt_support:{ ias_id: ESP_BLOWFISH,}}}},
-+ { CIPHERNAME_3DES, 8, 192, 192, des_ede3, { ixt_common:{ ixt_support:{ ias_id: ESP_3DES,}}}},
-+#ifdef CONFIG_KLIPS_ENC_1DES
-+ { CIPHERNAME_1DES, 8, 64, 64, des_ede1, { ixt_common:{ ixt_support:{ ias_id: ESP_DES,}}}},
-+#endif
-+ { NULL, 0, 0, 0, NULL, {} }
-+};
-+
-+#ifdef NOT_YET
-+struct ipsec_alg_capi_digest {
-+ const char *digestname; /* cryptoapi's digestname */
-+ struct digest_implementation *di;
-+ struct ipsec_alg_auth alg; /* note it's not a pointer */
-+};
-+static struct ipsec_alg_capi_cipher alg_capi_darray[] = {
-+ { DIGESTNAME_MD5, NULL, { ixt_alg_id: AH_MD5, }},
-+ { DIGESTNAME_SHA1, NULL, { ixt_alg_id: AH_SHA, }},
-+ { NULL, NULL, {} }
-+};
-+#endif
-+/*
-+ * "generic" linux cryptoapi setup_cipher() function
-+ */
-+int setup_cipher(const char *ciphername)
-+{
-+ return crypto_has_blkcipher(ciphername, 0, CRYPTO_ALG_ASYNC);
-+}
-+
-+/*
-+ * setups ipsec_alg_capi_cipher "hyper" struct components, calling
-+ * register_ipsec_alg for cointaned ipsec_alg object
-+ */
-+static void _capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e);
-+static __u8 * _capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen);
-+static int _capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, __u8 * iv, int encrypt);
-+
-+static int
-+setup_ipsec_alg_capi_cipher(struct ipsec_alg_capi_cipher *cptr)
-+{
-+ int ret;
-+ cptr->alg.ixt_common.ixt_version = IPSEC_ALG_VERSION;
-+ cptr->alg.ixt_common.ixt_module = THIS_MODULE;
-+ atomic_set (& cptr->alg.ixt_common.ixt_refcnt, 0);
-+ strncpy (cptr->alg.ixt_common.ixt_name , cptr->ciphername, sizeof (cptr->alg.ixt_common.ixt_name));
-+
-+ cptr->alg.ixt_common.ixt_blocksize=cptr->blocksize;
-+ cptr->alg.ixt_common.ixt_support.ias_keyminbits=cptr->minbits;
-+ cptr->alg.ixt_common.ixt_support.ias_keymaxbits=cptr->maxbits;
-+ cptr->alg.ixt_common.ixt_state = 0;
-+ if (excl_crypto) cptr->alg.ixt_common.ixt_state |= IPSEC_ALG_ST_EXCL;
-+ cptr->alg.ixt_e_keylen=cptr->alg.ixt_common.ixt_support.ias_keymaxbits/8;
-+ cptr->alg.ixt_e_ctx_size = 0;
-+ cptr->alg.ixt_common.ixt_support.ias_exttype = IPSEC_ALG_TYPE_ENCRYPT;
-+ cptr->alg.ixt_e_new_key = _capi_new_key;
-+ cptr->alg.ixt_e_destroy_key = _capi_destroy_key;
-+ cptr->alg.ixt_e_cbc_encrypt = _capi_cbc_encrypt;
-+ cptr->alg.ixt_common.ixt_data = cptr;
-+
-+ ret=register_ipsec_alg_enc(&cptr->alg);
-+ printk(KERN_INFO "KLIPS cryptoapi interface: "
-+ "alg_type=%d alg_id=%d name=%s "
-+ "keyminbits=%d keymaxbits=%d, %s(%d)\n",
-+ cptr->alg.ixt_common.ixt_support.ias_exttype,
-+ cptr->alg.ixt_common.ixt_support.ias_id,
-+ cptr->alg.ixt_common.ixt_name,
-+ cptr->alg.ixt_common.ixt_support.ias_keyminbits,
-+ cptr->alg.ixt_common.ixt_support.ias_keymaxbits,
-+ ret ? "not found" : "found", ret);
-+ return ret;
-+}
-+/*
-+ * called in ipsec_sa_wipe() time, will destroy key contexts
-+ * and do 1 unbind()
-+ */
-+static void
-+_capi_destroy_key (struct ipsec_alg_enc *alg, __u8 *key_e)
-+{
-+ struct crypto_tfm *tfm=(struct crypto_tfm*)key_e;
-+
-+ if (debug_crypto > 0)
-+ printk(KERN_DEBUG "klips_debug: _capi_destroy_key:"
-+ "name=%s key_e=%p \n",
-+ alg->ixt_common.ixt_name, key_e);
-+ if (!key_e) {
-+ printk(KERN_ERR "klips_debug: _capi_destroy_key:"
-+ "name=%s NULL key_e!\n",
-+ alg->ixt_common.ixt_name);
-+ return;
-+ }
-+ crypto_free_tfm(tfm);
-+}
-+
-+/*
-+ * create new key context, need alg->ixt_data to know which
-+ * (of many) cipher inside this module is the target
-+ */
-+static __u8 *
-+_capi_new_key (struct ipsec_alg_enc *alg, const __u8 *key, size_t keylen)
-+{
-+ struct ipsec_alg_capi_cipher *cptr;
-+ struct crypto_tfm *tfm=NULL;
-+
-+ cptr = alg->ixt_common.ixt_data;
-+ if (!cptr) {
-+ printk(KERN_ERR "_capi_new_key(): "
-+ "NULL ixt_data (?!) for \"%s\" algo\n"
-+ , alg->ixt_common.ixt_name);
-+ goto err;
-+ }
-+ if (debug_crypto > 0)
-+ printk(KERN_DEBUG "klips_debug:_capi_new_key:"
-+ "name=%s cptr=%p key=%p keysize=%d\n",
-+ alg->ixt_common.ixt_name, cptr, key, keylen);
-+
-+ /*
-+ * alloc tfm
-+ */
-+ tfm = crypto_blkcipher_tfm(crypto_alloc_blkcipher(cptr->ciphername, 0, CRYPTO_ALG_ASYNC));
-+ if (!tfm) {
-+ printk(KERN_ERR "_capi_new_key(): "
-+ "NULL tfm for \"%s\" cryptoapi (\"%s\") algo\n"
-+ , alg->ixt_common.ixt_name, cptr->ciphername);
-+ goto err;
-+ }
-+ if (crypto_blkcipher_setkey(crypto_blkcipher_cast(tfm), key, keylen) < 0) {
-+ printk(KERN_ERR "_capi_new_key(): "
-+ "failed new_key() for \"%s\" cryptoapi algo (keylen=%d)\n"
-+ , alg->ixt_common.ixt_name, keylen);
-+ crypto_free_tfm(tfm);
-+ tfm=NULL;
-+ }
-+err:
-+ if (debug_crypto > 0)
-+ printk(KERN_DEBUG "klips_debug:_capi_new_key:"
-+ "name=%s key=%p keylen=%d tfm=%p\n",
-+ alg->ixt_common.ixt_name, key, keylen, tfm);
-+ return (__u8 *) tfm;
-+}
-+/*
-+ * core encryption function: will use cx->ci to call actual cipher's
-+ * cbc function
-+ */
-+static int
-+_capi_cbc_encrypt(struct ipsec_alg_enc *alg, __u8 * key_e, __u8 * in, int ilen, __u8 * iv, int encrypt) {
-+ int error =0;
-+ struct crypto_tfm *tfm=(struct crypto_tfm *)key_e;
-+ struct scatterlist sg;
-+ struct blkcipher_desc desc;
-+ if (debug_crypto > 1)
-+ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:"
-+ "key_e=%p "
-+ "in=%p out=%p ilen=%d iv=%p encrypt=%d\n"
-+ , key_e
-+ , in, in, ilen, iv, encrypt);
-+
-+ memset(&sg, 0, sizeof(sg));
-+ sg_set_page(&sg, virt_to_page(in), ilen, offset_in_page(in));
-+
-+ memset(&desc, 0, sizeof(desc));
-+ desc.tfm = crypto_blkcipher_cast(tfm);
-+ desc.info = (void *) iv;
-+
-+ if (encrypt)
-+ error = crypto_blkcipher_encrypt_iv (&desc, &sg, &sg, ilen);
-+ else
-+ error = crypto_blkcipher_decrypt_iv (&desc, &sg, &sg, ilen);
-+ if (debug_crypto > 1)
-+ printk(KERN_DEBUG "klips_debug:_capi_cbc_encrypt:"
-+ "error=%d\n"
-+ , error);
-+ return (error<0)? error : ilen;
-+}
-+/*
-+ * main initialization loop: for each cipher in list, do
-+ * 1) setup cryptoapi cipher else continue
-+ * 2) register ipsec_alg object
-+ */
-+static int
-+setup_cipher_list (struct ipsec_alg_capi_cipher* clist)
-+{
-+ struct ipsec_alg_capi_cipher *cptr;
-+ /* foreach cipher in list ... */
-+ for (cptr=clist;cptr->ciphername;cptr++) {
-+ /*
-+ * see if cipher has been disabled (0) or
-+ * if noauto set and not enabled (1)
-+ */
-+ if (cptr->parm[0] == 0 || (noauto && cptr->parm[0] < 0)) {
-+ if (debug_crypto>0)
-+ printk(KERN_INFO "setup_cipher_list(): "
-+ "ciphername=%s skipped at user request: "
-+ "noauto=%d parm[0]=%d parm[1]=%d\n"
-+ , cptr->ciphername
-+ , noauto
-+ , cptr->parm[0]
-+ , cptr->parm[1]);
-+ continue;
-+ } else {
-+ if (debug_crypto>0)
-+ printk(KERN_INFO "setup_cipher_list(): going to init ciphername=%s: noauto=%d parm[0]=%d parm[1]=%d\n",
-+ , cptr->ciphername
-+ , noauto
-+ , cptr->parm[0]
-+ , cptr->parm[1]);
-+ }
-+ /*
-+ * use a local ci to avoid touching cptr->ci,
-+ * if register ipsec_alg success then bind cipher
-+ */
-+ if(cptr->alg.ixt_common.ixt_support.ias_name == NULL) {
-+ cptr->alg.ixt_common.ixt_support.ias_name = cptr->ciphername;
-+ }
-+
-+ if( setup_cipher(cptr->ciphername) ) {
-+ if (debug_crypto > 0)
-+ printk(KERN_DEBUG "klips_debug:"
-+ "setup_cipher_list():"
-+ "ciphername=%s found\n"
-+ , cptr->ciphername);
-+
-+ if (setup_ipsec_alg_capi_cipher(cptr) != 0) {
-+ printk(KERN_ERR "klips_debug:"
-+ "setup_cipher_list():"
-+ "ciphername=%s failed ipsec_alg_register\n"
-+ , cptr->ciphername);
-+ }
-+ } else {
-+ printk(KERN_INFO "KLIPS: lookup for ciphername=%s: not found \n",
-+ cptr->ciphername);
-+ }
-+ }
-+ return 0;
-+}
-+/*
-+ * deregister ipsec_alg objects and unbind ciphers
-+ */
-+static int
-+unsetup_cipher_list (struct ipsec_alg_capi_cipher* clist)
-+{
-+ struct ipsec_alg_capi_cipher *cptr;
-+ /* foreach cipher in list ... */
-+ for (cptr=clist;cptr->ciphername;cptr++) {
-+ if (cptr->alg.ixt_common.ixt_state & IPSEC_ALG_ST_REGISTERED) {
-+ unregister_ipsec_alg_enc(&cptr->alg);
-+ }
-+ }
-+ return 0;
-+}
-+/*
-+ * test loop for registered algos
-+ */
-+static int
-+test_cipher_list (struct ipsec_alg_capi_cipher* clist)
-+{
-+ int test_ret;
-+ struct ipsec_alg_capi_cipher *cptr;
-+ /* foreach cipher in list ... */
-+ for (cptr=clist;cptr->ciphername;cptr++) {
-+ if (cptr->alg.ixt_common.ixt_state & IPSEC_ALG_ST_REGISTERED) {
-+ test_ret=ipsec_alg_test(
-+ cptr->alg.ixt_common.ixt_support.ias_exttype,
-+ cptr->alg.ixt_common.ixt_support.ias_id,
-+ test_crypto);
-+ printk("test_cipher_list(alg_type=%d alg_id=%d): test_ret=%d\n",
-+ cptr->alg.ixt_common.ixt_support.ias_exttype,
-+ cptr->alg.ixt_common.ixt_support.ias_id,
-+ test_ret);
-+ }
-+ }
-+ return 0;
-+}
-+
-+IPSEC_ALG_MODULE_INIT_STATIC( ipsec_cryptoapi_init )
-+{
-+ int ret, test_ret;
-+ if ((ret=setup_cipher_list(alg_capi_carray)) < 0)
-+ return -EPROTONOSUPPORT;
-+ if (ret==0 && test_crypto) {
-+ test_ret=test_cipher_list(alg_capi_carray);
-+ }
-+ return ret;
-+}
-+IPSEC_ALG_MODULE_EXIT_STATIC( ipsec_cryptoapi_fini )
-+{
-+ unsetup_cipher_list(alg_capi_carray);
-+ return;
-+}
-+#ifdef MODULE_LICENSE
-+MODULE_LICENSE("GPL");
-+#endif
-+
-+#endif /* NO_CRYPTOAPI_SUPPORT */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_esp.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,584 @@
-+/*
-+ * processing code for ESP
-+ * Copyright (C) 2003 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <openswan.h>
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+#include <net/protocol.h>
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_xmit.h"
-+
-+#include "openswan/ipsec_auth.h"
-+
-+#ifdef CONFIG_KLIPS_ESP
-+#include "openswan/ipsec_esp.h"
-+#endif /* CONFIG_KLIPS_ESP */
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_alg.h"
-+#include "ipsec_ocf.h"
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+#define ESP_DMP(_x,_y,_z) if(debug_rcv && sysctl_ipsec_debug_verbose) ipsec_dmp_block(_x,_y,_z)
-+#else
-+#define ESP_DMP(_x,_y,_z)
-+#endif
-+
-+#ifdef CONFIG_KLIPS_ESP
-+enum ipsec_rcv_value
-+ipsec_rcv_esp_checks(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb)
-+{
-+ __u8 proto;
-+ int len; /* packet length */
-+
-+ len = skb->len;
-+ proto = irs->ipp->protocol;
-+
-+ /* XXX this will need to be 8 for IPv6 */
-+ if ((proto == IPPROTO_ESP) && ((len - irs->iphlen) % 4)) {
-+ printk("klips_error:ipsec_rcv: "
-+ "got packet with content length = %d from %s -- should be on 4 octet boundary, packet dropped\n",
-+ len - irs->iphlen,
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BADLEN;
-+ }
-+
-+ if(skb->len < (irs->hard_header_len + sizeof(struct iphdr) + sizeof(struct esphdr))) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_INAU,
-+ "klips_debug:ipsec_rcv: "
-+ "runt esp packet of skb->len=%d received from %s, dropped.\n",
-+ skb->len,
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BADLEN;
-+ }
-+
-+ irs->protostuff.espstuff.espp = (struct esphdr *)skb_transport_header(skb);
-+ irs->said.spi = irs->protostuff.espstuff.espp->esp_spi;
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_esp_decrypt_setup(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb,
-+ __u32 *replay,
-+ unsigned char **authenticator)
-+{
-+ struct esphdr *espp = irs->protostuff.espstuff.espp;
-+ //unsigned char *idat = (unsigned char *)espp;
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "packet from %s received with seq=%d (iv)=0x%08x%08x iplen=%d esplen=%d sa=%s\n",
-+ irs->ipsaddr_txt,
-+ (__u32)ntohl(espp->esp_rpl),
-+ (__u32)ntohl(*((__u32 *)(espp->esp_iv) )),
-+ (__u32)ntohl(*((__u32 *)(espp->esp_iv) + 1)),
-+ irs->len,
-+ irs->ilen,
-+ irs->sa_len ? irs->sa : " (error)");
-+
-+ *replay = ntohl(espp->esp_rpl);
-+ *authenticator = &(skb_transport_header(skb)[irs->ilen]);
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_esp_authcalc(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb)
-+{
-+ struct auth_alg *aa;
-+ struct esphdr *espp = irs->protostuff.espstuff.espp;
-+ union {
-+ MD5_CTX md5;
-+ SHA1_CTX sha1;
-+ } tctx;
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (irs->ipsp->ocf_in_use)
-+ return(ipsec_ocf_rcv(irs));
-+#endif
-+
-+#ifdef CONFIG_KLIPS_ALG
-+ if (irs->ipsp->ips_alg_auth) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "ipsec_alg hashing proto=%d... ",
-+ irs->said.proto);
-+ if(irs->said.proto == IPPROTO_ESP) {
-+ ipsec_alg_sa_esp_hash(irs->ipsp,
-+ (caddr_t)espp, irs->ilen,
-+ irs->hash, AHHMAC_HASHLEN);
-+ return IPSEC_RCV_OK;
-+ }
-+ return IPSEC_RCV_BADPROTO;
-+ }
-+#endif
-+ aa = irs->authfuncs;
-+
-+ /* copy the initialized keying material */
-+ memcpy(&tctx, irs->ictx, irs->ictx_len);
-+
-+#ifdef HASH_DEBUG
-+ ESP_DMP("ictx", irs->ictx, irs->ictx_len);
-+
-+ ESP_DMP("mac_esp", (caddr_t)espp, irs->ilen);
-+#endif
-+ (*aa->update)((void *)&tctx, (caddr_t)espp, irs->ilen);
-+
-+ (*aa->final)(irs->hash, (void *)&tctx);
-+
-+#ifdef HASH_DEBUG
-+ ESP_DMP("hash1", irs->hash, aa->hashlen);
-+#endif
-+
-+ memcpy(&tctx, irs->octx, irs->octx_len);
-+
-+#ifdef HASH_DEBUG
-+ ESP_DMP("octx", irs->octx, irs->octx_len);
-+#endif
-+
-+ (*aa->update)((void *)&tctx, irs->hash, aa->hashlen);
-+ (*aa->final)(irs->hash, (void *)&tctx);
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_esp_decrypt(struct ipsec_rcv_state *irs)
-+{
-+ struct ipsec_sa *ipsp = irs->ipsp;
-+#ifdef CONFIG_KLIPS_ALG
-+ struct esphdr *espp = irs->protostuff.espstuff.espp;
-+ __u8 *idat; /* pointer to content to be decrypted/authenticated */
-+ int encaplen = 0;
-+ struct sk_buff *skb;
-+ struct ipsec_alg_enc *ixt_e=NULL;
-+#endif
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ipsp->ocf_in_use)
-+ return(ipsec_ocf_rcv(irs));
-+#endif
-+
-+#ifdef CONFIG_KLIPS_ALG
-+ skb=irs->skb;
-+
-+ idat = skb_transport_header(skb);
-+
-+ /* encaplen is the distance between the end of the IP
-+ * header and the beginning of the ESP header.
-+ * on ESP headers it is zero, but on UDP-encap ESP
-+ * it includes the space for the UDP header.
-+ *
-+ * Note: UDP-encap code has already moved the
-+ * skb->data forward to accomodate this.
-+ */
-+ encaplen = skb_transport_header(skb) - (skb_network_header(skb) + irs->iphlen);
-+
-+ ixt_e=ipsp->ips_alg_enc;
-+ irs->esphlen = ESP_HEADER_LEN + ixt_e->ixt_common.ixt_support.ias_ivlen/8;
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "encalg=%d esphlen=%d\n",
-+ ipsp->ips_encalg, irs->esphlen);
-+
-+ idat += irs->esphlen;
-+ irs->ilen -= irs->esphlen;
-+
-+ if (ipsec_alg_esp_encrypt(ipsp,
-+ idat, irs->ilen, espp->esp_iv,
-+ IPSEC_ALG_DECRYPT) <= 0) {
-+#ifdef CONFIG_KLIPS_DEBUG
-+ KLIPS_ERROR(debug_rcv, "klips_error:ipsec_rcv: "
-+ "got packet with esplen = %d "
-+ "from %s -- should be on "
-+ "ENC(%d) octet boundary, "
-+ "packet dropped\n",
-+ irs->ilen,
-+ irs->ipsaddr_txt,
-+ ipsp->ips_encalg);
-+#endif
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BAD_DECRYPT;
-+ }
-+
-+ return ipsec_rcv_esp_post_decrypt(irs);
-+#else
-+ return IPSEC_RCV_BAD_DECRYPT;
-+#endif /* CONFIG_KLIPS_ALG */
-+}
-+
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_esp_post_decrypt(struct ipsec_rcv_state *irs)
-+{
-+ struct sk_buff *skb;
-+ __u8 *idat; /* pointer to content to be decrypted/authenticated */
-+ struct ipsec_sa *ipsp = irs->ipsp;
-+ int pad = 0, padlen;
-+ int badpad = 0;
-+ int i;
-+
-+ skb = irs->skb;
-+
-+ idat = skb_transport_header(skb) + irs->esphlen;
-+
-+ ESP_DMP("postdecrypt", idat, irs->ilen);
-+
-+ irs->next_header = idat[irs->ilen - 1];
-+ padlen = idat[irs->ilen - 2];
-+ pad = padlen + 2 + irs->authlen;
-+
-+ KLIPS_PRINT(debug_rcv & DB_RX_IPAD,
-+ "klips_debug:ipsec_rcv_esp_post_decrypt: "
-+ "padlen=%d, contents: 0x<offset>: 0x<value> 0x<value> ...\n",
-+ padlen);
-+
-+ for (i = 1; i <= padlen; i++) {
-+ if((i % 16) == 1) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_IPAD,
-+ "klips_debug: %02x:",
-+ i - 1);
-+ }
-+ KLIPS_PRINTMORE(debug_rcv & DB_RX_IPAD,
-+ " %02x",
-+ idat[irs->ilen - 2 - padlen + i - 1]);
-+ if(i != idat[irs->ilen - 2 - padlen + i - 1]) {
-+ badpad = 1;
-+ }
-+ if((i % 16) == 0) {
-+ KLIPS_PRINTMORE(debug_rcv & DB_RX_IPAD,
-+ "\n");
-+ }
-+ }
-+ if((i % 16) != 1) {
-+ KLIPS_PRINTMORE(debug_rcv & DB_RX_IPAD,
-+ "\n");
-+ }
-+ if(badpad) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_IPAD,
-+ "klips_debug:ipsec_rcv_esp_post_decrypt: "
-+ "warning, decrypted packet from %s has bad padding\n",
-+ irs->ipsaddr_txt);
-+ KLIPS_PRINT(debug_rcv & DB_RX_IPAD,
-+ "klips_debug:ipsec_rcv_esp_post_decrypt: "
-+ "...may be bad decryption -- not dropped\n");
-+ ipsp->ips_errs.ips_encpad_errs += 1;
-+ }
-+
-+ KLIPS_PRINT(debug_rcv & DB_RX_IPAD,
-+ "klips_debug:ipsec_rcv_esp_post_decrypt: "
-+ "packet decrypted from %s: next_header = %d, padding = %d\n",
-+ irs->ipsaddr_txt,
-+ irs->next_header,
-+ pad - 2 - irs->authlen);
-+
-+ irs->ipp->tot_len = htons(ntohs(irs->ipp->tot_len) - (irs->esphlen + pad));
-+
-+ /*
-+ * move the IP header forward by the size of the ESP header, which
-+ * will remove the the ESP header from the packet.
-+ *
-+ * XXX this is really unnecessary, since odds we are in tunnel
-+ * mode, and we will be *removing* this IP header.
-+ *
-+ */
-+ memmove((void *)(idat - irs->iphlen),
-+ (void *)(skb_network_header(skb)), irs->iphlen);
-+
-+ ESP_DMP("esp postmove", (idat - irs->iphlen),
-+ irs->iphlen + irs->ilen);
-+
-+ /* skb_pull below, will move up by esphlen */
-+
-+ /* XXX not clear how this can happen, as the message indicates */
-+ if(skb->len < irs->esphlen) {
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_rcv_esp_post_decrypt: "
-+ "tried to skb_pull esphlen=%d, %d available. This should never happen, please report.\n",
-+ irs->esphlen, (int)(skb->len));
-+ return IPSEC_RCV_ESP_DECAPFAIL;
-+ }
-+ skb_pull(skb, irs->esphlen);
-+ skb_set_network_header(skb, ipsec_skb_offset(skb, idat - irs->iphlen));
-+ irs->ipp = ip_hdr(skb);
-+
-+ ESP_DMP("esp postpull", skb->data, skb->len);
-+
-+ /* now, trip off the padding from the end */
-+ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX,
-+ "klips_debug:ipsec_rcv: "
-+ "trimming to %d.\n",
-+ irs->len - irs->esphlen - pad);
-+ if(pad + irs->esphlen <= irs->len) {
-+ skb_trim(skb, irs->len - irs->esphlen - pad);
-+ } else {
-+ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX,
-+ "klips_debug:ipsec_rcv: "
-+ "bogus packet, size is zero or negative, dropping.\n");
-+ return IPSEC_RCV_DECAPFAIL;
-+ }
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+/*
-+ *
-+ */
-+enum ipsec_xmit_value
-+ipsec_xmit_esp_setup(struct ipsec_xmit_state *ixs)
-+{
-+#ifdef CONFIG_KLIPS_ENC_3DES
-+ __u32 iv[2];
-+#endif
-+ struct esphdr *espp;
-+ int ilen = 0;
-+ int padlen = 0, i;
-+ unsigned char *dat;
-+ unsigned char *idat, *pad;
-+#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ __u8 hash[AH_AMAX];
-+ union {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ MD5_CTX md5;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ SHA1_CTX sha1;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ } tctx;
-+#endif
-+
-+ dat = (unsigned char *)ixs->iph;
-+
-+ espp = (struct esphdr *)(dat + ixs->iphlen);
-+ espp->esp_spi = ixs->ipsp->ips_said.spi;
-+ espp->esp_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq));
-+
-+ switch(ixs->ipsp->ips_encalg) {
-+#if defined(CONFIG_KLIPS_ENC_3DES)
-+#ifdef CONFIG_KLIPS_ENC_3DES
-+ case ESP_3DES:
-+#endif /* CONFIG_KLIPS_ENC_3DES */
-+ iv[0] = *((__u32*)&(espp->esp_iv) ) =
-+ ((__u32*)(ixs->ipsp->ips_iv))[0];
-+ iv[1] = *((__u32*)&(espp->esp_iv) + 1) =
-+ ((__u32*)(ixs->ipsp->ips_iv))[1];
-+ break;
-+#endif /* defined(CONFIG_KLIPS_ENC_3DES) */
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_BADALG;
-+ }
-+
-+ idat = dat + ixs->iphlen + sizeof(struct esphdr);
-+ ilen = ixs->skb->len - (ixs->iphlen + sizeof(struct esphdr) + ixs->authlen);
-+
-+ /* Self-describing padding */
-+ pad = &dat[ixs->skb->len - ixs->tailroom];
-+ padlen = ixs->tailroom - 2 - ixs->authlen;
-+ for (i = 0; i < padlen; i++) {
-+ pad[i] = i + 1;
-+ }
-+ dat[ixs->skb->len - ixs->authlen - 2] = padlen;
-+
-+ dat[ixs->skb->len - ixs->authlen - 1] = ixs->iph->protocol;
-+ ixs->iph->protocol = IPPROTO_ESP;
-+
-+ switch(ixs->ipsp->ips_encalg) {
-+#ifdef CONFIG_KLIPS_ENC_3DES
-+ case ESP_3DES:
-+ des_ede3_cbc_encrypt((des_cblock *)idat,
-+ (des_cblock *)idat,
-+ ilen,
-+ ((struct des_eks *)(ixs->ipsp->ips_key_e))[0].ks,
-+ ((struct des_eks *)(ixs->ipsp->ips_key_e))[1].ks,
-+ ((struct des_eks *)(ixs->ipsp->ips_key_e))[2].ks,
-+ (des_cblock *)iv, 1);
-+ break;
-+#endif /* CONFIG_KLIPS_ENC_3DES */
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_BADALG;
-+ }
-+
-+ switch(ixs->ipsp->ips_encalg) {
-+#if defined(CONFIG_KLIPS_ENC_3DES)
-+#ifdef CONFIG_KLIPS_ENC_3DES
-+ case ESP_3DES:
-+#endif /* CONFIG_KLIPS_ENC_3DES */
-+ /* XXX update IV with the last 8 octets of the encryption */
-+#if KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK
-+ ((__u32*)(ixs->ipsp->ips_iv))[0] =
-+ ((__u32 *)(idat))[(ilen >> 2) - 2];
-+ ((__u32*)(ixs->ipsp->ips_iv))[1] =
-+ ((__u32 *)(idat))[(ilen >> 2) - 1];
-+#else /* KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK */
-+ prng_bytes(&ipsec_prng, (char *)ixs->ipsp->ips_iv, EMT_ESPDES_IV_SZ);
-+#endif /* KLIPS_IMPAIRMENT_ESPIV_CBC_ATTACK */
-+ break;
-+#endif /* defined(CONFIG_KLIPS_ENC_3DES) */
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_BADALG;
-+ }
-+
-+ switch(ixs->ipsp->ips_authalg) {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5:
-+ ipsec_xmit_dmp("espp", (char*)espp, ixs->skb->len - ixs->iphlen - ixs->authlen);
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ ipsec_xmit_dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (caddr_t)espp, ixs->skb->len - ixs->iphlen - ixs->authlen);
-+ ipsec_xmit_dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ ipsec_xmit_dmp("ictx hash", (char*)&hash, sizeof(hash));
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ ipsec_xmit_dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, hash, AHMD596_ALEN);
-+ ipsec_xmit_dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ ipsec_xmit_dmp("octx hash", (char*)&hash, sizeof(hash));
-+ memcpy(&(dat[ixs->skb->len - ixs->authlen]), hash, ixs->authlen);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA:
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ SHA1Update(&tctx.sha1, (caddr_t)espp, ixs->skb->len - ixs->iphlen - ixs->authlen);
-+ SHA1Final(hash, &tctx.sha1);
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN);
-+ SHA1Final(hash, &tctx.sha1);
-+ memcpy(&(dat[ixs->skb->len - ixs->authlen]), hash, ixs->authlen);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ case AH_NONE:
-+ break;
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_AH_BADALG;
-+ }
-+
-+ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, espp));
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+
-+struct xform_functions esp_xform_funcs[]={
-+ {
-+ protocol: IPPROTO_ESP,
-+ rcv_checks: ipsec_rcv_esp_checks,
-+ rcv_setup_auth: ipsec_rcv_esp_decrypt_setup,
-+ rcv_calc_auth: ipsec_rcv_esp_authcalc,
-+ rcv_decrypt: ipsec_rcv_esp_decrypt,
-+
-+ xmit_setup: ipsec_xmit_esp_setup,
-+ xmit_headroom: sizeof(struct esphdr),
-+ xmit_needtailroom: 1,
-+ },
-+};
-+
-+#ifndef CONFIG_XFRM_ALTERNATE_STACK
-+#ifdef NET_26
-+struct inet_protocol esp_protocol = {
-+ .handler = ipsec_rcv,
-+ .no_policy = 1,
-+};
-+#else
-+struct inet_protocol esp_protocol =
-+{
-+ ipsec_rcv, /* ESP handler */
-+ NULL, /* TUNNEL error control */
-+#ifdef NETDEV_25
-+ 1, /* no policy */
-+#else
-+ 0, /* next */
-+ IPPROTO_ESP, /* protocol ID */
-+ 0, /* copy */
-+ NULL, /* data */
-+ "ESP" /* name */
-+#endif
-+};
-+#endif /* NET_26 */
-+#endif /* CONFIG_XFRM_ALTERNATE_STACK */
-+
-+#endif /* !CONFIG_KLIPS_ESP */
-+
-+/*
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_init.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,460 @@
-+/*
-+ * @(#) Initialization code.
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998 - 2002 Richard Guy Briggs <rgb@freeswan.org>
-+ * 2001 - 2004 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * /proc system code was split out into ipsec_proc.c after rev. 1.70.
-+ *
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/in.h> /* struct sockaddr_in */
-+#include <linux/skbuff.h>
-+#include <linux/random.h> /* get_random_bytes() */
-+#include <net/protocol.h>
-+
-+#include <openswan.h>
-+
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* 23_SPINLOCK */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* 23_SPINLOCK */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+
-+#ifdef CONFIG_PROC_FS
-+# include <linux/proc_fs.h>
-+#endif /* CONFIG_PROC_FS */
-+
-+#ifdef NETLINK_SOCK
-+# include <linux/netlink.h>
-+#else
-+# include <net/netlink.h>
-+#endif
-+
-+#include "openswan/radij.h"
-+
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_stats.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_mast.h"
-+
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_xmit.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+# include "openswan/ipcomp.h"
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_alg.h"
-+
-+#ifdef CONFIG_KLIPS_OCF
-+#include "ipsec_ocf.h"
-+#endif
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+#include <net/xfrmudp.h>
-+#endif
-+
-+#ifndef HAVE_UDP_ENCAP_CONVERT
-+# if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL) && !defined(HAVE_XFRM4_UDP_REGISTER)
-+# warning "You are trying to build KLIPS2.6 with NAT-T support, but you did not"
-+# error "properly apply the NAT-T patch to your 2.6 kernel source tree."
-+# endif
-+#endif
-+
-+#if !defined(CONFIG_KLIPS_ESP) && !defined(CONFIG_KLIPS_AH)
-+#error "kernel configuration must include ESP or AH"
-+#endif
-+
-+/*
-+ * seems to be present in 2.4.10 (Linus), but also in some RH and other
-+ * distro kernels of a lower number.
-+ */
-+#ifdef MODULE_LICENSE
-+MODULE_LICENSE("GPL");
-+#endif
-+
-+struct prng ipsec_prng;
-+
-+#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+xfrm4_rcv_encap_t klips_old_encap = NULL;
-+#endif
-+
-+extern int ipsec_device_event(struct notifier_block *dnot, unsigned long event, void *ptr);
-+/*
-+ * the following structure is required so that we receive
-+ * event notifications when network devices are enabled and
-+ * disabled (ifconfig up and down).
-+ */
-+static struct notifier_block ipsec_dev_notifier={
-+ .notifier_call = ipsec_device_event
-+};
-+
-+#ifdef CONFIG_SYSCTL
-+extern int ipsec_sysctl_register(void);
-+extern void ipsec_sysctl_unregister(void);
-+#endif
-+
-+/*
-+ * inet_*_protocol returns void on 2.4.x, int on 2.6.x
-+ * So we need our own wrapper
-+*/
-+#ifdef NET_26
-+static inline int
-+openswan_inet_add_protocol(struct inet_protocol *prot, unsigned protocol, char *protstr)
-+{
-+ int err = inet_add_protocol(prot, protocol);
-+ if (err)
-+ printk(KERN_ERR "KLIPS: can not register %s protocol - recompile with CONFIG_INET_%s disabled or as module\n", protstr,protstr);
-+ return err;
-+}
-+
-+static inline int
-+openswan_inet_del_protocol(struct inet_protocol *prot, unsigned protocol)
-+{
-+ return inet_del_protocol(prot, protocol);
-+}
-+
-+#else
-+static inline int
-+openswan_inet_add_protocol(struct inet_protocol *prot, unsigned protocol)
-+{
-+#ifdef IPSKB_XFRM_TUNNEL_SIZE
-+ inet_add_protocol(prot, protocol);
-+#else
-+ inet_add_protocol(prot);
-+#endif
-+ return 0;
-+}
-+
-+static inline int
-+openswan_inet_del_protocol(struct inet_protocol *prot, unsigned protocol)
-+{
-+#ifdef IPSKB_XFRM_TUNNEL_SIZE
-+ inet_del_protocol(prot, protocol);
-+#else
-+ inet_del_protocol(prot);
-+#endif
-+ return 0;
-+}
-+
-+#endif
-+
-+/* void */
-+int
-+ipsec_klips_init(void)
-+{
-+ int error = 0;
-+ unsigned char seed[256];
-+#ifdef CONFIG_KLIPS_ENC_3DES
-+ extern int des_check_key;
-+
-+ /* turn off checking of keys */
-+ des_check_key=0;
-+#endif /* CONFIG_KLIPS_ENC_3DES */
-+
-+ KLIPS_PRINT(1, "klips_info:ipsec_init: "
-+ "KLIPS startup, Openswan KLIPS IPsec stack version: %s\n",
-+ ipsec_version_code());
-+
-+ error = ipsec_xmit_state_cache_init ();
-+ if (error)
-+ goto error_xmit_state_cache;
-+
-+ error = ipsec_rcv_state_cache_init ();
-+ if (error)
-+ goto error_rcv_state_cache;
-+
-+ error |= ipsec_proc_init();
-+ if (error)
-+ goto error_proc_init;
-+
-+#ifdef SPINLOCK
-+ ipsec_sadb.sadb_lock = SPIN_LOCK_UNLOCKED;
-+#else /* SPINLOCK */
-+ ipsec_sadb.sadb_lock = 0;
-+#endif /* SPINLOCK */
-+
-+#ifndef SPINLOCK
-+ tdb_lock.lock = 0;
-+ eroute_lock.lock = 0;
-+#endif /* !SPINLOCK */
-+
-+ error |= ipsec_sadb_init();
-+ if (error)
-+ goto error_sadb_init;
-+
-+ error |= ipsec_radijinit();
-+ if (error)
-+ goto error_radijinit;
-+
-+ error |= pfkey_init();
-+ if (error)
-+ goto error_pfkey_init;
-+
-+ error |= register_netdevice_notifier(&ipsec_dev_notifier);
-+ if (error)
-+ goto error_netdev_notifier;
-+
-+#ifdef CONFIG_XFRM_ALTERNATE_STACK
-+ error = xfrm_register_alternate_rcv (ipsec_rcv);
-+ if (error)
-+ goto error_xfrm_register;
-+
-+#else // CONFIG_XFRM_ALTERNATE_STACK
-+
-+#ifdef CONFIG_KLIPS_ESP
-+ error |= openswan_inet_add_protocol(&esp_protocol, IPPROTO_ESP,"ESP");
-+ if (error)
-+ goto error_openswan_inet_add_protocol_esp;
-+
-+#endif /* CONFIG_KLIPS_ESP */
-+
-+#ifdef CONFIG_KLIPS_AH
-+ error |= openswan_inet_add_protocol(&ah_protocol, IPPROTO_AH,"AH");
-+ if (error)
-+ goto error_openswan_inet_add_protocol_ah;
-+#endif /* CONFIG_KLIPS_AH */
-+
-+/* we never actually link IPCOMP to the stack */
-+#ifdef IPCOMP_USED_ALONE
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ error |= openswan_inet_add_protocol(&comp_protocol, IPPROTO_COMP,"IPCOMP");
-+ if (error)
-+ goto error_openswan_inet_add_protocol_comp;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+#endif
-+
-+#endif // CONFIG_XFRM_ALTERNATE_STACK
-+
-+ error |= ipsec_tunnel_init_devices();
-+ if (error)
-+ goto error_tunnel_init_devices;
-+
-+ error |= ipsec_mast_init_devices();
-+
-+#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+ /* register our ESP-UDP handler */
-+ if(udp4_register_esp_rcvencap(klips26_rcv_encap
-+ , &klips_old_encap)!=0) {
-+ printk(KERN_ERR "KLIPS: can not register klips_rcv_encap function\n");
-+ }
-+#endif
-+
-+#ifdef CONFIG_SYSCTL
-+ error |= ipsec_sysctl_register();
-+ if (error)
-+ goto error_sysctl_register;
-+#endif
-+
-+#ifdef CONFIG_KLIPS_ALG
-+ ipsec_alg_init();
-+#endif
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ ipsec_ocf_init();
-+#endif
-+
-+ get_random_bytes((void *)seed, sizeof(seed));
-+ prng_init(&ipsec_prng, seed, sizeof(seed));
-+ return error;
-+
-+ // undo ipsec_sysctl_register
-+error_sysctl_register:
-+ ipsec_tunnel_cleanup_devices();
-+error_tunnel_init_devices:
-+#ifdef CONFIG_XFRM_ALTERNATE_STACK
-+ xfrm_deregister_alternate_rcv(ipsec_rcv);
-+error_xfrm_register:
-+#else // CONFIG_XFRM_ALTERNATE_STACK
-+#ifdef IPCOMP_USED_ALONE
-+#ifdef CONFIG_KLIPS_IPCOMP
-+error_openswan_inet_add_protocol_comp:
-+ openswan_inet_del_protocol(&comp_protocol, IPPROTO_COMP);
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+#endif
-+error_openswan_inet_add_protocol_ah:
-+ openswan_inet_del_protocol(&ah_protocol, IPPROTO_AH);
-+error_openswan_inet_add_protocol_esp:
-+ openswan_inet_del_protocol(&esp_protocol, IPPROTO_ESP);
-+#endif
-+ unregister_netdevice_notifier(&ipsec_dev_notifier);
-+error_netdev_notifier:
-+ pfkey_cleanup();
-+error_pfkey_init:
-+ ipsec_radijcleanup();
-+error_radijinit:
-+ ipsec_sadb_cleanup(0);
-+ ipsec_sadb_free();
-+error_sadb_init:
-+error_proc_init:
-+ // ipsec_proc_init() does not cleanup after itself, so we have to do it here
-+ // TODO: ipsec_proc_init() should roll back what it chaned on failure
-+ ipsec_proc_cleanup();
-+ ipsec_rcv_state_cache_cleanup ();
-+error_rcv_state_cache:
-+ ipsec_xmit_state_cache_cleanup ();
-+error_xmit_state_cache:
-+ return error;
-+}
-+
-+
-+/* void */
-+int
-+ipsec_cleanup(void)
-+{
-+ int error = 0;
-+
-+#ifdef CONFIG_SYSCTL
-+ ipsec_sysctl_unregister();
-+#endif
-+#if defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+ if(udp4_unregister_esp_rcvencap(klips_old_encap) < 0) {
-+ printk(KERN_ERR "KLIPS: can not unregister klips_rcv_encap function\n");
-+ }
-+#endif
-+
-+ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */
-+ "klips_debug:ipsec_cleanup: "
-+ "calling ipsec_tunnel_cleanup_devices.\n");
-+ error |= ipsec_tunnel_cleanup_devices();
-+
-+ KLIPS_PRINT(debug_netlink, "called ipsec_tunnel_cleanup_devices");
-+
-+#ifdef CONFIG_XFRM_ALTERNATE_STACK
-+
-+ xfrm_deregister_alternate_rcv(ipsec_rcv);
-+
-+#else // CONFIG_XFRM_ALTERNATE_STACK
-+
-+/* we never actually link IPCOMP to the stack */
-+#ifdef IPCOMP_USED_ALONE
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ if (openswan_inet_del_protocol(&comp_protocol, IPPROTO_COMP) < 0)
-+ printk(KERN_INFO "klips_debug:ipsec_cleanup: "
-+ "comp close: can't remove protocol\n");
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+#endif /* IPCOMP_USED_ALONE */
-+
-+#ifdef CONFIG_KLIPS_AH
-+ if (openswan_inet_del_protocol(&ah_protocol, IPPROTO_AH) < 0)
-+ printk(KERN_INFO "klips_debug:ipsec_cleanup: "
-+ "ah close: can't remove protocol\n");
-+#endif /* CONFIG_KLIPS_AH */
-+
-+#ifdef CONFIG_KLIPS_ESP
-+ if (openswan_inet_del_protocol(&esp_protocol, IPPROTO_ESP) < 0)
-+ printk(KERN_INFO "klips_debug:ipsec_cleanup: "
-+ "esp close: can't remove protocol\n");
-+#endif /* CONFIG_KLIPS_ESP */
-+
-+#endif // CONFIG_XFRM_ALTERNATE_STACK
-+
-+ error |= unregister_netdevice_notifier(&ipsec_dev_notifier);
-+
-+ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */
-+ "klips_debug:ipsec_cleanup: "
-+ "calling ipsec_sadb_cleanup.\n");
-+ error |= ipsec_sadb_cleanup(0);
-+ error |= ipsec_sadb_free();
-+
-+ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */
-+ "klips_debug:ipsec_cleanup: "
-+ "calling ipsec_radijcleanup.\n");
-+ error |= ipsec_radijcleanup();
-+
-+ KLIPS_PRINT(debug_pfkey, /* debug_tunnel & DB_TN_INIT, */
-+ "klips_debug:ipsec_cleanup: "
-+ "calling pfkey_cleanup.\n");
-+ error |= pfkey_cleanup();
-+
-+ ipsec_rcv_state_cache_cleanup ();
-+ ipsec_xmit_state_cache_cleanup ();
-+
-+ ipsec_proc_cleanup();
-+
-+ prng_final(&ipsec_prng);
-+
-+ return error;
-+}
-+
-+#ifdef MODULE
-+int
-+init_module(void)
-+{
-+ int error = 0;
-+
-+ error |= ipsec_klips_init();
-+
-+ return error;
-+}
-+
-+#ifndef NET_26
-+void
-+cleanup_module(void)
-+{
-+ KLIPS_PRINT(debug_netlink, /* debug_tunnel & DB_TN_INIT, */
-+ "klips_debug:cleanup_module: "
-+ "calling ipsec_cleanup.\n");
-+
-+ ipsec_cleanup();
-+
-+ KLIPS_PRINT(1, "klips_info:cleanup_module: "
-+ "ipsec module unloaded.\n");
-+}
-+#endif
-+#endif /* MODULE */
-+
-+/*
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_ipcomp.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,253 @@
-+/*
-+ * processing code for IPCOMP
-+ * Copyright (C) 2003 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <openswan.h>
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_xmit.h"
-+
-+#include "openswan/ipsec_auth.h"
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#include "openswan/ipsec_ipcomp.h"
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#include "openswan/ipsec_proto.h"
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+enum ipsec_rcv_value
-+ipsec_rcv_ipcomp_checks(struct ipsec_rcv_state *irs,
-+ struct sk_buff *skb)
-+{
-+ int ipcompminlen;
-+
-+ ipcompminlen = sizeof(struct iphdr);
-+
-+ if(skb->len < (ipcompminlen + sizeof(struct ipcomphdr))) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_INAU,
-+ "klips_debug:ipsec_rcv: "
-+ "runt comp packet of skb->len=%d received from %s, dropped.\n",
-+ skb->len,
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BADLEN;
-+ }
-+
-+ irs->protostuff.ipcompstuff.compp = (struct ipcomphdr *)skb_transport_header(skb);
-+ irs->said.spi = htonl((__u32)ntohs(irs->protostuff.ipcompstuff.compp->ipcomp_cpi));
-+ return IPSEC_RCV_OK;
-+}
-+
-+enum ipsec_rcv_value
-+ipsec_rcv_ipcomp_decomp(struct ipsec_rcv_state *irs)
-+{
-+ unsigned int flags = 0;
-+ struct ipsec_sa *ipsp = irs->ipsp;
-+ struct sk_buff *skb;
-+
-+ skb=irs->skb;
-+
-+ ipsec_xmit_dmp("ipcomp", skb_transport_header(skb), skb->len);
-+
-+ if(ipsp == NULL) {
-+ return IPSEC_RCV_SAIDNOTFOUND;
-+ }
-+
-+ if(sysctl_ipsec_inbound_policy_check &&
-+ ((((ntohl(ipsp->ips_said.spi) & 0x0000ffff) != (ntohl(irs->said.spi) & 0x0000ffff)) &&
-+ (ipsp->ips_encalg != ntohl(irs->said.spi)) /* this is a workaround for peer non-compliance with rfc2393 */
-+ ))) {
-+ char sa2[SATOT_BUF];
-+ size_t sa_len2 = 0;
-+
-+ sa_len2 = KLIPS_SATOT(debug_rcv, &ipsp->ips_said, 0, sa2, sizeof(sa2));
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "Incoming packet with SA(IPCA):%s does not match policy SA(IPCA):%s cpi=%04x cpi->spi=%08x spi=%08x, spi->cpi=%04x for SA grouping, dropped.\n",
-+ irs->sa_len ? irs->sa : " (error)",
-+ ipsp != NULL ? (sa_len2 ? sa2 : " (error)") : "NULL",
-+ ntohs(irs->protostuff.ipcompstuff.compp->ipcomp_cpi),
-+ (__u32)ntohl(irs->said.spi),
-+ ipsp != NULL ? (__u32)ntohl((ipsp->ips_said.spi)) : 0,
-+ ipsp != NULL ? (__u16)(ntohl(ipsp->ips_said.spi) & 0x0000ffff) : 0);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_SAIDNOTFOUND;
-+ }
-+
-+ ipsp->ips_comp_ratio_cbytes += ntohs(irs->ipp->tot_len);
-+ irs->next_header = irs->protostuff.ipcompstuff.compp->ipcomp_nh;
-+
-+ skb = skb_decompress(skb, ipsp, &flags);
-+ if (!skb || flags) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "skb_decompress() returned error flags=%x, dropped.\n",
-+ flags);
-+ if (irs->stats) {
-+ if (flags)
-+ irs->stats->rx_errors++;
-+ else
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_IPCOMPFAILED;
-+ }
-+
-+ /* make sure we update the pointer */
-+ irs->skb = skb;
-+
-+#ifdef NET_21
-+ irs->ipp = ip_hdr(skb);
-+#else /* NET_21 */
-+ irs->ipp = skb->ip_hdr;
-+#endif /* NET_21 */
-+
-+ ipsp->ips_comp_ratio_dbytes += ntohs(irs->ipp->tot_len);
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "packet decompressed SA(IPCA):%s cpi->spi=%08x spi=%08x, spi->cpi=%04x, nh=%d.\n",
-+ irs->sa_len ? irs->sa : " (error)",
-+ (__u32)ntohl(irs->said.spi),
-+ ipsp != NULL ? (__u32)ntohl((ipsp->ips_said.spi)) : 0,
-+ ipsp != NULL ? (__u16)(ntohl(ipsp->ips_said.spi) & 0x0000ffff) : 0,
-+ irs->next_header);
-+ KLIPS_IP_PRINT(debug_rcv & DB_RX_PKTRX, irs->ipp);
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_ipcomp_setup(struct ipsec_xmit_state *ixs)
-+{
-+ unsigned int flags = 0;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ unsigned int old_tot_len = ntohs(ixs->iph->tot_len);
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ ixs->ipsp->ips_comp_ratio_dbytes += ntohs(ixs->iph->tot_len);
-+
-+ ixs->skb = skb_compress(ixs->skb, ixs->ipsp, &flags);
-+
-+#ifdef NET_21
-+ ixs->iph = ip_hdr(ixs->skb);
-+#else /* NET_21 */
-+ ixs->iph = ixs->skb->ip_hdr;
-+#endif /* NET_21 */
-+
-+ ixs->ipsp->ips_comp_ratio_cbytes += ntohs(ixs->iph->tot_len);
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_tunnel & DB_TN_CROUT)
-+ {
-+ if (old_tot_len > ntohs(ixs->iph->tot_len))
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "packet shrunk from %d to %d bytes after compression, cpi=%04x (should be from spi=%08x, spi&0xffff=%04x.\n",
-+ old_tot_len, ntohs(ixs->iph->tot_len),
-+ ntohs(((struct ipcomphdr*)(((char*)ixs->iph) + ((ixs->iph->ihl) << 2)))->ipcomp_cpi),
-+ ntohl(ixs->ipsp->ips_said.spi),
-+ (__u16)(ntohl(ixs->ipsp->ips_said.spi) & 0x0000ffff));
-+ else
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "packet did not compress (flags = %d).\n",
-+ flags);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+struct xform_functions ipcomp_xform_funcs[]={
-+ {
-+ protocol: IPPROTO_COMP,
-+ rcv_checks: ipsec_rcv_ipcomp_checks,
-+ rcv_decrypt: ipsec_rcv_ipcomp_decomp,
-+ xmit_setup: ipsec_xmit_ipcomp_setup,
-+ xmit_headroom: 0,
-+ xmit_needtailroom: 0,
-+ },
-+};
-+
-+#if 0
-+/* We probably don't want to install a pure IPCOMP protocol handler, but
-+ only want to handle IPCOMP if it is encapsulated inside an ESP payload
-+ (which is already handled) */
-+#ifndef CONFIG_XFRM_ALTERNATE_STACK
-+#ifdef CONFIG_KLIPS_IPCOMP
-+struct inet_protocol comp_protocol =
-+{
-+ ipsec_rcv, /* COMP handler */
-+ NULL, /* COMP error control */
-+#ifdef NETDEV_25
-+ 1, /* no policy */
-+#else
-+ 0, /* next */
-+ IPPROTO_COMP, /* protocol ID */
-+ 0, /* copy */
-+ NULL, /* data */
-+ "COMP" /* name */
-+#endif
-+};
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+#endif /* CONFIG_XFRM_ALTERNATE_STACK */
-+#endif
-+
-+#endif /* CONFIG_KLIPS_IPCOMP */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_ipip.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,123 @@
-+/*
-+ * processing code for IPIP
-+ * Copyright (C) 2003 Michael Richardson <mcr@sandelman.ottawa.on.ca>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <openswan.h>
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_xmit.h"
-+
-+#include "openswan/ipsec_auth.h"
-+#include "openswan/ipsec_ipip.h"
-+#include "openswan/ipsec_param.h"
-+
-+#include "openswan/ipsec_proto.h"
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_ipip_setup(struct ipsec_xmit_state *ixs)
-+{
-+ ixs->iph->version = 4;
-+
-+ switch(sysctl_ipsec_tos) {
-+ case 0:
-+#ifdef NET_21
-+ ixs->iph->tos = ip_hdr(ixs->skb)->tos;
-+#else /* NET_21 */
-+ ixs->iph->tos = ixs->skb->ip_hdr->tos;
-+#endif /* NET_21 */
-+ break;
-+ case 1:
-+ ixs->iph->tos = 0;
-+ break;
-+ default:
-+ break;
-+ }
-+ ixs->iph->ttl = SYSCTL_IPSEC_DEFAULT_TTL;
-+ ixs->iph->frag_off = 0;
-+ ixs->iph->saddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_s))->sin_addr.s_addr;
-+ ixs->iph->daddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_d))->sin_addr.s_addr;
-+ ixs->iph->protocol = IPPROTO_IPIP;
-+ ixs->iph->ihl = sizeof(struct iphdr) >> 2;
-+
-+ KLIPS_IP_SELECT_IDENT(ixs->iph, ixs->skb);
-+
-+ ixs->newdst = (__u32)ixs->iph->daddr;
-+ ixs->newsrc = (__u32)ixs->iph->saddr;
-+
-+#ifdef NET_21
-+ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ip_hdr(ixs->skb)));
-+#endif /* NET_21 */
-+ return IPSEC_XMIT_OK;
-+}
-+
-+struct xform_functions ipip_xform_funcs[]={
-+ {
-+ protocol: IPPROTO_IPIP,
-+ rcv_checks: NULL,
-+ rcv_setup_auth: NULL,
-+ rcv_calc_auth: NULL,
-+ rcv_decrypt: NULL,
-+
-+ xmit_setup: ipsec_xmit_ipip_setup,
-+ xmit_headroom: sizeof(struct iphdr),
-+ xmit_needtailroom: 0,
-+ },
-+};
-+
-+
-+
-+
-+
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_kern24.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,74 @@
-+/*
-+ * Copyright 2005 (C) Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This is a file of functions which are present in 2.6 kernels,
-+ * but are not available by default in the 2.4 series.
-+ *
-+ * As such this code is usually from the Linux kernel, and is covered by
-+ * GPL.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * $Id: ipsec_kern24.c,v 1.2 2005/05/20 03:19:18 mcr Exp $
-+ *
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/mm.h>
-+#include <linux/spinlock.h>
-+
-+/*
-+ * printk rate limiting, lifted from the networking subsystem.
-+ *
-+ * This enforces a rate limit: not more than one kernel message
-+ * every printk_ratelimit_jiffies to make a denial-of-service
-+ * attack impossible.
-+ */
-+static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
-+
-+int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-+{
-+ static unsigned long toks = 10*5*HZ;
-+ static unsigned long last_msg;
-+ static int missed;
-+ unsigned long flags;
-+ unsigned long now = jiffies;
-+
-+ spin_lock_irqsave(&ratelimit_lock, flags);
-+ toks += now - last_msg;
-+ last_msg = now;
-+ if (toks > (ratelimit_burst * ratelimit_jiffies))
-+ toks = ratelimit_burst * ratelimit_jiffies;
-+ if (toks >= ratelimit_jiffies) {
-+ int lost = missed;
-+ missed = 0;
-+ toks -= ratelimit_jiffies;
-+ spin_unlock_irqrestore(&ratelimit_lock, flags);
-+ if (lost)
-+ printk(KERN_WARNING "printk: %d messages suppressed.\n", lost);
-+ return 1;
-+ }
-+ missed++;
-+ spin_unlock_irqrestore(&ratelimit_lock, flags);
-+ return 0;
-+}
-+
-+/* minimum time in jiffies between messages */
-+int printk_ratelimit_jiffies = 5*HZ;
-+
-+/* number of messages we send before ratelimiting */
-+int printk_ratelimit_burst = 10;
-+
-+int printk_ratelimit(void)
-+{
-+ return __printk_ratelimit(printk_ratelimit_jiffies,
-+ printk_ratelimit_burst);
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_life.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,221 @@
-+/*
-+ * @(#) lifetime structure utilities
-+ *
-+ * Copyright (C) 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * and Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ *
-+ */
-+
-+/*
-+ * This provides series of utility functions for dealing with lifetime
-+ * structures.
-+ *
-+ * ipsec_check_lifetime - returns -1 hard lifetime exceeded
-+ * 0 soft lifetime exceeded
-+ * 1 everything is okay
-+ * based upon whether or not the count exceeds hard/soft
-+ *
-+ */
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif /* for CONFIG_IP_FORWARD */
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#include <linux/netdevice.h> /* struct device, struct net_device_stats and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/skbuff.h>
-+#include <linux/ip.h>
-+#include <openswan.h>
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_eroute.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+
-+#include "openswan/ipsec_sa.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_ipe4.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#include "openswan/ipcomp.h"
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+
-+
-+enum ipsec_life_alive
-+ipsec_lifetime_check(struct ipsec_lifetime64 *il64,
-+ const char *lifename,
-+ const char *saname,
-+ enum ipsec_life_type ilt,
-+ enum ipsec_direction idir,
-+ struct ipsec_sa *ips)
-+{
-+ __u64 count;
-+ const char *dir;
-+
-+ if(saname == NULL) {
-+ saname = "unknown-SA";
-+ }
-+
-+ if(idir == ipsec_incoming) {
-+ dir = "incoming";
-+ } else {
-+ dir = "outgoing";
-+ }
-+
-+
-+ if(ilt == ipsec_life_timebased) {
-+ count = jiffies/HZ - il64->ipl_count;
-+ } else {
-+ count = il64->ipl_count;
-+ }
-+
-+ if(il64->ipl_hard &&
-+ (count > il64->ipl_hard)) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_lifetime_check: "
-+ "hard %s lifetime of SA:<%s%s%s> %s has been reached, SA expired, "
-+ "%s packet dropped.\n",
-+ lifename,
-+ IPS_XFORM_NAME(ips),
-+ saname,
-+ dir);
-+
-+ pfkey_expire(ips, 1);
-+ return ipsec_life_harddied;
-+ }
-+
-+ if(il64->ipl_soft &&
-+ (count > il64->ipl_soft)) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_lifetime_check: "
-+ "soft %s lifetime of SA:<%s%s%s> %s has been reached, SA expiring, "
-+ "soft expire message sent up, %s packet still processed.\n",
-+ lifename,
-+ IPS_XFORM_NAME(ips),
-+ saname,
-+ dir);
-+
-+ if(ips->ips_state != K_SADB_SASTATE_DYING) {
-+ pfkey_expire(ips, 0);
-+ }
-+ ips->ips_state = K_SADB_SASTATE_DYING;
-+
-+ return ipsec_life_softdied;
-+ }
-+ return ipsec_life_okay;
-+}
-+
-+
-+/*
-+ * This function takes a buffer (with length), a lifetime name and type,
-+ * and formats a string to represent the current values of the lifetime.
-+ *
-+ * It returns the number of bytes that the format took (or would take,
-+ * if the buffer were large enough: snprintf semantics).
-+ * This is used in /proc routines and in debug output.
-+ */
-+int
-+ipsec_lifetime_format(char *buffer,
-+ int buflen,
-+ char *lifename,
-+ enum ipsec_life_type timebaselife,
-+ struct ipsec_lifetime64 *lifetime)
-+{
-+ int len = 0;
-+ __u64 count;
-+
-+ if(timebaselife == ipsec_life_timebased) {
-+ count = jiffies/HZ - lifetime->ipl_count;
-+ } else {
-+ count = lifetime->ipl_count;
-+ }
-+
-+ if(lifetime->ipl_count > 1 ||
-+ lifetime->ipl_soft ||
-+ lifetime->ipl_hard) {
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0))
-+ len = ipsec_snprintf(buffer, buflen,
-+ "%s(%Lu,%Lu,%Lu)",
-+ lifename,
-+ count,
-+ lifetime->ipl_soft,
-+ lifetime->ipl_hard);
-+#else /* XXX high 32 bits are not displayed */
-+ len = ipsec_snprintf(buffer, buflen,
-+ "%s(%lu,%lu,%lu)",
-+ lifename,
-+ (unsigned long)count,
-+ (unsigned long)lifetime->ipl_soft,
-+ (unsigned long)lifetime->ipl_hard);
-+#endif
-+ }
-+
-+ return len;
-+}
-+
-+void
-+ipsec_lifetime_update_hard(struct ipsec_lifetime64 *lifetime,
-+ __u64 newvalue)
-+{
-+ if(newvalue &&
-+ (!lifetime->ipl_hard ||
-+ (newvalue < lifetime->ipl_hard))) {
-+ lifetime->ipl_hard = newvalue;
-+
-+ if(!lifetime->ipl_soft &&
-+ (lifetime->ipl_hard < lifetime->ipl_soft)) {
-+ lifetime->ipl_soft = lifetime->ipl_hard;
-+ }
-+ }
-+}
-+
-+void
-+ipsec_lifetime_update_soft(struct ipsec_lifetime64 *lifetime,
-+ __u64 newvalue)
-+{
-+ if(newvalue &&
-+ (!lifetime->ipl_soft ||
-+ (newvalue < lifetime->ipl_soft))) {
-+ lifetime->ipl_soft = newvalue;
-+
-+ if(lifetime->ipl_hard &&
-+ (lifetime->ipl_hard < lifetime->ipl_soft)) {
-+ lifetime->ipl_soft = lifetime->ipl_hard;
-+ }
-+ }
-+}
-+
-+
-+/*
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_mast.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,869 @@
-+/*
-+ * IPSEC MAST code.
-+ * Copyright (C) 2005 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+char ipsec_mast_c_version[] = "RCSID $Id: ipsec_mast.c,v 1.7 2005/04/29 05:10:22 mcr Exp $";
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif /* for CONFIG_IP_FORWARD */
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <net/arp.h>
-+#include <net/tcp.h>
-+#include <net/udp.h>
-+#include <linux/skbuff.h>
-+
-+#include <linux/netdevice.h> /* struct device, struct net_device_stats, dev_queue_xmit() and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <net/xfrm.h>
-+
-+#include <openswan.h>
-+
-+#include <net/icmp.h> /* icmp_send() */
-+#include <net/ip.h>
-+#ifdef NETDEV_23
-+# include <linux/netfilter_ipv4.h>
-+#endif /* NETDEV_23 */
-+
-+#include <linux/if_arp.h>
-+
-+#include "openswan/ipsec_kversion.h"
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_eroute.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_sa.h"
-+#include "openswan/ipsec_xmit.h"
-+#include "openswan/ipsec_mast.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_ipe4.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_kern24.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+#include <linux/udp.h>
-+#endif
-+
-+int ipsec_mastdevice_count = -1;
-+int debug_mast;
-+
-+static __u32 zeroes[64];
-+
-+DEBUG_NO_STATIC int
-+ipsec_mast_open(struct net_device *dev)
-+{
-+ struct mastpriv *prv = dev->priv;
-+
-+ prv = prv;
-+
-+ /*
-+ * Can't open until attached.
-+ */
-+
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_open: "
-+ "dev = %s\n",
-+ dev->name);
-+
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_mast_close(struct net_device *dev)
-+{
-+ return 0;
-+}
-+
-+static inline int ipsec_mast_xmit2(struct sk_buff *skb)
-+{
-+ return dst_output(skb);
-+}
-+
-+#ifdef HAVE_IPSEC_SAREF
-+int ip_cmsg_send_ipsec(struct cmsghdr *cmsg, struct ipcm_cookie *ipc)
-+{
-+ struct ipsec_sa *sa1;
-+ xfrm_sec_unique_t *ref;
-+ struct sec_path *sp;
-+
-+ if(cmsg->cmsg_len != CMSG_LEN(sizeof(xfrm_sec_unique_t))) {
-+ return -EINVAL;
-+ }
-+
-+ ref = (xfrm_sec_unique_t *)CMSG_DATA(cmsg);
-+
-+ sp = secpath_dup(NULL);
-+ if(!sp) {
-+ return -EINVAL;
-+ }
-+
-+ sp->ref = *ref;
-+ KLIPS_PRINT(debug_mast, "sending with saref=%u\n", sp->ref);
-+
-+ sa1 = ipsec_sa_getbyref(sp->ref);
-+ if(sa1 && sa1->ips_out) {
-+ ipc->oif = sa1->ips_out->ifindex;
-+ KLIPS_PRINT(debug_mast, "setting oif: %d\n", ipc->oif);
-+ }
-+ ipsec_sa_put(sa1);
-+
-+ ipc->sp = sp;
-+
-+ return 0;
-+}
-+#endif
-+
-+#if 0
-+/* Paul: This seems to be unused dead code */
-+enum ipsec_xmit_value
-+ipsec_mast_send(struct ipsec_xmit_state*ixs)
-+{
-+ /* new route/dst cache code from James Morris */
-+ ixs->skb->dev = ixs->physdev;
-+ /*skb_orphan(ixs->skb);*/
-+ if((ixs->error = ip_route_output(&ixs->route,
-+ ixs->skb->nh.iph->daddr,
-+ ixs->pass ? 0 : ixs->skb->nh.iph->saddr,
-+ RT_TOS(ixs->skb->nh.iph->tos),
-+ ixs->physdev->ifindex /* rgb: should this be 0? */))) {
-+ ixs->stats->tx_errors++;
-+ KLIPS_PRINT(debug_mast & DB_MAST_XMIT,
-+ "klips_debug:ipsec_xmit_send: "
-+ "ip_route_output failed with error code %d, rt->u.dst.dev=%s, dropped\n",
-+ ixs->error,
-+ ixs->route->u.dst.dev->name);
-+ return IPSEC_XMIT_ROUTEERR;
-+ }
-+ if(ixs->dev == ixs->route->u.dst.dev) {
-+ ip_rt_put(ixs->route);
-+ /* This is recursion, drop it. */
-+ ixs->stats->tx_errors++;
-+ KLIPS_PRINT(debug_mast & DB_MAST_XMIT,
-+ "klips_debug:ipsec_xmit_send: "
-+ "suspect recursion, dev=rt->u.dst.dev=%s, dropped\n",
-+ ixs->dev->name);
-+ return IPSEC_XMIT_RECURSDETECT;
-+ }
-+ dst_release(ixs->skb->dst);
-+ ixs->skb->dst = &ixs->route->u.dst;
-+ ixs->stats->tx_bytes += ixs->skb->len;
-+ if(ixs->skb->len < ixs->skb->nh.raw - ixs->skb->data) {
-+ ixs->stats->tx_errors++;
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_xmit_send: "
-+ "tried to __skb_pull nh-data=%ld, %d available. This should never happen, please report.\n",
-+ (unsigned long)(ixs->skb->nh.raw - ixs->skb->data),
-+ ixs->skb->len);
-+ return IPSEC_XMIT_PUSHPULLERR;
-+ }
-+ __skb_pull(ixs->skb, ixs->skb->nh.raw - ixs->skb->data);
-+
-+ ipsec_nf_reset(ixs->skb);
-+
-+ KLIPS_PRINT(debug_mast & DB_MAST_XMIT,
-+ "klips_debug:ipsec_xmit_send: "
-+ "...done, calling ip_send() on device:%s\n",
-+ ixs->skb->dev ? ixs->skb->dev->name : "NULL");
-+ KLIPS_IP_PRINT(debug_mast & DB_MAST_XMIT, ixs->skb->nh.iph);
-+ {
-+ int err;
-+
-+ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, ixs->skb, NULL, ixs->route->u.dst.dev,
-+ ipsec_mast_xmit2);
-+ if(err != NET_XMIT_SUCCESS && err != NET_XMIT_CN) {
-+ if(net_ratelimit())
-+ printk(KERN_ERR
-+ "klips_error:ipsec_xmit_send: "
-+ "ip_send() failed, err=%d\n",
-+ -err);
-+ ixs->stats->tx_errors++;
-+ ixs->stats->tx_aborted_errors++;
-+ ixs->skb = NULL;
-+ return IPSEC_XMIT_IPSENDFAILURE;
-+ }
-+ }
-+ ixs->stats->tx_packets++;
-+ ixs->skb = NULL;
-+
-+ return IPSEC_XMIT_OK;
-+}
-+#endif
-+
-+static void
-+ipsec_mast_xsm_complete(
-+ struct ipsec_xmit_state *ixs,
-+ enum ipsec_xmit_value stat)
-+{
-+ if (stat != IPSEC_XMIT_OK) {
-+ KLIPS_PRINT(debug_mast,
-+ "klips_debug:ipsec_mast_xsm_complete: ipsec_xsm failed: %d\n",
-+ stat);
-+ goto cleanup;
-+ }
-+
-+ /* do any final NAT-encapsulation */
-+ stat = ipsec_nat_encap(ixs);
-+ if(stat != IPSEC_XMIT_OK) {
-+ goto cleanup;
-+ }
-+
-+ /* now send the packet again */
-+ {
-+ struct flowi fl;
-+
-+ memset(&fl, 0, sizeof(fl));
-+ ipsec_xmit_send(ixs, &fl);
-+ }
-+
-+cleanup:
-+ ipsec_xmit_cleanup(ixs);
-+
-+ if(ixs->ipsp) {
-+ ipsec_sa_put(ixs->ipsp);
-+ ixs->ipsp=NULL;
-+ }
-+ if(ixs->skb) {
-+ ipsec_kfree_skb(ixs->skb);
-+ ixs->skb=NULL;
-+ }
-+ ipsec_xmit_state_delete(ixs);
-+}
-+
-+/*
-+ * This function assumes it is being called from dev_queue_xmit()
-+ * and that skb is filled properly by that function.
-+ */
-+int
-+ipsec_mast_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+ struct ipsec_xmit_state *ixs;
-+ IPsecSAref_t SAref;
-+
-+ if(skb == NULL) {
-+ printk("mast start_xmit passed NULL\n");
-+ return 0;
-+ }
-+
-+ ixs = ipsec_xmit_state_new();
-+ if(ixs == NULL) {
-+ printk("mast failed to allocate IXS\n");
-+ return 0;
-+ }
-+
-+ ixs->skb = skb;
-+ SAref = 0;
-+ if(skb->nfmark & 0x80000000) {
-+ SAref = NFmark2IPsecSAref(skb->nfmark);
-+ KLIPS_PRINT(debug_mast, "getting SAref=%d from nfmark\n",
-+ SAref);
-+ }
-+
-+#ifdef HAVE_IPSEC_SAREF
-+ if(skb->sp && skb->sp->ref != IPSEC_SAREF_NULL) {
-+ SAref = skb->sp->ref;
-+ KLIPS_PRINT(debug_mast, "getting SAref=%d from sec_path\n",
-+ SAref);
-+ }
-+#endif
-+ KLIPS_PRINT(debug_mast, "skb=%p\n", skb);
-+
-+ ipsec_xmit_sanity_check_skb(ixs);
-+
-+ ixs->ipsp = ipsec_sa_getbyref(SAref);
-+ if(ixs->ipsp == NULL) {
-+ KLIPS_ERROR(debug_mast, "%s: no SA for saref=%d (sp=%p)\n",
-+ dev->name, SAref, skb->sp);
-+ ipsec_kfree_skb(skb);
-+ return 0;
-+ }
-+
-+ /*
-+ * we should be calculating the MTU by looking up a route
-+ * based upon the destination in the SA, and then cache
-+ * it into the SA, but we don't do that right now.
-+ */
-+ ixs->cur_mtu = 1460;
-+ ixs->physmtu = 1460;
-+
-+ ixs->xsm_complete = ipsec_mast_xsm_complete;
-+ ixs->state = IPSEC_XSM_INIT2; /* we start later in the process */
-+
-+ ipsec_xsm(ixs);
-+ return 0;
-+
-+}
-+
-+DEBUG_NO_STATIC struct net_device_stats *
-+ipsec_mast_get_stats(struct net_device *dev)
-+{
-+ return &(((struct mastpriv *)(dev->priv))->mystats);
-+}
-+
-+#if 0
-+/*
-+ * Revectored calls.
-+ * For each of these calls, a field exists in our private structure.
-+ */
-+DEBUG_NO_STATIC int
-+ipsec_mast_hard_header(struct sk_buff *skb, struct net_device *dev,
-+ unsigned short type, void *daddr, void *saddr, unsigned len)
-+{
-+ struct mastpriv *prv = dev->priv;
-+ struct net_device_stats *stats; /* This device's statistics */
-+ int ret = 0;
-+
-+ if(skb == NULL) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_hard_header: "
-+ "no skb...\n");
-+ return -ENODATA;
-+ }
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_hard_header: "
-+ "no device...\n");
-+ return -ENODEV;
-+ }
-+
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_hard_header: "
-+ "skb->dev=%s\n",
-+ dev->name);
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_hard_header: "
-+ "no private space associated with dev=%s\n",
-+ dev->name ? dev->name : "NULL");
-+ return -ENODEV;
-+ }
-+
-+ stats = (struct net_device_stats *) &(prv->mystats);
-+
-+ /* check if we have to send a IPv6 packet. It might be a Router
-+ Solicitation, where the building of the packet happens in
-+ reverse order:
-+ 1. ll hdr,
-+ 2. IPv6 hdr,
-+ 3. ICMPv6 hdr
-+ -> skb->nh.raw is still uninitialized when this function is
-+ called!! If this is no IPv6 packet, we can print debugging
-+ messages, otherwise we skip all debugging messages and just
-+ build the ll header */
-+ if(type != ETH_P_IPV6) {
-+ /* execute this only, if we don't have to build the
-+ header for a IPv6 packet */
-+ if(!prv->hard_header) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_hard_header: "
-+ "physical device has been detached, packet dropped 0p%p->0p%p len=%d type=%d dev=%s->NULL ",
-+ saddr,
-+ daddr,
-+ len,
-+ type,
-+ dev->name);
-+ KLIPS_PRINTMORE(debug_mast & DB_MAST_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(skb->nh.iph->saddr),
-+ (__u32)ntohl(skb->nh.iph->daddr) );
-+ stats->tx_dropped++;
-+ return -ENODEV;
-+ }
-+ } else {
-+ KLIPS_PRINT(debug_mast,
-+ "klips_debug:ipsec_mast_hard_header: "
-+ "is IPv6 packet, skip debugging messages, only revector and build linklocal header.\n");
-+ }
-+
-+ return ret;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_mast_rebuild_header(struct sk_buff *skb)
-+{
-+ struct mastpriv *prv = skb->dev->priv;
-+
-+ prv = prv;
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_mast_set_mac_address(struct net_device *dev, void *addr)
-+{
-+ struct mastpriv *prv = dev->priv;
-+
-+ prv = prv;
-+ return 0;
-+
-+}
-+
-+DEBUG_NO_STATIC void
-+ipsec_mast_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
-+{
-+ struct mastpriv *prv = dev->priv;
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_cache_update: "
-+ "no device...");
-+ return;
-+ }
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_cache_update: "
-+ "no private space associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ return;
-+ }
-+
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast: "
-+ "Revectored cache_update\n");
-+ return;
-+}
-+#endif
-+
-+DEBUG_NO_STATIC int
-+ipsec_mast_neigh_setup(struct neighbour *n)
-+{
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_neigh_setup:\n");
-+
-+ if (n->nud_state == NUD_NONE) {
-+ n->ops = &arp_broken_ops;
-+ n->output = n->ops->output;
-+ }
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_mast_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
-+{
-+ KLIPS_PRINT(debug_mast & DB_MAST_REVEC,
-+ "klips_debug:ipsec_mast_neigh_setup_dev: "
-+ "setting up %s\n",
-+ dev ? dev->name : "NULL");
-+
-+ if (p->tbl->family == AF_INET) {
-+ p->neigh_setup = ipsec_mast_neigh_setup;
-+ p->ucast_probes = 0;
-+ p->mcast_probes = 0;
-+ }
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_mast_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-+{
-+ struct ipsecmastconf *cf = (struct ipsecmastconf *)&ifr->ifr_data;
-+ struct ipsecpriv *prv = dev->priv;
-+
-+ cf = cf;
-+ prv=prv;
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_ioctl: "
-+ "device not supplied.\n");
-+ return -ENODEV;
-+ }
-+
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_ioctl: "
-+ "tncfg service call #%d for dev=%s\n",
-+ cmd,
-+ dev->name ? dev->name : "NULL");
-+
-+ switch (cmd) {
-+ default:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_ioctl: "
-+ "unknown command %d.\n",
-+ cmd);
-+ return -EOPNOTSUPP;
-+
-+ }
-+}
-+
-+int
-+ipsec_mast_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
-+{
-+ struct net_device *dev = ptr;
-+ struct mastpriv *priv = dev->priv;
-+
-+ priv = priv;
-+
-+ if (dev == NULL) {
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "dev=NULL for event type %ld.\n",
-+ event);
-+ return(NOTIFY_DONE);
-+ }
-+
-+ /* check for loopback devices */
-+ if (dev && (dev->flags & IFF_LOOPBACK)) {
-+ return(NOTIFY_DONE);
-+ }
-+
-+ switch (event) {
-+ case NETDEV_DOWN:
-+ /* look very carefully at the scope of these compiler
-+ directives before changing anything... -- RGB */
-+
-+ case NETDEV_UNREGISTER:
-+ switch (event) {
-+ case NETDEV_DOWN:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_DOWN dev=%s flags=%x\n",
-+ dev->name,
-+ dev->flags);
-+ if(strncmp(dev->name, "ipsec", strlen("ipsec")) == 0) {
-+ printk(KERN_CRIT "IPSEC EVENT: KLIPS device %s shut down.\n",
-+ dev->name);
-+ }
-+ break;
-+ case NETDEV_UNREGISTER:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_UNREGISTER dev=%s flags=%x\n",
-+ dev->name,
-+ dev->flags);
-+ break;
-+ }
-+ break;
-+
-+ case NETDEV_UP:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_UP dev=%s\n",
-+ dev->name);
-+ break;
-+
-+ case NETDEV_REBOOT:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_REBOOT dev=%s\n",
-+ dev->name);
-+ break;
-+
-+ case NETDEV_CHANGE:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_CHANGE dev=%s flags=%x\n",
-+ dev->name,
-+ dev->flags);
-+ break;
-+
-+ case NETDEV_REGISTER:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_REGISTER dev=%s\n",
-+ dev->name);
-+ break;
-+
-+ case NETDEV_CHANGEMTU:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_CHANGEMTU dev=%s to mtu=%d\n",
-+ dev->name,
-+ dev->mtu);
-+ break;
-+
-+ case NETDEV_CHANGEADDR:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_CHANGEADDR dev=%s\n",
-+ dev->name);
-+ break;
-+
-+ case NETDEV_GOING_DOWN:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_GOING_DOWN dev=%s\n",
-+ dev->name);
-+ break;
-+
-+ case NETDEV_CHANGENAME:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "NETDEV_CHANGENAME dev=%s\n",
-+ dev->name);
-+ break;
-+
-+ default:
-+ KLIPS_PRINT(debug_mast & DB_MAST_INIT,
-+ "klips_debug:ipsec_mast_device_event: "
-+ "event type %ld unrecognised for dev=%s\n",
-+ event,
-+ dev->name);
-+ break;
-+ }
-+ return NOTIFY_DONE;
-+}
-+
-+/*
-+ * Called when an ipsec mast device is initialized.
-+ * The ipsec mast device structure is passed to us.
-+ */
-+int
-+ipsec_mast_probe(struct net_device *dev)
-+{
-+ int i;
-+
-+ KLIPS_PRINT(debug_mast,
-+ "klips_debug:ipsec_mast_init: "
-+ "allocating %lu bytes initialising device: %s\n",
-+ (unsigned long) sizeof(struct mastpriv),
-+ dev->name ? dev->name : "NULL");
-+
-+ /* Add our mast functions to the device */
-+ dev->open = ipsec_mast_open;
-+ dev->stop = ipsec_mast_close;
-+ dev->hard_start_xmit = ipsec_mast_start_xmit;
-+ dev->get_stats = ipsec_mast_get_stats;
-+
-+ dev->priv = kmalloc(sizeof(struct mastpriv), GFP_KERNEL);
-+ if (dev->priv == NULL)
-+ return -ENOMEM;
-+ memset((caddr_t)(dev->priv), 0, sizeof(struct mastpriv));
-+
-+ for(i = 0; i < sizeof(zeroes); i++) {
-+ ((__u8*)(zeroes))[i] = 0;
-+ }
-+
-+ dev->set_multicast_list = NULL;
-+ dev->do_ioctl = ipsec_mast_ioctl;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ dev->header_ops = NULL;
-+#else
-+ dev->hard_header = NULL;
-+ dev->rebuild_header = NULL;
-+ dev->header_cache_update= NULL;
-+#endif
-+ dev->set_mac_address = NULL;
-+ dev->neigh_setup = ipsec_mast_neigh_setup_dev;
-+ dev->hard_header_len = 8+20+20+8;
-+ dev->mtu = 0;
-+ dev->addr_len = 0;
-+ dev->type = ARPHRD_NONE;
-+ dev->tx_queue_len = 10;
-+ memset((caddr_t)(dev->broadcast),0xFF, ETH_ALEN); /* what if this is not attached to ethernet? */
-+
-+ /* New-style flags. */
-+ dev->flags = IFF_NOARP;
-+
-+ /* We're done. Have I forgotten anything? */
-+ return 0;
-+}
-+
-+#ifdef alloc_netdev
-+static void ipsec_mast_netdev_setup(struct net_device *dev)
-+{
-+}
-+#endif
-+struct net_device *mastdevices[IPSEC_NUM_IFMAX];
-+int mastdevices_max=-1;
-+
-+int ipsec_mast_createnum(int vifnum)
-+{
-+ struct net_device *im;
-+ int vifentry;
-+ char name[IFNAMSIZ];
-+
-+ if(vifnum > IPSEC_NUM_IFMAX) {
-+ return -ENOENT;
-+ }
-+
-+ if(mastdevices[vifnum]!=NULL) {
-+ return -EEXIST;
-+ }
-+
-+ /* no identical device */
-+ if(vifnum > mastdevices_max) {
-+ mastdevices_max=vifnum;
-+ }
-+ vifentry = vifnum;
-+
-+ snprintf(name, IFNAMSIZ, MAST_DEV_FORMAT, vifnum);
-+
-+#ifdef alloc_netdev
-+ im = alloc_netdev(0, name, ipsec_mast_netdev_setup);
-+#else
-+ im = (struct net_device *)kmalloc(sizeof(struct net_device),GFP_KERNEL);
-+#endif
-+ if(im == NULL) {
-+ printk(KERN_ERR "failed to allocate space for mast%d device\n", vifnum);
-+ return -ENOMEM;
-+ }
-+
-+#ifndef alloc_netdev
-+ memset((caddr_t)im, 0, sizeof(struct net_device));
-+ memcpy(im->name, name, IFNAMSIZ);
-+#endif
-+
-+ im->init = ipsec_mast_probe;
-+
-+ if(register_netdev(im) != 0) {
-+ printk(KERN_ERR "ipsec_mast: failed to register %s\n",
-+ im->name);
-+ return -EIO;
-+ }
-+
-+ dev_hold(im);
-+ mastdevices[vifentry]=im;
-+
-+ return 0;
-+}
-+
-+
-+int
-+ipsec_mast_deletenum(int vifnum)
-+{
-+ struct net_device *dev_ipsec;
-+
-+ if(vifnum > IPSEC_NUM_IFMAX) {
-+ return -ENOENT;
-+ }
-+
-+ dev_ipsec = mastdevices[vifnum];
-+ if(dev_ipsec == NULL) {
-+ return -ENOENT;
-+ }
-+
-+ /* release reference */
-+ mastdevices[vifnum]=NULL;
-+ ipsec_dev_put(dev_ipsec);
-+
-+ KLIPS_PRINT(debug_tunnel, "Unregistering %s (refcnt=%d)\n",
-+ dev_ipsec->name,
-+ atomic_read(&dev_ipsec->refcnt));
-+ unregister_netdev(dev_ipsec);
-+ KLIPS_PRINT(debug_tunnel, "Unregisted %s\n", dev_ipsec->name);
-+#ifndef NETDEV_23
-+ kfree(dev_ipsec->name);
-+ dev_ipsec->name=NULL;
-+#endif /* !NETDEV_23 */
-+ kfree(dev_ipsec->priv);
-+ dev_ipsec->priv=NULL;
-+
-+ return 0;
-+}
-+
-+
-+struct net_device *
-+ipsec_mast_get_device(int vifnum)
-+{
-+ int ovifnum = vifnum;
-+
-+ if(vifnum > IPSECDEV_OFFSET) {
-+ return ipsec_tunnel_get_device(vifnum-IPSECDEV_OFFSET);
-+ } else {
-+ struct net_device *nd;
-+
-+ if(vifnum >= MASTTRANSPORT_OFFSET) {
-+ vifnum -= MASTTRANSPORT_OFFSET;
-+ }
-+
-+ if(vifnum <= mastdevices_max) {
-+ nd = mastdevices[vifnum];
-+
-+ if(nd) dev_hold(nd);
-+ return nd;
-+ } else {
-+ KLIPS_ERROR(debug_tunnel,
-+ "no such vif %d (ovif=%d)\n", vifnum, ovifnum);
-+ return NULL;
-+ }
-+ }
-+}
-+
-+unsigned int
-+ipsec_mast_is_transport(int vifnum)
-+{
-+ if(vifnum > MASTTRANSPORT_OFFSET && vifnum <IPSECDEV_OFFSET) {
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+int
-+ipsec_mast_init_devices(void)
-+{
-+ /*
-+ * mast0 is used for transport mode stuff, and generally is
-+ * the default unless the user decides to create more.
-+ */
-+ ipsec_mast_createnum(0);
-+
-+ return 0;
-+}
-+
-+/* void */
-+int
-+ipsec_mast_cleanup_devices(void)
-+{
-+ int error = 0;
-+ int i;
-+ struct net_device *dev_mast;
-+
-+ for(i = 0; i <= mastdevices_max; i++) {
-+ if(mastdevices[i]!=NULL) {
-+ dev_mast = mastdevices[i];
-+ unregister_netdev(dev_mast);
-+ kfree(dev_mast->priv);
-+ dev_mast->priv=NULL;
-+ dev_put(mastdevices[i]);
-+ mastdevices[i]=NULL;
-+ }
-+ }
-+ return error;
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
-+
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_md5c.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,406 @@
-+/*
-+ * RCSID $Id: ipsec_md5c.c,v 1.10 2005/04/15 01:25:57 mcr Exp $
-+ */
-+
-+/*
-+ * The rest of the code is derived from MD5C.C by RSADSI. Minor cosmetic
-+ * changes to accomodate it in the kernel by ji.
-+ */
-+
-+#include <asm/byteorder.h>
-+#include <linux/string.h>
-+
-+#include "openswan/ipsec_md5h.h"
-+
-+/* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
-+ */
-+
-+/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
-+rights reserved.
-+
-+License to copy and use this software is granted provided that it
-+is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-+Algorithm" in all material mentioning or referencing this software
-+or this function.
-+
-+License is also granted to make and use derivative works provided
-+that such works are identified as "derived from the RSA Data
-+Security, Inc. MD5 Message-Digest Algorithm" in all material
-+mentioning or referencing the derived work.
-+
-+RSA Data Security, Inc. makes no representations concerning either
-+the merchantability of this software or the suitability of this
-+software for any particular purpose. It is provided "as is"
-+without express or implied warranty of any kind.
-+
-+These notices must be retained in any copies of any part of this
-+documentation and/or software.
-+ */
-+
-+/*
-+ * Additions by JI
-+ *
-+ * HAVEMEMCOPY is defined if mem* routines are available
-+ *
-+ * HAVEHTON is defined if htons() and htonl() can be used
-+ * for big/little endian conversions
-+ *
-+ */
-+
-+#define HAVEMEMCOPY
-+#ifdef __LITTLE_ENDIAN
-+#define LITTLENDIAN
-+#endif
-+#ifdef __BIG_ENDIAN
-+#define BIGENDIAN
-+#endif
-+
-+/* Constants for MD5Transform routine.
-+ */
-+
-+#define S11 7
-+#define S12 12
-+#define S13 17
-+#define S14 22
-+#define S21 5
-+#define S22 9
-+#define S23 14
-+#define S24 20
-+#define S31 4
-+#define S32 11
-+#define S33 16
-+#define S34 23
-+#define S41 6
-+#define S42 10
-+#define S43 15
-+#define S44 21
-+
-+static void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64]));
-+
-+#ifdef LITTLEENDIAN
-+#define Encode MD5_memcpy
-+#define Decode MD5_memcpy
-+#else
-+static void Encode PROTO_LIST
-+ ((unsigned char *, UINT4 *, unsigned int));
-+static void Decode PROTO_LIST
-+ ((UINT4 *, unsigned char *, unsigned int));
-+#endif
-+
-+#ifdef HAVEMEMCOPY
-+/* no need to include <memory.h> here; <linux/string.h> defines these */
-+#define MD5_memcpy memcpy
-+#define MD5_memset memset
-+#else
-+#ifdef HAVEBCOPY
-+#define MD5_memcpy(_a,_b,_c) bcopy((_b),(_a),(_c))
-+#define MD5_memset(_a,_b,_c) bzero((_a),(_c))
-+#else
-+static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int));
-+static void MD5_memset PROTO_LIST ((POINTER, int, unsigned int));
-+#endif
-+#endif
-+static unsigned char PADDING[64] = {
-+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-+};
-+
-+/* F, G, H and I are basic MD5 functions.
-+ */
-+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
-+#define H(x, y, z) ((x) ^ (y) ^ (z))
-+#define I(x, y, z) ((y) ^ ((x) | (~z)))
-+
-+/* ROTATE_LEFT rotates x left n bits.
-+ */
-+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
-+
-+/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
-+Rotation is separate from addition to prevent recomputation.
-+ */
-+#define FF(a, b, c, d, x, s, ac) { \
-+ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
-+ (a) = ROTATE_LEFT ((a), (s)); \
-+ (a) += (b); \
-+ }
-+#define GG(a, b, c, d, x, s, ac) { \
-+ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
-+ (a) = ROTATE_LEFT ((a), (s)); \
-+ (a) += (b); \
-+ }
-+#define HH(a, b, c, d, x, s, ac) { \
-+ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
-+ (a) = ROTATE_LEFT ((a), (s)); \
-+ (a) += (b); \
-+ }
-+#define II(a, b, c, d, x, s, ac) { \
-+ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
-+ (a) = ROTATE_LEFT ((a), (s)); \
-+ (a) += (b); \
-+ }
-+
-+/*
-+ * MD5 initialization. Begins an MD5 operation, writing a new context.
-+ */
-+void osMD5Init(void *vcontext)
-+{
-+ MD5_CTX *context = vcontext;
-+
-+ context->count[0] = context->count[1] = 0;
-+ /* Load magic initialization constants.*/
-+ context->state[0] = 0x67452301;
-+ context->state[1] = 0xefcdab89;
-+ context->state[2] = 0x98badcfe;
-+ context->state[3] = 0x10325476;
-+}
-+
-+/* MD5 block update operation. Continues an MD5 message-digest
-+ operation, processing another message block, and updating the
-+ context.
-+ */
-+void osMD5Update (vcontext, input, inputLen)
-+ void *vcontext;
-+ unsigned char *input; /* input block */
-+ __u32 inputLen; /* length of input block */
-+{
-+ MD5_CTX *context = vcontext;
-+ __u32 i;
-+ unsigned int index, partLen;
-+
-+ /* Compute number of bytes mod 64 */
-+ index = (unsigned int)((context->count[0] >> 3) & 0x3F);
-+
-+ /* Update number of bits */
-+ if ((context->count[0] += ((UINT4)inputLen << 3))
-+ < ((UINT4)inputLen << 3))
-+ context->count[1]++;
-+ context->count[1] += ((UINT4)inputLen >> 29);
-+
-+ partLen = 64 - index;
-+
-+ /* Transform as many times as possible.
-+*/
-+ if (inputLen >= partLen) {
-+ MD5_memcpy
-+ ((POINTER)&context->buffer[index], (POINTER)input, partLen);
-+ MD5Transform (context->state, context->buffer);
-+
-+ for (i = partLen; i + 63 < inputLen; i += 64)
-+ MD5Transform (context->state, &input[i]);
-+
-+ index = 0;
-+ }
-+ else
-+ i = 0;
-+
-+ /* Buffer remaining input */
-+ MD5_memcpy
-+ ((POINTER)&context->buffer[index], (POINTER)&input[i],
-+ inputLen-i);
-+}
-+
-+/* MD5 finalization. Ends an MD5 message-digest operation, writing the
-+ the message digest and zeroizing the context.
-+ */
-+void osMD5Final (digest, vcontext)
-+unsigned char digest[16]; /* message digest */
-+void *vcontext; /* context */
-+{
-+ MD5_CTX *context = vcontext;
-+ unsigned char bits[8];
-+ unsigned int index, padLen;
-+
-+ /* Save number of bits */
-+ Encode (bits, context->count, 8);
-+
-+ /* Pad out to 56 mod 64.
-+*/
-+ index = (unsigned int)((context->count[0] >> 3) & 0x3f);
-+ padLen = (index < 56) ? (56 - index) : (120 - index);
-+ osMD5Update (context, PADDING, padLen);
-+
-+ /* Append length (before padding) */
-+ osMD5Update (context, bits, 8);
-+
-+ if (digest != NULL) /* Bill Simpson's padding */
-+ {
-+ /* store state in digest */
-+ Encode (digest, context->state, 16);
-+
-+ /* Zeroize sensitive information.
-+ */
-+ MD5_memset ((POINTER)context, 0, sizeof (*context));
-+ }
-+}
-+
-+/* MD5 basic transformation. Transforms state based on block.
-+ */
-+static void MD5Transform (state, block)
-+UINT4 state[4];
-+unsigned char block[64];
-+{
-+ UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
-+
-+ Decode (x, block, 64);
-+
-+ /* Round 1 */
-+ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
-+ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
-+ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
-+ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
-+ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
-+ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
-+ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
-+ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
-+ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
-+ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
-+ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
-+ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
-+ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
-+ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
-+ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
-+ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
-+
-+ /* Round 2 */
-+ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
-+ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
-+ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
-+ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
-+ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
-+ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
-+ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
-+ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
-+ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
-+ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
-+ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
-+ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
-+ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
-+ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
-+ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
-+ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
-+
-+ /* Round 3 */
-+ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
-+ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
-+ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
-+ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
-+ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
-+ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
-+ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
-+ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
-+ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
-+ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
-+ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
-+ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
-+ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
-+ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
-+ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
-+ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
-+
-+ /* Round 4 */
-+ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
-+ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
-+ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
-+ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
-+ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
-+ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
-+ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
-+ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
-+ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
-+ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
-+ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
-+ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
-+ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
-+ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
-+ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
-+ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
-+
-+ state[0] += a;
-+ state[1] += b;
-+ state[2] += c;
-+ state[3] += d;
-+
-+ /* Zeroize sensitive information.
-+*/
-+ MD5_memset ((POINTER)x, 0, sizeof (x));
-+}
-+
-+#ifndef LITTLEENDIAN
-+
-+/* Encodes input (UINT4) into output (unsigned char). Assumes len is
-+ a multiple of 4.
-+ */
-+static void Encode (output, input, len)
-+unsigned char *output;
-+UINT4 *input;
-+unsigned int len;
-+{
-+ unsigned int i, j;
-+
-+ for (i = 0, j = 0; j < len; i++, j += 4) {
-+ output[j] = (unsigned char)(input[i] & 0xff);
-+ output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
-+ output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
-+ output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
-+ }
-+}
-+
-+/* Decodes input (unsigned char) into output (UINT4). Assumes len is
-+ a multiple of 4.
-+ */
-+static void Decode (output, input, len)
-+UINT4 *output;
-+unsigned char *input;
-+unsigned int len;
-+{
-+ unsigned int i, j;
-+
-+ for (i = 0, j = 0; j < len; i++, j += 4)
-+ output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
-+ (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
-+}
-+
-+#endif
-+
-+#ifndef HAVEMEMCOPY
-+#ifndef HAVEBCOPY
-+/* Note: Replace "for loop" with standard memcpy if possible.
-+ */
-+
-+static void MD5_memcpy (output, input, len)
-+POINTER output;
-+POINTER input;
-+unsigned int len;
-+{
-+ unsigned int i;
-+
-+ for (i = 0; i < len; i++)
-+
-+ output[i] = input[i];
-+}
-+
-+/* Note: Replace "for loop" with standard memset if possible.
-+ */
-+
-+static void MD5_memset (output, value, len)
-+POINTER output;
-+int value;
-+unsigned int len;
-+{
-+ unsigned int i;
-+
-+ for (i = 0; i < len; i++)
-+ ((char *)output)[i] = (char)value;
-+}
-+#endif
-+#endif
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_ocf.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,739 @@
-+/*
-+ * IPSEC OCF support
-+ *
-+ * This code written by David McCullough <dmccullough@cyberguard.com>
-+ * Copyright (C) 2005 Intel Corporation. All Rights Reserved.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+
-+#include <linux/interrupt.h>
-+
-+#include <net/ip.h>
-+
-+#include <openswan.h>
-+#include "openswan/ipsec_sa.h"
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_xmit.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_auth.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_ah.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "ipsec_ocf.h"
-+
-+extern int debug_pfkey;
-+extern int debug_rcv;
-+
-+int ipsec_ocf_crid = (CRYPTOCAP_F_HARDWARE|CRYPTOCAP_F_SOFTWARE);
-+#if 0 /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) */
-+/*
-+ * allow users to force us to a particular OCF driver
-+ */
-+char *ipsec_ocf_driver = NULL;
-+module_parm(ipsec_ocf_driver, charp, 0644);
-+MODULE_PARM_DESC(ipsec_ocf_driver,
-+ "Driver name (ie., cryptosoft), hw, sw, both (default both)");
-+#endif
-+
-+/*
-+ * Tuning parameters, the settings below appear best for
-+ * the IXP
-+ */
-+#define USE_BATCH 1 /* enable batch mode */
-+#define USE_CBIMM 1 /* enable immediate callbacks */
-+#define FORCE_QS 0 /* force use of queues for continuation of state machine */
-+#ifdef DECLARE_TASKLET
-+#define USE_TASKLET 1 /* use tasklet for continuation of state machine */
-+#else
-+#define USE_TASKLET 0 /* don't use tasklet for continuation of state machine */
-+#endif
-+/*
-+ * Because some OCF operations are synchronous (ie., software encryption)
-+ * we need to protect ourselves from distructive re-entry. All we do
-+ * is track where we are at and either callback immediately or Q the
-+ * callback to avoid conflicts. This allows us to deal with the fact that
-+ * OCF doesn't tell us if our crypto operations will be async or sync.
-+ */
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
-+#define _INIT_WORK(wq, fn, arg) INIT_WORK(&(wq), (void (*)(struct work_struct *))(fn))
-+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
-+#define _INIT_WORK(wq, fn, arg) INIT_WORK(&(wq), (void (*)(struct work_queue *))(fn))
-+#else
-+#define _INIT_WORK(wq, fn, arg) INIT_WORK(&(wq), (void (*)(void *))(fn), (void *)(arg))
-+#endif
-+
-+#define PROCESS_LATER(wq, sm, arg) \
-+ ({ \
-+ _INIT_WORK(wq, sm, arg); \
-+ schedule_work(&(wq)); \
-+ })
-+
-+#define PROCESS_NOW(sm, arg) \
-+ ({ \
-+ (*sm)(arg); \
-+ })
-+
-+#if USE_TASKLET == 1
-+ #define PROCESS_NEXT(this, wqsm, sm) ({ \
-+ tasklet_init(&this->tasklet, \
-+ (void (*)(unsigned long)) sm, (unsigned long)this); \
-+ tasklet_schedule(&this->tasklet); \
-+ })
-+#elif FORCE_QS == 0
-+ #define PROCESS_NEXT(this, wqsm, sm) \
-+ if (in_interrupt()) { \
-+ PROCESS_LATER(this->workq, wqsm, this); \
-+ } else { \
-+ PROCESS_NOW(sm, this); \
-+ }
-+#else
-+ #define PROCESS_NEXT(this, wqsm, sm) PROCESS_LATER(this->workq, wqsm, this)
-+#endif
-+
-+/*
-+ * convert openswan values to OCF values
-+ */
-+
-+static int
-+ipsec_ocf_authalg(int authalg)
-+{
-+ switch (authalg) {
-+ case AH_SHA: return CRYPTO_SHA1_HMAC;
-+ case AH_MD5: return CRYPTO_MD5_HMAC;
-+ }
-+ return 0;
-+}
-+
-+
-+static int
-+ipsec_ocf_encalg(int encalg)
-+{
-+ switch (encalg) {
-+ case ESP_NULL: return CRYPTO_NULL_CBC;
-+ case ESP_DES: return CRYPTO_DES_CBC;
-+ case ESP_3DES: return CRYPTO_3DES_CBC;
-+ case ESP_AES: return CRYPTO_AES_CBC;
-+ case ESP_CAST: return CRYPTO_CAST_CBC;
-+ case ESP_BLOWFISH: return CRYPTO_BLF_CBC;
-+ }
-+ return 0;
-+}
-+
-+/*
-+ * if we can do the request ops, setup the sessions and return true
-+ * otherwise return false with ipsp unchanged
-+ */
-+
-+int
-+ipsec_ocf_sa_init(struct ipsec_sa *ipsp, int authalg, int encalg)
-+{
-+ struct cryptoini crie, cria;
-+ int error;
-+
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:ipsec_ocf_sa_init(a=0x%x,e=0x%x)\n",
-+ authalg, encalg);
-+
-+ if (authalg && ipsp->ips_key_bits_a == 0) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_ocf_sa_init(a=0x%x,e=0x%x) a-key-bits=0\n",
-+ authalg, encalg);
-+ /* pretend we are happy with this */
-+ return 1;
-+ }
-+
-+ if (encalg && ipsp->ips_key_bits_e == 0) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_ocf_sa_init(a=0x%x,e=0x%x) e-key-bits=0\n",
-+ authalg, encalg);
-+ /* pretend we are happy with this */
-+ return 1;
-+ }
-+
-+ memset(&crie, 0, sizeof(crie));
-+ memset(&cria, 0, sizeof(cria));
-+
-+ cria.cri_alg = ipsec_ocf_authalg(authalg);
-+ cria.cri_klen = ipsp->ips_key_bits_a;
-+ cria.cri_key = ipsp->ips_key_a;
-+ cria.cri_mlen = 12;
-+
-+ crie.cri_alg = ipsec_ocf_encalg(encalg);
-+ crie.cri_klen = ipsp->ips_key_bits_e;
-+ crie.cri_key = ipsp->ips_key_e;
-+ switch (crie.cri_alg) {
-+ case CRYPTO_AES_CBC:
-+ ipsp->ips_iv_size = 16;
-+ break;
-+ case CRYPTO_DES_CBC:
-+ case CRYPTO_3DES_CBC:
-+ ipsp->ips_iv_size = 8;
-+ break;
-+ default:
-+ ipsp->ips_iv_size = 0;
-+ break;
-+ }
-+ ipsp->ips_iv_bits = ipsp->ips_iv_size * 8;
-+ ipsp->ips_auth_bits = ipsp->ips_key_bits_a;
-+
-+ if (authalg && encalg) {
-+ crie.cri_next = &cria;
-+ error = crypto_newsession(&ipsp->ocf_cryptoid, &crie, ipsec_ocf_crid);
-+ } else if (encalg) {
-+ error = crypto_newsession(&ipsp->ocf_cryptoid, &crie, ipsec_ocf_crid);
-+ } else if (authalg) {
-+ error = crypto_newsession(&ipsp->ocf_cryptoid, &cria, ipsec_ocf_crid);
-+ } else {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:ipsec_ocf_sa_init: "
-+ "no authalg or encalg\n");
-+ return 0;
-+ }
-+
-+ if (error) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:ipsec_ocf_sa_init: "
-+ "crypto_newsession failed 0x%x\n", error);
-+ return 0;
-+ }
-+
-+ /* make sure no ALG stuff bites us */
-+ if (ipsp->ips_alg_enc)
-+ printk("We received an ALG initted SA\n");
-+ ipsp->ips_alg_enc = NULL;
-+
-+ ipsp->ocf_in_use = 1;
-+ return 1;
-+}
-+
-+
-+int
-+ipsec_ocf_sa_free(struct ipsec_sa *ipsp)
-+{
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:ipsec_ocf_sa_free()\n");
-+ crypto_freesession(ipsp->ocf_cryptoid);
-+ ipsp->ocf_cryptoid = -1;
-+ ipsp->ocf_in_use = 0;
-+ return 1;
-+}
-+
-+#if USE_TASKLET == 0
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
-+static void
-+ipsec_rsm_wq(struct work_struct *work)
-+{
-+ struct ipsec_rcv_state *irs = container_of(work, struct ipsec_rcv_state, workq);
-+ ipsec_rsm(irs);
-+}
-+#else
-+#define ipsec_rsm_wq ipsec_rsm
-+#endif
-+#endif /* USE_TASKLET */
-+
-+static int
-+ipsec_ocf_rcv_cb(struct cryptop *crp)
-+{
-+ struct ipsec_rcv_state *irs = (struct ipsec_rcv_state *)crp->crp_opaque;
-+
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv_cb\n");
-+ if (irs == NULL) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv_cb: "
-+ "NULL irs in callback\n");
-+ return 0;
-+ }
-+
-+ /*
-+ * we must update the state before returning to the state machine.
-+ * if we have an error, terminate the processing by moving to the DONE
-+ * state
-+ */
-+
-+ irs->state = IPSEC_RSM_DONE; /* assume it went badly */
-+ if (crp->crp_etype) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv_cb: "
-+ "error in processing 0x%x\n", crp->crp_etype);
-+ } else {
-+ if (!irs->ipsp->ips_encalg) {
-+ /* AH post processing, put back fields we had to zero */
-+ irs->ipp->ttl = irs->ttl;
-+ irs->ipp->check = irs->check;
-+ irs->ipp->frag_off = irs->frag_off;
-+ irs->ipp->tos = irs->tos;
-+ irs->state = IPSEC_RSM_AUTH_CHK;
-+ /* pull up the IP header again after processing */
-+ skb_pull(irs->skb, ((unsigned char *)irs->protostuff.ahstuff.ahp) -
-+ ((unsigned char *)irs->ipp));
-+ } else if (ipsec_rcv_esp_post_decrypt(irs) == IPSEC_RCV_OK) {
-+ /* this one came up good, set next state */
-+ irs->state = IPSEC_RSM_DECAP_CONT;
-+ }
-+ }
-+
-+ crypto_freereq(crp);
-+ crp = NULL;
-+
-+ /* setup the rest of the processing now */
-+ PROCESS_NEXT(irs, ipsec_rsm_wq, ipsec_rsm);
-+ return 0;
-+}
-+
-+enum ipsec_rcv_value
-+ipsec_ocf_rcv(struct ipsec_rcv_state *irs)
-+{
-+ struct cryptop *crp;
-+ struct cryptodesc *crde, *crda = NULL;
-+ struct ipsec_sa *ipsp;
-+
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv\n");
-+
-+ ipsp = irs->ipsp;
-+ if (!ipsp) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv: "
-+ "no SA for rcv processing\n");
-+ return IPSEC_RCV_SAIDNOTFOUND;
-+ }
-+
-+ if (!irs->skb) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv: no skb\n");
-+ return IPSEC_RCV_SAIDNOTFOUND;
-+ }
-+
-+ crp = crypto_getreq((ipsp->ips_authalg && ipsp->ips_encalg) ? 2 : 1);
-+ if (!crp) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv: "
-+ "crypto_getreq returned NULL\n");
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+
-+ if (ipsp->ips_authalg) {
-+ crda = crp->crp_desc;
-+ crde = crda->crd_next;
-+ } else {
-+ crde = crp->crp_desc;
-+ crda = crde->crd_next;
-+ }
-+
-+ if (crda) {
-+ /* Authentication descriptor */
-+ crda->crd_alg = ipsec_ocf_authalg(ipsp->ips_authalg);
-+ if (!crda->crd_alg) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv: "
-+ "bad auth alg 0x%x\n", ipsp->ips_authalg);
-+ crypto_freereq(crp);
-+ return IPSEC_RCV_BADPROTO;
-+ }
-+
-+ if (!crde) { /* assuming AH processing */
-+ /* push the IP header so we can authenticate it */
-+ skb_push(irs->skb, ((unsigned char *)irs->protostuff.ahstuff.ahp) -
-+ ((unsigned char *)irs->ipp));
-+ }
-+
-+ crda->crd_key = ipsp->ips_key_a;
-+ crda->crd_klen = ipsp->ips_key_bits_a;
-+ crda->crd_inject = irs->authenticator - irs->skb->data;
-+ /* Copy the authenticator to check aganinst later */
-+ memcpy(irs->hash, irs->authenticator, 12);
-+
-+ if (!crde) { /* assume AH processing */
-+ /* AH processing, save fields we have to zero */
-+ irs->ttl = irs->ipp->ttl;
-+ irs->check = irs->ipp->check;
-+ irs->frag_off = irs->ipp->frag_off;
-+ irs->tos = irs->ipp->tos;
-+ irs->ipp->ttl = 0;
-+ irs->ipp->check = 0;
-+ irs->ipp->frag_off = 0;
-+ irs->ipp->tos = 0;
-+ crda->crd_len = irs->skb->len;
-+ crda->crd_skip = ((unsigned char *)irs->ipp) - irs->skb->data;
-+ memset(irs->authenticator, 0, 12);
-+ } else {
-+ crda->crd_len = irs->ilen;
-+ crda->crd_skip =
-+ ((unsigned char *) irs->protostuff.espstuff.espp) -
-+ irs->skb->data;
-+ /*
-+ * It would be nice to clear the authenticator here
-+ * to be sure we do not see it again later when checking.
-+ * We cannot. Some HW actually expects to check the in-data
-+ * hash and and flag an error if it is incorrect.
-+ *
-+ * What we do to allow this is to pass in the current in-data
-+ * value. Your OCF driver must ensure that it fails a request
-+ * for hash+decrypt with an invalid hash value, or returns the
-+ * computed in-data hash as requested.
-+ *
-+ * If your driver does not check the in-data hash but just
-+ * computes it value, you must ensure that it does not return
-+ * the original in-data hash by accident. It must invalidate the
-+ * in-data hash itself to force an auth check error.
-+ *
-+ * All existing drivers that do not care about the current
-+ * in-data hash do this by clearing the in-data hash before
-+ * processing, either directly or via their implementation.
-+ */
-+#if 0
-+ memset(irs->authenticator, 0, 12);
-+#endif
-+ }
-+ }
-+
-+ if (crde) {
-+ crde->crd_alg = ipsec_ocf_encalg(ipsp->ips_encalg);
-+ if (!crde->crd_alg) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_ocf_rcv: "
-+ "bad enc alg 0x%x\n", ipsp->ips_encalg);
-+ crypto_freereq(crp);
-+ return IPSEC_RCV_BADPROTO;
-+ }
-+
-+ irs->esphlen = ESP_HEADER_LEN + ipsp->ips_iv_size;
-+ irs->ilen -= irs->esphlen;
-+ crde->crd_skip = (skb_transport_header(irs->skb) - irs->skb->data) + irs->esphlen;
-+ crde->crd_len = irs->ilen;
-+ crde->crd_inject = crde->crd_skip - ipsp->ips_iv_size;
-+ crde->crd_klen = ipsp->ips_key_bits_e;
-+ crde->crd_key = ipsp->ips_key_e;
-+ }
-+
-+ crp->crp_ilen = irs->skb->len; /* Total input length */
-+ crp->crp_flags =
-+ CRYPTO_F_SKBUF |
-+#if USE_CBIMM == 1
-+ CRYPTO_F_CBIMM |
-+#endif
-+#if USE_BATCH == 1
-+ CRYPTO_F_BATCH |
-+#endif
-+ 0;
-+ crp->crp_buf = (caddr_t) irs->skb;
-+ crp->crp_callback = ipsec_ocf_rcv_cb;
-+ crp->crp_sid = ipsp->ocf_cryptoid;
-+ crp->crp_opaque = (caddr_t) irs;
-+ if (crypto_dispatch(crp)){
-+ crypto_freereq(crp);
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+ return(IPSEC_RCV_PENDING);
-+}
-+
-+#if USE_TASKLET == 0
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
-+static void
-+ipsec_xsm_wq(struct work_struct *work)
-+{
-+ struct ipsec_xmit_state *ixs = container_of(work, struct ipsec_xmit_state, workq);
-+ ipsec_xsm(ixs);
-+}
-+#else
-+#define ipsec_xsm_wq ipsec_xsm
-+#endif
-+#endif /* USE_TASKLET */
-+
-+static int
-+ipsec_ocf_xmit_cb(struct cryptop *crp)
-+{
-+ struct ipsec_xmit_state *ixs = (struct ipsec_xmit_state *)crp->crp_opaque;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit_cb\n");
-+
-+ if (ixs == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit_cb: "
-+ "NULL ixs in callback\n");
-+ return 0;
-+ }
-+
-+ /*
-+ * we must update the state before returning to the state machine.
-+ * if we have an error, terminate the processing by moving to the DONE
-+ * state
-+ */
-+
-+ ixs->state = IPSEC_XSM_DONE; /* assume bad xmit */
-+ if (crp->crp_etype) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit_cb: "
-+ "error in processing 0x%x\n", crp->crp_etype);
-+ } else {
-+ if (!ixs->ipsp->ips_encalg) {
-+ /* AH post processing, put back fields we had to zero */
-+ ixs->iph->ttl = ixs->ttl;
-+ ixs->iph->check = ixs->check;
-+ ixs->iph->frag_off = ixs->frag_off;
-+ ixs->iph->tos = ixs->tos;
-+ }
-+ ixs->state = IPSEC_XSM_CONT; /* ESP was all good */
-+ }
-+
-+ crypto_freereq(crp);
-+ crp = NULL;
-+
-+ /* setup the rest of the processing now */
-+ PROCESS_NEXT(ixs, ipsec_xsm_wq, ipsec_xsm);
-+ return 0;
-+}
-+
-+
-+enum ipsec_xmit_value
-+ipsec_ocf_xmit(struct ipsec_xmit_state *ixs)
-+{
-+ struct cryptop *crp;
-+ struct cryptodesc *crde, *crda;
-+ struct ipsec_sa *ipsp;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit\n");
-+
-+ ipsp = ixs->ipsp;
-+ if (!ipsp) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit: "
-+ "no SA for rcv processing\n");
-+ return IPSEC_XMIT_SAIDNOTFOUND;
-+ }
-+
-+ if (!ixs->skb) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_ocf_xmit: no skb\n");
-+ return IPSEC_XMIT_SAIDNOTFOUND;
-+ }
-+
-+ crp = crypto_getreq((ipsp->ips_authalg && ipsp->ips_encalg) ? 2 : 1);
-+ if (!crp) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit: "
-+ "crypto_getreq returned NULL\n");
-+ return IPSEC_XMIT_ERRMEMALLOC;
-+ }
-+
-+ if (ipsp->ips_encalg) {
-+ crde = crp->crp_desc;
-+ crda = crde->crd_next;
-+ } else {
-+ crda = crp->crp_desc;
-+ crde = crda->crd_next;
-+ }
-+
-+ if (crda) {
-+ /* Authentication descriptor */
-+ crda->crd_alg = ipsec_ocf_authalg(ipsp->ips_authalg);
-+ if (!crda->crd_alg) {
-+ KLIPS_PRINT(debug_tunnel&DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit: "
-+ "bad auth alg 0x%x\n", ipsp->ips_authalg);
-+ crypto_freereq(crp);
-+ return IPSEC_RCV_BADPROTO;
-+ }
-+ if (!crde) { /* assume AH processing */
-+ /* AH processing, save fields we have to zero */
-+ crda->crd_skip = ((unsigned char *) ixs->iph) - ixs->skb->data;
-+ ixs->ttl = ixs->iph->ttl;
-+ ixs->check = ixs->iph->check;
-+ ixs->frag_off = ixs->iph->frag_off;
-+ ixs->tos = ixs->iph->tos;
-+ ixs->iph->ttl = 0;
-+ ixs->iph->check = 0;
-+ ixs->iph->frag_off = 0;
-+ ixs->iph->tos = 0;
-+ crda->crd_inject =
-+ (((struct ahhdr *)(ixs->dat + ixs->iphlen))->ah_data) -
-+ ixs->skb->data;
-+ crda->crd_len = ixs->len - ixs->authlen;
-+ memset(ixs->skb->data + crda->crd_inject, 0, 12); // DM
-+ } else {
-+ crda->crd_skip = ((unsigned char *) ixs->espp) - ixs->skb->data;
-+ crda->crd_inject = ixs->len - ixs->authlen;
-+ crda->crd_len = ixs->len - ixs->iphlen - ixs->authlen;
-+ }
-+ crda->crd_key = ipsp->ips_key_a;
-+ crda->crd_klen = ipsp->ips_key_bits_a;
-+ }
-+
-+ if (crde) {
-+ /* Encryption descriptor */
-+ crde->crd_alg = ipsec_ocf_encalg(ipsp->ips_encalg);
-+ if (!crde->crd_alg) {
-+ KLIPS_PRINT(debug_tunnel&DB_TN_XMIT, "klips_debug:ipsec_ocf_xmit: "
-+ "bad enc alg 0x%x\n", ipsp->ips_encalg);
-+ crypto_freereq(crp);
-+ return IPSEC_RCV_BADPROTO;
-+ }
-+ crde->crd_flags = CRD_F_ENCRYPT;
-+ crde->crd_skip = ixs->idat - ixs->dat;
-+ crde->crd_len = ixs->ilen;
-+ crde->crd_inject = ((unsigned char *) ixs->espp->esp_iv) - ixs->dat;
-+ crde->crd_klen = ipsp->ips_key_bits_e;
-+ crde->crd_key = ipsp->ips_key_e;
-+ }
-+
-+ crp->crp_ilen = ixs->skb->len; /* Total input length */
-+ crp->crp_flags =
-+ CRYPTO_F_SKBUF |
-+#if USE_CBIMM == 1
-+ CRYPTO_F_CBIMM |
-+#endif
-+#if USE_BATCH == 1
-+ CRYPTO_F_BATCH |
-+#endif
-+ 0;
-+ crp->crp_buf = (caddr_t) ixs->skb;
-+ crp->crp_callback = ipsec_ocf_xmit_cb;
-+ crp->crp_sid = ipsp->ocf_cryptoid;
-+ crp->crp_opaque = (caddr_t) ixs;
-+ if (crypto_dispatch(crp)){
-+ crypto_freereq(crp);
-+ return IPSEC_XMIT_ERRMEMALLOC;
-+ }
-+ return(IPSEC_XMIT_PENDING);
-+}
-+
-+
-+
-+
-+#ifdef CONFIG_KLIPS_AH
-+static struct ipsec_alg_supported ocf_ah_algs[] = {
-+ {
-+ .ias_name = "ocf-md5hmac",
-+ .ias_id = AH_MD5,
-+ .ias_exttype = SADB_EXT_SUPPORTED_AUTH,
-+ .ias_ivlen = 0,
-+ .ias_keyminbits = 128,
-+ .ias_keymaxbits = 128,
-+ },
-+ {
-+ .ias_name = "ocf-sha1hmac",
-+ .ias_id = AH_SHA,
-+ .ias_exttype = SADB_EXT_SUPPORTED_AUTH,
-+ .ias_ivlen = 0,
-+ .ias_keyminbits = 160,
-+ .ias_keymaxbits = 160,
-+ },
-+ {
-+ .ias_name = NULL,
-+ .ias_id = 0,
-+ .ias_exttype = 0,
-+ .ias_ivlen = 0,
-+ .ias_keyminbits = 0,
-+ .ias_keymaxbits = 0,
-+ }
-+};
-+#endif /* CONFIG_KLIPS_AH */
-+
-+static struct ipsec_alg_supported ocf_esp_algs[] = {
-+ {
-+ .ias_name = "ocf-md5hmac",
-+ .ias_id = AH_MD5,
-+ .ias_exttype = SADB_EXT_SUPPORTED_AUTH,
-+ .ias_ivlen = 0,
-+ .ias_keyminbits = 128,
-+ .ias_keymaxbits = 128,
-+ },
-+ {
-+ .ias_name = "ocf-sha1hmac",
-+ .ias_id = AH_SHA,
-+ .ias_exttype = SADB_EXT_SUPPORTED_AUTH,
-+ .ias_ivlen = 0,
-+ .ias_keyminbits = 160,
-+ .ias_keymaxbits = 160,
-+ },
-+ {
-+ .ias_name = "ocf-aes",
-+ .ias_id = ESP_AES,
-+ .ias_exttype = SADB_EXT_SUPPORTED_ENCRYPT,
-+ .ias_ivlen = 16,
-+ .ias_keyminbits = 128,
-+ .ias_keymaxbits = 256,
-+ },
-+ {
-+ .ias_name = "ocf-3des",
-+ .ias_id = ESP_3DES,
-+ .ias_exttype = SADB_EXT_SUPPORTED_ENCRYPT,
-+ .ias_ivlen = 8,
-+ .ias_keyminbits = 192,
-+ .ias_keymaxbits = 192,
-+ },
-+ {
-+ .ias_name = "ocf-des",
-+ .ias_id = ESP_DES,
-+ .ias_exttype = SADB_EXT_SUPPORTED_ENCRYPT,
-+ .ias_ivlen = 8,
-+ .ias_keyminbits = 64,
-+ .ias_keymaxbits = 64,
-+ },
-+ {
-+ .ias_name = NULL,
-+ .ias_id = 0,
-+ .ias_exttype = 0,
-+ .ias_ivlen = 0,
-+ .ias_keyminbits = 0,
-+ .ias_keymaxbits = 0,
-+ }
-+};
-+
-+static int
-+ipsec_ocf_check_alg(struct ipsec_alg_supported *s)
-+{
-+ struct cryptoini cri;
-+ int64_t cryptoid;
-+
-+ memset(&cri, 0, sizeof(cri));
-+ if (s->ias_exttype == SADB_EXT_SUPPORTED_ENCRYPT)
-+ cri.cri_alg = ipsec_ocf_encalg(s->ias_id);
-+ else
-+ cri.cri_alg = ipsec_ocf_authalg(s->ias_id);
-+ cri.cri_klen = s->ias_keyminbits;
-+ cri.cri_key = "0123456789abcdefghijklmnopqrstuvwxyz";
-+
-+ if (crypto_newsession(&cryptoid, &cri, ipsec_ocf_crid)) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:ipsec_ocf:%s not supported\n",
-+ s->ias_name);
-+ return 0;
-+ }
-+ crypto_freesession(cryptoid);
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:ipsec_ocf:%s supported\n",
-+ s->ias_name);
-+ return 1;
-+}
-+
-+void
-+ipsec_ocf_init(void)
-+{
-+ struct ipsec_alg_supported *s;
-+
-+ for (s = ocf_esp_algs; s->ias_name; s++) {
-+ if (ipsec_ocf_check_alg(s))
-+ (void)pfkey_list_insert_supported(s,
-+ &(pfkey_supported_list[SADB_SATYPE_ESP]));
-+ }
-+
-+#ifdef CONFIG_KLIPS_AH
-+ for (s = ocf_ah_algs; s->ias_name; s++) {
-+ if (ipsec_ocf_check_alg(s))
-+ (void)pfkey_list_insert_supported(s,
-+ &(pfkey_supported_list[SADB_SATYPE_AH]));
-+ }
-+#endif
-+
-+ /* send register event to userspace */
-+ pfkey_register_reply(SADB_SATYPE_ESP, NULL);
-+ pfkey_register_reply(SADB_SATYPE_AH, NULL);
-+}
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_ocf.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,41 @@
-+#ifndef _IPSEC_OCF_H_
-+#define _IPSEC_OCF_H_
-+/****************************************************************************/
-+/*
-+ * IPSEC OCF support
-+ *
-+ * This code written by David McCullough <dmccullough@cyberguard.com>
-+ * Copyright (C) 2005 Intel Corporation. All Rights Reserved.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/kernel.h>
-+
-+#ifdef CONFIG_KLIPS_OCF
-+
-+#include <cryptodev.h>
-+
-+extern int ipsec_ocf_sa_init(struct ipsec_sa *ipsp, int authalg, int encalg);
-+extern int ipsec_ocf_sa_free(struct ipsec_sa *ipsp);
-+extern enum ipsec_rcv_value ipsec_ocf_rcv(struct ipsec_rcv_state *irs);
-+extern enum ipsec_xmit_value ipsec_ocf_xmit(struct ipsec_xmit_state *ixs);
-+extern void ipsec_ocf_init(void);
-+
-+#endif
-+
-+/****************************************************************************/
-+#endif /* _IPSEC_OCF_H_ */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_proc.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1056 @@
-+/*
-+ * @(#) /proc file system interface code.
-+ *
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * 2001 Michael Richardson <mcr@freeswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * Split out from ipsec_init.c version 1.70.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-+#include <linux/moduleparam.h>
-+#endif
-+#include <linux/kernel.h> /* printk() */
-+#include <linux/ip.h> /* struct iphdr */
-+
-+#include "openswan/ipsec_kversion.h"
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/in.h> /* struct sockaddr_in */
-+#include <linux/skbuff.h>
-+#include <asm/uaccess.h> /* copy_from_user */
-+#include <openswan.h>
-+#ifdef SPINLOCK
-+#ifdef SPINLOCK_23
-+#include <linux/spinlock.h> /* *lock* */
-+#else /* SPINLOCK_23 */
-+#include <asm/spinlock.h> /* *lock* */
-+#endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+#ifdef CONFIG_PROC_FS
-+#include <linux/proc_fs.h>
-+#endif /* CONFIG_PROC_FS */
-+#ifdef NETLINK_SOCK
-+#include <linux/netlink.h>
-+#else
-+#include <net/netlink.h>
-+#endif
-+
-+#include "openswan/radij.h"
-+
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_stats.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_xmit.h"
-+
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_kern24.h"
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#include "openswan/ipcomp.h"
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#include "openswan/ipsec_proto.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#ifdef CONFIG_PROC_FS
-+
-+#ifdef IPSEC_PROC_SUBDIRS
-+static struct proc_dir_entry *proc_net_ipsec_dir = NULL;
-+static struct proc_dir_entry *proc_eroute_dir = NULL;
-+static struct proc_dir_entry *proc_spi_dir = NULL;
-+static struct proc_dir_entry *proc_spigrp_dir = NULL;
-+static struct proc_dir_entry *proc_birth_dir = NULL;
-+static struct proc_dir_entry *proc_stats_dir = NULL;
-+#endif
-+
-+struct ipsec_birth_reply ipsec_ipv4_birth_packet;
-+struct ipsec_birth_reply ipsec_ipv6_birth_packet;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+int debug_esp = 0;
-+int debug_ah = 0;
-+int sysctl_ipsec_inbound_policy_check = 1;
-+int debug_tunnel = 0;
-+int debug_xmit = 0;
-+int debug_xform = 0;
-+int debug_eroute = 0;
-+int debug_spi = 0;
-+int debug_radij = 0;
-+int debug_pfkey = 0;
-+int debug_rcv = 0;
-+int debug_netlink = 0;
-+int sysctl_ipsec_debug_verbose = 0;
-+int sysctl_ipsec_debug_ipcomp =0;
-+int sysctl_ipsec_icmp = 0;
-+int sysctl_ipsec_tos = 0;
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+#define DECREMENT_UNSIGNED(X, amount) ((amount < (X)) ? (X)-amount : 0)
-+
-+#ifdef CONFIG_KLIPS_ALG
-+extern int ipsec_xform_get_info(char *buffer, char **start,
-+ off_t offset, int length IPSEC_PROC_LAST_ARG);
-+#endif
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_eroute_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ struct wsbuf w = {buffer, length, offset, 0, 0};
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_radij & DB_RJ_DUMPTREES)
-+ rj_dumptrees(); /* XXXXXXXXX */
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_eroute_get_info: "
-+ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n",
-+ buffer,
-+ *start,
-+ (int)offset,
-+ length);
-+
-+ spin_lock_bh(&eroute_lock);
-+
-+ rj_walktree(rnh, ipsec_rj_walker_procprint, &w);
-+/* rj_walktree(mask_rjhead, ipsec_rj_walker_procprint, &w); */
-+
-+ spin_unlock_bh(&eroute_lock);
-+
-+ *start = buffer + (offset - w.begin); /* Start of wanted data */
-+ return w.len - (offset - w.begin);
-+}
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_spi_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ const int max_content = length > 0? length-1 : 0;
-+ int len = 0;
-+ off_t begin = 0;
-+ int i;
-+ struct ipsec_sa *sa_p;
-+ char sa[SATOT_BUF];
-+ char buf_s[SUBNETTOA_BUF];
-+ char buf_d[SUBNETTOA_BUF];
-+ size_t sa_len;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_spi_get_info: "
-+ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n",
-+ buffer,
-+ *start,
-+ (int)offset,
-+ length);
-+
-+ spin_lock_bh(&tdb_lock);
-+
-+ for (i = 0; i < SADB_HASHMOD; i++) {
-+ for (sa_p = ipsec_sadb_hash[i];
-+ sa_p;
-+ sa_p = sa_p->ips_hnext) {
-+ ipsec_sa_get(sa_p);
-+ sa_len = satot(&sa_p->ips_said, 'x', sa, sizeof(sa));
-+ len += ipsec_snprintf(buffer+len, length-len, "%s ",
-+ sa_len ? sa : " (error)");
-+
-+ len += ipsec_snprintf(buffer+len, length-len, "%s%s%s",
-+ IPS_XFORM_NAME(sa_p));
-+
-+ len += ipsec_snprintf(buffer+len, length-len, ": dir=%s",
-+ (sa_p->ips_flags & EMT_INBOUND) ?
-+ "in " : "out");
-+
-+ if(sa_p->ips_addr_s) {
-+ addrtoa(((struct sockaddr_in*)(sa_p->ips_addr_s))->sin_addr,
-+ 0, buf_s, sizeof(buf_s));
-+ len += ipsec_snprintf(buffer+len, length-len, " src=%s",
-+ buf_s);
-+ }
-+
-+ if((sa_p->ips_said.proto == IPPROTO_IPIP)
-+ && (sa_p->ips_flags & SADB_X_SAFLAGS_INFLOW)) {
-+ subnettoa(sa_p->ips_flow_s.u.v4.sin_addr,
-+ sa_p->ips_mask_s.u.v4.sin_addr,
-+ 0,
-+ buf_s,
-+ sizeof(buf_s));
-+
-+ subnettoa(sa_p->ips_flow_d.u.v4.sin_addr,
-+ sa_p->ips_mask_d.u.v4.sin_addr,
-+ 0,
-+ buf_d,
-+ sizeof(buf_d));
-+
-+ len += ipsec_snprintf(buffer+len, length-len, " policy=%s->%s",
-+ buf_s, buf_d);
-+ }
-+
-+ if(sa_p->ips_iv_bits) {
-+ int j;
-+ len += ipsec_snprintf(buffer+len, length-len, " iv_bits=%dbits iv=0x",
-+ sa_p->ips_iv_bits);
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (!sa_p->ips_iv) {
-+ /* ocf doesn't set the IV, fake it for the UML tests */
-+ len += ipsec_snprintf(buffer+len, length-len, "0cf0");
-+ for (j = 0; j < (sa_p->ips_iv_bits / 8) - 2; j++) {
-+ len += ipsec_snprintf(buffer+len, length-len, "%02x",
-+ (int) ((((long)sa_p) >> j) & 0xff));
-+ }
-+ } else
-+#endif
-+ for(j = 0; j < sa_p->ips_iv_bits / 8; j++) {
-+ len += ipsec_snprintf(buffer+len, length-len, "%02x",
-+ (__u32)((__u8*)(sa_p->ips_iv))[j]);
-+ }
-+ }
-+
-+ if(sa_p->ips_encalg || sa_p->ips_authalg) {
-+ if(sa_p->ips_replaywin) {
-+ len += ipsec_snprintf(buffer+len, length-len, " ooowin=%d",
-+ sa_p->ips_replaywin);
-+ }
-+ if(sa_p->ips_errs.ips_replaywin_errs) {
-+ len += ipsec_snprintf(buffer+len, length-len, " ooo_errs=%d",
-+ sa_p->ips_errs.ips_replaywin_errs);
-+ }
-+ if(sa_p->ips_replaywin_lastseq) {
-+ len += ipsec_snprintf(buffer+len, length-len, " seq=%d",
-+ sa_p->ips_replaywin_lastseq);
-+ }
-+ if(sa_p->ips_replaywin_bitmap) {
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
-+ len += ipsec_snprintf(buffer+len, length-len, " bit=0x%Lx",
-+ sa_p->ips_replaywin_bitmap);
-+#else
-+ len += ipsec_snprintf(buffer+len, length-len, " bit=0x%x%08x",
-+ (__u32)(sa_p->ips_replaywin_bitmap >> 32),
-+ (__u32)sa_p->ips_replaywin_bitmap);
-+#endif
-+ }
-+ if(sa_p->ips_replaywin_maxdiff) {
-+ len += ipsec_snprintf(buffer+len, length-len, " max_seq_diff=%d",
-+ sa_p->ips_replaywin_maxdiff);
-+ }
-+ }
-+ if(sa_p->ips_flags & ~EMT_INBOUND) {
-+ len += ipsec_snprintf(buffer+len, length-len, " flags=0x%x",
-+ sa_p->ips_flags & ~EMT_INBOUND);
-+ len += ipsec_snprintf(buffer+len, length-len, "<");
-+ /* flag printing goes here */
-+ len += ipsec_snprintf(buffer+len, length-len, ">");
-+ }
-+ if(sa_p->ips_auth_bits) {
-+ len += ipsec_snprintf(buffer+len, length-len, " alen=%d",
-+ sa_p->ips_auth_bits);
-+ }
-+ if(sa_p->ips_key_bits_a) {
-+ len += ipsec_snprintf(buffer+len, length-len, " aklen=%d",
-+ sa_p->ips_key_bits_a);
-+ }
-+ if(sa_p->ips_errs.ips_auth_errs) {
-+ len += ipsec_snprintf(buffer+len, length-len, " auth_errs=%d",
-+ sa_p->ips_errs.ips_auth_errs);
-+ }
-+ if(sa_p->ips_key_bits_e) {
-+ len += ipsec_snprintf(buffer+len, length-len, " eklen=%d",
-+ sa_p->ips_key_bits_e);
-+ }
-+ if(sa_p->ips_errs.ips_encsize_errs) {
-+ len += ipsec_snprintf(buffer+len, length-len, " encr_size_errs=%d",
-+ sa_p->ips_errs.ips_encsize_errs);
-+ }
-+ if(sa_p->ips_errs.ips_encpad_errs) {
-+ len += ipsec_snprintf(buffer+len, length-len, " encr_pad_errs=%d",
-+ sa_p->ips_errs.ips_encpad_errs);
-+ }
-+
-+ len += ipsec_snprintf(buffer+len, length-len, " life(c,s,h)=");
-+
-+ len += ipsec_lifetime_format(buffer + len,
-+ length - len,
-+ "alloc",
-+ ipsec_life_countbased,
-+ &sa_p->ips_life.ipl_allocations);
-+
-+ len += ipsec_lifetime_format(buffer + len,
-+ length - len,
-+ "bytes",
-+ ipsec_life_countbased,
-+ &sa_p->ips_life.ipl_bytes);
-+
-+ len += ipsec_lifetime_format(buffer + len,
-+ length - len,
-+ "addtime",
-+ ipsec_life_timebased,
-+ &sa_p->ips_life.ipl_addtime);
-+
-+ len += ipsec_lifetime_format(buffer + len,
-+ length - len,
-+ "usetime",
-+ ipsec_life_timebased,
-+ &sa_p->ips_life.ipl_usetime);
-+
-+ len += ipsec_lifetime_format(buffer + len,
-+ length - len,
-+ "packets",
-+ ipsec_life_countbased,
-+ &sa_p->ips_life.ipl_packets);
-+
-+ if(sa_p->ips_life.ipl_usetime.ipl_last) { /* XXX-MCR should be last? */
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
-+ len += ipsec_snprintf(buffer+len, length-len, " idle=%Ld",
-+ jiffies / HZ - sa_p->ips_life.ipl_usetime.ipl_last);
-+#else
-+ len += ipsec_snprintf(buffer+len, length-len, " idle=%lu",
-+ jiffies / HZ - (unsigned long)sa_p->ips_life.ipl_usetime.ipl_last);
-+#endif
-+ }
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ if(sa_p->ips_said.proto == IPPROTO_COMP &&
-+ (sa_p->ips_comp_ratio_dbytes ||
-+ sa_p->ips_comp_ratio_cbytes)) {
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0)
-+ len += ipsec_snprintf(buffer+len, length-len, " ratio=%Ld:%Ld",
-+ sa_p->ips_comp_ratio_dbytes,
-+ sa_p->ips_comp_ratio_cbytes);
-+#else
-+ len += ipsec_snprintf(buffer+len, length-len, " ratio=%lu:%lu",
-+ (unsigned long)sa_p->ips_comp_ratio_dbytes,
-+ (unsigned long)sa_p->ips_comp_ratio_cbytes);
-+#endif
-+ }
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ {
-+ char *natttype_name;
-+
-+ switch(sa_p->ips_natt_type)
-+ {
-+ case 0:
-+ natttype_name="none";
-+ break;
-+ case ESPINUDP_WITH_NON_IKE:
-+ natttype_name="nonike";
-+ break;
-+ case ESPINUDP_WITH_NON_ESP:
-+ natttype_name="nonesp";
-+ break;
-+ default:
-+ natttype_name = "unknown";
-+ break;
-+ }
-+
-+ len += ipsec_snprintf(buffer + len, length-len, " natencap=%s",
-+ natttype_name);
-+
-+ len += ipsec_snprintf(buffer + len, length-len, " natsport=%d",
-+ sa_p->ips_natt_sport);
-+
-+ len += ipsec_snprintf(buffer + len,length-len, " natdport=%d",
-+ sa_p->ips_natt_dport);
-+ }
-+#else
-+ len += ipsec_snprintf(buffer + len, length-len, " natencap=na");
-+#endif /* CONFIG_IPSEC_NAT_TRAVERSAL */
-+
-+ /* we decrement by one, because this SA has been referenced in order to dump this info */
-+ len += ipsec_snprintf(buffer + len,length-len, " refcount=%d",
-+ atomic_read(&sa_p->ips_refcount)-1);
-+
-+ len += ipsec_snprintf(buffer+len, length-len, " ref=%d",
-+ sa_p->ips_ref);
-+ len += ipsec_snprintf(buffer+len, length-len, " refhim=%d",
-+ sa_p->ips_refhim);
-+
-+ if(sa_p->ips_out) {
-+ len += ipsec_snprintf(buffer+len, length-len, " outif=%s:%d",
-+ sa_p->ips_out->name,
-+ sa_p->ips_transport_direct);
-+ }
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_xform) {
-+ len += ipsec_snprintf(buffer+len, length-len, " reftable=%lu refentry=%lu",
-+ (unsigned long)IPsecSAref2table(sa_p->ips_ref),
-+ (unsigned long)IPsecSAref2entry(sa_p->ips_ref));
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ len += ipsec_snprintf(buffer+len, length-len, "\n");
-+
-+ ipsec_sa_put(sa_p);
-+
-+ if (len >= max_content) {
-+ /* we've done all that can fit -- stop loops */
-+ len = max_content; /* truncate crap */
-+ goto done_spi_i;
-+ } else {
-+ const off_t pos = begin + len; /* file position of end of what we've generated */
-+
-+ if (pos <= offset) {
-+ /* all is before first interesting character:
-+ * discard, but note where we are.
-+ */
-+ len = 0;
-+ begin = pos;
-+ }
-+ }
-+ }
-+ }
-+
-+done_spi_i:
-+ spin_unlock_bh(&tdb_lock);
-+
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ return len - (offset - begin);
-+}
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_spigrp_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ /* Limit of useful snprintf output */
-+ const int max_content = length > 0? length-1 : 0;
-+
-+ int len = 0;
-+ off_t begin = 0;
-+ int i;
-+ struct ipsec_sa *sa_p, *sa_p2;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_spigrp_get_info: "
-+ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n",
-+ buffer,
-+ *start,
-+ (int)offset,
-+ length);
-+
-+ spin_lock_bh(&tdb_lock);
-+
-+ for (i = 0; i < SADB_HASHMOD; i++) {
-+ for (sa_p = ipsec_sadb_hash[i];
-+ sa_p != NULL;
-+ sa_p = sa_p->ips_hnext)
-+ {
-+ sa_p2 = sa_p;
-+ while(sa_p2 != NULL) {
-+ struct ipsec_sa *sa2n;
-+ sa_len = satot(&sa_p2->ips_said,
-+ 'x', sa, sizeof(sa));
-+
-+ len += ipsec_snprintf(buffer+len, length-len, "%s ",
-+ sa_len ? sa : " (error)");
-+
-+ sa2n = sa_p2->ips_next;
-+ sa_p2 = sa2n;
-+ }
-+ len += ipsec_snprintf(buffer+len, length-len, "\n");
-+
-+ if (len >= max_content) {
-+ /* we've done all that can fit -- stop loops */
-+ len = max_content; /* truncate crap */
-+ goto done_spigrp_i;
-+ } else {
-+ const off_t pos = begin + len;
-+
-+ if (pos <= offset) {
-+ /* all is before first interesting character:
-+ * discard, but note where we are.
-+ */
-+ len = 0;
-+ begin = pos;
-+ }
-+ }
-+ }
-+ }
-+
-+done_spigrp_i:
-+ spin_unlock_bh(&tdb_lock);
-+
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ return len - (offset - begin);
-+}
-+
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_tncfg_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ /* limit of useful snprintf output */
-+ const int max_content = length > 0? length-1 : 0;
-+ int len = 0;
-+ off_t begin = 0;
-+ int i;
-+ char name[9];
-+ struct net_device *dev, *privdev;
-+ struct ipsecpriv *priv;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_tncfg_get_info: "
-+ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n",
-+ buffer,
-+ *start,
-+ (int)offset,
-+ length);
-+
-+ for(i = 0; i < IPSEC_NUM_IF; i++) {
-+ ipsec_snprintf(name, (ssize_t) sizeof(name), IPSEC_DEV_FORMAT, i);
-+ dev = __ipsec_dev_get(name);
-+ if(dev) {
-+ priv = (struct ipsecpriv *)(dev->priv);
-+ len += ipsec_snprintf(buffer+len, length-len, "%s",
-+ dev->name);
-+ if(priv) {
-+ privdev = (struct net_device *)(priv->dev);
-+ len += ipsec_snprintf(buffer+len, length-len, " -> %s",
-+ privdev ? privdev->name : "NULL");
-+ len += ipsec_snprintf(buffer+len, length-len, " mtu=%d(%d) -> %d",
-+ dev->mtu,
-+ priv->mtu,
-+ privdev ? privdev->mtu : 0);
-+ } else {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_tncfg_get_info: device '%s' has no private data space!\n",
-+ dev->name);
-+ }
-+ len += ipsec_snprintf(buffer+len, length-len, "\n");
-+
-+ if (len >= max_content) {
-+ /* we've done all that can fit -- stop loop */
-+ len = max_content; /* truncate crap */
-+ break;
-+ } else {
-+ const off_t pos = begin + len;
-+ if (pos <= offset) {
-+ len = 0;
-+ begin = pos;
-+ }
-+ }
-+ }
-+ }
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ len -= (offset - begin); /* Start slop */
-+ if (len > length)
-+ len = length;
-+ return len;
-+}
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_version_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ int len = 0;
-+ off_t begin = 0;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_version_get_info: "
-+ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n",
-+ buffer,
-+ *start,
-+ (int)offset,
-+ length);
-+
-+ len += ipsec_snprintf(buffer + len,length-len, "Openswan version: %s\n",
-+ ipsec_version_code());
-+#if 0
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_version_get_info: "
-+ "ipsec_init version: %s\n",
-+ ipsec_init_c_version);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_version_get_info: "
-+ "ipsec_tunnel version: %s\n",
-+ ipsec_tunnel_c_version);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_version_get_info: "
-+ "ipsec_netlink version: %s\n",
-+ ipsec_netlink_c_version);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_version_get_info: "
-+ "radij_c_version: %s\n",
-+ radij_c_version);
-+#endif
-+
-+
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ len -= (offset - begin); /* Start slop */
-+ if (len > length)
-+ len = length;
-+ return len;
-+}
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+unsigned int natt_available = 1;
-+#else
-+unsigned int natt_available = 0;
-+#endif
-+module_param(natt_available,int,0644);
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_natt_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ int len = 0;
-+ off_t begin = 0;
-+
-+ len += ipsec_snprintf(buffer + len,
-+ length-len, "%d\n",
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ 1
-+#else
-+ 0
-+#endif
-+ );
-+
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ len -= (offset - begin); /* Start slop */
-+ if (len > length)
-+ len = length;
-+ return len;
-+}
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_birth_info(char *page,
-+ char **start,
-+ off_t offset,
-+ int count,
-+ int *eof,
-+ void *data)
-+{
-+ struct ipsec_birth_reply *ibr = (struct ipsec_birth_reply *)data;
-+ int len;
-+
-+ if(offset >= ibr->packet_template_len) {
-+ if(eof) {
-+ *eof=1;
-+ }
-+ return 0;
-+ }
-+
-+ len = ibr->packet_template_len;
-+ len -= offset;
-+ if (len > count)
-+ len = count;
-+
-+ memcpy(page + offset, ibr->packet_template+offset, len);
-+
-+ return len;
-+}
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_birth_set(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{
-+ struct ipsec_birth_reply *ibr = (struct ipsec_birth_reply *)data;
-+ int len;
-+
-+ KLIPS_INC_USE;
-+ if(count > IPSEC_BIRTH_TEMPLATE_MAXLEN) {
-+ len = IPSEC_BIRTH_TEMPLATE_MAXLEN;
-+ } else {
-+ len = count;
-+ }
-+
-+ if(copy_from_user(ibr->packet_template, buffer, len)) {
-+ KLIPS_DEC_USE;
-+ return -EFAULT;
-+ }
-+ ibr->packet_template_len = len;
-+
-+ KLIPS_DEC_USE;
-+
-+ return len;
-+}
-+
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_klipsdebug_get_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length IPSEC_PROC_LAST_ARG)
-+{
-+ int len = 0;
-+ off_t begin = 0;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_PROCFS,
-+ "klips_debug:ipsec_klipsdebug_get_info: "
-+ "buffer=0p%p, *start=0p%p, offset=%d, length=%d\n",
-+ buffer,
-+ *start,
-+ (int)offset,
-+ length);
-+
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_tunnel=%08x.\n", debug_tunnel);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_xform=%08x.\n", debug_xform);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_eroute=%08x.\n", debug_eroute);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_spi=%08x.\n", debug_spi);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_radij=%08x.\n", debug_radij);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_esp=%08x.\n", debug_esp);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_ah=%08x.\n", debug_ah);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_rcv=%08x.\n", debug_rcv);
-+ len += ipsec_snprintf(buffer+len, length-len, "debug_pfkey=%08x.\n", debug_pfkey);
-+
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ len -= (offset - begin); /* Start slop */
-+ if (len > length)
-+ len = length;
-+ return len;
-+}
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+IPSEC_PROCFS_DEBUG_NO_STATIC
-+int
-+ipsec_stats_get_int_info(char *buffer,
-+ char **start,
-+ off_t offset,
-+ int length,
-+ int *eof,
-+ void *data)
-+{
-+
-+ const int max_content = length > 0? length-1 : 0;
-+ int len = 0;
-+ int *thing;
-+
-+ thing = (int *)data;
-+
-+ len = ipsec_snprintf(buffer+len, length-len, "%08x\n", *thing);
-+
-+ if (len >= max_content)
-+ len = max_content; /* truncate crap */
-+
-+ *start = buffer + offset; /* Start of wanted data */
-+ return len > offset? len - offset : 0;
-+
-+}
-+
-+#ifndef PROC_FS_2325
-+struct proc_dir_entry ipsec_eroute =
-+{
-+ 0,
-+ 12, "ipsec_eroute",
-+ S_IFREG | S_IRUGO, 1, 0, 0, 0,
-+ &proc_net_inode_operations,
-+ ipsec_eroute_get_info,
-+ NULL, NULL, NULL, NULL, NULL
-+};
-+
-+struct proc_dir_entry ipsec_spi =
-+{
-+ 0,
-+ 9, "ipsec_spi",
-+ S_IFREG | S_IRUGO, 1, 0, 0, 0,
-+ &proc_net_inode_operations,
-+ ipsec_spi_get_info,
-+ NULL, NULL, NULL, NULL, NULL
-+};
-+
-+struct proc_dir_entry ipsec_spigrp =
-+{
-+ 0,
-+ 12, "ipsec_spigrp",
-+ S_IFREG | S_IRUGO, 1, 0, 0, 0,
-+ &proc_net_inode_operations,
-+ ipsec_spigrp_get_info,
-+ NULL, NULL, NULL, NULL, NULL
-+};
-+
-+struct proc_dir_entry ipsec_tncfg =
-+{
-+ 0,
-+ 11, "ipsec_tncfg",
-+ S_IFREG | S_IRUGO, 1, 0, 0, 0,
-+ &proc_net_inode_operations,
-+ ipsec_tncfg_get_info,
-+ NULL, NULL, NULL, NULL, NULL
-+};
-+
-+struct proc_dir_entry ipsec_version =
-+{
-+ 0,
-+ 13, "ipsec_version",
-+ S_IFREG | S_IRUGO, 1, 0, 0, 0,
-+ &proc_net_inode_operations,
-+ ipsec_version_get_info,
-+ NULL, NULL, NULL, NULL, NULL
-+};
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+struct proc_dir_entry ipsec_klipsdebug =
-+{
-+ 0,
-+ 16, "ipsec_klipsdebug",
-+ S_IFREG | S_IRUGO, 1, 0, 0, 0,
-+ &proc_net_inode_operations,
-+ ipsec_klipsdebug_get_info,
-+ NULL, NULL, NULL, NULL, NULL
-+};
-+#endif /* CONFIG_KLIPS_DEBUG */
-+#endif /* !PROC_FS_2325 */
-+#endif /* CONFIG_PROC_FS */
-+
-+#if defined(PROC_FS_2325)
-+struct ipsec_proc_list {
-+ char *name;
-+ struct proc_dir_entry **parent;
-+ struct proc_dir_entry **dir;
-+ read_proc_t *readthing;
-+ write_proc_t *writething;
-+ void *data;
-+};
-+static struct ipsec_proc_list proc_items[]={
-+#ifdef CONFIG_KLIPS_DEBUG
-+ {"klipsdebug", &proc_net_ipsec_dir, NULL, ipsec_klipsdebug_get_info, NULL, NULL},
-+#endif
-+ {"eroute", &proc_net_ipsec_dir, &proc_eroute_dir, NULL, NULL, NULL},
-+ {"all", &proc_eroute_dir, NULL, ipsec_eroute_get_info, NULL, NULL},
-+ {"spi", &proc_net_ipsec_dir, &proc_spi_dir, NULL, NULL, NULL},
-+ {"all", &proc_spi_dir, NULL, ipsec_spi_get_info, NULL, NULL},
-+ {"spigrp", &proc_net_ipsec_dir, &proc_spigrp_dir, NULL, NULL, NULL},
-+ {"all", &proc_spigrp_dir, NULL, ipsec_spigrp_get_info, NULL, NULL},
-+ {"birth", &proc_net_ipsec_dir, &proc_birth_dir, NULL, NULL, NULL},
-+ {"ipv4", &proc_birth_dir, NULL, ipsec_birth_info, ipsec_birth_set, (void *)&ipsec_ipv4_birth_packet},
-+ {"ipv6", &proc_birth_dir, NULL, ipsec_birth_info, ipsec_birth_set, (void *)&ipsec_ipv6_birth_packet},
-+ {"tncfg", &proc_net_ipsec_dir, NULL, ipsec_tncfg_get_info, NULL, NULL},
-+#ifdef CONFIG_KLIPS_ALG
-+
-+ {"xforms", &proc_net_ipsec_dir, NULL, ipsec_xform_get_info, NULL, NULL},
-+#endif
-+ {"stats", &proc_net_ipsec_dir, &proc_stats_dir, NULL, NULL, NULL},
-+ {"trap_count", &proc_stats_dir, NULL, ipsec_stats_get_int_info, NULL, &ipsec_xmit_trap_count},
-+ {"trap_sendcount", &proc_stats_dir, NULL, ipsec_stats_get_int_info, NULL, &ipsec_xmit_trap_sendcount},
-+ {"natt", &proc_net_ipsec_dir, NULL, ipsec_natt_get_info, NULL, NULL},
-+ {"version", &proc_net_ipsec_dir, NULL, ipsec_version_get_info, NULL, NULL},
-+ {NULL, NULL, NULL, NULL, NULL, NULL}
-+};
-+#endif
-+
-+int
-+ipsec_proc_init()
-+{
-+ int error = 0;
-+#ifdef IPSEC_PROC_SUBDIRS
-+ struct proc_dir_entry *item;
-+#endif
-+
-+ /*
-+ * just complain because pluto won't run without /proc!
-+ */
-+#ifndef CONFIG_PROC_FS
-+#error You must have PROC_FS built in to use KLIPS
-+#endif
-+
-+ /* for 2.0 kernels */
-+#if !defined(PROC_FS_2325) && !defined(PROC_FS_21)
-+ error |= proc_register_dynamic(&PROC_NET, &ipsec_eroute);
-+ error |= proc_register_dynamic(&PROC_NET, &ipsec_spi);
-+ error |= proc_register_dynamic(&PROC_NET, &ipsec_spigrp);
-+ error |= proc_register_dynamic(&PROC_NET, &ipsec_tncfg);
-+ error |= proc_register_dynamic(&PROC_NET, &ipsec_version);
-+#ifdef CONFIG_KLIPS_DEBUG
-+ error |= proc_register_dynamic(&PROC_NET, &ipsec_klipsdebug);
-+#endif /* CONFIG_KLIPS_DEBUG */
-+#endif
-+
-+ /* for 2.2 kernels */
-+#if !defined(PROC_FS_2325) && defined(PROC_FS_21)
-+ error |= proc_register(PROC_NET, &ipsec_eroute);
-+ error |= proc_register(PROC_NET, &ipsec_spi);
-+ error |= proc_register(PROC_NET, &ipsec_spigrp);
-+ error |= proc_register(PROC_NET, &ipsec_tncfg);
-+ error |= proc_register(PROC_NET, &ipsec_version);
-+#ifdef CONFIG_KLIPS_DEBUG
-+ error |= proc_register(PROC_NET, &ipsec_klipsdebug);
-+#endif /* CONFIG_KLIPS_DEBUG */
-+#endif
-+
-+ /* for 2.4 kernels */
-+#if defined(PROC_FS_2325)
-+ /* create /proc/net/ipsec */
-+
-+ /* zero these out before we initialize /proc/net/ipsec/birth/stuff */
-+ memset(&ipsec_ipv4_birth_packet, 0, sizeof(struct ipsec_birth_reply));
-+ memset(&ipsec_ipv6_birth_packet, 0, sizeof(struct ipsec_birth_reply));
-+
-+ proc_net_ipsec_dir = proc_mkdir("ipsec", PROC_NET);
-+ if(proc_net_ipsec_dir == NULL) {
-+ /* no point in continuing */
-+ return 1;
-+ }
-+
-+ {
-+ struct ipsec_proc_list *it;
-+
-+ it=proc_items;
-+ while(it->name!=NULL) {
-+ if(it->dir) {
-+ /* make a dir instead */
-+ item = proc_mkdir(it->name, *it->parent);
-+ *it->dir = item;
-+ } else {
-+ item = create_proc_entry(it->name, 0400, *it->parent);
-+ }
-+ if(item) {
-+ item->read_proc = it->readthing;
-+ item->write_proc = it->writething;
-+ item->data = it->data;
-+#ifdef MODULE
-+ item->owner = THIS_MODULE;
-+#endif
-+ } else {
-+ error |= 1;
-+ }
-+ it++;
-+ }
-+ }
-+
-+ /* now create some symlinks to provide compatibility */
-+ proc_symlink("ipsec_eroute", PROC_NET, "ipsec/eroute/all");
-+ proc_symlink("ipsec_spi", PROC_NET, "ipsec/spi/all");
-+ proc_symlink("ipsec_spigrp", PROC_NET, "ipsec/spigrp/all");
-+ proc_symlink("ipsec_tncfg", PROC_NET, "ipsec/tncfg");
-+ proc_symlink("ipsec_version",PROC_NET, "ipsec/version");
-+ proc_symlink("ipsec_klipsdebug",PROC_NET,"ipsec/klipsdebug");
-+
-+#endif /* !PROC_FS_2325 */
-+
-+ return error;
-+}
-+
-+void
-+ipsec_proc_cleanup()
-+{
-+
-+ /* for 2.0 and 2.2 kernels */
-+#if !defined(PROC_FS_2325)
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (proc_net_unregister(ipsec_klipsdebug.low_ino) != 0)
-+ printk("klips_debug:ipsec_cleanup: "
-+ "cannot unregister /proc/net/ipsec_klipsdebug\n");
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ if (proc_net_unregister(ipsec_version.low_ino) != 0)
-+ printk("klips_debug:ipsec_cleanup: "
-+ "cannot unregister /proc/net/ipsec_version\n");
-+ if (proc_net_unregister(ipsec_eroute.low_ino) != 0)
-+ printk("klips_debug:ipsec_cleanup: "
-+ "cannot unregister /proc/net/ipsec_eroute\n");
-+ if (proc_net_unregister(ipsec_spi.low_ino) != 0)
-+ printk("klips_debug:ipsec_cleanup: "
-+ "cannot unregister /proc/net/ipsec_spi\n");
-+ if (proc_net_unregister(ipsec_spigrp.low_ino) != 0)
-+ printk("klips_debug:ipsec_cleanup: "
-+ "cannot unregister /proc/net/ipsec_spigrp\n");
-+ if (proc_net_unregister(ipsec_tncfg.low_ino) != 0)
-+ printk("klips_debug:ipsec_cleanup: "
-+ "cannot unregister /proc/net/ipsec_tncfg\n");
-+#endif
-+
-+ /* for 2.4 kernels */
-+#if defined(PROC_FS_2325)
-+ {
-+ struct ipsec_proc_list *it;
-+
-+ /* find end of list */
-+ it=proc_items;
-+ while(it->name!=NULL) {
-+ it++;
-+ }
-+ it--;
-+
-+ do {
-+ remove_proc_entry(it->name, *it->parent);
-+ it--;
-+ } while(it >= proc_items);
-+ }
-+
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ remove_proc_entry("ipsec_klipsdebug", PROC_NET);
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ remove_proc_entry("ipsec_eroute", PROC_NET);
-+ remove_proc_entry("ipsec_spi", PROC_NET);
-+ remove_proc_entry("ipsec_spigrp", PROC_NET);
-+ remove_proc_entry("ipsec_tncfg", PROC_NET);
-+ remove_proc_entry("ipsec_version", PROC_NET);
-+ remove_proc_entry("ipsec", PROC_NET);
-+#endif /* 2.4 kernel */
-+}
-+
-+/*
-+ *
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_radij.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,554 @@
-+/*
-+ * Interface between the IPSEC code and the radix (radij) tree code
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, struct net_device_stats and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <openswan.h>
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* 23_SPINLOCK */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* 23_SPINLOCK */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+
-+#include "openswan/ipsec_eroute.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_tunnel.h" /* struct ipsecpriv */
-+#include "openswan/ipsec_xform.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+
-+struct radij_node_head *rnh = NULL;
-+#ifdef SPINLOCK
-+spinlock_t eroute_lock = SPIN_LOCK_UNLOCKED;
-+#else /* SPINLOCK */
-+spinlock_t eroute_lock;
-+#endif /* SPINLOCK */
-+
-+int
-+ipsec_radijinit(void)
-+{
-+ maj_keylen = sizeof (struct sockaddr_encap);
-+
-+ rj_init();
-+
-+ if (rj_inithead((void **)&rnh, /*16*/offsetof(struct sockaddr_encap, sen_type) * sizeof(__u8)) == 0) /* 16 is bit offset of sen_type */
-+ return -1;
-+ return 0;
-+}
-+
-+int
-+ipsec_radijcleanup(void)
-+{
-+ int error = 0;
-+
-+ spin_lock_bh(&eroute_lock);
-+
-+ error = radijcleanup();
-+
-+ spin_unlock_bh(&eroute_lock);
-+
-+ return error;
-+}
-+
-+int
-+ipsec_cleareroutes(void)
-+{
-+ int error;
-+
-+ spin_lock_bh(&eroute_lock);
-+
-+ error = radijcleartree();
-+
-+ spin_unlock_bh(&eroute_lock);
-+
-+ return error;
-+}
-+
-+int
-+ipsec_breakroute(struct sockaddr_encap *eaddr,
-+ struct sockaddr_encap *emask,
-+ struct sk_buff **first,
-+ struct sk_buff **last)
-+{
-+ struct eroute *ro;
-+ struct radij_node *rn;
-+ int error;
-+#ifdef CONFIG_KLIPS_DEBUG
-+
-+ if (debug_eroute) {
-+ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF];
-+ subnettoa(eaddr->sen_ip_src, emask->sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(eaddr->sen_ip_dst, emask->sen_ip_dst, 0, buf2, sizeof(buf2));
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_breakroute: "
-+ "attempting to delete eroute for %s:%d->%s:%d %d\n",
-+ buf1, ntohs(eaddr->sen_sport),
-+ buf2, ntohs(eaddr->sen_dport), eaddr->sen_proto);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ spin_lock_bh(&eroute_lock);
-+
-+ if ((error = rj_delete(eaddr, emask, rnh, &rn)) != 0) {
-+ spin_unlock_bh(&eroute_lock);
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_breakroute: "
-+ "node not found, eroute delete failed.\n");
-+ return error;
-+ }
-+
-+ spin_unlock_bh(&eroute_lock);
-+
-+ ro = (struct eroute *)rn;
-+
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_breakroute: "
-+ "deleted eroute=0p%p, ident=0p%p->0p%p, first=0p%p, last=0p%p\n",
-+ ro,
-+ ro->er_ident_s.data,
-+ ro->er_ident_d.data,
-+ ro->er_first,
-+ ro->er_last);
-+
-+ if (ro->er_ident_s.data != NULL) {
-+ kfree(ro->er_ident_s.data);
-+ }
-+ if (ro->er_ident_d.data != NULL) {
-+ kfree(ro->er_ident_d.data);
-+ }
-+ if (ro->er_first != NULL) {
-+#if 0
-+ struct net_device_stats *stats = (struct net_device_stats *) &(((struct ipsecpriv *)(ro->er_first->dev->priv))->mystats);
-+ stats->tx_dropped--;
-+#endif
-+ *first = ro->er_first;
-+ }
-+ if (ro->er_last != NULL) {
-+#if 0
-+ struct net_device_stats *stats = (struct net_device_stats *) &(((struct ipsecpriv *)(ro->er_last->dev->priv))->mystats);
-+ stats->tx_dropped--;
-+#endif
-+ *last = ro->er_last;
-+ }
-+
-+ if (rn->rj_flags & (RJF_ACTIVE | RJF_ROOT))
-+ panic ("ipsec_breakroute RMT_DELEROUTE root or active node\n");
-+ memset((caddr_t)rn, 0, sizeof (struct eroute));
-+ kfree(rn);
-+
-+ return 0;
-+}
-+
-+int
-+ipsec_makeroute(struct sockaddr_encap *eaddr,
-+ struct sockaddr_encap *emask,
-+ ip_said said,
-+ uint32_t pid,
-+ struct sk_buff *skb,
-+ struct ident *ident_s,
-+ struct ident *ident_d)
-+{
-+ struct eroute *retrt;
-+ int error;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+
-+ if (debug_eroute) {
-+
-+ {
-+ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF];
-+
-+ subnettoa(eaddr->sen_ip_src, emask->sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(eaddr->sen_ip_dst, emask->sen_ip_dst, 0, buf2, sizeof(buf2));
-+ sa_len = satot(&said, 0, sa, sizeof(sa));
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_makeroute: "
-+ "attempting to allocate %lu bytes to insert eroute for %s->%s, SA: %s, PID:%d, skb=0p%p, ident:%s->%s\n",
-+ (unsigned long) sizeof(struct eroute),
-+ buf1,
-+ buf2,
-+ sa_len ? sa : " (error)",
-+ pid,
-+ skb,
-+ (ident_s ? (ident_s->data ? ident_s->data : "NULL") : "NULL"),
-+ (ident_d ? (ident_d->data ? ident_d->data : "NULL") : "NULL"));
-+ }
-+ {
-+ char buf1[sizeof(struct sockaddr_encap)*2 + 1],
-+ buf2[sizeof(struct sockaddr_encap)*2 + 1];
-+ int i;
-+ unsigned char *b1 = buf1,
-+ *b2 = buf2,
-+ *ea = (unsigned char *)eaddr,
-+ *em = (unsigned char *)emask;
-+
-+
-+ for (i=0; i<sizeof(struct sockaddr_encap); i++) {
-+ sprintf(b1, "%02x", ea[i]);
-+ sprintf(b2, "%02x", em[i]);
-+ b1+=2;
-+ b2+=2;
-+ }
-+ KLIPS_PRINT(debug_eroute, "klips_debug:ipsec_makeroute: %s / %s \n", buf1, buf2);
-+ }
-+
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ retrt = (struct eroute *)kmalloc(sizeof (struct eroute), GFP_ATOMIC);
-+ if (retrt == NULL) {
-+ printk("klips_error:ipsec_makeroute: "
-+ "not able to allocate kernel memory");
-+ return -ENOMEM;
-+ }
-+ memset((caddr_t)retrt, 0, sizeof (struct eroute));
-+
-+ retrt->er_eaddr = *eaddr;
-+ retrt->er_emask = *emask;
-+ retrt->er_said = said;
-+ retrt->er_pid = pid;
-+ retrt->er_count = 0;
-+ retrt->er_lasttime = jiffies/HZ;
-+
-+ {
-+ /* this is because gcc 3. doesn't like cast's as lvalues */
-+ struct rjtentry *rje = (struct rjtentry *)&(retrt->er_rjt);
-+ caddr_t er = (caddr_t)&(retrt->er_eaddr);
-+
-+ rje->rd_nodes->rj_key= er;
-+ }
-+
-+ if (ident_s && ident_s->type != SADB_IDENTTYPE_RESERVED) {
-+ int data_len = ident_s->len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident);
-+
-+ retrt->er_ident_s.type = ident_s->type;
-+ retrt->er_ident_s.id = ident_s->id;
-+ retrt->er_ident_s.len = ident_s->len;
-+ if(data_len) {
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_makeroute: "
-+ "attempting to allocate %u bytes for ident_s.\n",
-+ data_len);
-+ if(!(retrt->er_ident_s.data = kmalloc(data_len, GFP_KERNEL))) {
-+ kfree(retrt);
-+ printk("klips_error:ipsec_makeroute: not able to allocate kernel memory (%d)\n", data_len);
-+ return ENOMEM;
-+ }
-+ memcpy(retrt->er_ident_s.data, ident_s->data, data_len);
-+ } else {
-+ retrt->er_ident_s.data = NULL;
-+ }
-+ }
-+
-+ if (ident_d && ident_d->type != SADB_IDENTTYPE_RESERVED) {
-+ int data_len = ident_d->len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident);
-+
-+ retrt->er_ident_d.type = ident_d->type;
-+ retrt->er_ident_d.id = ident_d->id;
-+ retrt->er_ident_d.len = ident_d->len;
-+ if(data_len) {
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_makeroute: "
-+ "attempting to allocate %u bytes for ident_d.\n",
-+ data_len);
-+ if(!(retrt->er_ident_d.data = kmalloc(data_len, GFP_KERNEL))) {
-+ if (retrt->er_ident_s.data)
-+ kfree(retrt->er_ident_s.data);
-+ kfree(retrt);
-+ printk("klips_error:ipsec_makeroute: not able to allocate kernel memory (%d)\n", data_len);
-+ return ENOMEM;
-+ }
-+ memcpy(retrt->er_ident_d.data, ident_d->data, data_len);
-+ } else {
-+ retrt->er_ident_d.data = NULL;
-+ }
-+ }
-+ retrt->er_first = skb;
-+ retrt->er_last = NULL;
-+
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_makeroute: "
-+ "calling rj_addroute now\n");
-+
-+ spin_lock_bh(&eroute_lock);
-+
-+ error = rj_addroute(&(retrt->er_eaddr), &(retrt->er_emask),
-+ rnh, retrt->er_rjt.rd_nodes);
-+
-+ spin_unlock_bh(&eroute_lock);
-+
-+ if(error) {
-+ sa_len = KLIPS_SATOT(debug_eroute, &said, 0, sa, sizeof(sa));
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_makeroute: "
-+ "rj_addroute not able to insert eroute for SA:%s (error:%d)\n",
-+ sa_len ? sa : " (error)", error);
-+ if (retrt->er_ident_s.data)
-+ kfree(retrt->er_ident_s.data);
-+ if (retrt->er_ident_d.data)
-+ kfree(retrt->er_ident_d.data);
-+
-+ kfree(retrt);
-+
-+ return error;
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_eroute) {
-+ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF];
-+/*
-+ subnettoa(eaddr->sen_ip_src, emask->sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(eaddr->sen_ip_dst, emask->sen_ip_dst, 0, buf2, sizeof(buf2));
-+*/
-+ subnettoa(rd_key((&(retrt->er_rjt)))->sen_ip_src, rd_mask((&(retrt->er_rjt)))->sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(rd_key((&(retrt->er_rjt)))->sen_ip_dst, rd_mask((&(retrt->er_rjt)))->sen_ip_dst, 0, buf2, sizeof(buf2));
-+ sa_len = satot(&retrt->er_said, 0, sa, sizeof(sa));
-+
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_makeroute: "
-+ "pid=%05d "
-+ "count=%10d "
-+ "lasttime=%6d "
-+ "%-18s -> %-18s => %s\n",
-+ retrt->er_pid,
-+ retrt->er_count,
-+ (int)(jiffies/HZ - retrt->er_lasttime),
-+ buf1,
-+ buf2,
-+ sa_len ? sa : " (error)");
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_makeroute: "
-+ "succeeded.\n");
-+ return 0;
-+}
-+
-+struct eroute *
-+ipsec_findroute(struct sockaddr_encap *eaddr)
-+{
-+ struct radij_node *rn;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ char buf1[ADDRTOA_BUF], buf2[ADDRTOA_BUF];
-+
-+ if (debug_radij & DB_RJ_FINDROUTE) {
-+ addrtoa(eaddr->sen_ip_src, 0, buf1, sizeof(buf1));
-+ addrtoa(eaddr->sen_ip_dst, 0, buf2, sizeof(buf2));
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:ipsec_findroute: "
-+ "%s:%d->%s:%d %d\n",
-+ buf1, ntohs(eaddr->sen_sport),
-+ buf2, ntohs(eaddr->sen_dport),
-+ eaddr->sen_proto);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ rn = rj_match((caddr_t)eaddr, rnh);
-+ if(rn) {
-+ KLIPS_PRINT(debug_eroute && sysctl_ipsec_debug_verbose,
-+ "klips_debug:ipsec_findroute: "
-+ "found, points to proto=%d, spi=%x, dst=%x.\n",
-+ ((struct eroute*)rn)->er_said.proto,
-+ ntohl(((struct eroute*)rn)->er_said.spi),
-+ ntohl(((struct eroute*)rn)->er_said.dst.u.v4.sin_addr.s_addr));
-+ }
-+ return (struct eroute *)rn;
-+}
-+
-+#ifdef CONFIG_PROC_FS
-+/** ipsec_rj_walker_procprint: print one line of eroute table output.
-+ *
-+ * Theoretical BUG: if w->length is less than the length
-+ * of some line we should produce, that line will never
-+ * be finished. In effect, the "file" will stop part way
-+ * through that line.
-+ */
-+int
-+ipsec_rj_walker_procprint(struct radij_node *rn, void *w0)
-+{
-+ struct eroute *ro = (struct eroute *)rn;
-+ struct rjtentry *rd = (struct rjtentry *)rn;
-+ struct wsbuf *w = (struct wsbuf *)w0;
-+ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF];
-+ char buf3[16];
-+ char sa[SATOT_BUF];
-+ size_t sa_len, buf_len;
-+ struct sockaddr_encap *key, *mask;
-+
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:ipsec_rj_walker_procprint: "
-+ "rn=0p%p, w0=0p%p\n",
-+ rn,
-+ w0);
-+ if (rn->rj_b >= 0) {
-+ return 0;
-+ }
-+
-+ key = rd_key(rd);
-+ mask = rd_mask(rd);
-+
-+ if (key == NULL || mask == NULL) {
-+ return 0;
-+ }
-+
-+ buf_len = subnettoa(key->sen_ip_src, mask->sen_ip_src, 0, buf1, sizeof(buf1));
-+ if(key->sen_sport != 0) {
-+ sprintf(buf1+buf_len-1, ":%d", ntohs(key->sen_sport));
-+ }
-+
-+ buf_len = subnettoa(key->sen_ip_dst, mask->sen_ip_dst, 0, buf2, sizeof(buf2));
-+ if(key->sen_dport != 0) {
-+ sprintf(buf2+buf_len-1, ":%d", ntohs(key->sen_dport));
-+ }
-+
-+ buf3[0]='\0';
-+ if(key->sen_proto != 0) {
-+ sprintf(buf3, ":%d", key->sen_proto);
-+ }
-+
-+ sa_len = satot(&ro->er_said, 'x', sa, sizeof(sa));
-+ w->len += ipsec_snprintf(w->buffer + w->len,
-+ w->length - w->len,
-+ "%-10d "
-+ "%-18s -> %-18s => %s%s\n",
-+ ro->er_count,
-+ buf1,
-+ buf2,
-+ sa_len ? sa : " (error)",
-+ buf3);
-+
-+ {
-+ /* snprintf can only fill the last character with NUL
-+ * so the maximum useful character is w->length-1.
-+ * However, if w->length == 0, we cannot go back.
-+ * (w->length surely cannot be negative.)
-+ */
-+ int max_content = w->length > 0? w->length-1 : 0;
-+
-+ if (w->len >= max_content) {
-+ /* we've done all that can fit -- stop treewalking */
-+ w->len = max_content; /* truncate crap */
-+ return -ENOBUFS;
-+ } else {
-+ const off_t pos = w->begin + w->len; /* file position of end of what we've generated */
-+
-+ if (pos <= w->offset) {
-+ /* all is before first interesting character:
-+ * discard, but note where we are.
-+ */
-+ w->len = 0;
-+ w->begin = pos;
-+ }
-+ return 0;
-+ }
-+ }
-+}
-+#endif /* CONFIG_PROC_FS */
-+
-+int
-+ipsec_rj_walker_delete(struct radij_node *rn, void *w0)
-+{
-+ struct eroute *ro;
-+ struct rjtentry *rd = (struct rjtentry *)rn;
-+ struct radij_node *rn2;
-+ int error;
-+ struct sockaddr_encap *key, *mask;
-+
-+ key = rd_key(rd);
-+ mask = rd_mask(rd);
-+
-+ if(!key || !mask) {
-+ return -ENODATA;
-+ }
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_radij) {
-+ char buf1[SUBNETTOA_BUF], buf2[SUBNETTOA_BUF];
-+ subnettoa(key->sen_ip_src, mask->sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(key->sen_ip_dst, mask->sen_ip_dst, 0, buf2, sizeof(buf2));
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:ipsec_rj_walker_delete: "
-+ "deleting: %s -> %s\n",
-+ buf1,
-+ buf2);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ if((error = rj_delete(key, mask, rnh, &rn2))) {
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:ipsec_rj_walker_delete: "
-+ "rj_delete failed with error=%d.\n", error);
-+ return error;
-+ }
-+
-+ if(rn2 != rn) {
-+ printk("klips_debug:ipsec_rj_walker_delete: "
-+ "tried to delete a different node?!? This should never happen!\n");
-+ }
-+
-+ ro = (struct eroute *)rn;
-+
-+ if (ro->er_ident_s.data)
-+ kfree(ro->er_ident_s.data);
-+ if (ro->er_ident_d.data)
-+ kfree(ro->er_ident_d.data);
-+
-+ memset((caddr_t)rn, 0, sizeof (struct eroute));
-+ kfree(rn);
-+
-+ return 0;
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_rcv.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,2124 @@
-+/*
-+ * receive code
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998-2003 Richard Guy Briggs.
-+ * Copyright (C) 2004-2007 Michael Richardson <mcr@xelerance.com>
-+ * Copyright (C) 2007-2008 Paul Wouters <paul@xelerance.com>
-+ *
-+ * OCF/receive state machine written by
-+ * David McCullough <dmccullough@cyberguard.com>
-+ * Copyright (C) 2004-2005 Intel Corporation. All Rights Reserved.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+
-+#include <net/tcp.h>
-+#include <net/udp.h>
-+#include <net/xfrm.h>
-+#include <linux/skbuff.h>
-+#include <openswan.h>
-+
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# ifdef NEED_SPINLOCK_TYPES
-+# include <linux/spinlock_types.h>
-+# endif
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+
-+#include "openswan/ipsec_kern24.h"
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_mast.h"
-+#include "openswan/ipsec_rcv.h"
-+
-+#include "openswan/ipsec_auth.h"
-+
-+#include "openswan/ipsec_esp.h"
-+
-+#ifdef CONFIG_KLIPS_AH
-+#include "openswan/ipsec_ah.h"
-+#endif /* CONFIG_KLIPS_AH */
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#include "openswan/ipsec_ipcomp.h"
-+#endif /* CONFIG_KLIPS_COMP */
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_alg.h"
-+#include "openswan/ipsec_kern24.h"
-+
-+#ifdef CONFIG_KLIPS_OCF
-+#include "ipsec_ocf.h"
-+#endif
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+#include <linux/udp.h>
-+#endif
-+
-+/* This is a private use protocol, and AT&T should be ashamed. They should have
-+ * used protocol # 59, which is "no next header" instead of 0xFE.
-+ */
-+#ifndef IPPROTO_ATT_HEARTBEAT
-+#define IPPROTO_ATT_HEARTBEAT 0xFE
-+#endif
-+
-+/* management of buffers */
-+static struct ipsec_rcv_state *ipsec_rcv_state_new (void);
-+static void ipsec_rcv_state_delete (struct ipsec_rcv_state *irs);
-+
-+/*
-+ * Check-replay-window routine, adapted from the original
-+ * by J. Hughes, from draft-ietf-ipsec-esp-des-md5-03.txt
-+ *
-+ * This is a routine that implements a 64 packet window. This is intend-
-+ * ed on being an implementation sample.
-+ */
-+
-+DEBUG_NO_STATIC int
-+ipsec_checkreplaywindow(struct ipsec_sa*ipsp, __u32 seq)
-+{
-+ __u32 diff;
-+
-+ if (ipsp->ips_replaywin == 0) /* replay shut off */
-+ return 1;
-+ if (seq == 0)
-+ return 0; /* first == 0 or wrapped */
-+
-+ /* new larger sequence number */
-+ if (seq > ipsp->ips_replaywin_lastseq) {
-+ return 1; /* larger is good */
-+ }
-+ diff = ipsp->ips_replaywin_lastseq - seq;
-+
-+ /* too old or wrapped */ /* if wrapped, kill off SA? */
-+ if (diff >= ipsp->ips_replaywin) {
-+ return 0;
-+ }
-+ /* this packet already seen */
-+ if (ipsp->ips_replaywin_bitmap & (1 << diff))
-+ return 0;
-+ return 1; /* out of order but good */
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_updatereplaywindow(struct ipsec_sa*ipsp, __u32 seq)
-+{
-+ __u32 diff;
-+
-+ if (ipsp->ips_replaywin == 0) /* replay shut off */
-+ return 1;
-+ if (seq == 0)
-+ return 0; /* first == 0 or wrapped */
-+
-+ /* new larger sequence number */
-+ if (seq > ipsp->ips_replaywin_lastseq) {
-+ diff = seq - ipsp->ips_replaywin_lastseq;
-+
-+ /* In win, set bit for this pkt */
-+ if (diff < ipsp->ips_replaywin)
-+ ipsp->ips_replaywin_bitmap =
-+ (ipsp->ips_replaywin_bitmap << diff) | 1;
-+ else
-+ /* This packet has way larger seq num */
-+ ipsp->ips_replaywin_bitmap = 1;
-+
-+ if(seq - ipsp->ips_replaywin_lastseq - 1 > ipsp->ips_replaywin_maxdiff) {
-+ ipsp->ips_replaywin_maxdiff = seq - ipsp->ips_replaywin_lastseq - 1;
-+ }
-+ ipsp->ips_replaywin_lastseq = seq;
-+ return 1; /* larger is good */
-+ }
-+ diff = ipsp->ips_replaywin_lastseq - seq;
-+
-+ /* too old or wrapped */ /* if wrapped, kill off SA? */
-+ if (diff >= ipsp->ips_replaywin) {
-+/*
-+ if(seq < 0.25*max && ipsp->ips_replaywin_lastseq > 0.75*max) {
-+ ipsec_sa_delchain(ipsp);
-+ }
-+*/
-+ return 0;
-+ }
-+ /* this packet already seen */
-+ if (ipsp->ips_replaywin_bitmap & (1 << diff))
-+ return 0;
-+ ipsp->ips_replaywin_bitmap |= (1 << diff); /* mark as seen */
-+ return 1; /* out of order but good */
-+}
-+
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+struct auth_alg ipsec_rcv_md5[]={
-+ {osMD5Init, osMD5Update, osMD5Final, AHMD596_ALEN}
-+};
-+
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+struct auth_alg ipsec_rcv_sha1[]={
-+ {SHA1Init, SHA1Update, SHA1Final, AHSHA196_ALEN}
-+};
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+
-+
-+static inline void ipsec_rcv_redodebug(struct ipsec_rcv_state *irs)
-+{
-+ struct iphdr * ipp = irs->ipp;
-+ struct in_addr ipsaddr, ipdaddr;
-+
-+ ipsaddr.s_addr = ipp->saddr;
-+ addrtoa(ipsaddr, 0, irs->ipsaddr_txt, sizeof(irs->ipsaddr_txt));
-+ ipdaddr.s_addr = ipp->daddr;
-+ addrtoa(ipdaddr, 0, irs->ipdaddr_txt, sizeof(irs->ipdaddr_txt));
-+}
-+
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+DEBUG_NO_STATIC char *
-+ipsec_rcv_err(int err)
-+{
-+ static char tmp[32];
-+ switch ((int) err) {
-+ case IPSEC_RCV_PENDING: return("IPSEC_RCV_PENDING");
-+ case IPSEC_RCV_LASTPROTO: return("IPSEC_RCV_LASTPROTO");
-+ case IPSEC_RCV_OK: return("IPSEC_RCV_OK");
-+ case IPSEC_RCV_BADPROTO: return("IPSEC_RCV_BADPROTO");
-+ case IPSEC_RCV_BADLEN: return("IPSEC_RCV_BADLEN");
-+ case IPSEC_RCV_ESP_BADALG: return("IPSEC_RCV_ESP_BADALG");
-+ case IPSEC_RCV_3DES_BADBLOCKING:return("IPSEC_RCV_3DES_BADBLOCKING");
-+ case IPSEC_RCV_ESP_DECAPFAIL: return("IPSEC_RCV_ESP_DECAPFAIL");
-+ case IPSEC_RCV_DECAPFAIL: return("IPSEC_RCV_DECAPFAIL");
-+ case IPSEC_RCV_SAIDNOTFOUND: return("IPSEC_RCV_SAIDNOTFOUND");
-+ case IPSEC_RCV_IPCOMPALONE: return("IPSEC_RCV_IPCOMPALONE");
-+ case IPSEC_RCV_IPCOMPFAILED: return("IPSEC_RCV_IPCOMPFAILED");
-+ case IPSEC_RCV_SAIDNOTLIVE: return("IPSEC_RCV_SAIDNOTLIVE");
-+ case IPSEC_RCV_FAILEDINBOUND: return("IPSEC_RCV_FAILEDINBOUND");
-+ case IPSEC_RCV_LIFETIMEFAILED: return("IPSEC_RCV_LIFETIMEFAILED");
-+ case IPSEC_RCV_BADAUTH: return("IPSEC_RCV_BADAUTH");
-+ case IPSEC_RCV_REPLAYFAILED: return("IPSEC_RCV_REPLAYFAILED");
-+ case IPSEC_RCV_AUTHFAILED: return("IPSEC_RCV_AUTHFAILED");
-+ case IPSEC_RCV_REPLAYROLLED: return("IPSEC_RCV_REPLAYROLLED");
-+ case IPSEC_RCV_BAD_DECRYPT: return("IPSEC_RCV_BAD_DECRYPT");
-+ case IPSEC_RCV_REALLYBAD: return("IPSEC_RCV_REALLYBAD");
-+ }
-+ snprintf(tmp, sizeof(tmp), "%d", err);
-+ return tmp;
-+}
-+#endif
-+
-+/*
-+ * here is a state machine to handle receiving ipsec packets.
-+ * basically we keep getting re-entered until processing is
-+ * complete. For the simple case we step down the states and finish.
-+ * each state is ideally some logical part of the process. If a state
-+ * can pend (ie., require async processing to complete), then this
-+ * should be the part of last action before it returns IPSEC_RCV_PENDING
-+ *
-+ * Any particular action may alter the next_state in irs to move us to
-+ * a state other than the preferred "next_state", but this is the
-+ * exception and is highlighted when it is done.
-+ *
-+ * prototypes for state action
-+ */
-+
-+static enum ipsec_rcv_value ipsec_rcv_init(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_decap_init(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_decap_lookup(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_auth_init(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_auth_decap(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_auth_calc(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_auth_chk(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_decrypt(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_decap_cont(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_cleanup(struct ipsec_rcv_state *irs);
-+static enum ipsec_rcv_value ipsec_rcv_complete(struct ipsec_rcv_state *irs);
-+
-+/*
-+ * the state table and each action
-+ */
-+
-+struct {
-+ enum ipsec_rcv_value (*action)(struct ipsec_rcv_state *irs);
-+ int next_state;
-+} rcv_state_table[] = {
-+ [IPSEC_RSM_INIT] = {ipsec_rcv_init, IPSEC_RSM_DECAP_INIT },
-+ [IPSEC_RSM_DECAP_INIT] = {ipsec_rcv_decap_init, IPSEC_RSM_DECAP_LOOKUP },
-+ [IPSEC_RSM_DECAP_LOOKUP] = {ipsec_rcv_decap_lookup,IPSEC_RSM_AUTH_INIT },
-+ [IPSEC_RSM_AUTH_INIT] = {ipsec_rcv_auth_init, IPSEC_RSM_AUTH_DECAP },
-+ [IPSEC_RSM_AUTH_DECAP] = {ipsec_rcv_auth_decap, IPSEC_RSM_AUTH_CALC },
-+ [IPSEC_RSM_AUTH_CALC] = {ipsec_rcv_auth_calc, IPSEC_RSM_AUTH_CHK },
-+ [IPSEC_RSM_AUTH_CHK] = {ipsec_rcv_auth_chk, IPSEC_RSM_DECRYPT },
-+ [IPSEC_RSM_DECRYPT] = {ipsec_rcv_decrypt, IPSEC_RSM_DECAP_CONT },
-+ [IPSEC_RSM_DECAP_CONT] = {ipsec_rcv_decap_cont, IPSEC_RSM_CLEANUP },
-+ [IPSEC_RSM_CLEANUP] = {ipsec_rcv_cleanup, IPSEC_RSM_COMPLETE },
-+ [IPSEC_RSM_COMPLETE] = {ipsec_rcv_complete, IPSEC_RSM_DONE },
-+
-+ [IPSEC_RSM_DONE] = {NULL, IPSEC_RSM_DONE},
-+};
-+
-+
-+
-+struct sk_buff *ipsec_rcv_unclone(struct sk_buff *skb,
-+ struct ipsec_rcv_state *irs)
-+{
-+ /* if skb was cloned (most likely due to a packet sniffer such as
-+ tcpdump being momentarily attached to the interface), make
-+ a copy of our own to modify */
-+ if(skb_cloned(skb)) {
-+ /* include any mac header while copying.. */
-+ if(skb_headroom(skb) < irs->hard_header_len) {
-+ printk(KERN_WARNING "klips_error:ipsec_rcv: "
-+ "tried to skb_push hhlen=%d, %d available. This should never happen, please report.\n",
-+ irs->hard_header_len,
-+ skb_headroom(skb));
-+ goto rcvleave;
-+ }
-+ skb_push(skb, irs->hard_header_len);
-+ if
-+#ifdef SKB_COW_NEW
-+ (skb_cow(skb, skb_headroom(skb)) != 0)
-+#else /* SKB_COW_NEW */
-+ ((skb = skb_cow(skb, skb_headroom(skb))) == NULL)
-+#endif /* SKB_COW_NEW */
-+ {
-+ goto rcvleave;
-+ }
-+ if(skb->len < irs->hard_header_len) {
-+ printk(KERN_WARNING "klips_error:ipsec_rcv: "
-+ "tried to skb_pull hhlen=%d, %d available. This should never happen, please report.\n",
-+ irs->hard_header_len,
-+ skb->len);
-+ goto rcvleave;
-+ }
-+ skb_pull(skb, irs->hard_header_len);
-+ }
-+ return skb;
-+
-+rcvleave:
-+ ipsec_kfree_skb(skb);
-+ return NULL;
-+}
-+
-+
-+
-+
-+#if !defined(NET_26) && defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+/*
-+ * decapsulate a UDP encapsulated ESP packet
-+ */
-+struct sk_buff *ipsec_rcv_natt_decap(struct sk_buff *skb
-+ , struct ipsec_rcv_state *irs
-+ , int *udp_decap_ret_p)
-+{
-+ *udp_decap_ret_p = 0;
-+ if (skb->sk && ip_hdr(skb) && ip_hdr(skb)->protocol==IPPROTO_UDP) {
-+ /**
-+ * Packet comes from udp_queue_rcv_skb so it is already defrag,
-+ * checksum verified, ... (ie safe to use)
-+ *
-+ * If the packet is not for us, return -1 and udp_queue_rcv_skb
-+ * will continue to handle it (do not kfree skb !!).
-+ */
-+
-+#ifndef UDP_OPT_IN_SOCK
-+ struct udp_opt {
-+ __u32 esp_in_udp;
-+ };
-+ struct udp_opt *tp = (struct udp_opt *)&(skb->sk->tp_pinfo.af_tcp);
-+#else
-+ struct udp_opt *tp = &(skb->sk->tp_pinfo.af_udp);
-+#endif
-+
-+ struct iphdr *ip = ip_hdr(skb);
-+ struct udphdr *udp = (struct udphdr *)((__u32 *)ip+ip->ihl);
-+ __u8 *udpdata = (__u8 *)udp + sizeof(struct udphdr);
-+ __u32 *udpdata32 = (__u32 *)udpdata;
-+
-+ irs->natt_sport = ntohs(udp->source);
-+ irs->natt_dport = ntohs(udp->dest);
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "suspected ESPinUDP packet (NAT-Traversal) [%d].\n",
-+ tp->esp_in_udp);
-+ KLIPS_IP_PRINT(debug_rcv, ip);
-+
-+ if (udpdata < skb->tail) {
-+ unsigned int len = skb->tail - udpdata;
-+ if ((len==1) && (udpdata[0]==0xff)) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ /* not IPv6 compliant message */
-+ "NAT-keepalive from %d.%d.%d.%d.\n", NIPQUAD(ip->saddr));
-+ *udp_decap_ret_p = 0;
-+ return NULL;
-+ }
-+ else if ( (tp->esp_in_udp == ESPINUDP_WITH_NON_IKE) &&
-+ (len > (2*sizeof(__u32) + sizeof(struct esphdr))) &&
-+ (udpdata32[0]==0) && (udpdata32[1]==0) ) {
-+ /* ESP Packet with Non-IKE header */
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "ESPinUDP pkt with Non-IKE - spi=0x%x\n",
-+ ntohl(udpdata32[2]));
-+ irs->natt_type = ESPINUDP_WITH_NON_IKE;
-+ irs->natt_len = sizeof(struct udphdr)+(2*sizeof(__u32));
-+ }
-+ else if ( (tp->esp_in_udp == ESPINUDP_WITH_NON_ESP) &&
-+ (len > sizeof(struct esphdr)) &&
-+ (udpdata32[0]!=0) ) {
-+ /* ESP Packet without Non-ESP header */
-+ irs->natt_type = ESPINUDP_WITH_NON_ESP;
-+ irs->natt_len = sizeof(struct udphdr);
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "ESPinUDP pkt without Non-ESP - spi=0x%x\n",
-+ ntohl(udpdata32[0]));
-+ }
-+ else {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "IKE packet - not handled here\n");
-+ *udp_decap_ret_p = -1;
-+ return NULL;
-+ }
-+ }
-+ else {
-+ return NULL;
-+ }
-+ }
-+ return skb;
-+}
-+#endif
-+
-+#ifdef HAVE_IPSEC_SAREF
-+void ip_cmsg_recv_ipsec(struct msghdr *msg, struct sk_buff *skb)
-+{
-+ struct ipsec_sa *sa1;
-+ struct sec_path *sp;
-+ xfrm_sec_unique_t refs[2];
-+
-+ sp = skb->sp;
-+
-+ if(sp==NULL) return;
-+
-+ KLIPS_PRINT(debug_rcv, "retrieving saref=%u from skb=%p\n",
-+ sp->ref, skb);
-+
-+ sa1 = ipsec_sa_getbyref(sp->ref);
-+ if(sa1) {
-+ refs[1]= sa1->ips_refhim;
-+ }
-+ refs[0]=sp->ref;
-+
-+ put_cmsg(msg, SOL_IP, IP_IPSEC_REFINFO,
-+ sizeof(xfrm_sec_unique_t)*2, &refs);
-+}
-+#endif
-+
-+
-+void ipsec_rcv_setoutif(struct ipsec_rcv_state *irs)
-+{
-+ struct sk_buff *skb = irs->skb;
-+
-+ if(skb!=NULL && irs->ipsp->ips_out) {
-+ if(skb->dev != irs->ipsp->ips_out) {
-+ KLIPS_PRINT(debug_rcv,
-+ "changing originating interface from %s to %s\n",
-+ skb->dev->name,
-+ irs->ipsp->ips_out->name);
-+ }
-+ skb->dev = irs->ipsp->ips_out;
-+
-+ if(skb->dev && skb->dev->get_stats) {
-+ struct net_device_stats *stats = skb->dev->get_stats(skb->dev);
-+ irs->stats = stats;
-+ }
-+ }
-+}
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_decap_ipip(struct ipsec_rcv_state *irs)
-+{
-+ struct ipsec_sa *ipsp = NULL;
-+ struct ipsec_sa* ipsnext = NULL;
-+ struct iphdr *ipp;
-+ struct sk_buff *skb;
-+ enum ipsec_rcv_value result = IPSEC_RCV_DECAPFAIL;
-+
-+ ipp = irs->ipp;
-+ ipsp = irs->ipsp;
-+ skb = irs->skb;
-+ irs->sa_len = satot(&irs->said, 0, irs->sa, sizeof(irs->sa));
-+ if((ipp->protocol != IPPROTO_IPIP) &&
-+ (ipp->protocol != IPPROTO_ATT_HEARTBEAT)) { /* AT&T heartbeats to SIG/GIG */
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s, Hey! How did this get through? Dropped.\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ goto rcvleave;
-+ }
-+ if(sysctl_ipsec_inbound_policy_check) {
-+ struct sockaddr_in *psin = (struct sockaddr_in*)(ipsp->ips_addr_s);
-+ if((ipsnext = ipsp->ips_next)) {
-+ char sa2[SATOT_BUF];
-+ size_t sa_len2;
-+ sa_len2 = satot(&ipsnext->ips_said, 0, sa2, sizeof(sa2));
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "unexpected SA:%s after IPIP SA:%s\n",
-+ sa_len2 ? sa2 : " (error)",
-+ irs->sa_len ? irs->sa : " (error)");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ goto rcvleave;
-+ }
-+ if(ipp->saddr != psin->sin_addr.s_addr) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s, src=%s(%08x) does match expected 0x%08x.\n",
-+ irs->sa_len ? irs->sa : " (error)",
-+ irs->ipsaddr_txt,
-+ ipp->saddr, psin->sin_addr.s_addr);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ goto rcvleave;
-+ }
-+ }
-+
-+ ipsec_rcv_setoutif(irs);
-+
-+ if(ipp->protocol == IPPROTO_IPIP) /* added to support AT&T heartbeats to SIG/GIG */
-+ {
-+ /*
-+ * XXX this needs to be locked from when it was first looked
-+ * up in the decapsulation loop. Perhaps it is better to put
-+ * the IPIP decap inside the loop.
-+ */
-+ ipsp->ips_life.ipl_bytes.ipl_count += skb->len;
-+ ipsp->ips_life.ipl_bytes.ipl_last = skb->len;
-+
-+ if(!ipsp->ips_life.ipl_usetime.ipl_count) {
-+ ipsp->ips_life.ipl_usetime.ipl_count = jiffies / HZ;
-+ }
-+ ipsp->ips_life.ipl_usetime.ipl_last = jiffies / HZ;
-+ ipsp->ips_life.ipl_packets.ipl_count += 1;
-+
-+ if(skb->len < irs->iphlen) {
-+ printk(KERN_WARNING "klips_debug:ipsec_rcv: "
-+ "tried to skb_pull iphlen=%d, %d available. This should never happen, please report.\n",
-+ irs->iphlen,
-+ (int)(skb->len));
-+
-+ goto rcvleave;
-+ }
-+
-+ /*
-+ * we need to pull up by size of IP header,
-+ * options, but also by any UDP/ESP encap there might
-+ * have been, and this deals with all cases.
-+ */
-+ skb_pull(skb, (skb_transport_header(skb) - skb_network_header(skb)));
-+
-+ /* new L3 header is where L4 payload was */
-+ skb_set_network_header(skb, ipsec_skb_offset(skb, skb_transport_header(skb)));
-+
-+ /* now setup new L4 payload location */
-+ ipp = (struct iphdr *)skb_network_header(skb);
-+ skb_set_transport_header(skb, ipsec_skb_offset(skb, skb_network_header(skb) + (ipp->ihl << 2)));
-+
-+
-+ /* remove any saved options that we might have,
-+ * since we have a new IP header.
-+ */
-+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-+
-+#if 0
-+ KLIPS_PRINT(debug_rcv, "csum: %d\n", ip_fast_csum((u8 *)ipp, ipp->ihl));
-+#endif
-+
-+ /* re-do any strings for debugging */
-+ irs->ipp = ipp;
-+ ipsec_rcv_redodebug(irs);
-+
-+ skb->protocol = htons(ETH_P_IP);
-+ skb->ip_summed = 0;
-+ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX,
-+ "klips_debug:ipsec_rcv: "
-+ "IPIP tunnel stripped.\n");
-+ KLIPS_IP_PRINT(debug_rcv & DB_RX_PKTRX, ipp);
-+ }
-+
-+ if(sysctl_ipsec_inbound_policy_check
-+ /*
-+ Note: "xor" (^) logically replaces "not equal"
-+ (!=) and "bitwise or" (|) logically replaces
-+ "boolean or" (||). This is done to speed up
-+ execution by doing only bitwise operations and
-+ no branch operations
-+ */
-+ && (((ipp->saddr & ipsp->ips_mask_s.u.v4.sin_addr.s_addr)
-+ ^ ipsp->ips_flow_s.u.v4.sin_addr.s_addr)
-+ | ((ipp->daddr & ipsp->ips_mask_d.u.v4.sin_addr.s_addr)
-+ ^ ipsp->ips_flow_d.u.v4.sin_addr.s_addr)) )
-+ {
-+ char sflow_txt[SUBNETTOA_BUF], dflow_txt[SUBNETTOA_BUF];
-+
-+ subnettoa(ipsp->ips_flow_s.u.v4.sin_addr,
-+ ipsp->ips_mask_s.u.v4.sin_addr,
-+ 0, sflow_txt, sizeof(sflow_txt));
-+ subnettoa(ipsp->ips_flow_d.u.v4.sin_addr,
-+ ipsp->ips_mask_d.u.v4.sin_addr,
-+ 0, dflow_txt, sizeof(dflow_txt));
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s, inner tunnel policy [%s -> %s] does not agree with pkt contents [%s -> %s].\n",
-+ irs->sa_len ? irs->sa : " (error)",
-+ sflow_txt,
-+ dflow_txt,
-+ irs->ipsaddr_txt,
-+ irs->ipdaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ goto rcvleave;
-+ }
-+#ifdef CONFIG_NETFILTER
-+ skb->nfmark = (skb->nfmark & (~(IPsecSAref2NFmark(IPSEC_SA_REF_TABLE_MASK))))
-+ | IPsecSAref2NFmark(IPsecSA2SAref(ipsp));
-+ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX,
-+ "klips_debug:ipsec_rcv: "
-+ "IPIP SA sets skb->nfmark=0x%x.\n",
-+ (unsigned)skb->nfmark);
-+#endif /* CONFIG_NETFILTER */
-+
-+ result = IPSEC_RCV_OK;
-+
-+rcvleave:
-+ return result;
-+}
-+
-+/*
-+ * get all the initial checking and setup done. Not of this can be off
-+ * loaded by any currently support hardware
-+ *
-+ * the following things should be setup when we exit this function.
-+ *
-+ * irs->stats == stats structure (or NULL)
-+ * irs->ipp = IP header.
-+ * irs->len = total length of packet
-+ * skb->nh.iph = ipp;
-+ * skb->h.raw = start of payload
-+ * irs->ipsp = NULL.
-+ * irs->iphlen = N/A = is recalculated.
-+ * irs->ilen = 0;
-+ * irs->authlen = 0;
-+ * irs->authfuncs = NULL;
-+ * irs->skb = the skb;
-+ *
-+ * proto_funcs should be from ipsec_esp.c, ipsec_ah.c or ipsec_ipcomp.c.
-+ *
-+ */
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_init(struct ipsec_rcv_state *irs)
-+{
-+#ifdef CONFIG_KLIPS_DEBUG
-+ struct net_device *dev;
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ unsigned char protoc;
-+ struct iphdr *ipp;
-+ struct net_device_stats *stats = NULL; /* This device's statistics */
-+ int i;
-+ struct sk_buff *skb;
-+
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ if (irs == NULL) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_rcv_init: NULL irs.");
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+
-+ skb = irs->skb;
-+ if (!skb) {
-+ KLIPS_PRINT(debug_rcv, "klips_debug:ipsec_rcv_init: NULL skb.");
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+ dev = skb->dev;
-+
-+ if (skb->data == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "NULL skb->data passed in, packet is bogus, dropping.\n");
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+
-+ /* dev->hard_header_len is unreliable and should not be used */
-+ /* klips26_rcv_encap will have already set hard_header_len for us */
-+ if (irs->hard_header_len == 0) {
-+ irs->hard_header_len = skb_mac_header(skb) ? (skb_network_header(skb) - skb_mac_header(skb)) : 0;
-+ if((irs->hard_header_len < 0) || (irs->hard_header_len > skb_headroom(skb)))
-+ irs->hard_header_len = 0;
-+ }
-+
-+ skb = ipsec_rcv_unclone(skb, irs);
-+ if(skb == NULL) {
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+
-+#if IP_FRAGMENT_LINEARIZE
-+ /* In Linux 2.4.4, we may have to reassemble fragments. They are
-+ not assembled automatically to save TCP from having to copy
-+ twice.
-+ */
-+ if (skb_is_nonlinear(skb)) {
-+#ifdef HAVE_NEW_SKB_LINEARIZE
-+ if (skb_linearize_cow(skb) != 0)
-+#else
-+ if (skb_linearize(skb, GFP_ATOMIC) != 0)
-+#endif
-+ {
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+ }
-+#endif /* IP_FRAGMENT_LINEARIZE */
-+
-+ ipp = ip_hdr(skb);
-+ irs->ipp = ipp;
-+
-+#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) && !defined(NET_26)
-+ if (irs->natt_len) {
-+ /**
-+ * Now, we are sure packet is ESPinUDP, and we have a private
-+ * copy that has been linearized, remove natt_len bytes
-+ * from packet and modify protocol to ESP.
-+ */
-+ if (((unsigned char *)skb->data > (unsigned char *)ip_hdr(skb))
-+ && ((unsigned char *)ip_hdr(skb) > (unsigned char *)skb->head))
-+ {
-+ unsigned int _len = (unsigned char *)skb->data -
-+ (unsigned char *)ip_hdr(skb);
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: adjusting skb: skb_push(%u)\n",
-+ _len);
-+ skb_push(skb, _len);
-+ }
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "removing %d bytes from ESPinUDP packet\n"
-+ , irs->natt_len);
-+
-+ ipp = (struct iphdr *)skb->data;
-+ irs->iphlen = ipp->ihl << 2;
-+ ipp->tot_len = htons(ntohs(ipp->tot_len) - irs->natt_len);
-+ if (skb->len < irs->iphlen + irs->natt_len) {
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_rcv: "
-+ "ESPinUDP packet is too small (%d < %d+%d). "
-+ "This should never happen, please report.\n",
-+ (int)(skb->len), irs->iphlen, irs->natt_len);
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+
-+ /* advance payload pointer to point past the UDP header */
-+ skb->h.raw = skb->h.raw + irs->natt_len;
-+
-+ /* modify protocol */
-+ ipp->protocol = IPPROTO_ESP;
-+
-+ skb->sk = NULL;
-+
-+ KLIPS_IP_PRINT(debug_rcv, ip_hdr(skb));
-+ }
-+#endif
-+
-+ if (debug_rcv)
-+ ipsec_rcv_redodebug(irs);
-+
-+ irs->iphlen = ipp->ihl << 2;
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "<<< Info -- ");
-+ KLIPS_PRINTMORE(debug_rcv && skb->dev, "skb->dev=%s ",
-+ skb->dev->name ? skb->dev->name : "NULL");
-+ KLIPS_PRINTMORE(debug_rcv && dev, "dev=%s ",
-+ dev->name ? dev->name : "NULL");
-+ KLIPS_PRINTMORE(debug_rcv, "\n");
-+
-+ KLIPS_PRINT(debug_rcv && !(skb->dev && dev && (skb->dev == dev)),
-+ "klips_debug:ipsec_rcv: "
-+ "Informational -- **if this happens, find out why** skb->dev:%s is not equal to dev:%s\n",
-+ skb->dev ? (skb->dev->name ? skb->dev->name : "NULL") : "NULL",
-+ dev ? (dev->name ? dev->name : "NULL") : "NULL");
-+
-+ protoc = ipp->protocol;
-+#ifndef NET_21
-+ if((!protocol) || (protocol->protocol != protoc)) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_IPSA,
-+ "klips_debug:ipsec_rcv: "
-+ "protocol arg is NULL or unequal to the packet contents, this is odd, using value in packet.\n");
-+ }
-+#endif /* !NET_21 */
-+
-+ if( (protoc != IPPROTO_AH) &&
-+#ifdef CONFIG_KLIPS_IPCOMP_disabled_until_we_register_IPCOMP_HANDLER
-+ (protoc != IPPROTO_COMP) &&
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ (protoc != IPPROTO_ESP) ) {
-+ KLIPS_PRINT(debug_rcv & DB_RX_IPSA,
-+ "klips_debug:ipsec_rcv: Why the hell is someone "
-+ "passing me a non-ipsec protocol = %d packet? -- dropped.\n",
-+ protoc);
-+ return IPSEC_RCV_REALLYBAD;
-+ }
-+
-+ /*
-+ * if there is an attached ipsec device, then use that device for
-+ * stats until we know better.
-+ */
-+ if(skb->dev) {
-+ struct ipsecpriv *prvdev = NULL;
-+ struct net_device *ipsecdev = NULL;
-+
-+ for(i = 0; i <= ipsecdevices_max; i++) {
-+ if(ipsecdevices[i] == NULL) continue;
-+ prvdev = ipsecdevices[i]->priv;
-+
-+ if(prvdev == NULL) continue;
-+
-+ if(prvdev->dev == skb->dev) {
-+ ipsecdev = ipsecdevices[i];
-+ break;
-+ }
-+ }
-+
-+ if(ipsecdev) {
-+ skb->dev = ipsecdev;
-+ } else {
-+ skb->dev = ipsec_mast_get_device(0);
-+
-+ /* ipsec_mast_get takes the device */
-+ if(skb->dev) dev_put(skb->dev);
-+ }
-+
-+ if(prvdev) {
-+ stats = (struct net_device_stats *) &(prvdev->mystats);
-+ }
-+ }
-+
-+ if(stats) {
-+ stats->rx_packets++;
-+ }
-+
-+ KLIPS_IP_PRINT(debug_rcv, ipp);
-+
-+ /* set up for decap */
-+ irs->stats= stats;
-+ irs->ipp = ipp;
-+ irs->ipsp = NULL;
-+ irs->ilen = 0;
-+ irs->authlen=0;
-+ irs->authfuncs=NULL;
-+ irs->skb = skb;
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_decap_init(struct ipsec_rcv_state *irs)
-+{
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ switch (irs->ipp->protocol) {
-+ case IPPROTO_ESP:
-+ irs->proto_funcs = esp_xform_funcs;
-+ break;
-+
-+#ifdef CONFIG_KLIPS_AH
-+ case IPPROTO_AH:
-+ irs->proto_funcs = ah_xform_funcs;
-+ break;
-+#endif /* !CONFIG_KLIPS_AH */
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ case IPPROTO_COMP:
-+ irs->proto_funcs = ipcomp_xform_funcs;
-+ break;
-+#endif /* !CONFIG_KLIPS_IPCOMP */
-+
-+ default:
-+ if (irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BADPROTO;
-+ }
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_decap_lookup(struct ipsec_rcv_state *irs)
-+{
-+ struct iphdr *ipp;
-+ struct sk_buff *skb;
-+
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ irs->replay = 0;
-+#ifdef CONFIG_KLIPS_ALG
-+ irs->ixt_a = NULL;
-+#endif /* CONFIG_KLIPS_ALG */
-+
-+ skb = irs->skb;
-+ irs->len = skb->len;
-+ ipp = irs->ipp;
-+ irs->proto = ipp->protocol;
-+ if (debug_rcv)
-+ ipsec_rcv_redodebug(irs);
-+
-+ irs->iphlen = ipp->ihl << 2;
-+ ipp->check = 0; /* we know the sum is good */
-+
-+ irs->said.dst.u.v4.sin_addr.s_addr = ipp->daddr;
-+ irs->said.dst.u.v4.sin_family = AF_INET;
-+
-+ /* note: rcv_checks set up the said.spi value, if appropriate */
-+ if (irs->proto_funcs->rcv_checks)
-+ return (*irs->proto_funcs->rcv_checks)(irs, irs->skb);
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_auth_init(struct ipsec_rcv_state *irs)
-+{
-+ struct ipsec_sa *newipsp;
-+
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ irs->said.proto = irs->proto;
-+ if (debug_rcv) {
-+ irs->sa_len = satot(&irs->said, 0, irs->sa, sizeof(irs->sa));
-+ if(irs->sa_len == 0) {
-+ strcpy(irs->sa, "(error)");
-+ }
-+ } else
-+ irs->sa_len = 0;
-+
-+ newipsp = ipsec_sa_getbyid(&irs->said);
-+ if (newipsp == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "no ipsec_sa for SA:%s: incoming packet with no SA dropped\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_SAIDNOTFOUND;
-+ }
-+
-+ /* If it is in larval state, drop the packet, we cannot process yet. */
-+ if(newipsp->ips_state == K_SADB_SASTATE_LARVAL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "ipsec_sa in larval state, cannot be used yet, dropping packet.\n");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ ipsec_sa_put(newipsp);
-+ return IPSEC_RCV_SAIDNOTLIVE;
-+ }
-+
-+ if(newipsp->ips_state == K_SADB_SASTATE_DEAD) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "ipsec_sa in dead state, cannot be used any more, dropping packet.\n");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ ipsec_sa_put(newipsp);
-+ return IPSEC_RCV_SAIDNOTLIVE;
-+ }
-+
-+ if(sysctl_ipsec_inbound_policy_check) {
-+ if(irs->ipp->saddr != ((struct sockaddr_in*)(newipsp->ips_addr_s))->sin_addr.s_addr) {
-+ KLIPS_ERROR(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s, src=%s of pkt does not agree with expected SA source address policy.\n",
-+ irs->sa_len ? irs->sa : " (error)",
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ ipsec_sa_put(newipsp);
-+ return IPSEC_RCV_FAILEDINBOUND;
-+ }
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s, src=%s of pkt agrees with expected SA source address policy.\n",
-+ irs->sa_len ? irs->sa : " (error)",
-+ irs->ipsaddr_txt);
-+
-+ /*
-+ * at this point, we have looked up a new SA, and we want to
-+ * make sure that if this isn't the first SA in the list,
-+ * that the previous SA actually points at this one.
-+ */
-+ if(irs->ipsp) {
-+ if(irs->ipsp->ips_next != newipsp) {
-+ KLIPS_ERROR(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "unexpected SA:%s: does not agree with ips->inext policy, dropped\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ ipsec_sa_put(newipsp);
-+ return IPSEC_RCV_FAILEDINBOUND;
-+ }
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s grouping from previous SA is OK.\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ } else {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s First SA in group.\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ }
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ if (irs->proto == IPPROTO_ESP) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "natt_type=%u tdbp->ips_natt_type=%u : %s\n",
-+ irs->natt_type, newipsp->ips_natt_type,
-+ (irs->natt_type==newipsp->ips_natt_type)?"ok":"bad");
-+ if (irs->natt_type != newipsp->ips_natt_type) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s does not agree with expected NAT-T policy.\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ ipsec_sa_put(newipsp);
-+ return IPSEC_RCV_FAILEDINBOUND;
-+ }
-+ }
-+#endif
-+ }
-+
-+ irs->ipsp=newipsp;
-+
-+ return IPSEC_RCV_OK;
-+}
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_auth_decap(struct ipsec_rcv_state *irs)
-+{
-+ ipsec_rcv_setoutif(irs);
-+
-+ irs->proto_funcs = irs->ipsp->ips_xformfuncs;
-+ if (irs->proto_funcs == NULL)
-+ return IPSEC_RCV_BADPROTO;
-+
-+ if (irs->proto_funcs->protocol != irs->ipp->protocol) {
-+ if(irs->proto_funcs->protocol == IPPROTO_COMP) {
-+ /* looks like an IPCOMP that we can skip */
-+ struct ipsec_sa *newipsp = NULL;
-+
-+ newipsp = irs->ipsp->ips_next;
-+ if(newipsp) {
-+ ipsec_sa_get(newipsp);
-+ }
-+ if(irs->lastipsp) {
-+ ipsec_sa_put(irs->lastipsp);
-+ }
-+ irs->lastipsp = irs->ipsp;
-+ irs->ipsp=newipsp;
-+
-+ /* come back into here with the next transform */
-+ irs->next_state = IPSEC_RSM_AUTH_DECAP;
-+ return IPSEC_RCV_OK;
-+ }
-+
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_FAILEDINBOUND;
-+ }
-+
-+ if (debug_rcv)
-+ ipsec_rcv_redodebug(irs);
-+
-+ /* now check the lifetimes */
-+ if(ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_bytes, "bytes",
-+ irs->sa, ipsec_life_countbased, ipsec_incoming,
-+ irs->ipsp) == ipsec_life_harddied ||
-+ ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_addtime, "addtime",
-+ irs->sa, ipsec_life_timebased, ipsec_incoming,
-+ irs->ipsp) == ipsec_life_harddied ||
-+ ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_addtime, "usetime",
-+ irs->sa, ipsec_life_timebased, ipsec_incoming,
-+ irs->ipsp) == ipsec_life_harddied ||
-+ ipsec_lifetime_check(&irs->ipsp->ips_life.ipl_packets, "packets",
-+ irs->sa, ipsec_life_countbased, ipsec_incoming,
-+ irs->ipsp) == ipsec_life_harddied) {
-+
-+ /*
-+ * disconnect SA from the hash table, so it can not be
-+ * found again.
-+ */
-+ ipsec_sa_rm(irs->ipsp);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv_decap_once: "
-+ "decap (%d) failed lifetime check\n",
-+ irs->proto);
-+
-+ return IPSEC_RCV_LIFETIMEFAILED;
-+ }
-+
-+#if 0
-+ /*
-+ * This is removed for some reasons:
-+ * 1) it needs to happen *after* authentication.
-+ * 2) do we really care, if it authenticates, if it came
-+ * from the wrong location?
-+ * 3) the NAT_KA messages in IKE will also get to pluto
-+ * and it will figure out that stuff has moved.
-+ * 4) the 2.6 udp-esp encap function does not pass us
-+ * the originating port number, and I can't tell
-+ * if skb->sk is guaranteed to be valid here.
-+ * 2005-04-16: mcr@xelerance.com
-+ */
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ /*
-+ *
-+ * XXX we should ONLY update pluto if the SA passes all checks,
-+ * which we clearly do not now.
-+ */
-+ if ((irs->natt_type) &&
-+ ( (irs->ipp->saddr != (((struct sockaddr_in*)(newipsp->ips_addr_s))->sin_addr.s_addr)) ||
-+ (irs->natt_sport != newipsp->ips_natt_sport)
-+ )) {
-+ struct sockaddr sipaddr;
-+ struct sockaddr_in *psin = (struct sockaddr_in*)(newipsp->ips_addr_s);
-+
-+ /** Advertise NAT-T addr change to pluto **/
-+ sipaddr.sa_family = AF_INET;
-+ ((struct sockaddr_in*)&sipaddr)->sin_addr.s_addr = irs->ipp->saddr;
-+ ((struct sockaddr_in*)&sipaddr)->sin_port = htons(irs->natt_sport);
-+ pfkey_nat_t_new_mapping(newipsp, &sipaddr, irs->natt_sport);
-+
-+ /**
-+ * Then allow or block packet depending on
-+ * sysctl_ipsec_inbound_policy_check.
-+ *
-+ * In all cases, pluto will update SA if new mapping is
-+ * accepted.
-+ */
-+ if (sysctl_ipsec_inbound_policy_check) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s, src=%s:%u of pkt does not agree with expected "
-+ "SA source address [%08x:%u] (notifying pluto of change).\n",
-+ irs->sa_len ? irs->sa : " (error)",
-+ irs->ipsaddr_txt, irs->natt_sport,
-+ psin->sin_addr.s_addr,
-+ newipsp->ips_natt_sport);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ ipsec_sa_put(newipsp);
-+ return IPSEC_RCV_FAILEDINBOUND;
-+ }
-+ }
-+#endif
-+#endif
-+
-+ irs->authfuncs=NULL;
-+
-+ /* authenticate, if required */
-+#ifdef CONFIG_KLIPS_OCF
-+ if (irs->ipsp->ocf_in_use) {
-+ irs->authlen = AHHMAC_HASHLEN;
-+ irs->authfuncs = NULL;
-+ irs->ictx = NULL;
-+ irs->octx = NULL;
-+ irs->ictx_len = 0;
-+ irs->octx_len = 0;
-+ } else
-+#endif /* CONFIG_KLIPS_OCF */
-+#ifdef CONFIG_KLIPS_ALG
-+ /* authenticate, if required */
-+ if ((irs->ixt_a=irs->ipsp->ips_alg_auth)) {
-+ irs->authlen = AHHMAC_HASHLEN;
-+ irs->authfuncs = NULL;
-+ irs->ictx = NULL;
-+ irs->octx = NULL;
-+ irs->ictx_len = 0;
-+ irs->octx_len = 0;
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "authalg=%d authlen=%d\n",
-+ irs->ipsp->ips_authalg,
-+ irs->authlen);
-+ } else
-+#endif /* CONFIG_KLIPS_ALG */
-+ switch(irs->ipsp->ips_authalg) {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5:
-+ irs->authlen = AHHMAC_HASHLEN;
-+ irs->authfuncs = ipsec_rcv_md5;
-+ irs->ictx = (void *)&((struct md5_ctx*)(irs->ipsp->ips_key_a))->ictx;
-+ irs->octx = (void *)&((struct md5_ctx*)(irs->ipsp->ips_key_a))->octx;
-+ irs->ictx_len = sizeof(((struct md5_ctx*)(irs->ipsp->ips_key_a))->ictx);
-+ irs->octx_len = sizeof(((struct md5_ctx*)(irs->ipsp->ips_key_a))->octx);
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA:
-+ irs->authlen = AHHMAC_HASHLEN;
-+ irs->authfuncs = ipsec_rcv_sha1;
-+ irs->ictx = (void *)&((struct sha1_ctx*)(irs->ipsp->ips_key_a))->ictx;
-+ irs->octx = (void *)&((struct sha1_ctx*)(irs->ipsp->ips_key_a))->octx;
-+ irs->ictx_len = sizeof(((struct sha1_ctx*)(irs->ipsp->ips_key_a))->ictx);
-+ irs->octx_len = sizeof(((struct sha1_ctx*)(irs->ipsp->ips_key_a))->octx);
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ case AH_NONE:
-+ irs->authlen = 0;
-+ irs->authfuncs = NULL;
-+ irs->ictx = NULL;
-+ irs->octx = NULL;
-+ irs->ictx_len = 0;
-+ irs->octx_len = 0;
-+ break;
-+ default:
-+ irs->ipsp->ips_errs.ips_alg_errs += 1;
-+ if(irs->stats) {
-+ irs->stats->rx_errors++;
-+ }
-+ return IPSEC_RCV_BADAUTH;
-+ }
-+
-+ /* ilen counts number of bytes in ESP portion */
-+ irs->ilen = ((irs->skb->data + irs->skb->len) - skb_transport_header(irs->skb)) - irs->authlen;
-+ if(irs->ilen <= 0) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "runt %s packet with no data, dropping.\n",
-+ (irs->proto == IPPROTO_ESP ? "esp" : "ah"));
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_BADLEN;
-+ }
-+
-+ if(irs->authfuncs ||
-+#ifdef CONFIG_KLIPS_OCF
-+ irs->ipsp->ocf_in_use ||
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ irs->ixt_a ||
-+#endif
-+ 0) {
-+ if(irs->proto_funcs->rcv_setup_auth)
-+ return (*irs->proto_funcs->rcv_setup_auth)(irs, irs->skb,
-+ &irs->replay, &irs->authenticator);
-+ }
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_auth_calc(struct ipsec_rcv_state *irs)
-+{
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ if(irs->authfuncs ||
-+#ifdef CONFIG_KLIPS_OCF
-+ irs->ipsp->ocf_in_use ||
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ irs->ixt_a ||
-+#endif
-+ 0) {
-+ if(!irs->authenticator) {
-+ irs->ipsp->ips_errs.ips_auth_errs += 1;
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_BADAUTH;
-+ }
-+
-+ if(!ipsec_checkreplaywindow(irs->ipsp, irs->replay)) {
-+ irs->ipsp->ips_errs.ips_replaywin_errs += 1;
-+ KLIPS_PRINT(debug_rcv & DB_RX_REPLAY,
-+ "klips_debug:ipsec_rcv: "
-+ "duplicate frame from %s, packet dropped\n",
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_REPLAYFAILED;
-+ }
-+
-+ /*
-+ * verify authenticator
-+ */
-+
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "encalg = %d, authalg = %d.\n",
-+ irs->ipsp->ips_encalg,
-+ irs->ipsp->ips_authalg);
-+
-+ /* calculate authenticator */
-+ if(irs->proto_funcs->rcv_calc_auth == NULL) {
-+ return IPSEC_RCV_BADAUTH;
-+ }
-+ return (*irs->proto_funcs->rcv_calc_auth)(irs, irs->skb);
-+ }
-+ return IPSEC_RCV_OK;
-+}
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_auth_chk(struct ipsec_rcv_state *irs)
-+{
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d) - %s\n", __FUNCTION__,
-+ irs->state, irs->next_state,
-+ irs->auth_checked ? "already checked" : "will check");
-+
-+ if (irs->auth_checked)
-+ return IPSEC_RCV_OK;
-+
-+ if(irs->authfuncs ||
-+#ifdef CONFIG_KLIPS_OCF
-+ irs->ipsp->ocf_in_use ||
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ irs->ixt_a ||
-+#endif
-+ 0) {
-+ if (memcmp(irs->hash, irs->authenticator, irs->authlen)) {
-+ irs->ipsp->ips_errs.ips_auth_errs += 1;
-+ KLIPS_ERROR(debug_rcv & DB_RX_INAU,
-+ "klips_debug:ipsec_rcv: "
-+ "auth failed on incoming packet from %s (replay=%d): calculated hash=%08x%08x%08x received hash=%08x%08x%08x, dropped\n",
-+ irs->ipsaddr_txt,
-+ irs->replay,
-+ ntohl(*(__u32*)&irs->hash[0]),
-+ ntohl(*(__u32*)&irs->hash[4]),
-+ ntohl(*(__u32*)&irs->hash[8]),
-+ ntohl(*(__u32*)irs->authenticator),
-+ ntohl(*((__u32*)irs->authenticator + 1)),
-+ ntohl(*((__u32*)irs->authenticator + 2)));
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_AUTHFAILED;
-+ } else {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "authentication successful.\n");
-+ }
-+
-+ /* Crypto hygiene: clear memory used to calculate autheticator.
-+ * The length varies with the algorithm.
-+ */
-+ memset(irs->hash, 0, irs->authlen);
-+
-+ /* If the sequence number == 0, expire SA, it had rolled */
-+ if(irs->ipsp->ips_replaywin && !irs->replay /* !irs->ipsp->ips_replaywin_lastseq */) {
-+ /* we need to remove it from the sadb hash, so that it can't be found again */
-+ ipsec_sa_rm(irs->ipsp);
-+
-+ KLIPS_ERROR(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "replay window counter rolled, expiring SA.\n");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_REPLAYROLLED;
-+ }
-+
-+ /* now update the replay counter */
-+ if (!ipsec_updatereplaywindow(irs->ipsp, irs->replay)) {
-+ irs->ipsp->ips_errs.ips_replaywin_errs += 1;
-+ KLIPS_ERROR(debug_rcv & DB_RX_REPLAY,
-+ "klips_debug:ipsec_rcv: "
-+ "duplicate frame from %s, packet dropped\n",
-+ irs->ipsaddr_txt);
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_REPLAYROLLED;
-+ }
-+ irs->auth_checked = 1;
-+ }
-+ return IPSEC_RCV_OK;
-+}
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_decrypt(struct ipsec_rcv_state *irs)
-+{
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ if (irs->proto_funcs->rcv_decrypt) {
-+ return (*irs->proto_funcs->rcv_decrypt)(irs);
-+ }
-+ return IPSEC_RCV_OK;
-+}
-+
-+/*
-+ * here we decide if there is more decapsulating required and
-+ * change the next state appropriately
-+ */
-+static enum ipsec_rcv_value
-+ipsec_rcv_decap_cont(struct ipsec_rcv_state *irs)
-+{
-+ struct sk_buff *skb;
-+ struct iphdr *ipp;
-+ struct ipsec_sa *ipsnext = NULL; /* next SA towards inside of packet */
-+ enum ipsec_rcv_value rv;
-+
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ /*
-+ * if we haven't checked the auth values yet, do it now.
-+ * This is needed for the case where drivers do crypt+hash
-+ * in one operation.
-+ */
-+ rv = ipsec_rcv_auth_chk(irs);
-+ if (rv != IPSEC_RCV_OK)
-+ return rv;
-+
-+ /*
-+ * Adjust pointers after decrypt
-+ */
-+ skb = irs->skb;
-+ irs->len = skb->len;
-+ ipp = irs->ipp = ip_hdr(skb);
-+ irs->iphlen = ipp->ihl<<2;
-+ skb_set_transport_header(skb, ipsec_skb_offset(skb, skb_network_header(skb) + irs->iphlen));
-+
-+ /* zero any options that there might be */
-+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-+
-+ if (debug_rcv)
-+ ipsec_rcv_redodebug(irs);
-+
-+ /*
-+ * Discard the original ESP/AH header
-+ */
-+ ipp->protocol = irs->next_header;
-+
-+ ipp->check = 0; /* NOTE: this will be included in checksum */
-+ ipp->check = ip_fast_csum((unsigned char *)ip_hdr(skb), irs->iphlen >> 2);
-+
-+ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX,
-+ "klips_debug:ipsec_rcv: "
-+ "after <%s%s%s>, SA:%s:\n",
-+ IPS_XFORM_NAME(irs->ipsp),
-+ irs->sa_len ? irs->sa : " (error)");
-+ KLIPS_IP_PRINT(debug_rcv & DB_RX_PKTRX, ipp);
-+
-+ skb->protocol = htons(ETH_P_IP);
-+ skb->ip_summed = 0;
-+
-+ ipsnext = irs->ipsp->ips_next;
-+ if(sysctl_ipsec_inbound_policy_check) {
-+ if(ipsnext) {
-+ if(
-+ ipp->protocol != IPPROTO_AH
-+ && ipp->protocol != IPPROTO_ESP
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ && ipp->protocol != IPPROTO_COMP
-+ && (ipsnext->ips_said.proto != IPPROTO_COMP
-+ || ipsnext->ips_next)
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ && ipp->protocol != IPPROTO_IPIP
-+ && ipp->protocol != IPPROTO_ATT_HEARTBEAT /* heartbeats to AT&T SIG/GIG */
-+ ) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "packet with incomplete policy dropped, last successful SA:%s.\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ if(irs->stats) {
-+ irs->stats->rx_dropped++;
-+ }
-+ return IPSEC_RCV_FAILEDINBOUND;
-+ }
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "SA:%s, Another IPSEC header to process.\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ } else {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "No ips_inext from this SA:%s.\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ }
-+ }
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ /* update ipcomp ratio counters, even if no ipcomp packet is present */
-+ if (ipsnext
-+ && ipsnext->ips_said.proto == IPPROTO_COMP
-+ && ipp->protocol != IPPROTO_COMP) {
-+ ipsnext->ips_comp_ratio_cbytes += ntohs(ipp->tot_len);
-+ ipsnext->ips_comp_ratio_dbytes += ntohs(ipp->tot_len);
-+ }
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+ irs->ipsp->ips_life.ipl_bytes.ipl_count += irs->len;
-+ irs->ipsp->ips_life.ipl_bytes.ipl_last = irs->len;
-+
-+ if(!irs->ipsp->ips_life.ipl_usetime.ipl_count) {
-+ irs->ipsp->ips_life.ipl_usetime.ipl_count = jiffies / HZ;
-+ }
-+ irs->ipsp->ips_life.ipl_usetime.ipl_last = jiffies / HZ;
-+ irs->ipsp->ips_life.ipl_packets.ipl_count += 1;
-+
-+#ifdef CONFIG_NETFILTER
-+ if(irs->proto == IPPROTO_ESP || irs->proto == IPPROTO_AH) {
-+ skb->nfmark = (skb->nfmark & (~(IPsecSAref2NFmark(IPSEC_SA_REF_MASK))))
-+ | IPsecSAref2NFmark(IPsecSA2SAref(irs->ipsp));
-+ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX,
-+ "klips_debug:ipsec_rcv: "
-+ "%s SA sets skb->nfmark=0x%x.\n",
-+ irs->proto == IPPROTO_ESP ? "ESP" : "AH",
-+ (unsigned)skb->nfmark);
-+ }
-+#endif /* CONFIG_NETFILTER */
-+
-+ /* okay, acted on this SA, so free any previous SA, and record a new one */
-+ if(irs->ipsp) {
-+ struct ipsec_sa *newipsp = NULL;
-+ newipsp = irs->ipsp->ips_next;
-+ if(newipsp) {
-+ ipsec_sa_get(newipsp);
-+ }
-+ if(irs->lastipsp) {
-+ ipsec_sa_put(irs->lastipsp);
-+ }
-+ irs->lastipsp = irs->ipsp;
-+ irs->ipsp=newipsp;
-+ }
-+
-+ /* do we need to do more decapsulation */
-+ if ((irs->ipp->protocol == IPPROTO_ESP ||
-+ irs->ipp->protocol == IPPROTO_AH ||
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ irs->ipp->protocol == IPPROTO_COMP ||
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ 0) && irs->ipsp != NULL) {
-+ irs->next_state = IPSEC_RSM_AUTH_DECAP;
-+ }
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_cleanup(struct ipsec_rcv_state *irs)
-+{
-+ struct sk_buff *skb;
-+ struct iphdr *ipp;
-+ struct ipsec_sa *ipsp = NULL;
-+
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ /* set up for decap loop */
-+ ipp = irs->ipp;
-+ ipsp = irs->ipsp;
-+ skb = irs->skb;
-+
-+ /* if there is an IPCOMP, but we don't have an IPPROTO_COMP,
-+ * then we can just skip it
-+ */
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ if(irs->ipsp && irs->ipsp->ips_said.proto == IPPROTO_COMP) {
-+ struct ipsec_sa *newipsp = NULL;
-+ newipsp = irs->ipsp->ips_next;
-+ if(newipsp) {
-+ ipsec_sa_get(newipsp);
-+ }
-+ if(irs->lastipsp) {
-+ ipsec_sa_put(irs->lastipsp);
-+ }
-+ irs->lastipsp = irs->ipsp;
-+ irs->ipsp=newipsp;
-+ }
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ if ((irs->natt_type) && (ipp->protocol != IPPROTO_IPIP)) {
-+ /**
-+ * NAT-Traversal and Transport Mode:
-+ * we need to correct TCP/UDP checksum
-+ *
-+ * If we've got NAT-OA, we can fix checksum without recalculation.
-+ */
-+ __u32 natt_oa = ipsp->ips_natt_oa ?
-+ ((struct sockaddr_in*)(ipsp->ips_natt_oa))->sin_addr.s_addr : 0;
-+
-+ if(natt_oa != 0) {
-+ /* reset source address to what it was before NAT */
-+ ipp->saddr = natt_oa;
-+ ipp->check = 0;
-+ ipp->check = ip_fast_csum((unsigned char *)ipp, ipp->ihl);
-+ KLIPS_PRINT(debug_rcv, "csum: %04x\n", ipp->check);
-+ }
-+ }
-+#endif
-+
-+ /*
-+ * the SA is still locked from the loop
-+ */
-+ if(irs->ipsp && irs->ipsp->ips_xformfuncs->protocol == IPPROTO_IPIP) {
-+ enum ipsec_rcv_value decap_stat;
-+
-+ decap_stat = ipsec_rcv_decap_ipip(irs);
-+ if(decap_stat != IPSEC_RCV_OK) {
-+ return decap_stat;
-+ }
-+ }
-+
-+ if(irs->stats) {
-+ irs->stats->rx_bytes += skb->len;
-+ }
-+
-+ /*
-+ * if we are supposed to return the packet directly to the transport
-+ * layer, then dump it out correctly.
-+ */
-+ if(unlikely(!irs->lastipsp))
-+ printk("%s,%d: %s lastipsp should never be NULL\n",
-+ __FILE__, __LINE__, __FUNCTION__);
-+ if(irs->lastipsp->ips_transport_direct) {
-+ KLIPS_PRINT(debug_rcv, "receiving packet as transport direct\n");
-+ skb->ip_summed=CHECKSUM_UNNECESSARY;
-+ /* STUFF */
-+ }
-+
-+#ifdef HAVE_IPSEC_SAREF
-+ if(skb->sp) {
-+ secpath_put(skb->sp);
-+ }
-+ skb->sp = secpath_dup(NULL);
-+ skb->sp->ref = irs->lastipsp->ips_ref;
-+#endif
-+
-+ /* release the dst that was attached, since we have likely
-+ * changed the actual destination of the packet.
-+ */
-+ if(skb->dst) {
-+ dst_release(skb->dst);
-+ skb->dst = NULL;
-+ }
-+ skb->pkt_type = PACKET_HOST;
-+ if(irs->hard_header_len &&
-+ (skb_mac_header(skb) != (skb_network_header(skb) - irs->hard_header_len)) &&
-+ (irs->hard_header_len <= skb_headroom(skb))) {
-+ /* copy back original MAC header */
-+ memmove(skb_network_header(skb) - irs->hard_header_len,
-+ skb_mac_header(skb), irs->hard_header_len);
-+ skb_set_mac_header(skb, ipsec_skb_offset(skb, skb_network_header(skb) - irs->hard_header_len));
-+ }
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+static enum ipsec_rcv_value
-+ipsec_rcv_complete(struct ipsec_rcv_state *irs)
-+{
-+ KLIPS_PRINT(debug_rcv, "klips_debug: %s(st=%d,nxt=%d)\n", __FUNCTION__,
-+ irs->state, irs->next_state);
-+
-+ /*
-+ * make sure that data now starts at IP header, since we are going
-+ * to pass this back to ip_input (aka netif_rx). Rules for what the
-+ * pointers wind up a different for 2.6 vs 2.4, so we just fudge it here.
-+ */
-+#ifdef NET_26
-+ irs->skb->data = skb_push(irs->skb, skb_transport_header(irs->skb) - skb_network_header(irs->skb));
-+#else
-+ irs->skb->data = skb_network_header(irs->skb);
-+ {
-+ struct iphdr *iph = ip_hdr(irs->skb);
-+ int len = ntohs(iph->tot_len);
-+ irs->skb->len = len;
-+ }
-+#endif
-+
-+ ipsec_nf_reset(irs->skb);
-+
-+ KLIPS_PRINT(debug_rcv & DB_RX_PKTRX,
-+ "klips_debug:ipsec_rcv: "
-+ "netif_rx(%s) called.\n", irs->skb->dev->name);
-+ netif_rx(irs->skb);
-+ irs->skb = NULL;
-+ return IPSEC_RCV_OK;
-+}
-+
-+
-+
-+/*
-+ * ipsec_rsm is responsible for walking us through the state machine
-+ * it is the only entry point into the receive processing and does
-+ * appropriate checks and state changes for us.
-+ */
-+
-+void
-+ipsec_rsm(struct ipsec_rcv_state *irs)
-+{
-+ if (irs == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rsm: "
-+ "irs == NULL.\n");
-+ return;
-+ }
-+
-+ /*
-+ * make sure nothing is removed from underneath us
-+ */
-+ spin_lock_bh(&tdb_lock);
-+
-+ /*
-+ * if we have a valid said, then we must check it here to ensure it
-+ * hasn't gone away while we were waiting for a task to complete
-+ */
-+
-+ if (irs->said.proto && ipsec_sa_getbyid(&irs->said) == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "no ipsec_sa for SA:%s: incoming packet with no SA dropped\n",
-+ irs->sa_len ? irs->sa : " (error)");
-+ if (irs->stats)
-+ irs->stats->rx_dropped++;
-+
-+ /* drop through and cleanup */
-+ irs->state = IPSEC_RSM_DONE;
-+ }
-+
-+ while (irs->state != IPSEC_RSM_DONE) {
-+ int rc;
-+
-+ irs->next_state = rcv_state_table[irs->state].next_state;
-+
-+ rc = rcv_state_table[irs->state].action(irs);
-+
-+ if (rc == IPSEC_RCV_OK) {
-+ /* some functions change the next state, see the state table */
-+ irs->state = irs->next_state;
-+ } else if (rc == IPSEC_RCV_PENDING) {
-+ /*
-+ * things are on hold until we return here in the next/new state
-+ * we check our SA is valid when we return
-+ */
-+ spin_unlock_bh(&tdb_lock);
-+ return;
-+ } else {
-+ /* bad result, force state change to done */
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rsm: "
-+ "processing completed due to %s.\n",
-+ ipsec_rcv_err(rc));
-+ irs->state = IPSEC_RSM_DONE;
-+ }
-+ }
-+
-+ /*
-+ * all done with anything needing locks
-+ */
-+ spin_unlock_bh(&tdb_lock);
-+
-+ if (irs->lastipsp) {
-+ ipsec_sa_put(irs->lastipsp);
-+ irs->lastipsp=NULL;
-+ }
-+
-+ if (irs->ipsp) {
-+ ipsec_sa_put(irs->ipsp);
-+ irs->ipsp=NULL;
-+ }
-+
-+ if (irs->skb) {
-+ ipsec_kfree_skb(irs->skb);
-+ irs->skb = NULL;
-+ }
-+
-+ ipsec_rcv_state_delete(irs);
-+
-+ KLIPS_DEC_USE; /* once less packet using the driver */
-+}
-+
-+
-+int
-+ipsec_rcv(struct sk_buff *skb
-+#ifndef PROTO_HANDLER_SINGLE_PARM
-+ unsigned short xlen
-+#endif /* PROTO_HANDLER_SINGLE_PARM */
-+ )
-+{
-+ struct ipsec_rcv_state *irs = NULL;
-+
-+ /* Don't unlink in the middle of a turnaround */
-+ KLIPS_INC_USE;
-+
-+ if (skb == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "NULL skb passed in.\n");
-+ goto rcvleave;
-+ }
-+
-+ if (skb->data == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "NULL skb->data passed in, packet is bogus, dropping.\n");
-+ goto rcvleave;
-+ }
-+
-+ irs = ipsec_rcv_state_new();
-+ if (unlikely (! irs)) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "failled to allocate a rcv state object\n");
-+ goto rcvleave;
-+ }
-+
-+#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) && !defined(NET_26)
-+ {
-+ /* NET_26 NAT-T is handled by seperate function */
-+ struct sk_buff *nskb;
-+ int udp_decap_ret = 0;
-+
-+ nskb = ipsec_rcv_natt_decap(skb, irs, &udp_decap_ret);
-+ if(nskb == NULL) {
-+ /* return with non-zero, because UDP.c code
-+ * need to send it upstream.
-+ */
-+ if(skb && udp_decap_ret == 0) {
-+ ipsec_kfree_skb(skb);
-+ }
-+ if (irs) {
-+ ipsec_rcv_state_delete(irs);
-+ }
-+ KLIPS_DEC_USE;
-+ return(udp_decap_ret);
-+ }
-+ skb = nskb;
-+ }
-+#endif /* NAT_T */
-+
-+ irs->skb = skb;
-+
-+ /*
-+ * we hand off real early to the state machine because we just cannot
-+ * know how much processing it is off-loading
-+ */
-+ ipsec_rsm(irs);
-+
-+ return(0);
-+
-+ rcvleave:
-+ if (irs) {
-+ ipsec_rcv_state_delete(irs);
-+ }
-+ if (skb) {
-+ ipsec_kfree_skb(skb);
-+ }
-+ KLIPS_DEC_USE;
-+ return(0);
-+}
-+
-+
-+#ifdef NET_26
-+/*
-+ * this entry point is not a protocol entry point, so the entry
-+ * is a bit different.
-+ *
-+ * skb->iph->tot_len has been byte-swapped, and reduced by the size of
-+ * the IP header (and options).
-+ *
-+ * skb->h.raw has been pulled up the ESP header.
-+ *
-+ * skb->iph->protocol = 50 IPPROTO_ESP;
-+ *
-+ */
-+int klips26_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
-+{
-+ return klips26_rcv_encap(skb, udp_sk(sk)->encap_type);
-+}
-+
-+int klips26_rcv_encap(struct sk_buff *skb, __u16 encap_type)
-+{
-+ struct ipsec_rcv_state *irs = NULL;
-+
-+ /* Don't unlink in the middle of a turnaround */
-+ KLIPS_INC_USE;
-+
-+ if (skb == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "NULL skb passed in.\n");
-+ goto rcvleave;
-+ }
-+
-+ if (skb->data == NULL) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "NULL skb->data passed in, packet is bogus, dropping.\n");
-+ goto rcvleave;
-+ }
-+
-+ irs = ipsec_rcv_state_new();
-+ if (unlikely (! irs)) {
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv: "
-+ "failled to allocate a rcv state object\n");
-+ goto rcvleave;
-+ }
-+
-+ /* XXX fudge it so that all nat-t stuff comes from ipsec0 */
-+ /* eventually, the SA itself will determine which device
-+ * it comes from
-+ */
-+ {
-+ skb->dev = ipsec_get_device(0);
-+ }
-+ irs->hard_header_len = skb->dev->hard_header_len;
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ switch(encap_type) {
-+ case UDP_ENCAP_ESPINUDP:
-+ irs->natt_type = ESPINUDP_WITH_NON_ESP;
-+ break;
-+
-+ case UDP_ENCAP_ESPINUDP_NON_IKE:
-+ irs->natt_type = ESPINUDP_WITH_NON_IKE;
-+ break;
-+
-+ default:
-+ if(printk_ratelimit()) {
-+ printk(KERN_INFO "KLIPS received unknown UDP-ESP encap type %u\n",
-+ encap_type);
-+ }
-+ goto rcvleave;
-+ }
-+#endif /* NAT_T */
-+
-+ irs->skb = skb;
-+
-+ /*
-+ * we hand off real early to the state machine because we just cannot
-+ * know how much processing it is off-loading
-+ */
-+ ipsec_rsm(irs);
-+
-+ return(0);
-+
-+ rcvleave:
-+ if (irs) {
-+ ipsec_rcv_state_delete(irs);
-+ }
-+ if (skb) {
-+ ipsec_kfree_skb(skb);
-+ }
-+ KLIPS_DEC_USE;
-+ return(0);
-+}
-+#endif
-+
-+// ------------------------------------------------------------------------
-+// this handles creating and managing state for recv path
-+
-+static spinlock_t irs_cache_lock = SPIN_LOCK_UNLOCKED;
-+#ifdef HAVE_KMEM_CACHE_MACRO
-+static struct kmem_cache *irs_cache_allocator = NULL;
-+#else
-+static kmem_cache_t *irs_cache_allocator = NULL;
-+#endif
-+static unsigned irs_cache_allocated_count = 0;
-+
-+int ipsec_irs_cache_allocated_max = 1000;
-+module_param(ipsec_irs_cache_allocated_max,int,0644);
-+MODULE_PARM_DESC(ipsec_irs_cache_allocated_max,
-+ "Maximum outstanding receive packets (before they are dropped)");
-+
-+int
-+ipsec_rcv_state_cache_init (void)
-+{
-+ if (irs_cache_allocator)
-+ return -EBUSY;
-+
-+ spin_lock_init(&irs_cache_lock);
-+#ifdef HAVE_KMEM_CACHE_MACRO
-+ /* irs_cache_allocator = KMEM_CACHE(ipsec_irs,0); */
-+ irs_cache_allocator = kmem_cache_create ("ipsec_irs",
-+ sizeof (struct ipsec_rcv_state), 0,
-+ 0, NULL);
-+#else
-+ irs_cache_allocator = kmem_cache_create ("ipsec_irs",
-+ sizeof (struct ipsec_rcv_state), 0,
-+ 0, NULL, NULL);
-+#endif
-+ if (! irs_cache_allocator)
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-+
-+void
-+ipsec_rcv_state_cache_cleanup (void)
-+{
-+ if (unlikely (irs_cache_allocated_count))
-+ printk ("ipsec: deleting ipsec_irs kmem_cache while in use\n");
-+
-+ if (irs_cache_allocator) {
-+ kmem_cache_destroy (irs_cache_allocator);
-+ irs_cache_allocator = NULL;
-+ }
-+ irs_cache_allocated_count = 0;
-+}
-+
-+static struct ipsec_rcv_state *
-+ipsec_rcv_state_new (void)
-+{
-+ struct ipsec_rcv_state *irs;
-+
-+ spin_lock_bh (&irs_cache_lock);
-+
-+ if (irs_cache_allocated_count >= ipsec_irs_cache_allocated_max) {
-+ spin_unlock_bh (&irs_cache_lock);
-+ KLIPS_PRINT(debug_rcv,
-+ "klips_debug:ipsec_rcv_state_new: "
-+ "exceeded maximum outstanding RX packet cnt %d\n",
-+ irs_cache_allocated_count);
-+ return NULL;
-+ }
-+
-+ irs = kmem_cache_alloc (irs_cache_allocator, GFP_ATOMIC);
-+
-+ if (likely (irs != NULL))
-+ irs_cache_allocated_count++;
-+
-+ spin_unlock_bh (&irs_cache_lock);
-+
-+ if (unlikely (NULL == irs))
-+ goto bail;
-+
-+ // initialize the object
-+#if 1
-+ memset((caddr_t)irs, 0, sizeof(*irs));
-+#else
-+ /* optimised to only clear the essentials */
-+ irs->state = 0;
-+ irs->next_state = 0;
-+ irs->auth_checked = 0;
-+ irs->stats = NULL;
-+ irs->authenticator = NULL;
-+ irs->said.proto = 0;
-+
-+ irs->hard_header_len = 0;
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ irs->natt_type = 0;
-+ irs->natt_len = 0;
-+#endif
-+
-+ irs->lastipsp = NULL;
-+#endif
-+
-+bail:
-+ return irs;
-+}
-+
-+static void
-+ipsec_rcv_state_delete (struct ipsec_rcv_state *irs)
-+{
-+ if (unlikely (! irs))
-+ return;
-+
-+ spin_lock_bh (&irs_cache_lock);
-+
-+ irs_cache_allocated_count--;
-+ kmem_cache_free (irs_cache_allocator, irs);
-+
-+ spin_unlock_bh (&irs_cache_lock);
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-set-style: linux
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_sa.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1575 @@
-+/*
-+ * Common routines for IPsec SA maintenance routines.
-+ *
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001, 2002 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_sa.c,v 1.31 2005/11/11 04:38:56 paul Exp $
-+ *
-+ * This is the file formerly known as "ipsec_xform.h"
-+ *
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/vmalloc.h> /* vmalloc() */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <openswan.h>
-+#ifdef SPINLOCK
-+#ifdef SPINLOCK_23
-+#include <linux/spinlock.h> /* *lock* */
-+#else /* SPINLOCK_23 */
-+#include <asm/spinlock.h> /* *lock* */
-+#endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+
-+#include "openswan/radij.h"
-+
-+#include "openswan/ipsec_stats.h"
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_sa.h"
-+#include "openswan/ipsec_xform.h"
-+
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_ipe4.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_ipip.h"
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#include "openswan/ipsec_ipcomp.h"
-+#endif /* CONFIG_KLIPS_COMP */
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_alg.h"
-+
-+#include "ipsec_ocf.h"
-+
-+
-+#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0)
-+
-+struct ipsec_sa *ipsec_sadb_hash[SADB_HASHMOD];
-+#ifdef SPINLOCK
-+spinlock_t tdb_lock = SPIN_LOCK_UNLOCKED;
-+#else /* SPINLOCK */
-+spinlock_t tdb_lock;
-+#endif /* SPINLOCK */
-+
-+struct ipsec_sadb ipsec_sadb;
-+
-+/* the sub table must be narrower (or equal) in bits than the variable type
-+ in the main table to count the number of unused entries in it. */
-+typedef struct {
-+ int testSizeOf_refSubTable :
-+ ((sizeof(IPsecRefTableUnusedCount) * 8) < IPSEC_SA_REF_SUBTABLE_IDX_WIDTH ? -1 : 1);
-+} dummy;
-+
-+
-+/* The field where the saref will be hosted in the skb must be wide enough to
-+ accomodate the information it needs to store. */
-+typedef struct {
-+ int testSizeOf_refField :
-+ (IPSEC_SA_REF_HOST_FIELD_WIDTH < IPSEC_SA_REF_TABLE_IDX_WIDTH ? -1 : 1 );
-+} dummy2;
-+
-+
-+#define IPS_HASH(said) (((said)->spi + (said)->dst.u.v4.sin_addr.s_addr + (said)->proto) % SADB_HASHMOD)
-+
-+int
-+ipsec_SAref_recycle(void)
-+{
-+ int table, i;
-+ int error = 0;
-+ int entry;
-+ int addone;
-+
-+ ipsec_sadb.refFreeListHead = IPSEC_SAREF_NULL;
-+ ipsec_sadb.refFreeListTail = IPSEC_SAREF_NULL;
-+
-+ if(ipsec_sadb.refFreeListCont == IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES * IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES) {
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SAref_recycle: "
-+ "end of table reached, continuing at start..\n");
-+ ipsec_sadb.refFreeListCont = IPSEC_SAREF_FIRST;
-+ }
-+
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SAref_recycle: "
-+ "recycling, continuing from SAref=%d (0p%p), table=%d, entry=%d.\n",
-+ ipsec_sadb.refFreeListCont,
-+ (ipsec_sadb.refTable[IPsecSAref2table(ipsec_sadb.refFreeListCont)] != NULL) ? IPsecSAref2SA(ipsec_sadb.refFreeListCont) : NULL,
-+ IPsecSAref2table(ipsec_sadb.refFreeListCont),
-+ IPsecSAref2entry(ipsec_sadb.refFreeListCont));
-+
-+ /* add one additional table entry */
-+ addone = 0;
-+
-+ ipsec_sadb.refFreeListHead = IPSEC_SAREF_FIRST;
-+ for(i = 0; i < IPSEC_SA_REF_FREELIST_NUM_ENTRIES; i++) {
-+ table = IPsecSAref2table(ipsec_sadb.refFreeListCont);
-+ if(addone == 0 && ipsec_sadb.refTable[table] == NULL) {
-+ addone = 1;
-+ error = ipsec_SArefSubTable_alloc(table);
-+ if(error) {
-+ return error;
-+ }
-+ }
-+ for(entry = IPsecSAref2entry(ipsec_sadb.refFreeListCont);
-+ entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES;
-+ entry++) {
-+ if(ipsec_sadb.refTable[table]->entry[entry] == NULL) {
-+ ipsec_sadb.refFreeList[++ipsec_sadb.refFreeListTail] = IPsecSArefBuild(table, entry);
-+ if(ipsec_sadb.refFreeListTail == (IPSEC_SA_REF_FREELIST_NUM_ENTRIES - 1)) {
-+ ipsec_sadb.refFreeListHead = IPSEC_SAREF_FIRST;
-+ ipsec_sadb.refFreeListCont = ipsec_sadb.refFreeList[ipsec_sadb.refFreeListTail] + 1;
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SAref_recycle: "
-+ "SArefFreeList refilled.\n");
-+ return 0;
-+ }
-+ }
-+ }
-+ ipsec_sadb.refFreeListCont++;
-+ ipsec_sadb.refFreeListTail=i;
-+ }
-+
-+ if(ipsec_sadb.refFreeListTail == IPSEC_SAREF_NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SAref_recycle: "
-+ "out of room in the SArefTable.\n");
-+
-+ return(-ENOSPC);
-+ }
-+
-+ ipsec_sadb.refFreeListHead = IPSEC_SAREF_FIRST;
-+ ipsec_sadb.refFreeListCont = ipsec_sadb.refFreeList[ipsec_sadb.refFreeListTail] + 1;
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SAref_recycle: "
-+ "SArefFreeList partly refilled to %d of %d.\n",
-+ ipsec_sadb.refFreeListTail,
-+ IPSEC_SA_REF_FREELIST_NUM_ENTRIES);
-+ return 0;
-+}
-+
-+int
-+ipsec_SArefSubTable_alloc(unsigned table)
-+{
-+ unsigned entry;
-+ struct IPsecSArefSubTable* SArefsub;
-+
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SArefSubTable_alloc: "
-+ "allocating %lu bytes for table %u of %u.\n",
-+ (unsigned long) (IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES * sizeof(struct ipsec_sa *)),
-+ table,
-+ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES);
-+
-+ /* allocate another sub-table */
-+ SArefsub = vmalloc(IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES * sizeof(struct ipsec_sa *));
-+ if(SArefsub == NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SArefSubTable_alloc: "
-+ "error allocating memory for table %u of %u!\n",
-+ table,
-+ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES);
-+ return -ENOMEM;
-+ }
-+
-+ /* add this sub-table to the main table */
-+ ipsec_sadb.refTable[table] = SArefsub;
-+
-+ /* initialise each element to NULL */
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_SArefSubTable_alloc: "
-+ "initialising %u elements (2 ^ %u) of table %u.\n",
-+ IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES,
-+ IPSEC_SA_REF_SUBTABLE_IDX_WIDTH,
-+ table);
-+ for(entry = 0; entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES; entry++) {
-+ SArefsub->entry[entry] = NULL;
-+ }
-+
-+ return 0;
-+}
-+
-+int
-+ipsec_saref_verify_slot(IPsecSAref_t ref)
-+{
-+ int ref_table=IPsecSAref2table(ref);
-+
-+ if(ipsec_sadb.refTable[ref_table] == NULL) {
-+ return ipsec_SArefSubTable_alloc(ref_table);
-+ }
-+ return 0;
-+}
-+
-+int
-+ipsec_saref_freelist_init(void)
-+{
-+ int i;
-+
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_saref_freelist_init: "
-+ "initialising %u elements of FreeList.\n",
-+ IPSEC_SA_REF_FREELIST_NUM_ENTRIES);
-+
-+ for(i = 0; i < IPSEC_SA_REF_FREELIST_NUM_ENTRIES; i++) {
-+ ipsec_sadb.refFreeList[i] = IPSEC_SAREF_NULL;
-+ }
-+ ipsec_sadb.refFreeListHead = IPSEC_SAREF_NULL;
-+ ipsec_sadb.refFreeListCont = IPSEC_SAREF_FIRST;
-+ ipsec_sadb.refFreeListTail = IPSEC_SAREF_NULL;
-+
-+ return 0;
-+}
-+
-+int
-+ipsec_sadb_init(void)
-+{
-+ int error = 0;
-+ unsigned i;
-+
-+ for(i = 0; i < SADB_HASHMOD; i++) {
-+ ipsec_sadb_hash[i] = NULL;
-+ }
-+ /* parts above are for the old style SADB hash table */
-+
-+
-+ /* initialise SA reference table */
-+
-+ /* initialise the main table */
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_init: "
-+ "initialising main table of size %u (2 ^ %u).\n",
-+ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES,
-+ IPSEC_SA_REF_MAINTABLE_IDX_WIDTH);
-+ {
-+ unsigned table;
-+ for(table = 0; table < IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES; table++) {
-+ ipsec_sadb.refTable[table] = NULL;
-+ }
-+ }
-+
-+ /* allocate the first sub-table */
-+ error = ipsec_SArefSubTable_alloc(0);
-+ if(error) {
-+ return error;
-+ }
-+
-+ error = ipsec_saref_freelist_init();
-+ return error;
-+}
-+
-+IPsecSAref_t
-+ipsec_SAref_alloc(int*error) /* pass in error var by pointer */
-+{
-+ IPsecSAref_t SAref;
-+
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_SAref_alloc: "
-+ "SAref requested... head=%d, cont=%d, tail=%d, listsize=%d.\n",
-+ ipsec_sadb.refFreeListHead,
-+ ipsec_sadb.refFreeListCont,
-+ ipsec_sadb.refFreeListTail,
-+ IPSEC_SA_REF_FREELIST_NUM_ENTRIES);
-+
-+ if(ipsec_sadb.refFreeListHead == IPSEC_SAREF_NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_SAref_alloc: "
-+ "FreeList empty, recycling...\n");
-+ *error = ipsec_SAref_recycle();
-+ if(*error) {
-+ return IPSEC_SAREF_NULL;
-+ }
-+ }
-+
-+ SAref = ipsec_sadb.refFreeList[ipsec_sadb.refFreeListHead];
-+ if(SAref == IPSEC_SAREF_NULL) {
-+ KLIPS_ERROR(debug_xform,
-+ "ipsec_SAref_alloc: "
-+ "unexpected error, refFreeListHead = %d points to invalid entry.\n",
-+ ipsec_sadb.refFreeListHead);
-+ *error = -ESPIPE;
-+ return IPSEC_SAREF_NULL;
-+ }
-+
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_SAref_alloc: "
-+ "allocating SAref=%d, table=%u, entry=%u of %u.\n",
-+ SAref,
-+ IPsecSAref2table(SAref),
-+ IPsecSAref2entry(SAref),
-+ IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES * IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES);
-+
-+ ipsec_sadb.refFreeList[ipsec_sadb.refFreeListHead] = IPSEC_SAREF_NULL;
-+ ipsec_sadb.refFreeListHead++;
-+ if(ipsec_sadb.refFreeListHead > ipsec_sadb.refFreeListTail) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_SAref_alloc: "
-+ "last FreeList entry allocated, resetting list head to empty.\n");
-+ ipsec_sadb.refFreeListHead = IPSEC_SAREF_NULL;
-+ }
-+
-+ return SAref;
-+}
-+
-+int
-+ipsec_sa_print(struct ipsec_sa *ips)
-+{
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+
-+ printk(KERN_INFO "klips_debug: SA:");
-+ if(ips == NULL) {
-+ printk("NULL\n");
-+ return -ENOENT;
-+ }
-+ printk(" ref=%d", ips->ips_ref);
-+ printk(" refcount=%d", atomic_read(&ips->ips_refcount));
-+ if(ips->ips_hnext != NULL) {
-+ printk(" hnext=0p%p", ips->ips_hnext);
-+ }
-+ if(ips->ips_next != NULL) {
-+ printk(" next=0p%p", ips->ips_next);
-+ }
-+ sa_len = satot(&ips->ips_said, 0, sa, sizeof(sa));
-+ printk(" said=%s", sa_len ? sa : " (error)");
-+ if(ips->ips_seq) {
-+ printk(" seq=%u", ips->ips_seq);
-+ }
-+ if(ips->ips_pid) {
-+ printk(" pid=%u", ips->ips_pid);
-+ }
-+ if(ips->ips_authalg) {
-+ printk(" authalg=%u", ips->ips_authalg);
-+ }
-+ if(ips->ips_encalg) {
-+ printk(" encalg=%u", ips->ips_encalg);
-+ }
-+ printk(" XFORM=%s%s%s", IPS_XFORM_NAME(ips));
-+ if(ips->ips_replaywin) {
-+ printk(" ooowin=%u", ips->ips_replaywin);
-+ }
-+ if(ips->ips_flags) {
-+ printk(" flags=%u", ips->ips_flags);
-+ }
-+ if(ips->ips_addr_s) {
-+ char buf[SUBNETTOA_BUF];
-+ addrtoa(((struct sockaddr_in*)(ips->ips_addr_s))->sin_addr,
-+ 0, buf, sizeof(buf));
-+ printk(" src=%s", buf);
-+ }
-+ if(ips->ips_addr_d) {
-+ char buf[SUBNETTOA_BUF];
-+ addrtoa(((struct sockaddr_in*)(ips->ips_addr_s))->sin_addr,
-+ 0, buf, sizeof(buf));
-+ printk(" dst=%s", buf);
-+ }
-+ if(ips->ips_addr_p) {
-+ char buf[SUBNETTOA_BUF];
-+ addrtoa(((struct sockaddr_in*)(ips->ips_addr_p))->sin_addr,
-+ 0, buf, sizeof(buf));
-+ printk(" proxy=%s", buf);
-+ }
-+ if(ips->ips_key_bits_a) {
-+ printk(" key_bits_a=%u", ips->ips_key_bits_a);
-+ }
-+ if(ips->ips_key_bits_e) {
-+ printk(" key_bits_e=%u", ips->ips_key_bits_e);
-+ }
-+
-+ printk("\n");
-+ return 0;
-+}
-+
-+struct ipsec_sa*
-+ipsec_sa_alloc(int*error) /* pass in error var by pointer */
-+{
-+ struct ipsec_sa* ips;
-+
-+ if((ips = kmalloc(sizeof(*ips), GFP_ATOMIC) ) == NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_alloc: "
-+ "memory allocation error\n");
-+ *error = -ENOMEM;
-+ return NULL;
-+ }
-+ memset((caddr_t)ips, 0, sizeof(*ips));
-+
-+ /* return with at least counter = 1 */
-+ ipsec_sa_get(ips);
-+
-+ *error = 0;
-+ return(ips);
-+}
-+
-+void
-+ipsec_sa_untern(struct ipsec_sa *ips)
-+{
-+ IPsecSAref_t ref = ips->ips_ref;
-+ int error;
-+
-+ /* verify that we are removing correct item! */
-+ error = ipsec_saref_verify_slot(ref);
-+ if(error) {
-+ return;
-+ }
-+
-+ if(IPsecSAref2SA(ref) == ips) {
-+ IPsecSAref2SA(ref) = NULL;
-+ ipsec_sa_put(ips);
-+ } else {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_untern: "
-+ "ref=%u -> %p but untern'ing %p\n", ref,
-+ IPsecSAref2SA(ref), ips);
-+ }
-+
-+}
-+
-+int
-+ipsec_sa_intern(struct ipsec_sa *ips)
-+{
-+ int error;
-+ IPsecSAref_t ref = ips->ips_ref;
-+
-+ if(ref == IPSEC_SAREF_NULL) {
-+ ref = ipsec_SAref_alloc(&error); /* pass in error return by pointer */
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_intern: "
-+ "allocated ref=%u for sa %p\n", ref, ips);
-+
-+ if(ref == IPSEC_SAREF_NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_intern: "
-+ "SAref allocation error\n");
-+ return error;
-+ }
-+
-+ ips->ips_ref = ref;
-+ }
-+
-+ error = ipsec_saref_verify_slot(ref);
-+ if(error) {
-+ return error;
-+ }
-+
-+ ipsec_sa_get(ips);
-+ /*
-+ * if there is an existing SA at this reference, then free it
-+ * note, that nsa might == ips!. That's okay, we just incremented
-+ * the reference count above.
-+ */
-+ {
-+ struct ipsec_sa *nsa = IPsecSAref2SA(ref);
-+ if(nsa) {
-+ ipsec_sa_put(nsa);
-+ }
-+ }
-+
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_alloc: "
-+ "SAref[%d]=%p\n",
-+ ips->ips_ref, ips);
-+ IPsecSAref2SA(ips->ips_ref) = ips;
-+
-+ /* return OK */
-+ return 0;
-+}
-+
-+
-+struct ipsec_sa *
-+ipsec_sa_getbyid(ip_said *said)
-+{
-+ int hashval;
-+ struct ipsec_sa *ips;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+
-+ if(said == NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_getbyid: "
-+ "null pointer passed in!\n");
-+ return NULL;
-+ }
-+
-+ hashval = IPS_HASH(said);
-+
-+ sa_len = KLIPS_SATOT(debug_xform, said, 0, sa, sizeof(sa));
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_getbyid: "
-+ "linked entry in ipsec_sa table for hash=%d of SA:%s requested.\n",
-+ hashval,
-+ sa_len ? sa : " (error)");
-+
-+ if((ips = ipsec_sadb_hash[hashval]) == NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_getbyid: "
-+ "no entries in ipsec_sa table for hash=%d of SA:%s.\n",
-+ hashval,
-+ sa_len ? sa : " (error)");
-+ return NULL;
-+ }
-+
-+ for (; ips; ips = ips->ips_hnext) {
-+ if ((ips->ips_said.spi == said->spi) &&
-+ (ips->ips_said.dst.u.v4.sin_addr.s_addr == said->dst.u.v4.sin_addr.s_addr) &&
-+ (ips->ips_said.proto == said->proto)) {
-+ ipsec_sa_get(ips);
-+ return ips;
-+ }
-+ }
-+
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_getbyid: "
-+ "no entry in linked list for hash=%d of SA:%s.\n",
-+ hashval,
-+ sa_len ? sa : " (error)");
-+ return NULL;
-+}
-+
-+struct ipsec_sa *
-+ipsec_sa_getbyref(IPsecSAref_t ref)
-+{
-+ struct ipsec_sa *ips;
-+ struct IPsecSArefSubTable *st = ipsec_sadb.refTable[IPsecSAref2table(ref)];
-+
-+ if(st == NULL) {
-+ return NULL;
-+ }
-+
-+ ips = st->entry[IPsecSAref2entry(ref)];
-+ if(ips) {
-+ ipsec_sa_get(ips);
-+ }
-+ return ips;
-+}
-+
-+
-+void
-+__ipsec_sa_put(struct ipsec_sa *ips, const char *func, int line)
-+{
-+ if(ips == NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_put: "
-+ "null pointer passed in!\n");
-+ return;
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_xform) {
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ sa_len = satot(&ips->ips_said, 0, sa, sizeof(sa));
-+
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_put: "
-+ "ipsec_sa %p SA:%s, ref:%d reference count (%d--) decremented by %s:%d.\n",
-+ ips,
-+ sa_len ? sa : " (error)",
-+ ips->ips_ref,
-+ atomic_read(&ips->ips_refcount),
-+ func, line);
-+ }
-+#endif
-+
-+ if(atomic_dec_and_test(&ips->ips_refcount)) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_put: freeing %p\n",
-+ ips);
-+ /* it was zero */
-+ ipsec_sa_wipe(ips);
-+ }
-+
-+ return;
-+}
-+
-+struct ipsec_sa *
-+__ipsec_sa_get(struct ipsec_sa *ips, const char *func, int line)
-+{
-+ if (ips == NULL)
-+ return NULL;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_xform) {
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ sa_len = satot(&ips->ips_said, 0, sa, sizeof(sa));
-+
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_get: "
-+ "ipsec_sa %p SA:%s, ref:%d reference count (%d++) incremented by %s:%d.\n",
-+ ips,
-+ sa_len ? sa : " (error)",
-+ ips->ips_ref,
-+ atomic_read(&ips->ips_refcount),
-+ func, line);
-+ }
-+#endif
-+
-+ atomic_inc(&ips->ips_refcount);
-+
-+#if 0
-+ /*
-+ * DAVIDM: if we include this code it means the SA is freed immediately
-+ * on creation and then reused ! Not sure why it is here.
-+ */
-+
-+ if(atomic_dec_and_test(&ips->ips_refcount)) {
-+ KLIPS_PRINT(debug_xform,
-+ "ipsec_sa_get: freeing %p\n",
-+ ips);
-+ /* it was zero */
-+ ipsec_sa_wipe(ips);
-+ }
-+#endif
-+
-+ return ips;
-+}
-+
-+/*
-+ The ipsec_sa table better *NOT* be locked before it is handed in, or SMP locks will happen
-+*/
-+int
-+ipsec_sa_add(struct ipsec_sa *ips)
-+{
-+ int error = 0;
-+ unsigned int hashval;
-+
-+ ips = ipsec_sa_get(ips);
-+
-+ if(ips == NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "klips_error:ipsec_sa_add: "
-+ "null pointer passed in!\n");
-+ return -ENODATA;
-+ }
-+ hashval = IPS_HASH(&ips->ips_said);
-+
-+ ipsec_sa_get(ips);
-+ spin_lock_bh(&tdb_lock);
-+
-+ ips->ips_hnext = ipsec_sadb_hash[hashval];
-+ ipsec_sadb_hash[hashval] = ips;
-+
-+ spin_unlock_bh(&tdb_lock);
-+
-+ return error;
-+}
-+
-+/*
-+ * remove it from the hash chain, decrementing hash count
-+ */
-+void ipsec_sa_rm(struct ipsec_sa *ips)
-+{
-+ unsigned int hashval;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+
-+
-+ if(ips == NULL) return;
-+
-+
-+ hashval = IPS_HASH(&ips->ips_said);
-+
-+ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa));
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "unhashing SA:%s (ref=%u), hashval=%d.\n",
-+ sa_len ? sa : " (error)",
-+ ips->ips_ref,
-+ hashval);
-+
-+ if(ipsec_sadb_hash[hashval] == NULL) {
-+ return;
-+ }
-+
-+ if (ips == ipsec_sadb_hash[hashval]) {
-+ ipsec_sadb_hash[hashval] = ipsec_sadb_hash[hashval]->ips_hnext;
-+ ips->ips_hnext = NULL;
-+ ipsec_sa_put(ips);
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "successfully unhashed first ipsec_sa in chain.\n");
-+ return;
-+ } else {
-+ struct ipsec_sa *ipstp;
-+
-+ for (ipstp = ipsec_sadb_hash[hashval];
-+ ipstp;
-+ ipstp = ipstp->ips_hnext) {
-+ if (ipstp->ips_hnext == ips) {
-+ ipstp->ips_hnext = ips->ips_hnext;
-+ ips->ips_hnext = NULL;
-+ ipsec_sa_put(ips);
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "successfully unhashed link in ipsec_sa chain.\n");
-+ return;
-+ }
-+ }
-+ }
-+}
-+
-+
-+#if 0
-+/*
-+ * The ipsec_sa table better be locked before it is handed in,
-+ * or races might happen.
-+ *
-+ * this routine assumes the SA has a refcount==0, and we free it.
-+ * we also assume that the pointers are already cleaned up.
-+ */
-+static int
-+ipsec_sa_del(struct ipsec_sa *ips)
-+{
-+ unsigned int hashval;
-+ struct ipsec_sa *ipstp;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+
-+ if(ips == NULL) {
-+ KLIPS_ERROR(debug_xform,
-+ "klips_error:ipsec_sa_del: "
-+ "null pointer passed in!\n");
-+ return -ENODATA;
-+ }
-+
-+ if(ips->ips_next) {
-+ struct ipsec_sa *in = ips->ips_next;
-+
-+ ips->ips_next=NULL;
-+ ipsec_sa_put(in);
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_xform, &ips->ips_said, 0, sa, sizeof(sa));
-+ hashval = IPS_HASH(&ips->ips_said);
-+
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "deleting SA:%s (ref=%u), hashval=%d.\n",
-+ sa_len ? sa : " (error)",
-+ ips->ips_ref,
-+ hashval);
-+
-+ if(ipsec_sadb_hash[hashval] == NULL) {
-+ /* if this is NULL, then we can be sure that the SA was never
-+ * added to the SADB, so we just free it.
-+ */
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "no entries in ipsec_sa table for hash=%d (ref=%u) of SA:%s.\n",
-+ hashval,
-+ ips->ips_ref,
-+ sa_len ? sa : " (error)");
-+ return -ENOENT;
-+ }
-+
-+ if (ips == ipsec_sadb_hash[hashval]) {
-+ ipsec_sadb_hash[hashval] = ipsec_sadb_hash[hashval]->ips_hnext;
-+ ips->ips_hnext = NULL;
-+
-+ ipsec_sa_put(ips);
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "successfully deleted first ipsec_sa in chain.\n");
-+ return 0;
-+ } else {
-+ for (ipstp = ipsec_sadb_hash[hashval];
-+ ipstp;
-+ ipstp = ipstp->ips_hnext) {
-+ if (ipstp->ips_hnext == ips) {
-+ ipstp->ips_hnext = ips->ips_hnext;
-+ ips->ips_hnext = NULL;
-+ ipsec_sa_put(ips);
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "successfully deleted link in ipsec_sa chain.\n");
-+ return 0;
-+ }
-+ }
-+ }
-+
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_del: "
-+ "no entries in linked list for hash=%d of SA:%s.\n",
-+ hashval,
-+ sa_len ? sa : " (error)");
-+ return -ENOENT;
-+}
-+#endif
-+
-+int
-+ipsec_sadb_cleanup(__u8 proto)
-+{
-+ unsigned i;
-+ int error = 0;
-+ struct ipsec_sa *ips;
-+ //struct ipsec_sa *ipsnext, **ipsprev;
-+ //char sa[SATOT_BUF];
-+ //size_t sa_len;
-+
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_cleanup: "
-+ "cleaning up proto=%d.\n",
-+ proto);
-+
-+ spin_lock_bh(&tdb_lock);
-+
-+ for (i = 0; i < SADB_HASHMOD; i++) {
-+ ips = ipsec_sadb_hash[i];
-+
-+ while(ips) {
-+ ipsec_sadb_hash[i]=ips->ips_hnext;
-+ ips->ips_hnext=NULL;
-+ ipsec_sa_put(ips);
-+
-+ ips = ipsec_sadb_hash[i];
-+ }
-+ }
-+
-+//errlab:
-+
-+ spin_unlock_bh(&tdb_lock);
-+
-+
-+#if IPSEC_SA_REF_CODE
-+ /* clean up SA reference table */
-+
-+ /* go through the ref table and clean out all the SAs */
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_cleanup: "
-+ "removing SAref entries and tables.");
-+ {
-+ unsigned table, entry;
-+ for(table = 0; table < IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES; table++) {
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_cleanup: "
-+ "cleaning SAref table=%u.\n",
-+ table);
-+ if(ipsec_sadb.refTable[table] == NULL) {
-+ printk("\n");
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_cleanup: "
-+ "cleaned %u used refTables.\n",
-+ table);
-+ break;
-+ }
-+ for(entry = 0; entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES; entry++) {
-+ if(ipsec_sadb.refTable[table]->entry[entry] != NULL) {
-+ struct ipsec_sa *sa1 = ipsec_sadb.refTable[table]->entry[entry];
-+ ipsec_sa_put(sa1);
-+ ipsec_sadb.refTable[table]->entry[entry] = NULL;
-+ }
-+ }
-+ }
-+ }
-+#endif /* IPSEC_SA_REF_CODE */
-+
-+ return(error);
-+}
-+
-+int
-+ipsec_sadb_free(void)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_free: "
-+ "freeing SArefTable memory.\n");
-+
-+ /* clean up SA reference table */
-+
-+ /* go through the ref table and clean out all the SAs if any are
-+ left and free table memory */
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_free: "
-+ "removing SAref entries and tables.\n");
-+ {
-+ unsigned table, entry;
-+ for(table = 0; table < IPSEC_SA_REF_MAINTABLE_NUM_ENTRIES; table++) {
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_free: "
-+ "removing SAref table=%u.\n",
-+ table);
-+ if(ipsec_sadb.refTable[table] == NULL) {
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sadb_free: "
-+ "removed %u used refTables.\n",
-+ table);
-+ break;
-+ }
-+ for(entry = 0; entry < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES; entry++) {
-+ if(ipsec_sadb.refTable[table]->entry[entry] != NULL) {
-+ struct ipsec_sa *sa1 = ipsec_sadb.refTable[table]->entry[entry];
-+
-+ BUG_ON(atomic_read(&sa1->ips_refcount) == 1);
-+ ipsec_sa_put(sa1);
-+ ipsec_sadb.refTable[table]->entry[entry] = NULL;
-+ }
-+ }
-+ vfree(ipsec_sadb.refTable[table]);
-+ ipsec_sadb.refTable[table] = NULL;
-+ }
-+ }
-+
-+ return(error);
-+}
-+
-+int
-+ipsec_sa_wipe(struct ipsec_sa *ips)
-+{
-+ if(ips == NULL) {
-+ return -ENODATA;
-+ }
-+
-+#if IPSEC_SA_REF_CODE
-+ /* remove me from the SArefTable */
-+ if(debug_xform)
-+ {
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ struct IPsecSArefSubTable *subtable = NULL;
-+
-+ if(IPsecSAref2table(IPsecSA2SAref(ips))<IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES
-+ && ipsec_sadb.refTable != NULL) {
-+ subtable = ipsec_sadb.refTable[IPsecSAref2table(IPsecSA2SAref(ips))];
-+ }
-+
-+ sa_len = satot(&ips->ips_said, 0, sa, sizeof(sa));
-+ KLIPS_PRINT(debug_xform,
-+ "klips_debug:ipsec_sa_wipe: "
-+ "removing SA=%s(0p%p), SAref=%d, table=%d(0p%p), entry=%d from the refTable.\n",
-+ sa_len ? sa : " (error)",
-+ ips,
-+ ips->ips_ref,
-+ IPsecSAref2table(IPsecSA2SAref(ips)),
-+ subtable,
-+ subtable ? IPsecSAref2entry(IPsecSA2SAref(ips)) : 0);
-+ }
-+
-+ if(ips->ips_ref != IPSEC_SAREF_NULL) {
-+ struct IPsecSArefSubTable *subtable = NULL;
-+ int ref_table=IPsecSAref2table(IPsecSA2SAref(ips));
-+ int ref_entry=IPsecSAref2entry(IPsecSA2SAref(ips));
-+
-+ if(ref_table < IPSEC_SA_REF_SUBTABLE_NUM_ENTRIES) {
-+ subtable = ipsec_sadb.refTable[ref_table];
-+ if(subtable!=NULL && subtable->entry[ref_entry] == ips) {
-+
-+ subtable->entry[ref_entry] = NULL;
-+ }
-+ }
-+ ips->ips_ref = IPSEC_SAREF_NULL;
-+ }
-+#endif /* IPSEC_SA_REF_CODE */
-+
-+ /* paranoid clean up */
-+ if(ips->ips_addr_s != NULL) {
-+ memset((caddr_t)(ips->ips_addr_s), 0, ips->ips_addr_s_size);
-+ kfree(ips->ips_addr_s);
-+ }
-+ ips->ips_addr_s = NULL;
-+
-+ if(ips->ips_addr_d != NULL) {
-+ memset((caddr_t)(ips->ips_addr_d), 0, ips->ips_addr_d_size);
-+ kfree(ips->ips_addr_d);
-+ }
-+ ips->ips_addr_d = NULL;
-+
-+ if(ips->ips_addr_p != NULL) {
-+ memset((caddr_t)(ips->ips_addr_p), 0, ips->ips_addr_p_size);
-+ kfree(ips->ips_addr_p);
-+ }
-+ ips->ips_addr_p = NULL;
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ if(ips->ips_natt_oa) {
-+ memset((caddr_t)(ips->ips_natt_oa), 0, ips->ips_natt_oa_size);
-+ kfree(ips->ips_natt_oa);
-+ }
-+ ips->ips_natt_oa = NULL;
-+#endif
-+
-+ if(ips->ips_key_a != NULL) {
-+ memset((caddr_t)(ips->ips_key_a), 0, ips->ips_key_a_size);
-+ kfree(ips->ips_key_a);
-+ }
-+ ips->ips_key_a = NULL;
-+
-+ if(ips->ips_key_e != NULL) {
-+#ifdef CONFIG_KLIPS_ALG
-+ if (ips->ips_alg_enc &&
-+ ips->ips_alg_enc->ixt_e_destroy_key)
-+ {
-+ ips->ips_alg_enc->ixt_e_destroy_key(ips->ips_alg_enc,
-+ ips->ips_key_e);
-+ } else
-+#endif
-+ {
-+ memset((caddr_t)(ips->ips_key_e), 0, ips->ips_key_e_size);
-+ kfree(ips->ips_key_e);
-+ }
-+ }
-+ ips->ips_key_e = NULL;
-+
-+ if(ips->ips_iv != NULL) {
-+ memset((caddr_t)(ips->ips_iv), 0, ips->ips_iv_size);
-+ kfree(ips->ips_iv);
-+ }
-+ ips->ips_iv = NULL;
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ips->ocf_in_use)
-+ ipsec_ocf_sa_free(ips);
-+#endif
-+
-+ if(ips->ips_ident_s.data != NULL) {
-+ memset((caddr_t)(ips->ips_ident_s.data),
-+ 0,
-+ ips->ips_ident_s.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident));
-+ kfree(ips->ips_ident_s.data);
-+ }
-+ ips->ips_ident_s.data = NULL;
-+
-+ if(ips->ips_ident_d.data != NULL) {
-+ memset((caddr_t)(ips->ips_ident_d.data),
-+ 0,
-+ ips->ips_ident_d.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident));
-+ kfree(ips->ips_ident_d.data);
-+ }
-+ ips->ips_ident_d.data = NULL;
-+
-+#ifdef CONFIG_KLIPS_ALG
-+ if (ips->ips_alg_enc||ips->ips_alg_auth) {
-+ ipsec_alg_sa_wipe(ips);
-+ }
-+#endif
-+
-+ BUG_ON(atomic_read(&ips->ips_refcount) != 0);
-+
-+ memset((caddr_t)ips, 0, sizeof(*ips));
-+ kfree(ips);
-+ ips = NULL;
-+
-+ return 0;
-+}
-+
-+extern int sysctl_ipsec_debug_verbose;
-+
-+int ipsec_sa_init(struct ipsec_sa *ipsp)
-+{
-+ int error = 0;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ char ipaddr_txt[ADDRTOA_BUF];
-+ char ipaddr2_txt[ADDRTOA_BUF];
-+#if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || defined (CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ unsigned char kb[AHMD596_BLKLEN];
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ struct ipsec_alg_enc *ixt_e = NULL;
-+ struct ipsec_alg_auth *ixt_a = NULL;
-+ int i;
-+#endif
-+
-+ if(ipsp == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "ipsp is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_pfkey, &ipsp->ips_said, 0, sa, sizeof(sa));
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "(pfkey defined) called for SA:%s\n",
-+ sa_len ? sa : " (error)");
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "calling init routine of %s%s%s\n",
-+ IPS_XFORM_NAME(ipsp));
-+
-+ switch(ipsp->ips_said.proto) {
-+#ifdef CONFIG_KLIPS_IPIP
-+ case IPPROTO_IPIP: {
-+ ipsp->ips_xformfuncs = ipip_xform_funcs;
-+ addrtoa(((struct sockaddr_in*)(ipsp->ips_addr_s))->sin_addr,
-+ 0,
-+ ipaddr_txt, sizeof(ipaddr_txt));
-+ addrtoa(((struct sockaddr_in*)(ipsp->ips_addr_d))->sin_addr,
-+ 0,
-+ ipaddr2_txt, sizeof(ipaddr_txt));
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "(pfkey defined) IPIP ipsec_sa set for %s->%s.\n",
-+ ipaddr_txt,
-+ ipaddr2_txt);
-+ }
-+ break;
-+#endif /* !CONFIG_KLIPS_IPIP */
-+
-+#ifdef CONFIG_KLIPS_AH
-+ case IPPROTO_AH:
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg, 0))
-+ break;
-+#endif
-+
-+ ipsp->ips_xformfuncs = ah_xform_funcs;
-+ switch(ipsp->ips_authalg) {
-+# ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5: {
-+ unsigned char *akp;
-+ unsigned int aks;
-+ MD5_CTX *ictx;
-+ MD5_CTX *octx;
-+
-+ if(ipsp->ips_key_bits_a != (AHMD596_KLEN * 8)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "incorrect key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/,
-+ ipsp->ips_key_bits_a, AHMD596_KLEN * 8);
-+ SENDERR(EINVAL);
-+ }
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "hmac md5-96 key is 0x%08x %08x %08x %08x\n",
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+0)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+1)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+2)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+3)));
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+
-+ ipsp->ips_auth_bits = AHMD596_ALEN * 8;
-+
-+ /* save the pointer to the key material */
-+ akp = ipsp->ips_key_a;
-+ aks = ipsp->ips_key_a_size;
-+
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "allocating %lu bytes for md5_ctx.\n",
-+ (unsigned long) sizeof(struct md5_ctx));
-+ if((ipsp->ips_key_a = (caddr_t)
-+ kmalloc(sizeof(struct md5_ctx), GFP_ATOMIC)) == NULL) {
-+ ipsp->ips_key_a = akp;
-+ SENDERR(ENOMEM);
-+ }
-+ ipsp->ips_key_a_size = sizeof(struct md5_ctx);
-+
-+ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) {
-+ kb[i] = akp[i] ^ HMAC_IPAD;
-+ }
-+ for (; i < AHMD596_BLKLEN; i++) {
-+ kb[i] = HMAC_IPAD;
-+ }
-+
-+ ictx = &(((struct md5_ctx*)(ipsp->ips_key_a))->ictx);
-+ osMD5Init(ictx);
-+ osMD5Update(ictx, kb, AHMD596_BLKLEN);
-+
-+ for (i = 0; i < AHMD596_BLKLEN; i++) {
-+ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);
-+ }
-+
-+ octx = &(((struct md5_ctx*)(ipsp->ips_key_a))->octx);
-+ osMD5Init(octx);
-+ osMD5Update(octx, kb, AHMD596_BLKLEN);
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
-+ ((__u32*)ictx)[0],
-+ ((__u32*)ictx)[1],
-+ ((__u32*)ictx)[2],
-+ ((__u32*)ictx)[3],
-+ ((__u32*)octx)[0],
-+ ((__u32*)octx)[1],
-+ ((__u32*)octx)[2],
-+ ((__u32*)octx)[3] );
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+
-+ /* zero key buffer -- paranoid */
-+ memset(akp, 0, aks);
-+ kfree(akp);
-+ }
-+ break;
-+# endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+# ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA: {
-+ unsigned char *akp;
-+ unsigned int aks;
-+ SHA1_CTX *ictx;
-+ SHA1_CTX *octx;
-+
-+ if(ipsp->ips_key_bits_a != (AHSHA196_KLEN * 8)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "incorrect key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/,
-+ ipsp->ips_key_bits_a, AHSHA196_KLEN * 8);
-+ SENDERR(EINVAL);
-+ }
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "hmac sha1-96 key is 0x%08x %08x %08x %08x\n",
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+0)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+1)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+2)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+3)));
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+
-+ ipsp->ips_auth_bits = AHSHA196_ALEN * 8;
-+
-+ /* save the pointer to the key material */
-+ akp = ipsp->ips_key_a;
-+ aks = ipsp->ips_key_a_size;
-+
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "allocating %lu bytes for sha1_ctx.\n",
-+ (unsigned long) sizeof(struct sha1_ctx));
-+ if((ipsp->ips_key_a = (caddr_t)
-+ kmalloc(sizeof(struct sha1_ctx), GFP_ATOMIC)) == NULL) {
-+ ipsp->ips_key_a = akp;
-+ SENDERR(ENOMEM);
-+ }
-+ ipsp->ips_key_a_size = sizeof(struct sha1_ctx);
-+
-+ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) {
-+ kb[i] = akp[i] ^ HMAC_IPAD;
-+ }
-+ for (; i < AHMD596_BLKLEN; i++) {
-+ kb[i] = HMAC_IPAD;
-+ }
-+
-+ ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->ictx);
-+ SHA1Init(ictx);
-+ SHA1Update(ictx, kb, AHSHA196_BLKLEN);
-+
-+ for (i = 0; i < AHSHA196_BLKLEN; i++) {
-+ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);
-+ }
-+
-+ octx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->octx);
-+ SHA1Init(octx);
-+ SHA1Update(octx, kb, AHSHA196_BLKLEN);
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
-+ ((__u32*)ictx)[0],
-+ ((__u32*)ictx)[1],
-+ ((__u32*)ictx)[2],
-+ ((__u32*)ictx)[3],
-+ ((__u32*)octx)[0],
-+ ((__u32*)octx)[1],
-+ ((__u32*)octx)[2],
-+ ((__u32*)octx)[3] );
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+ /* zero key buffer -- paranoid */
-+ memset(akp, 0, aks);
-+ kfree(akp);
-+ }
-+ break;
-+# endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "authalg=%d support not available in the kernel",
-+ ipsp->ips_authalg);
-+ SENDERR(EINVAL);
-+ }
-+ break;
-+#endif /* CONFIG_KLIPS_AH */
-+
-+#ifdef CONFIG_KLIPS_ESP
-+ case IPPROTO_ESP:
-+ ipsp->ips_xformfuncs = esp_xform_funcs;
-+ {
-+#if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || defined (CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ unsigned char *akp;
-+ unsigned int aks;
-+#endif
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg, ipsp->ips_encalg))
-+ break;
-+#endif
-+
-+#ifdef CONFIG_KLIPS_ALG
-+ ipsec_alg_sa_init(ipsp);
-+ ixt_e=ipsp->ips_alg_enc;
-+
-+ if (ixt_e == NULL) {
-+ if(printk_ratelimit()) {
-+ printk(KERN_ERR
-+ "ipsec_sa_init: "
-+ "encalg=%d support not available in the kernel",
-+ ipsp->ips_encalg);
-+ }
-+ SENDERR(ENOENT);
-+ }
-+
-+ ipsp->ips_iv_size = ixt_e->ixt_common.ixt_support.ias_ivlen/8;
-+
-+ /* Create IV */
-+ if (ipsp->ips_iv_size) {
-+ if((ipsp->ips_iv = (caddr_t)
-+ kmalloc(ipsp->ips_iv_size, GFP_ATOMIC)) == NULL) {
-+ SENDERR(ENOMEM);
-+ }
-+ prng_bytes(&ipsec_prng,
-+ (char *)ipsp->ips_iv,
-+ ipsp->ips_iv_size);
-+ ipsp->ips_iv_bits = ipsp->ips_iv_size * 8;
-+ }
-+
-+ if ((error=ipsec_alg_enc_key_create(ipsp)) < 0)
-+ SENDERR(-error);
-+
-+ if ((ixt_a=ipsp->ips_alg_auth)) {
-+ if ((error=ipsec_alg_auth_key_create(ipsp)) < 0)
-+ SENDERR(-error);
-+ } else
-+#endif /* CONFIG_KLIPS_ALG */
-+
-+ switch(ipsp->ips_authalg) {
-+# ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5: {
-+ MD5_CTX *ictx;
-+ MD5_CTX *octx;
-+
-+ if(ipsp->ips_key_bits_a != (AHMD596_KLEN * 8)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "incorrect authorisation key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/,
-+ ipsp->ips_key_bits_a,
-+ AHMD596_KLEN * 8);
-+ SENDERR(EINVAL);
-+ }
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "hmac md5-96 key is 0x%08x %08x %08x %08x\n",
-+ ntohl(*(((__u32 *)(ipsp->ips_key_a))+0)),
-+ ntohl(*(((__u32 *)(ipsp->ips_key_a))+1)),
-+ ntohl(*(((__u32 *)(ipsp->ips_key_a))+2)),
-+ ntohl(*(((__u32 *)(ipsp->ips_key_a))+3)));
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+ ipsp->ips_auth_bits = AHMD596_ALEN * 8;
-+
-+ /* save the pointer to the key material */
-+ akp = ipsp->ips_key_a;
-+ aks = ipsp->ips_key_a_size;
-+
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "allocating %lu bytes for md5_ctx.\n",
-+ (unsigned long) sizeof(struct md5_ctx));
-+ if((ipsp->ips_key_a = (caddr_t)
-+ kmalloc(sizeof(struct md5_ctx), GFP_ATOMIC)) == NULL) {
-+ ipsp->ips_key_a = akp;
-+ SENDERR(ENOMEM);
-+ }
-+ ipsp->ips_key_a_size = sizeof(struct md5_ctx);
-+
-+ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) {
-+ kb[i] = akp[i] ^ HMAC_IPAD;
-+ }
-+ for (; i < AHMD596_BLKLEN; i++) {
-+ kb[i] = HMAC_IPAD;
-+ }
-+
-+ ictx = &(((struct md5_ctx*)(ipsp->ips_key_a))->ictx);
-+ osMD5Init(ictx);
-+ osMD5Update(ictx, kb, AHMD596_BLKLEN);
-+
-+ for (i = 0; i < AHMD596_BLKLEN; i++) {
-+ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);
-+ }
-+
-+ octx = &(((struct md5_ctx*)(ipsp->ips_key_a))->octx);
-+ osMD5Init(octx);
-+ osMD5Update(octx, kb, AHMD596_BLKLEN);
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
-+ ((__u32*)ictx)[0],
-+ ((__u32*)ictx)[1],
-+ ((__u32*)ictx)[2],
-+ ((__u32*)ictx)[3],
-+ ((__u32*)octx)[0],
-+ ((__u32*)octx)[1],
-+ ((__u32*)octx)[2],
-+ ((__u32*)octx)[3] );
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+ /* paranoid */
-+ memset(akp, 0, aks);
-+ kfree(akp);
-+ break;
-+ }
-+# endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+# ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA: {
-+ SHA1_CTX *ictx;
-+ SHA1_CTX *octx;
-+
-+ if(ipsp->ips_key_bits_a != (AHSHA196_KLEN * 8)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "incorrect authorisation key size: %d bits -- must be %d bits\n"/*octets (bytes)\n"*/,
-+ ipsp->ips_key_bits_a,
-+ AHSHA196_KLEN * 8);
-+ SENDERR(EINVAL);
-+ }
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "hmac sha1-96 key is 0x%08x %08x %08x %08x\n",
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+0)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+1)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+2)),
-+ ntohl(*(((__u32 *)ipsp->ips_key_a)+3)));
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+ ipsp->ips_auth_bits = AHSHA196_ALEN * 8;
-+
-+ /* save the pointer to the key material */
-+ akp = ipsp->ips_key_a;
-+ aks = ipsp->ips_key_a_size;
-+
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "allocating %lu bytes for sha1_ctx.\n",
-+ (unsigned long) sizeof(struct sha1_ctx));
-+ if((ipsp->ips_key_a = (caddr_t)
-+ kmalloc(sizeof(struct sha1_ctx), GFP_ATOMIC)) == NULL) {
-+ ipsp->ips_key_a = akp;
-+ SENDERR(ENOMEM);
-+ }
-+ ipsp->ips_key_a_size = sizeof(struct sha1_ctx);
-+
-+ for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) {
-+ kb[i] = akp[i] ^ HMAC_IPAD;
-+ }
-+ for (; i < AHMD596_BLKLEN; i++) {
-+ kb[i] = HMAC_IPAD;
-+ }
-+
-+ ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->ictx);
-+ SHA1Init(ictx);
-+ SHA1Update(ictx, kb, AHSHA196_BLKLEN);
-+
-+ for (i = 0; i < AHSHA196_BLKLEN; i++) {
-+ kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);
-+ }
-+
-+ octx = &((struct sha1_ctx*)(ipsp->ips_key_a))->octx;
-+ SHA1Init(octx);
-+ SHA1Update(octx, kb, AHSHA196_BLKLEN);
-+
-+# if KLIPS_DIVULGE_HMAC_KEY
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "ipsec_sa_init: "
-+ "SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
-+ ((__u32*)ictx)[0],
-+ ((__u32*)ictx)[1],
-+ ((__u32*)ictx)[2],
-+ ((__u32*)ictx)[3],
-+ ((__u32*)octx)[0],
-+ ((__u32*)octx)[1],
-+ ((__u32*)octx)[2],
-+ ((__u32*)octx)[3] );
-+# endif /* KLIPS_DIVULGE_HMAC_KEY */
-+ memset(akp, 0, aks);
-+ kfree(akp);
-+ break;
-+ }
-+# endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ case AH_NONE:
-+ break;
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "ipsec_sa_init: "
-+ "authalg=%d support not available in the kernel.\n",
-+ ipsp->ips_authalg);
-+ SENDERR(EINVAL);
-+ }
-+ }
-+ break;
-+#endif /* !CONFIG_KLIPS_ESP */
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ case IPPROTO_COMP:
-+ ipsp->ips_xformfuncs = ipcomp_xform_funcs;
-+ ipsp->ips_comp_adapt_tries = 0;
-+ ipsp->ips_comp_adapt_skip = 0;
-+ ipsp->ips_comp_ratio_cbytes = 0;
-+ ipsp->ips_comp_ratio_dbytes = 0;
-+ break;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ default:
-+ printk(KERN_ERR "KLIPS sa initialization: "
-+ "proto=%d unknown.\n",
-+ ipsp->ips_said.proto);
-+ SENDERR(EINVAL);
-+ }
-+
-+ errlab:
-+ return(error);
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_sha1.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,177 @@
-+/*
-+ * RCSID $Id: ipsec_sha1.c,v 1.9 2004/04/06 02:49:26 mcr Exp $
-+ */
-+
-+/*
-+ * The rest of the code is derived from sha1.c by Steve Reid, which is
-+ * public domain.
-+ * Minor cosmetic changes to accomodate it in the Linux kernel by ji.
-+ */
-+
-+#include <asm/byteorder.h>
-+#include <linux/string.h>
-+
-+#include "openswan/ipsec_sha1.h"
-+
-+#if defined(rol)
-+#undef rol
-+#endif
-+
-+#define SHA1HANDSOFF
-+
-+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
-+
-+/* blk0() and blk() perform the initial expand. */
-+/* I got the idea of expanding during the round function from SSLeay */
-+#ifdef __LITTLE_ENDIAN
-+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
-+ |(rol(block->l[i],8)&0x00FF00FF))
-+#else
-+#define blk0(i) block->l[i]
-+#endif
-+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
-+ ^block->l[(i+2)&15]^block->l[i&15],1))
-+
-+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
-+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
-+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
-+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
-+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
-+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
-+
-+
-+/* Hash a single 512-bit block. This is the core of the algorithm. */
-+
-+void SHA1Transform(__u32 state[5], __u8 buffer[64])
-+{
-+__u32 a, b, c, d, e;
-+typedef union {
-+ unsigned char c[64];
-+ __u32 l[16];
-+} CHAR64LONG16;
-+CHAR64LONG16* block;
-+#ifdef SHA1HANDSOFF
-+static unsigned char workspace[64];
-+ block = (CHAR64LONG16*)workspace;
-+ memcpy(block, buffer, 64);
-+#else
-+ block = (CHAR64LONG16*)buffer;
-+#endif
-+ /* Copy context->state[] to working vars */
-+ a = state[0];
-+ b = state[1];
-+ c = state[2];
-+ d = state[3];
-+ e = state[4];
-+ /* 4 rounds of 20 operations each. Loop unrolled. */
-+ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
-+ R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
-+ R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
-+ R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
-+ R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
-+ R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
-+ R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
-+ R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
-+ R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
-+ R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
-+ R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
-+ R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
-+ R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
-+ R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
-+ R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
-+ R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
-+ R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
-+ R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
-+ R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
-+ R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
-+ /* Add the working vars back into context.state[] */
-+ state[0] += a;
-+ state[1] += b;
-+ state[2] += c;
-+ state[3] += d;
-+ state[4] += e;
-+ /* Wipe variables */
-+ a = b = c = d = e = 0;
-+}
-+
-+
-+/* SHA1Init - Initialize new context */
-+
-+void SHA1Init(void *vcontext)
-+{
-+ SHA1_CTX* context = vcontext;
-+
-+ /* SHA1 initialization constants */
-+ context->state[0] = 0x67452301;
-+ context->state[1] = 0xEFCDAB89;
-+ context->state[2] = 0x98BADCFE;
-+ context->state[3] = 0x10325476;
-+ context->state[4] = 0xC3D2E1F0;
-+ context->count[0] = context->count[1] = 0;
-+}
-+
-+
-+/* Run your data through this. */
-+
-+void SHA1Update(void *vcontext, unsigned char* data, __u32 len)
-+{
-+ SHA1_CTX* context = vcontext;
-+ __u32 i, j;
-+
-+ j = context->count[0];
-+ if ((context->count[0] += len << 3) < j)
-+ context->count[1]++;
-+ context->count[1] += (len>>29);
-+ j = (j >> 3) & 63;
-+ if ((j + len) > 63) {
-+ memcpy(&context->buffer[j], data, (i = 64-j));
-+ SHA1Transform(context->state, context->buffer);
-+ for ( ; i + 63 < len; i += 64) {
-+ SHA1Transform(context->state, &data[i]);
-+ }
-+ j = 0;
-+ }
-+ else i = 0;
-+ memcpy(&context->buffer[j], &data[i], len - i);
-+}
-+
-+
-+/* Add padding and return the message digest. */
-+
-+void SHA1Final(unsigned char digest[20], void *vcontext)
-+{
-+ __u32 i, j;
-+ unsigned char finalcount[8];
-+ SHA1_CTX* context = vcontext;
-+
-+ for (i = 0; i < 8; i++) {
-+ finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
-+ >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
-+ }
-+ SHA1Update(context, (unsigned char *)"\200", 1);
-+ while ((context->count[0] & 504) != 448) {
-+ SHA1Update(context, (unsigned char *)"\0", 1);
-+ }
-+ SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
-+ for (i = 0; i < 20; i++) {
-+ digest[i] = (unsigned char)
-+ ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
-+ }
-+ /* Wipe variables */
-+ i = j = 0;
-+ memset(context->buffer, 0, 64);
-+ memset(context->state, 0, 20);
-+ memset(context->count, 0, 8);
-+ memset(&finalcount, 0, 8);
-+#ifdef SHA1HANDSOFF /* make SHA1Transform overwrite its own static vars */
-+ SHA1Transform(context->state, context->buffer);
-+#endif
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_snprintf.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,121 @@
-+/*
-+ * @(#) ipsec_snprintf() function
-+ *
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ * 2001 Michael Richardson <mcr@freeswan.org>
-+ * Copyright (C) 2005 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * Split out from ipsec_proc.c.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_kversion.h"
-+#include "openswan/ipsec_param.h"
-+
-+#include <net/ip.h>
-+
-+#include "openswan/radij.h"
-+
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_stats.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_xmit.h"
-+
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_kern24.h"
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#include "openswan/ipcomp.h"
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#include "openswan/ipsec_proto.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+/* ipsec_snprintf: like snprintf except
-+ * - size is signed and a negative value is treated as if it were 0
-+ * - the returned result is never negative --
-+ * an error generates a "?" or null output (depending on space).
-+ * (Our callers are too lazy to check for an error return.)
-+ *
-+ * @param buf String buffer
-+ * @param size Size of the string
-+ * @param fmt printf string
-+ * @param ... Variables to be displayed in fmt
-+ * @return int Return code
-+ */
-+int ipsec_snprintf(char *buf, ssize_t size, const char *fmt, ...)
-+{
-+ va_list args;
-+ int i;
-+ size_t possize = size < 0? 0 : size;
-+ va_start(args, fmt);
-+ i = vsnprintf(buf,possize,fmt,args);
-+ va_end(args);
-+ if (i < 0) {
-+ /* create empty output in place of error */
-+ i = 0;
-+ if (size > 0) {
-+ *buf = '\0';
-+ }
-+ }
-+ return i;
-+}
-+
-+
-+void ipsec_dmp_block(char *s, caddr_t bb, int len)
-+{
-+ int i;
-+ unsigned char *b = bb;
-+
-+ printk(KERN_INFO "klips_dmp: "
-+ "at %s, len=%d:\n", s, len);
-+
-+ for(i = 0; i < len; i++ /*, c++*/) {
-+ if(!(i % 16)) {
-+ printk(KERN_INFO
-+ "klips_debug: @%03x:",
-+ i);
-+ }
-+ printk(" %02x", b[i]);
-+ if(!((i + 1) % 16)) {
-+ printk("\n");
-+ }
-+ }
-+ if(i % 16) {
-+ printk("\n");
-+ }
-+}
-+
-+/*
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_tunnel.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,2004 @@
-+/*
-+ * IPSEC Tunneling code. Heavily based on drivers/net/new_tunnel.c
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Richard Guy Briggs.
-+ *
-+ * OCF/receive state machine written by
-+ * David McCullough <dmccullough@cyberguard.com>
-+ * Copyright (C) 2004-2005 Intel Corporation. All Rights Reserved.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif /* for CONFIG_IP_FORWARD */
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <net/tcp.h>
-+#include <net/udp.h>
-+#include <linux/skbuff.h>
-+
-+#include <linux/netdevice.h> /* struct device, struct net_device_stats, dev_queue_xmit() and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <net/arp.h>
-+#include <linux/skbuff.h>
-+
-+#include <openswan.h>
-+
-+#ifdef NET_21
-+# include <linux/in6.h>
-+# define IS_MYADDR RTN_LOCAL
-+# include <net/dst.h>
-+# undef dev_kfree_skb
-+# define dev_kfree_skb(a,b) kfree_skb(a)
-+# define PHYSDEV_TYPE
-+#endif /* NET_21 */
-+
-+#ifndef NETDEV_TX_BUSY
-+# ifdef NETDEV_XMIT_CN
-+# define NETDEV_TX_BUSY NETDEV_XMIT_CN
-+# else
-+# define NETDEV_TX_BUSY 1
-+# endif
-+#endif
-+
-+#include <net/icmp.h> /* icmp_send() */
-+#include <net/ip.h>
-+#include <net/arp.h>
-+#ifdef NETDEV_23
-+# include <linux/netfilter_ipv4.h>
-+#endif /* NETDEV_23 */
-+
-+#include <linux/if_arp.h>
-+#include <net/arp.h>
-+
-+#include "openswan/ipsec_kversion.h"
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_eroute.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_sa.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_xmit.h"
-+#include "openswan/ipsec_ipe4.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_kern24.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+#include <linux/udp.h>
-+#endif
-+
-+static __u32 zeroes[64];
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_open(struct net_device *dev)
-+{
-+ struct ipsecpriv *prv = dev->priv;
-+
-+ /*
-+ * Can't open until attached.
-+ */
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_open: "
-+ "dev = %s, prv->dev = %s\n",
-+ dev->name, prv->dev?prv->dev->name:"NONE");
-+
-+ if (prv->dev == NULL)
-+ return -ENODEV;
-+
-+ KLIPS_INC_USE;
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_close(struct net_device *dev)
-+{
-+ KLIPS_DEC_USE;
-+ return 0;
-+}
-+
-+static inline int ipsec_tunnel_xmit2(struct sk_buff *skb)
-+{
-+
-+#ifdef NETDEV_25 /* 2.6 kernels */
-+ return dst_output(skb);
-+#else
-+ return ip_send(skb);
-+#endif
-+}
-+
-+enum ipsec_xmit_value
-+ipsec_tunnel_strip_hard_header(struct ipsec_xmit_state *ixs)
-+{
-+ /* ixs->physdev->hard_header_len is unreliable and should not be used */
-+ ixs->hard_header_len = (unsigned char *)(ixs->iph) - ixs->skb->data;
-+
-+ if(ixs->hard_header_len < 0) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_error:ipsec_xmit_strip_hard_header: "
-+ "Negative hard_header_len (%d)?!\n", ixs->hard_header_len);
-+ ixs->stats->tx_dropped++;
-+ return IPSEC_XMIT_BADHHLEN;
-+ }
-+
-+ /* while ixs->physdev->hard_header_len is unreliable and
-+ * should not be trusted, it accurate and required for ATM, GRE and
-+ * some other interfaces to work. Thanks to Willy Tarreau
-+ * <willy@w.ods.org>.
-+ */
-+ if(ixs->hard_header_len == 0) { /* no hard header present */
-+ ixs->hard_header_stripped = 1;
-+ ixs->hard_header_len = ixs->physdev->hard_header_len;
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_tunnel & DB_TN_XMIT) {
-+ int i;
-+ char c;
-+
-+ printk(KERN_INFO "klips_debug:ipsec_xmit_strip_hard_header: "
-+ ">>> skb->len=%ld hard_header_len:%d",
-+ (unsigned long int)ixs->skb->len, ixs->hard_header_len);
-+ c = ' ';
-+ for (i=0; i < ixs->hard_header_len; i++) {
-+ printk("%c%02x", c, ixs->skb->data[i]);
-+ c = ':';
-+ }
-+ printk(" \n");
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ KLIPS_IP_PRINT(debug_tunnel & DB_TN_XMIT, ixs->iph);
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_strip_hard_header: "
-+ "Original head,tailroom: %d,%d\n",
-+ skb_headroom(ixs->skb), skb_tailroom(ixs->skb));
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+enum ipsec_xmit_value
-+ipsec_tunnel_SAlookup(struct ipsec_xmit_state *ixs)
-+{
-+ unsigned int bypass;
-+
-+ bypass = FALSE;
-+
-+ /*
-+ * First things first -- look us up in the erouting tables.
-+ */
-+ ixs->matcher.sen_len = sizeof (struct sockaddr_encap);
-+ ixs->matcher.sen_family = AF_ENCAP;
-+ ixs->matcher.sen_type = SENT_IP4;
-+ ixs->matcher.sen_ip_src.s_addr = ixs->iph->saddr;
-+ ixs->matcher.sen_ip_dst.s_addr = ixs->iph->daddr;
-+ ixs->matcher.sen_proto = ixs->iph->protocol;
-+ ipsec_extract_ports(ixs->iph, &ixs->matcher);
-+
-+ /*
-+ * The spinlock is to prevent any other process from accessing or deleting
-+ * the eroute while we are using and updating it.
-+ */
-+ spin_lock_bh(&eroute_lock);
-+
-+ ixs->eroute = ipsec_findroute(&ixs->matcher);
-+
-+ if(ixs->iph->protocol == IPPROTO_UDP) {
-+ struct udphdr *t = NULL;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:udp port check: "
-+ "fragoff: %d len: %d>%ld \n",
-+ ntohs(ixs->iph->frag_off) & IP_OFFSET,
-+ (ixs->skb->len - ixs->hard_header_len),
-+ (unsigned long int) ((ixs->iph->ihl << 2) + sizeof(struct udphdr)));
-+
-+ if((ntohs(ixs->iph->frag_off) & IP_OFFSET) == 0 &&
-+ ((ixs->skb->len - ixs->hard_header_len) >=
-+ ((ixs->iph->ihl << 2) + sizeof(struct udphdr))))
-+ {
-+ t =((struct udphdr*)((caddr_t)ixs->iph+(ixs->iph->ihl<<2)));
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:udp port in packet: "
-+ "port %d -> %d\n",
-+ ntohs(t->source), ntohs(t->dest));
-+ }
-+
-+ ixs->sport=0; ixs->dport=0;
-+
-+ if(ixs->skb->sk) {
-+#ifdef NET_26
-+#ifdef HAVE_INET_SK_SPORT
-+ ixs->sport = ntohs(inet_sk(ixs->skb->sk)->sport);
-+ ixs->dport = ntohs(inet_sk(ixs->skb->sk)->dport);
-+#else
-+ struct udp_sock *us;
-+
-+ us = (struct udp_sock *)ixs->skb->sk;
-+
-+ ixs->sport = ntohs(us->inet.sport);
-+ ixs->dport = ntohs(us->inet.dport);
-+#endif
-+#else
-+ ixs->sport = ntohs(ixs->skb->sk->sport);
-+ ixs->dport = ntohs(ixs->skb->sk->dport);
-+#endif
-+
-+ }
-+
-+ if(t != NULL) {
-+ if(ixs->sport == 0) {
-+ ixs->sport = ntohs(t->source);
-+ }
-+ if(ixs->dport == 0) {
-+ ixs->dport = ntohs(t->dest);
-+ }
-+ }
-+ }
-+
-+ /*
-+ * practically identical to above, but let's be careful about
-+ * tcp vs udp headers
-+ */
-+ if(ixs->iph->protocol == IPPROTO_TCP) {
-+ struct tcphdr *t = NULL;
-+
-+ if((ntohs(ixs->iph->frag_off) & IP_OFFSET) == 0 &&
-+ ((ixs->skb->len - ixs->hard_header_len) >=
-+ ((ixs->iph->ihl << 2) + sizeof(struct tcphdr)))) {
-+ t =((struct tcphdr*)((caddr_t)ixs->iph+(ixs->iph->ihl<<2)));
-+ }
-+
-+ ixs->sport=0; ixs->dport=0;
-+
-+ if(ixs->skb->sk) {
-+#ifdef NET_26
-+#ifdef HAVE_INET_SK_SPORT
-+ ixs->sport = ntohs(inet_sk(ixs->skb->sk)->sport);
-+ ixs->dport = ntohs(inet_sk(ixs->skb->sk)->dport);
-+#else
-+ struct tcp_tw_bucket *tw;
-+ tw = (struct tcp_tw_bucket *)ixs->skb->sk;
-+ ixs->sport = ntohs(tw->tw_sport);
-+ ixs->dport = ntohs(tw->tw_dport);
-+#endif
-+#else
-+ ixs->sport = ntohs(ixs->skb->sk->sport);
-+ ixs->dport = ntohs(ixs->skb->sk->dport);
-+#endif
-+ }
-+
-+ if(t != NULL) {
-+ if(ixs->sport == 0) {
-+ ixs->sport = ntohs(t->source);
-+ }
-+ if(ixs->dport == 0) {
-+ ixs->dport = ntohs(t->dest);
-+ }
-+ }
-+ }
-+
-+ /* default to a %drop eroute */
-+ ixs->outgoing_said.proto = IPPROTO_INT;
-+ ixs->outgoing_said.spi = htonl(SPI_DROP);
-+ ixs->outgoing_said.dst.u.v4.sin_addr.s_addr = INADDR_ANY;
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "checking for local udp/500 IKE packet "
-+ "saddr=%x, er=0p%p, daddr=%x, er_dst=%x, proto=%d sport=%d dport=%d\n",
-+ ntohl((unsigned int)ixs->iph->saddr),
-+ ixs->eroute,
-+ ntohl((unsigned int)ixs->iph->daddr),
-+ ixs->eroute ? ntohl((unsigned int)ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr) : 0,
-+ ixs->iph->protocol,
-+ ixs->sport,
-+ ixs->dport);
-+
-+ /*
-+ * cheat for now...are we udp/500? If so, let it through
-+ * without interference since it is most likely an IKE packet.
-+ */
-+
-+ if (ip_chk_addr((unsigned long)ixs->iph->saddr) == IS_MYADDR
-+ && (ixs->eroute==NULL
-+ || ixs->iph->daddr == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr
-+ || INADDR_ANY == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr)
-+ && (ixs->iph->protocol == IPPROTO_UDP &&
-+ (ixs->sport == 500 || ixs->sport == 4500))) {
-+ /* Whatever the eroute, this is an IKE message
-+ * from us (i.e. not being forwarded).
-+ * Furthermore, if there is a tunnel eroute,
-+ * the destination is the peer for this eroute.
-+ * So %pass the packet: modify the default %drop.
-+ */
-+
-+ ixs->outgoing_said.spi = htonl(SPI_PASS);
-+ if(!(ixs->skb->sk) && ((ntohs(ixs->iph->frag_off) & IP_MF) != 0)) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "local UDP/500 (probably IKE) passthrough: base fragment, rest of fragments will probably get filtered.\n");
-+ }
-+ bypass = TRUE;
-+ }
-+
-+#ifdef KLIPS_EXCEPT_DNS53
-+ /*
-+ *
-+ * if we are udp/53 or tcp/53, also let it through a %trap or %hold,
-+ * since it is DNS, but *also* follow the %trap.
-+ *
-+ * we do not do this for tunnels, only %trap's and %hold's.
-+ *
-+ */
-+
-+ if (ip_chk_addr((unsigned long)ixs->iph->saddr) == IS_MYADDR
-+ && (ixs->eroute==NULL
-+ || ixs->iph->daddr == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr
-+ || INADDR_ANY == ixs->eroute->er_said.dst.u.v4.sin_addr.s_addr)
-+ && ((ixs->iph->protocol == IPPROTO_UDP
-+ || ixs->iph->protocol == IPPROTO_TCP)
-+ && ixs->dport == 53)) {
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "possible DNS packet\n");
-+
-+ if(ixs->eroute)
-+ {
-+ if(ixs->eroute->er_said.spi == htonl(SPI_TRAP)
-+ || ixs->eroute->er_said.spi == htonl(SPI_HOLD))
-+ {
-+ ixs->outgoing_said.spi = htonl(SPI_PASSTRAP);
-+ bypass = TRUE;
-+ }
-+ }
-+ else
-+ {
-+ ixs->outgoing_said.spi = htonl(SPI_PASSTRAP);
-+ bypass = TRUE;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "bypass = %d\n", bypass);
-+
-+ if(bypass
-+ && !(ixs->skb->sk)
-+ && ((ntohs(ixs->iph->frag_off) & IP_MF) != 0))
-+ {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "local port 53 (probably DNS) passthrough:"
-+ "base fragment, rest of fragments will "
-+ "probably get filtered.\n");
-+ }
-+ }
-+#endif
-+
-+ if (bypass==FALSE && ixs->eroute) {
-+ ixs->eroute->er_count++;
-+ ixs->eroute->er_lasttime = jiffies/HZ;
-+ if(ixs->eroute->er_said.proto==IPPROTO_INT
-+ && ixs->eroute->er_said.spi==htonl(SPI_HOLD))
-+ {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "shunt SA of HOLD: skb stored in HOLD.\n");
-+ if(ixs->eroute->er_last != NULL) {
-+ kfree_skb(ixs->eroute->er_last);
-+ }
-+ ixs->eroute->er_last = ixs->skb;
-+ ixs->skb = NULL;
-+ ixs->stats->tx_dropped++;
-+ spin_unlock_bh(&eroute_lock);
-+ return IPSEC_XMIT_STOLEN;
-+ }
-+ ixs->outgoing_said = ixs->eroute->er_said;
-+ ixs->eroute_pid = ixs->eroute->er_pid;
-+
-+ /* Copy of the ident for the TRAP/TRAPSUBNET eroutes */
-+ if(ixs->outgoing_said.proto==IPPROTO_INT
-+ && (ixs->outgoing_said.spi==htonl(SPI_TRAP)
-+ || (ixs->outgoing_said.spi==htonl(SPI_TRAPSUBNET)))) {
-+ int len;
-+
-+ ixs->ips.ips_ident_s.type = ixs->eroute->er_ident_s.type;
-+ ixs->ips.ips_ident_s.id = ixs->eroute->er_ident_s.id;
-+ ixs->ips.ips_ident_s.len = ixs->eroute->er_ident_s.len;
-+ if (ixs->ips.ips_ident_s.len)
-+ {
-+ len = ixs->ips.ips_ident_s.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "allocating %d bytes for ident_s shunt SA of HOLD: skb stored in HOLD.\n",
-+ len);
-+ if ((ixs->ips.ips_ident_s.data = kmalloc(len, GFP_ATOMIC)) == NULL) {
-+ printk(KERN_WARNING "klips_debug:ipsec_xmit_SAlookup: "
-+ "Failed, tried to allocate %d bytes for source ident.\n",
-+ len);
-+ ixs->stats->tx_dropped++;
-+ spin_unlock_bh(&eroute_lock);
-+ return IPSEC_XMIT_ERRMEMALLOC;
-+ }
-+ memcpy(ixs->ips.ips_ident_s.data, ixs->eroute->er_ident_s.data, len);
-+ }
-+ ixs->ips.ips_ident_d.type = ixs->eroute->er_ident_d.type;
-+ ixs->ips.ips_ident_d.id = ixs->eroute->er_ident_d.id;
-+ ixs->ips.ips_ident_d.len = ixs->eroute->er_ident_d.len;
-+ if (ixs->ips.ips_ident_d.len)
-+ {
-+ len = ixs->ips.ips_ident_d.len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_SAlookup: "
-+ "allocating %d bytes for ident_d shunt SA of HOLD: skb stored in HOLD.\n",
-+ len);
-+ if ((ixs->ips.ips_ident_d.data = kmalloc(len, GFP_ATOMIC)) == NULL) {
-+ printk(KERN_WARNING "klips_debug:ipsec_xmit_SAlookup: "
-+ "Failed, tried to allocate %d bytes for dest ident.\n",
-+ len);
-+ ixs->stats->tx_dropped++;
-+ spin_unlock_bh(&eroute_lock);
-+ return IPSEC_XMIT_ERRMEMALLOC;
-+ }
-+ memcpy(ixs->ips.ips_ident_d.data, ixs->eroute->er_ident_d.data, len);
-+ }
-+ }
-+ }
-+
-+ spin_unlock_bh(&eroute_lock);
-+ return IPSEC_XMIT_OK;
-+}
-+
-+
-+enum ipsec_xmit_value
-+ipsec_tunnel_restore_hard_header(struct ipsec_xmit_state*ixs)
-+{
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_restore_hard_header: "
-+ "After recursive xforms -- head,tailroom: %d,%d\n",
-+ skb_headroom(ixs->skb),
-+ skb_tailroom(ixs->skb));
-+
-+ if(ixs->saved_header) {
-+ if(skb_headroom(ixs->skb) < ixs->hard_header_len) {
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_xmit_restore_hard_header: "
-+ "tried to skb_push hhlen=%d, %d available. This should never happen, please report.\n",
-+ ixs->hard_header_len,
-+ skb_headroom(ixs->skb));
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_PUSHPULLERR;
-+
-+ }
-+ skb_push(ixs->skb, ixs->hard_header_len);
-+ {
-+ int i;
-+ for (i = 0; i < ixs->hard_header_len; i++) {
-+ ixs->skb->data[i] = ixs->saved_header[i];
-+ }
-+ }
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_restore_hard_header: "
-+ "With hard_header, final head,tailroom: %d,%d\n",
-+ skb_headroom(ixs->skb),
-+ skb_tailroom(ixs->skb));
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+
-+/*
-+ * when encap processing is complete it call this for us to continue
-+ */
-+
-+void
-+ipsec_tunnel_xsm_complete(
-+ struct ipsec_xmit_state *ixs,
-+ enum ipsec_xmit_value stat)
-+{
-+ if(stat != IPSEC_XMIT_OK) {
-+ if(stat == IPSEC_XMIT_PASS) {
-+ goto bypass;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_tunnel_start_xmit: encap_bundle failed: %d\n",
-+ stat);
-+ goto cleanup;
-+ }
-+
-+ ixs->matcher.sen_ip_src.s_addr = ixs->iph->saddr;
-+ ixs->matcher.sen_ip_dst.s_addr = ixs->iph->daddr;
-+ ixs->matcher.sen_proto = ixs->iph->protocol;
-+ ipsec_extract_ports(ixs->iph, &ixs->matcher);
-+
-+ spin_lock_bh(&eroute_lock);
-+ ixs->eroute = ipsec_findroute(&ixs->matcher);
-+ if(ixs->eroute) {
-+ ixs->outgoing_said = ixs->eroute->er_said;
-+ ixs->eroute_pid = ixs->eroute->er_pid;
-+ ixs->eroute->er_count++;
-+ ixs->eroute->er_lasttime = jiffies/HZ;
-+ }
-+ spin_unlock_bh(&eroute_lock);
-+
-+ KLIPS_PRINT((debug_tunnel & DB_TN_XMIT) &&
-+ /* ((ixs->orgdst != ixs->newdst) || (ixs->orgsrc != ixs->newsrc)) */
-+ (ixs->orgedst != ixs->outgoing_said.dst.u.v4.sin_addr.s_addr) &&
-+ ixs->outgoing_said.dst.u.v4.sin_addr.s_addr &&
-+ ixs->eroute,
-+ "klips_debug:ipsec_tunnel_start_xmit: "
-+ "We are recursing here.\n");
-+
-+ if (/*((ixs->orgdst != ixs->newdst) || (ixs->orgsrc != ixs->newsrc))*/
-+ (ixs->orgedst != ixs->outgoing_said.dst.u.v4.sin_addr.s_addr) &&
-+ ixs->outgoing_said.dst.u.v4.sin_addr.s_addr &&
-+ ixs->eroute) {
-+ ipsec_xsm(ixs);
-+ return;
-+ }
-+
-+ stat = ipsec_nat_encap(ixs);
-+ if(stat != IPSEC_XMIT_OK) {
-+ goto cleanup;
-+ }
-+
-+ stat = ipsec_tunnel_restore_hard_header(ixs);
-+ if(stat != IPSEC_XMIT_OK) {
-+ goto cleanup;
-+ }
-+
-+bypass:
-+ stat = ipsec_tunnel_send(ixs);
-+
-+cleanup:
-+ ipsec_xmit_cleanup(ixs);
-+ ipsec_xmit_state_delete(ixs);
-+}
-+
-+
-+/*
-+ * This function assumes it is being called from dev_queue_xmit()
-+ * and that skb is filled properly by that function.
-+ */
-+int
-+ipsec_tunnel_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+ struct ipsec_xmit_state *ixs = NULL;
-+ enum ipsec_xmit_value stat;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "\n\nipsec_tunnel_start_xmit: STARTING");
-+
-+ stat = IPSEC_XMIT_ERRMEMALLOC;
-+ ixs = ipsec_xmit_state_new();
-+ if (! ixs) {
-+ goto alloc_error;
-+ }
-+
-+ ixs->dev = dev;
-+ ixs->skb = skb;
-+
-+ stat = ipsec_xmit_sanity_check_dev(ixs);
-+ if(stat != IPSEC_XMIT_OK) {
-+ goto cleanup;
-+ }
-+
-+ stat = ipsec_xmit_sanity_check_skb(ixs);
-+ if(stat != IPSEC_XMIT_OK) {
-+ goto cleanup;
-+ }
-+
-+ stat = ipsec_tunnel_strip_hard_header(ixs);
-+ if(stat != IPSEC_XMIT_OK) {
-+ goto cleanup;
-+ }
-+
-+ stat = ipsec_tunnel_SAlookup(ixs);
-+ if(stat != IPSEC_XMIT_OK) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_tunnel_start_xmit: SAlookup failed: %d\n",
-+ stat);
-+ goto cleanup;
-+ }
-+
-+ ixs->innersrc = ixs->iph->saddr;
-+
-+ ixs->xsm_complete = ipsec_tunnel_xsm_complete;
-+
-+ ipsec_xsm(ixs);
-+ return 0;
-+
-+ cleanup:
-+ ipsec_xmit_cleanup(ixs);
-+ ipsec_xmit_state_delete(ixs);
-+alloc_error:
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC struct net_device_stats *
-+ipsec_tunnel_get_stats(struct net_device *dev)
-+{
-+ return &(((struct ipsecpriv *)(dev->priv))->mystats);
-+}
-+
-+/*
-+ * Revectored calls.
-+ * For each of these calls, a field exists in our private structure.
-+ */
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_hard_header(struct sk_buff *skb, struct net_device *dev,
-+ unsigned short type, const void *daddr, const void *saddr, unsigned len)
-+{
-+ struct ipsecpriv *prv = dev->priv;
-+ struct net_device *tmp;
-+ int ret;
-+ struct net_device_stats *stats; /* This device's statistics */
-+
-+ if(skb == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "no skb...\n");
-+ return -ENODATA;
-+ }
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "no device...\n");
-+ return -ENODEV;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "skb->dev=%s dev=%s.\n",
-+ skb->dev ? skb->dev->name : "NULL",
-+ dev->name);
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "no private space associated with dev=%s\n",
-+ dev->name ? dev->name : "NULL");
-+ return -ENODEV;
-+ }
-+
-+ stats = (struct net_device_stats *) &(prv->mystats);
-+
-+ if(prv->dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "no physical device associated with dev=%s\n",
-+ dev->name ? dev->name : "NULL");
-+ stats->tx_dropped++;
-+ return -ENODEV;
-+ }
-+
-+ /* check if we have to send a IPv6 packet. It might be a Router
-+ Solicitation, where the building of the packet happens in
-+ reverse order:
-+ 1. ll hdr,
-+ 2. IPv6 hdr,
-+ 3. ICMPv6 hdr
-+ -> skb->nh.raw is still uninitialized when this function is
-+ called!! If this is no IPv6 packet, we can print debugging
-+ messages, otherwise we skip all debugging messages and just
-+ build the ll header */
-+ if(type != ETH_P_IPV6) {
-+ /* execute this only, if we don't have to build the
-+ header for a IPv6 packet */
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ if(!prv->header_ops->create)
-+#else
-+ if(!prv->hard_header)
-+#endif
-+ {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "physical device has been detached, packet dropped 0p%p->0p%p len=%d type=%d dev=%s->NULL ",
-+ saddr,
-+ daddr,
-+ len,
-+ type,
-+ dev->name);
-+#ifdef NET_21
-+ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(ip_hdr(skb)->saddr),
-+ (__u32)ntohl(ip_hdr(skb)->daddr) );
-+#else /* NET_21 */
-+ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(skb->ip_hdr->saddr),
-+ (__u32)ntohl(skb->ip_hdr->daddr) );
-+#endif /* NET_21 */
-+ stats->tx_dropped++;
-+ return -ENODEV;
-+ }
-+
-+#define da ((struct net_device *)(prv->dev))->dev_addr
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "Revectored 0p%p->0p%p len=%d type=%d dev=%s->%s dev_addr=%02x:%02x:%02x:%02x:%02x:%02x ",
-+ saddr,
-+ daddr,
-+ len,
-+ type,
-+ dev->name,
-+ prv->dev->name,
-+ da[0], da[1], da[2], da[3], da[4], da[5]);
-+#ifdef NET_21
-+ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(ip_hdr(skb)->saddr),
-+ (__u32)ntohl(ip_hdr(skb)->daddr) );
-+#else /* NET_21 */
-+ KLIPS_PRINTMORE(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(skb->ip_hdr->saddr),
-+ (__u32)ntohl(skb->ip_hdr->daddr) );
-+#endif /* NET_21 */
-+ } else {
-+ KLIPS_PRINT(debug_tunnel,
-+ "klips_debug:ipsec_tunnel_hard_header: "
-+ "is IPv6 packet, skip debugging messages, only revector and build linklocal header.\n");
-+ }
-+ tmp = skb->dev;
-+ skb->dev = prv->dev;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ ret = prv->header_ops->create(skb, prv->dev, type, (void *)daddr, (void *)saddr, len);
-+#else
-+ ret = prv->hard_header(skb, prv->dev, type, (void *)daddr, (void *)saddr, len);
-+#endif
-+ skb->dev = tmp;
-+ return ret;
-+}
-+
-+DEBUG_NO_STATIC int
-+#ifdef NET_21
-+ipsec_tunnel_rebuild_header(struct sk_buff *skb)
-+#else /* NET_21 */
-+ipsec_tunnel_rebuild_header(void *buff, struct net_device *dev,
-+ unsigned long raddr, struct sk_buff *skb)
-+#endif /* NET_21 */
-+{
-+ struct ipsecpriv *prv = skb->dev->priv;
-+ struct net_device *tmp;
-+ int ret;
-+ struct net_device_stats *stats; /* This device's statistics */
-+
-+ if(skb->dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_rebuild_header: "
-+ "no device...");
-+ return -ENODEV;
-+ }
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_rebuild_header: "
-+ "no private space associated with dev=%s",
-+ skb->dev->name ? skb->dev->name : "NULL");
-+ return -ENODEV;
-+ }
-+
-+ stats = (struct net_device_stats *) &(prv->mystats);
-+
-+ if(prv->dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_rebuild_header: "
-+ "no physical device associated with dev=%s",
-+ skb->dev->name ? skb->dev->name : "NULL");
-+ stats->tx_dropped++;
-+ return -ENODEV;
-+ }
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ if(!prv->header_ops->rebuild)
-+#else
-+ if(!prv->rebuild_header)
-+#endif
-+ {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_rebuild_header: "
-+ "physical device has been detached, packet dropped skb->dev=%s->NULL ",
-+ skb->dev->name);
-+#ifdef NET_21
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(ip_hdr(skb)->saddr),
-+ (__u32)ntohl(ip_hdr(skb)->daddr) );
-+#else /* NET_21 */
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(skb->ip_hdr->saddr),
-+ (__u32)ntohl(skb->ip_hdr->daddr) );
-+#endif /* NET_21 */
-+ stats->tx_dropped++;
-+ return -ENODEV;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel: "
-+ "Revectored rebuild_header dev=%s->%s ",
-+ skb->dev->name, prv->dev->name);
-+#ifdef NET_21
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(ip_hdr(skb)->saddr),
-+ (__u32)ntohl(ip_hdr(skb)->daddr) );
-+#else /* NET_21 */
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "ip=%08x->%08x\n",
-+ (__u32)ntohl(skb->ip_hdr->saddr),
-+ (__u32)ntohl(skb->ip_hdr->daddr) );
-+#endif /* NET_21 */
-+ tmp = skb->dev;
-+ skb->dev = prv->dev;
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ ret = prv->header_ops->rebuild(skb);
-+#else
-+#ifdef NET_21
-+ ret = prv->rebuild_header(skb);
-+#else /* NET_21 */
-+ ret = prv->rebuild_header(buff, prv->dev, raddr, skb);
-+#endif /* NET_21 */
-+#endif
-+ skb->dev = tmp;
-+ return ret;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_set_mac_address(struct net_device *dev, void *addr)
-+{
-+ struct ipsecpriv *prv = dev->priv;
-+
-+ struct net_device_stats *stats; /* This device's statistics */
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_set_mac_address: "
-+ "no device...");
-+ return -ENODEV;
-+ }
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_set_mac_address: "
-+ "no private space associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ return -ENODEV;
-+ }
-+
-+ stats = (struct net_device_stats *) &(prv->mystats);
-+
-+ if(prv->dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_set_mac_address: "
-+ "no physical device associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ stats->tx_dropped++;
-+ return -ENODEV;
-+ }
-+
-+ if(!prv->set_mac_address) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_set_mac_address: "
-+ "physical device has been detached, cannot set - skb->dev=%s->NULL\n",
-+ dev->name);
-+ return -ENODEV;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_set_mac_address: "
-+ "Revectored dev=%s->%s addr=0p%p\n",
-+ dev->name, prv->dev->name, addr);
-+ return prv->set_mac_address(prv->dev, addr);
-+
-+}
-+
-+#ifndef NET_21
-+DEBUG_NO_STATIC void
-+ipsec_tunnel_cache_bind(struct hh_cache **hhp, struct net_device *dev,
-+ unsigned short htype, __u32 daddr)
-+{
-+ struct ipsecpriv *prv = dev->priv;
-+
-+ struct net_device_stats *stats; /* This device's statistics */
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_bind: "
-+ "no device...");
-+ return;
-+ }
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_bind: "
-+ "no private space associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ return;
-+ }
-+
-+ stats = (struct net_device_stats *) &(prv->mystats);
-+
-+ if(prv->dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_bind: "
-+ "no physical device associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ stats->tx_dropped++;
-+ return;
-+ }
-+
-+ if(!prv->header_cache_bind) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_bind: "
-+ "physical device has been detached, cannot set - skb->dev=%s->NULL\n",
-+ dev->name);
-+ stats->tx_dropped++;
-+ return;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_bind: "
-+ "Revectored \n");
-+ prv->header_cache_bind(hhp, prv->dev, htype, daddr);
-+ return;
-+}
-+#endif /* !NET_21 */
-+
-+
-+DEBUG_NO_STATIC void
-+ipsec_tunnel_cache_update(struct hh_cache *hh, const struct net_device *dev,
-+ const unsigned char * haddr)
-+{
-+ struct ipsecpriv *prv = dev->priv;
-+
-+ struct net_device_stats *stats; /* This device's statistics */
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_update: "
-+ "no device...");
-+ return;
-+ }
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_update: "
-+ "no private space associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ return;
-+ }
-+
-+ stats = (struct net_device_stats *) &(prv->mystats);
-+
-+ if(prv->dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_update: "
-+ "no physical device associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ stats->tx_dropped++;
-+ return;
-+ }
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ if(!prv->header_ops->cache_update)
-+#else
-+ if(!prv->header_cache_update)
-+#endif
-+ {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_cache_update: "
-+ "physical device has been detached, cannot set - skb->dev=%s->NULL\n",
-+ dev->name);
-+ return;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel: "
-+ "Revectored cache_update\n");
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ prv->header_ops->cache_update(hh, prv->dev, haddr);
-+#else
-+ prv->header_cache_update(hh, prv->dev, haddr);
-+#endif
-+ return;
-+}
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+const struct header_ops ipsec_tunnel_header_ops = {
-+ .create = ipsec_tunnel_hard_header,
-+ .rebuild = ipsec_tunnel_rebuild_header,
-+ .cache_update = ipsec_tunnel_cache_update,
-+};
-+#endif
-+
-+#ifdef NET_21
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_neigh_setup(struct neighbour *n)
-+{
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_neigh_setup:\n");
-+
-+ if (n->nud_state == NUD_NONE) {
-+ n->ops = &arp_broken_ops;
-+ n->output = n->ops->output;
-+ }
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
-+{
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_neigh_setup_dev: "
-+ "setting up %s\n",
-+ dev ? dev->name : "NULL");
-+
-+ if (p->tbl->family == AF_INET) {
-+ p->neigh_setup = ipsec_tunnel_neigh_setup;
-+ p->ucast_probes = 0;
-+ p->mcast_probes = 0;
-+ }
-+ return 0;
-+}
-+#endif /* NET_21 */
-+
-+/*
-+ * We call the attach routine to attach another device.
-+ */
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_attach(struct net_device *dev, struct net_device *physdev)
-+{
-+ int i;
-+ struct ipsecpriv *prv = dev->priv;
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_attach: "
-+ "no device...");
-+ return -ENODEV;
-+ }
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_attach: "
-+ "no private space associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ return -ENODATA;
-+ }
-+
-+ prv->dev = physdev;
-+ prv->hard_start_xmit = physdev->hard_start_xmit;
-+ prv->get_stats = physdev->get_stats;
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ if (physdev->header_ops) {
-+ prv->header_ops = physdev->header_ops;
-+ dev->header_ops = &ipsec_tunnel_header_ops;
-+ } else
-+ dev->header_ops = NULL;
-+#else
-+ if (physdev->hard_header) {
-+ prv->hard_header = physdev->hard_header;
-+ dev->hard_header = &ipsec_tunnel_hard_header;
-+ } else
-+ dev->hard_header = NULL;
-+
-+ if (physdev->rebuild_header) {
-+ prv->rebuild_header = physdev->rebuild_header;
-+ dev->rebuild_header = ipsec_tunnel_rebuild_header;
-+ } else
-+ dev->rebuild_header = NULL;
-+
-+#ifndef NET_21
-+ if (physdev->header_cache_bind) {
-+ prv->header_cache_bind = physdev->header_cache_bind;
-+ dev->header_cache_bind = ipsec_tunnel_cache_bind;
-+ } else
-+ dev->header_cache_bind = NULL;
-+#endif /* !NET_21 */
-+
-+ if (physdev->header_cache_update) {
-+ prv->header_cache_update = physdev->header_cache_update;
-+ dev->header_cache_update = ipsec_tunnel_cache_update;
-+ } else
-+ dev->header_cache_update = NULL;
-+#endif
-+
-+ if (physdev->set_mac_address) {
-+ prv->set_mac_address = physdev->set_mac_address;
-+ dev->set_mac_address = ipsec_tunnel_set_mac_address;
-+ } else
-+ dev->set_mac_address = NULL;
-+
-+ dev->hard_header_len = physdev->hard_header_len;
-+
-+#ifdef NET_21
-+/* prv->neigh_setup = physdev->neigh_setup; */
-+ dev->neigh_setup = ipsec_tunnel_neigh_setup_dev;
-+#endif /* NET_21 */
-+ dev->mtu = 16260; /* 0xfff0; */ /* dev->mtu; */
-+ prv->mtu = physdev->mtu;
-+
-+#ifdef PHYSDEV_TYPE
-+ dev->type = physdev->type; /* ARPHRD_TUNNEL; */
-+#endif /* PHYSDEV_TYPE */
-+
-+ dev->addr_len = physdev->addr_len;
-+ for (i=0; i<dev->addr_len; i++) {
-+ dev->dev_addr[i] = physdev->dev_addr[i];
-+ }
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_tunnel & DB_TN_INIT) {
-+ printk(KERN_INFO "klips_debug:ipsec_tunnel_attach: "
-+ "physical device %s being attached has HW address: %2x",
-+ physdev->name, physdev->dev_addr[0]);
-+ for (i=1; i < physdev->addr_len; i++) {
-+ printk(":%02x", physdev->dev_addr[i]);
-+ }
-+ printk("\n");
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ return 0;
-+}
-+
-+/*
-+ * We call the detach routine to detach the ipsec tunnel from another device.
-+ */
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_detach(struct net_device *dev)
-+{
-+ int i;
-+ struct ipsecpriv *prv = dev->priv;
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_detach: "
-+ "no device...");
-+ return -ENODEV;
-+ }
-+
-+ if(prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_REVEC,
-+ "klips_debug:ipsec_tunnel_detach: "
-+ "no private space associated with dev=%s",
-+ dev->name ? dev->name : "NULL");
-+ return -ENODATA;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_detach: "
-+ "physical device %s being detached from virtual device %s\n",
-+ prv->dev ? prv->dev->name : "NULL",
-+ dev->name);
-+
-+ ipsec_dev_put(prv->dev);
-+ prv->dev = NULL;
-+ prv->hard_start_xmit = NULL;
-+ prv->get_stats = NULL;
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ prv->header_ops = NULL;
-+#else
-+ prv->hard_header = NULL;
-+ prv->rebuild_header = NULL;
-+ prv->header_cache_update = NULL;
-+#ifndef NET_21
-+ prv->header_cache_bind = NULL;
-+#else
-+/* prv->neigh_setup = NULL; */
-+#endif
-+#endif
-+ prv->set_mac_address = NULL;
-+ dev->hard_header_len = 0;
-+
-+#ifdef DETACH_AND_DOWN
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ dev->header_ops = NULL;
-+#else
-+ dev->hard_header = NULL;
-+ dev->rebuild_header = NULL;
-+ dev->header_cache_update = NULL;
-+#ifndef NET_21
-+ dev->header_cache_bind = NULL;
-+#else
-+ dev->neigh_setup = NULL;
-+#endif
-+#endif
-+ dev->set_mac_address = NULL;
-+ dev->mtu = 0;
-+#endif /* DETACH_AND_DOWN */
-+
-+ prv->mtu = 0;
-+ for (i=0; i<MAX_ADDR_LEN; i++) {
-+ dev->dev_addr[i] = 0;
-+ }
-+ dev->addr_len = 0;
-+#ifdef PHYSDEV_TYPE
-+ dev->type = ARPHRD_VOID; /* ARPHRD_TUNNEL; */
-+#endif /* PHYSDEV_TYPE */
-+
-+ return 0;
-+}
-+
-+/*
-+ * We call the clear routine to detach all ipsec tunnels from other devices.
-+ */
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_clear(void)
-+{
-+ int i;
-+ struct net_device *ipsecdev = NULL, *prvdev;
-+ struct ipsecpriv *prv;
-+ int ret;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_clear: .\n");
-+
-+ for(i = 0; i < IPSEC_NUM_IF; i++) {
-+ ipsecdev = ipsecdevices[i];
-+ if(ipsecdev != NULL) {
-+ if((prv = (struct ipsecpriv *)(ipsecdev->priv))) {
-+ prvdev = (struct net_device *)(prv->dev);
-+ if(prvdev) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_clear: "
-+ "physical device for device %s is %s\n",
-+ ipsecdev->name, prvdev->name);
-+ if((ret = ipsec_tunnel_detach(ipsecdev))) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_clear: "
-+ "error %d detatching device %s from device %s.\n",
-+ ret, ipsecdev->name, prvdev->name);
-+ return ret;
-+ }
-+ }
-+ }
-+ }
-+ }
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+ipsec_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-+{
-+ struct ipsectunnelconf *cf = (struct ipsectunnelconf *)&ifr->ifr_data;
-+ struct ipsecpriv *prv = dev->priv;
-+ struct net_device *them; /* physical device */
-+#ifdef CONFIG_IP_ALIAS
-+ char *colon;
-+ char realphysname[IFNAMSIZ];
-+#endif /* CONFIG_IP_ALIAS */
-+
-+ if(dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "device not supplied.\n");
-+ return -ENODEV;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "tncfg service call #%d for dev=%s\n",
-+ cmd,
-+ dev->name ? dev->name : "NULL");
-+ switch (cmd) {
-+ /* attach a virtual ipsec? device to a physical device */
-+ case IPSEC_SET_DEV:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "calling ipsec_tunnel_attatch...\n");
-+#ifdef CONFIG_IP_ALIAS
-+ /* If this is an IP alias interface, get its real physical name */
-+ strncpy(realphysname, cf->cf_name, IFNAMSIZ);
-+ realphysname[IFNAMSIZ-1] = 0;
-+ colon = strchr(realphysname, ':');
-+ if (colon) *colon = 0;
-+ them = ipsec_dev_get(realphysname);
-+#else /* CONFIG_IP_ALIAS */
-+ them = ipsec_dev_get(cf->cf_name);
-+#endif /* CONFIG_IP_ALIAS */
-+
-+ if (them == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "physical device %s requested is null\n",
-+ cf->cf_name);
-+ return -ENXIO;
-+ }
-+
-+#if 0
-+ if (them->flags & IFF_UP) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "physical device %s requested is not up.\n",
-+ cf->cf_name);
-+ ipsec_dev_put(them);
-+ return -ENXIO;
-+ }
-+#endif
-+
-+ if (prv && prv->dev) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "virtual device is already connected to %s.\n",
-+ prv->dev->name ? prv->dev->name : "NULL");
-+ ipsec_dev_put(them);
-+ return -EBUSY;
-+ }
-+ return ipsec_tunnel_attach(dev, them);
-+
-+ case IPSEC_DEL_DEV:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "calling ipsec_tunnel_detatch.\n");
-+ if (! prv->dev) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "physical device not connected.\n");
-+ return -ENODEV;
-+ }
-+ return ipsec_tunnel_detach(dev);
-+
-+ case IPSEC_CLR_DEV:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "calling ipsec_tunnel_clear.\n");
-+ return ipsec_tunnel_clear();
-+
-+#ifdef HAVE_UDP_ENCAP_CONVERT
-+ case IPSEC_UDP_ENCAP_CONVERT:
-+ {
-+ unsigned int *socknum =(unsigned int *)&ifr->ifr_data;
-+ struct socket *sock;
-+ int err, fput_needed;
-+
-+ /* that's a static function in socket.c
-+ * sock = sockfd_lookup_light(*socknum, &err, &fput_needed); */
-+ sock = sockfd_lookup(*socknum, &err);
-+ if (!sock)
-+ goto encap_out;
-+
-+ /* check that it's a UDP socket */
-+ udp_sk(sk)->encap_type = UDP_ENCAP_ESPINUDP_NON_IKE;
-+ udp_sk(sk)->encap_rcv = klips26_udp_encap_rcv;
-+
-+ KLIPS_PRINT(debug_tunnel
-+ , "UDP socket: %u set to NON-IKE encap mode\n"
-+ , socknum);
-+
-+ err = 0;
-+
-+ encap_output:
-+ fput_light(sock->file, fput_needed);
-+ encap_out:
-+ return err;
-+#endif
-+
-+ default:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_ioctl: "
-+ "unknown command %d.\n",
-+ cmd);
-+ return -EOPNOTSUPP;
-+ }
-+}
-+
-+struct net_device *ipsec_get_device(int inst)
-+{
-+ struct net_device *ipsec_dev;
-+
-+ ipsec_dev = NULL;
-+
-+ if(inst < IPSEC_NUM_IF) {
-+ ipsec_dev = ipsecdevices[inst];
-+ }
-+
-+ return ipsec_dev;
-+}
-+
-+int
-+ipsec_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
-+{
-+ struct net_device *dev = ptr;
-+ struct net_device *ipsec_dev;
-+ struct ipsecpriv *priv;
-+ int i;
-+
-+ if (dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "dev=NULL for event type %ld.\n",
-+ event);
-+ return(NOTIFY_DONE);
-+ }
-+
-+ /* check for loopback devices */
-+ if (dev && (dev->flags & IFF_LOOPBACK)) {
-+ return(NOTIFY_DONE);
-+ }
-+
-+ switch (event) {
-+ case NETDEV_DOWN:
-+ /* look very carefully at the scope of these compiler
-+ directives before changing anything... -- RGB */
-+#ifdef NET_21
-+ case NETDEV_UNREGISTER:
-+ switch (event) {
-+ case NETDEV_DOWN:
-+#endif /* NET_21 */
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_DOWN dev=%s flags=%x\n",
-+ dev->name,
-+ dev->flags);
-+ if(strncmp(dev->name, "ipsec", strlen("ipsec")) == 0) {
-+ printk(KERN_CRIT "IPSEC EVENT: KLIPS device %s shut down.\n",
-+ dev->name);
-+ }
-+#ifdef NET_21
-+ break;
-+ case NETDEV_UNREGISTER:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_UNREGISTER dev=%s flags=%x\n",
-+ dev->name,
-+ dev->flags);
-+ break;
-+ }
-+#endif /* NET_21 */
-+
-+ /* find the attached physical device and detach it. */
-+ for(i = 0; i < IPSEC_NUM_IF; i++) {
-+ ipsec_dev = ipsecdevices[i];
-+
-+ if(ipsec_dev) {
-+ priv = (struct ipsecpriv *)(ipsec_dev->priv);
-+ if(priv) {
-+ ;
-+ if(((struct net_device *)(priv->dev)) == dev) {
-+ /* dev_close(ipsec_dev); */
-+ /* return */ ipsec_tunnel_detach(ipsec_dev);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "device '%s' has been detached.\n",
-+ ipsec_dev->name);
-+ break;
-+ }
-+ } else {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "device '%s' has no private data space!\n",
-+ ipsec_dev->name);
-+ }
-+ }
-+ }
-+ break;
-+ case NETDEV_UP:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_UP dev=%s\n",
-+ dev->name);
-+ break;
-+#ifdef NET_21
-+ case NETDEV_REBOOT:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_REBOOT dev=%s\n",
-+ dev->name);
-+ break;
-+ case NETDEV_CHANGE:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_CHANGE dev=%s flags=%x\n",
-+ dev->name,
-+ dev->flags);
-+ break;
-+ case NETDEV_REGISTER:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_REGISTER dev=%s\n",
-+ dev->name);
-+ break;
-+ case NETDEV_CHANGEMTU:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_CHANGEMTU dev=%s to mtu=%d\n",
-+ dev->name,
-+ dev->mtu);
-+ break;
-+ case NETDEV_CHANGEADDR:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_CHANGEADDR dev=%s\n",
-+ dev->name);
-+ break;
-+ case NETDEV_GOING_DOWN:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_GOING_DOWN dev=%s\n",
-+ dev->name);
-+ break;
-+ case NETDEV_CHANGENAME:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "NETDEV_CHANGENAME dev=%s\n",
-+ dev->name);
-+ break;
-+#endif /* NET_21 */
-+ default:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_device_event: "
-+ "event type %ld unrecognised for dev=%s\n",
-+ event,
-+ dev->name);
-+ break;
-+ }
-+ return NOTIFY_DONE;
-+}
-+
-+/*
-+ * Called when an ipsec tunnel device is initialized.
-+ * The ipsec tunnel device structure is passed to us.
-+ */
-+
-+int
-+ipsec_tunnel_init(struct net_device *dev)
-+{
-+ int i;
-+
-+ KLIPS_PRINT(debug_tunnel,
-+ "klips_debug:ipsec_tunnel_init: "
-+ "allocating %lu bytes initialising device: %s\n",
-+ (unsigned long) sizeof(struct ipsecpriv),
-+ dev->name ? dev->name : "NULL");
-+
-+ /* Add our tunnel functions to the device */
-+ dev->open = ipsec_tunnel_open;
-+ dev->stop = ipsec_tunnel_close;
-+ dev->hard_start_xmit = ipsec_tunnel_start_xmit;
-+ dev->get_stats = ipsec_tunnel_get_stats;
-+
-+ dev->priv = kmalloc(sizeof(struct ipsecpriv), GFP_KERNEL);
-+ if (dev->priv == NULL)
-+ return -ENOMEM;
-+ memset((caddr_t)(dev->priv), 0, sizeof(struct ipsecpriv));
-+
-+ for(i = 0; i < sizeof(zeroes); i++) {
-+ ((__u8*)(zeroes))[i] = 0;
-+ }
-+
-+#ifndef NET_21
-+ /* Initialize the tunnel device structure */
-+ for (i = 0; i < DEV_NUMBUFFS; i++)
-+ skb_queue_head_init(&dev->buffs[i]);
-+#endif /* !NET_21 */
-+
-+ dev->set_multicast_list = NULL;
-+ dev->do_ioctl = ipsec_tunnel_ioctl;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+ dev->header_ops = NULL;
-+#else
-+ dev->hard_header = NULL;
-+ dev->rebuild_header = NULL;
-+ dev->set_mac_address = NULL;
-+#ifndef NET_21
-+ dev->header_cache_bind = NULL;
-+#endif /* !NET_21 */
-+ dev->header_cache_update= NULL;
-+#endif
-+
-+#ifdef NET_21
-+/* prv->neigh_setup = NULL; */
-+ dev->neigh_setup = ipsec_tunnel_neigh_setup_dev;
-+#endif /* NET_21 */
-+ dev->hard_header_len = 0;
-+ dev->mtu = 0;
-+ dev->addr_len = 0;
-+ dev->type = ARPHRD_VOID; /* ARPHRD_TUNNEL; */ /* ARPHRD_ETHER; */
-+ dev->tx_queue_len = 10; /* Small queue */
-+ memset((caddr_t)(dev->broadcast),0xFF, ETH_ALEN); /* what if this is not attached to ethernet? */
-+
-+ /* New-style flags. */
-+ dev->flags = IFF_NOARP /* 0 */ /* Petr Novak */;
-+
-+ /* We're done. Have I forgotten anything? */
-+ return 0;
-+}
-+
-+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-+/* Module specific interface (but it links with the rest of IPSEC) */
-+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-+
-+int
-+ipsec_tunnel_probe(struct net_device *dev)
-+{
-+ ipsec_tunnel_init(dev);
-+ return 0;
-+}
-+
-+#ifdef alloc_netdev
-+static void ipsec_tunnel_netdev_setup(struct net_device *dev)
-+{
-+}
-+#endif
-+
-+struct net_device *ipsecdevices[IPSEC_NUM_IFMAX];
-+int ipsecdevices_max=-1;
-+
-+int
-+ipsec_tunnel_createnum(int ifnum)
-+{
-+ char name[IFNAMSIZ];
-+ struct net_device *dev_ipsec;
-+ int vifentry;
-+
-+ if(ifnum > IPSEC_NUM_IFMAX) {
-+ return -ENOENT;
-+ }
-+
-+ if(ipsecdevices[ifnum]!=NULL) {
-+ return -EEXIST;
-+ }
-+
-+ /* no identical device */
-+ if(ifnum > ipsecdevices_max) {
-+ ipsecdevices_max=ifnum;
-+ }
-+ vifentry = ifnum;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_init_devices: "
-+ "creating and registering IPSEC_NUM_IF=%u device\n",
-+ ifnum);
-+
-+ sprintf(name, IPSEC_DEV_FORMAT, ifnum);
-+#ifdef alloc_netdev
-+ dev_ipsec = alloc_netdev(0, name, ipsec_tunnel_netdev_setup);
-+#else
-+ dev_ipsec = (struct net_device*)kmalloc(sizeof(struct net_device), GFP_KERNEL);
-+#endif
-+ if (dev_ipsec == NULL) {
-+ printk(KERN_ERR "klips_debug:ipsec_tunnel_init_devices: "
-+ "failed to allocate memory for device %s, quitting device init.\n",
-+ name);
-+ return -ENOMEM;
-+ }
-+#ifndef alloc_netdev
-+ memset((caddr_t)dev_ipsec, 0, sizeof(struct net_device));
-+#ifdef NETDEV_23
-+ strncpy(dev_ipsec->name, name, sizeof(dev_ipsec->name));
-+#else /* NETDEV_23 */
-+ dev_ipsec->name = (char*)kmalloc(IFNAMSIZ, GFP_KERNEL);
-+ if (dev_ipsec->name == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_init_devices: "
-+ "failed to allocate memory for device %s name, quitting device init.\n",
-+ name);
-+ return -ENOMEM;
-+ }
-+ memset((caddr_t)dev_ipsec->name, 0, IFNAMSIZ);
-+ strncpy(dev_ipsec->name, name, IFNAMSIZ);
-+#endif /* NETDEV_23 */
-+#ifdef PAUL_FIXME
-+ dev_ipsec->next = NULL;
-+#endif
-+#endif /* alloc_netdev */
-+ dev_ipsec->init = &ipsec_tunnel_probe;
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_init_devices: "
-+ "registering device %s\n",
-+ dev_ipsec->name);
-+
-+ /* reference and hold the device reference */
-+ dev_hold(dev_ipsec);
-+ ipsecdevices[vifentry]=dev_ipsec;
-+
-+ if (register_netdev(dev_ipsec) != 0) {
-+ KLIPS_PRINT(1 || debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_init_devices: "
-+ "registering device %s failed, quitting device init.\n",
-+ dev_ipsec->name);
-+ return -EIO;
-+ } else {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_init_devices: "
-+ "registering device %s succeeded, continuing...\n",
-+ dev_ipsec->name);
-+ }
-+ return 0;
-+}
-+
-+
-+int
-+ipsec_tunnel_init_devices(void)
-+{
-+ int i;
-+ int error;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_INIT,
-+ "klips_debug:ipsec_tunnel_init_devices: "
-+ "creating and registering IPSEC_NUM_IF=%u devices, allocating %lu per device, IFNAMSIZ=%u.\n",
-+ IPSEC_NUM_IF,
-+ (unsigned long) (sizeof(struct net_device) + IFNAMSIZ),
-+ IFNAMSIZ);
-+
-+ for(i = 0; i < IPSEC_NUM_IF; i++) {
-+ error = ipsec_tunnel_createnum(i);
-+
-+ if(error) break;
-+ }
-+ return 0;
-+}
-+
-+int
-+ipsec_tunnel_deletenum(int vifnum)
-+{
-+ struct net_device *dev_ipsec;
-+
-+ if(vifnum > IPSEC_NUM_IFMAX) {
-+ return -ENOENT;
-+ }
-+
-+ dev_ipsec = ipsecdevices[vifnum];
-+ if(dev_ipsec == NULL) {
-+ return -ENOENT;
-+ }
-+
-+ /* release reference */
-+ ipsecdevices[vifnum]=NULL;
-+ ipsec_dev_put(dev_ipsec);
-+
-+ KLIPS_PRINT(debug_tunnel, "Unregistering %s (refcnt=%d)\n",
-+ dev_ipsec->name,
-+ atomic_read(&dev_ipsec->refcnt));
-+ unregister_netdev(dev_ipsec);
-+ KLIPS_PRINT(debug_tunnel, "Unregisted %s\n", dev_ipsec->name);
-+#ifdef alloc_netdev
-+ free_netdev(dev_ipsec);
-+#else
-+#ifndef NETDEV_23
-+ kfree(dev_ipsec->name);
-+ dev_ipsec->name=NULL;
-+#endif /* !NETDEV_23 */
-+ kfree(dev_ipsec->priv);
-+#endif /* alloc_netdev */
-+ dev_ipsec->priv=NULL;
-+
-+ return 0;
-+}
-+
-+
-+struct net_device *
-+ipsec_tunnel_get_device(int vifnum)
-+{
-+ struct net_device *nd;
-+
-+ if(vifnum < ipsecdevices_max) {
-+ nd = ipsecdevices[vifnum];
-+
-+ if(nd) dev_hold(nd);
-+ return nd;
-+ } else {
-+ return NULL;
-+ }
-+}
-+
-+/* void */
-+int
-+ipsec_tunnel_cleanup_devices(void)
-+{
-+ int error = 0;
-+ int i;
-+ struct net_device *dev_ipsec;
-+
-+ for(i = 0; i < IPSEC_NUM_IF; i++) {
-+ dev_ipsec = ipsecdevices[i];
-+ if(dev_ipsec == NULL) {
-+ continue;
-+ }
-+
-+ /* release reference */
-+ ipsecdevices[i]=NULL;
-+ ipsec_dev_put(dev_ipsec);
-+
-+ KLIPS_PRINT(debug_tunnel, "Unregistering %s (refcnt=%d)\n",
-+ dev_ipsec->name,
-+ atomic_read(&dev_ipsec->refcnt));
-+ unregister_netdev(dev_ipsec);
-+ KLIPS_PRINT(debug_tunnel, "Unregisted %s\n", dev_ipsec->name);
-+#ifdef alloc_netdev
-+ free_netdev(dev_ipsec);
-+#else
-+#ifndef NETDEV_23
-+ kfree(dev_ipsec->name);
-+ dev_ipsec->name=NULL;
-+#endif /* !NETDEV_23 */
-+ kfree(dev_ipsec->priv);
-+#endif /* alloc_netdev */
-+ dev_ipsec->priv=NULL;
-+ }
-+ return error;
-+}
-+
-+// ------------------------------------------------------------------------
-+// this handles creating and managing state for xmit path
-+
-+static spinlock_t ixs_cache_lock = SPIN_LOCK_UNLOCKED;
-+#ifdef HAVE_KMEM_CACHE_MACRO
-+static struct kmem_cache *ixs_cache_allocator = NULL;
-+#else
-+static kmem_cache_t *ixs_cache_allocator = NULL;
-+#endif
-+static unsigned ixs_cache_allocated_count = 0;
-+
-+#if !defined(MODULE_PARM) && defined(module_param)
-+/*
-+ * As of 2.6.17 MODULE_PARM no longer exists, use module_param instead.
-+ */
-+#define MODULE_PARM(a,b) module_param(a,int,0644)
-+#endif
-+
-+int ipsec_ixs_cache_allocated_count_max = 1000;
-+MODULE_PARM(ipsec_ixs_cache_allocated_count_max, "i");
-+MODULE_PARM_DESC(ipsec_ixs_cache_allocated_count_max,
-+ "Maximum outstanding transmit packets");
-+
-+int
-+ipsec_xmit_state_cache_init (void)
-+{
-+ if (ixs_cache_allocator)
-+ return -EBUSY;
-+
-+ spin_lock_init(&ixs_cache_lock);
-+#ifdef HAVE_KMEM_CACHE_MACRO
-+ /* ixs_cache_allocator = KMEM_CACHE(ipsec_ixs,0); */
-+ ixs_cache_allocator = kmem_cache_create ("ipsec_ixs",
-+ sizeof (struct ipsec_xmit_state), 0,
-+ 0, NULL);
-+#else
-+ ixs_cache_allocator = kmem_cache_create ("ipsec_ixs",
-+ sizeof (struct ipsec_xmit_state), 0,
-+ 0, NULL, NULL);
-+#endif
-+ if (! ixs_cache_allocator)
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-+
-+void
-+ipsec_xmit_state_cache_cleanup (void)
-+{
-+ if (unlikely (ixs_cache_allocated_count))
-+ printk ("ipsec: deleting ipsec_ixs kmem_cache while in use\n");
-+
-+ if (ixs_cache_allocator) {
-+ kmem_cache_destroy (ixs_cache_allocator);
-+ ixs_cache_allocator = NULL;
-+ }
-+ ixs_cache_allocated_count = 0;
-+}
-+
-+struct ipsec_xmit_state *
-+ipsec_xmit_state_new (void)
-+{
-+ struct ipsec_xmit_state *ixs;
-+
-+ spin_lock_bh (&ixs_cache_lock);
-+
-+ if (ixs_cache_allocated_count >= ipsec_ixs_cache_allocated_count_max) {
-+ spin_unlock_bh (&ixs_cache_lock);
-+ KLIPS_PRINT(debug_tunnel,
-+ "klips_debug:ipsec_xmit_state_new: "
-+ "exceeded maximum outstanding TX packet cnt %d\n",
-+ ixs_cache_allocated_count);
-+ return NULL;
-+ }
-+
-+ ixs = kmem_cache_alloc (ixs_cache_allocator, GFP_ATOMIC);
-+
-+ if (likely (ixs != NULL))
-+ ixs_cache_allocated_count++;
-+
-+ spin_unlock_bh (&ixs_cache_lock);
-+
-+ if (unlikely (NULL == ixs))
-+ goto bail;
-+
-+ // initialize the object
-+#if 1 /* optimised to only clear the required bits */
-+ memset((caddr_t)ixs, 0, sizeof(*ixs));
-+#else
-+ ixs->pass = 0;
-+ ixs->state = 0;
-+ ixs->next_state = 0;
-+ ixs->ipsp = NULL;
-+ ixs->sa_len = 0;
-+ ixs->stats = NULL;
-+ ixs->ips.ips_ident_s.data = NULL;
-+ ixs->ips.ips_ident_d.data = NULL;
-+ ixs->outgoing_said.proto = 0;
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ ixs->natt_type = 0, ixs->natt_head = 0;
-+ ixs->natt_sport = 0, ixs->natt_dport = 0;
-+#endif
-+ ixs->tot_headroom = 0;
-+ ixs->tot_tailroom = 0;
-+ ixs->eroute = NULL;
-+ ixs->hard_header_stripped = 0;
-+ ixs->hard_header_len = 0;
-+ ixs->cur_mtu = 0; /* FIXME: can we do something better ? */
-+
-+ ixs->oskb = NULL;
-+ ixs->saved_header = NULL; /* saved copy of the hard header */
-+ ixs->route = NULL;
-+#endif /* memset */
-+
-+bail:
-+ return ixs;
-+}
-+
-+void
-+ipsec_xmit_state_delete (struct ipsec_xmit_state *ixs)
-+{
-+ if (unlikely (! ixs))
-+ return;
-+
-+ spin_lock_bh (&ixs_cache_lock);
-+
-+ ixs_cache_allocated_count--;
-+ kmem_cache_free (ixs_cache_allocator, ixs);
-+
-+ spin_unlock_bh (&ixs_cache_lock);
-+}
-+
-+/*
-+ * Local Variables:
-+ * c-style: linux
-+ * End:
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_xform.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,69 @@
-+/*
-+ * Common routines for IPSEC transformations.
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: ipsec_xform.c,v 1.65 2005/04/29 05:10:22 mcr Exp $
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "freeswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#include <linux/random.h> /* get_random_bytes() */
-+#include <freeswan.h>
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+
-+#include <net/ip.h>
-+
-+#include "freeswan/radij.h"
-+#include "freeswan/ipsec_encap.h"
-+#include "freeswan/ipsec_radij.h"
-+#include "freeswan/ipsec_xform.h"
-+#include "freeswan/ipsec_ipe4.h"
-+#include "freeswan/ipsec_ah.h"
-+#include "freeswan/ipsec_esp.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ipsec_xmit.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,2273 @@
-+/*
-+ * IPSEC Transmit code.
-+ * Copyright (C) 1996, 1997 John Ioannidis.
-+ * Copyright (C) 1998-2003 Richard Guy Briggs.
-+ * Copyright (C) 2004-2005 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * OCF/receive state machine written by
-+ * David McCullough <dmccullough@cyberguard.com>
-+ * Copyright (C) 2004-2005 Intel Corporation. All Rights Reserved.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ */
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif /* for CONFIG_IP_FORWARD */
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, struct net_device_stats, dev_queue_xmit() and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+
-+#include <net/tcp.h>
-+#include <net/udp.h>
-+#include <linux/skbuff.h>
-+
-+#include <asm/uaccess.h>
-+#include <asm/checksum.h>
-+#include <openswan.h>
-+#ifdef NET_21
-+# define MSS_HACK_ /* experimental */
-+# include <linux/in6.h>
-+# include <net/dst.h>
-+# define proto_priv cb
-+#endif /* NET_21 */
-+
-+#include <net/icmp.h> /* icmp_send() */
-+#include <net/ip.h>
-+#ifdef NETDEV_23
-+# include <linux/netfilter_ipv4.h>
-+#endif /* NETDEV_23 */
-+
-+#include <linux/if_arp.h>
-+#ifdef MSS_HACK
-+# include <net/tcp.h> /* TCP options */
-+#endif /* MSS_HACK */
-+
-+#include "openswan/ipsec_kern24.h"
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_life.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_eroute.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xmit.h"
-+#include "openswan/ipsec_sa.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_ipe4.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#include "openswan/ipcomp.h"
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_alg.h"
-+#include "ipsec_ocf.h"
-+
-+
-+/*
-+ * Stupid kernel API differences in APIs. Not only do some
-+ * kernels not have ip_select_ident, but some have differing APIs,
-+ * and SuSE has one with one parameter, but no way of checking to
-+ * see what is really what.
-+ */
-+
-+#ifdef SUSE_LINUX_2_4_19_IS_STUPID
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph)
-+#else
-+
-+/* simplest case, nothing */
-+#if !defined(IP_SELECT_IDENT)
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) do { iph->id = htons(ip_id_count++); } while(0)
-+#endif
-+
-+/* kernels > 2.3.37-ish */
-+#if defined(IP_SELECT_IDENT) && !defined(IP_SELECT_IDENT_NEW)
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst)
-+#endif
-+
-+/* kernels > 2.4.2 */
-+#if defined(IP_SELECT_IDENT) && defined(IP_SELECT_IDENT_NEW)
-+#define KLIPS_IP_SELECT_IDENT(iph, skb) ip_select_ident(iph, skb->dst, NULL)
-+#endif
-+
-+#endif /* SUSE_LINUX_2_4_19_IS_STUPID */
-+
-+
-+
-+#if defined(CONFIG_KLIPS_AH)
-+#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+static __u32 zeroes[64];
-+#endif
-+#endif
-+
-+int ipsec_xmit_trap_count = 0;
-+int ipsec_xmit_trap_sendcount = 0;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+#define dmp(_x,_y,_z) if(debug_xmit && sysctl_ipsec_debug_verbose) ipsec_dmp_block(_x,_y,_z)
-+#else /* CONFIG_KLIPS_DEBUG */
-+#define dmp(_x, _y, _z)
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+
-+#if !defined(SKB_COPY_EXPAND) || defined(KLIPS_UNIT_TESTS)
-+/*
-+ * This is mostly skbuff.c:skb_copy().
-+ */
-+struct sk_buff *
-+skb_copy_expand(const struct sk_buff *skb, int headroom,
-+ int tailroom, int priority)
-+{
-+ struct sk_buff *n;
-+ unsigned long offset;
-+
-+ /*
-+ * Do sanity checking
-+ */
-+ if((headroom < 0) || (tailroom < 0) || ((headroom+tailroom) < 0)) {
-+ printk(KERN_WARNING
-+ "klips_error:skb_copy_expand: "
-+ "Illegal negative head,tailroom %d,%d\n",
-+ headroom,
-+ tailroom);
-+ return NULL;
-+ }
-+ /*
-+ * Allocate the copy buffer
-+ */
-+
-+#ifndef NET_21
-+ IS_SKB(skb);
-+#endif /* !NET_21 */
-+
-+
-+ n=alloc_skb(skb->end - skb->head + headroom + tailroom, priority);
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:skb_copy_expand: "
-+ "allocating %d bytes, head=0p%p data=0p%p tail=0p%p end=0p%p end-head=%d tail-data=%d\n",
-+ skb->end - skb->head + headroom + tailroom,
-+ skb->head,
-+ skb->data,
-+ skb->tail,
-+ skb->end,
-+ skb->end - skb->head,
-+ skb->tail - skb->data);
-+
-+ if(n==NULL)
-+ return NULL;
-+
-+ /*
-+ * Shift between the two data areas in bytes
-+ */
-+
-+ /* Set the data pointer */
-+ skb_reserve(n,skb->data-skb->head+headroom);
-+ /* Set the tail pointer and length */
-+ if(skb_tailroom(n) < skb->len) {
-+ printk(KERN_WARNING "klips_error:skb_copy_expand: "
-+ "tried to skb_put %ld, %d available. This should never happen, please report.\n",
-+ (unsigned long int)skb->len,
-+ skb_tailroom(n));
-+ ipsec_kfree_skb(n);
-+ return NULL;
-+ }
-+ skb_put(n,skb->len);
-+
-+ offset=n->head + headroom - skb->head;
-+
-+ /* Copy the bytes */
-+ memcpy(n->head + headroom, skb->head,skb->end-skb->head);
-+#ifdef NET_21
-+ n->csum=skb->csum;
-+ n->priority=skb->priority;
-+ n->dst=dst_clone(skb->dst);
-+ if(skb->nh.raw)
-+ n->nh.raw=skb->nh.raw+offset;
-+#ifndef NETDEV_23
-+ n->is_clone=0;
-+#endif /* NETDEV_23 */
-+ atomic_set(&n->users, 1);
-+ n->destructor = NULL;
-+#ifdef HAVE_SOCK_SECURITY
-+ n->security=skb->security;
-+#endif
-+#else /* NET_21 */
-+ n->link3=NULL;
-+ n->when=skb->when;
-+ if(skb->ip_hdr)
-+ n->ip_hdr=(struct iphdr *)(((char *)skb->ip_hdr)+offset);
-+ n->saddr=skb->saddr;
-+ n->daddr=skb->daddr;
-+ n->raddr=skb->raddr;
-+ n->seq=skb->seq;
-+ n->end_seq=skb->end_seq;
-+ n->ack_seq=skb->ack_seq;
-+ n->acked=skb->acked;
-+ n->free=1;
-+ n->arp=skb->arp;
-+ n->tries=0;
-+ n->lock=0;
-+ n->users=0;
-+#endif /* NET_21 */
-+ n->protocol=skb->protocol;
-+ n->list=NULL;
-+ n->sk=NULL;
-+ n->dev=skb->dev;
-+ if(skb->h.raw)
-+ n->h.raw=skb->h.raw+offset;
-+ if(skb->mac.raw)
-+ n->mac.raw=skb->mac.raw+offset;
-+ memcpy(n->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
-+#ifndef NETDEV_23
-+ n->used=skb->used;
-+#endif /* !NETDEV_23 */
-+ n->pkt_type=skb->pkt_type;
-+ n->stamp=skb->stamp;
-+
-+#ifndef NET_21
-+ IS_SKB(n);
-+#endif /* !NET_21 */
-+ return n;
-+}
-+#endif /* !SKB_COPY_EXPAND */
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+void
-+ipsec_print_ip(struct iphdr *ip)
-+{
-+ char buf[ADDRTOA_BUF];
-+
-+ printk(KERN_INFO "klips_debug: IP:");
-+ printk(" ihl:%d", ip->ihl << 2);
-+ printk(" ver:%d", ip->version);
-+ printk(" tos:%d", ip->tos);
-+ printk(" tlen:%d", ntohs(ip->tot_len));
-+ printk(" id:%d", ntohs(ip->id));
-+ printk(" %s%s%sfrag_off:%d",
-+ ip->frag_off & __constant_htons(IP_CE) ? "CE " : "",
-+ ip->frag_off & __constant_htons(IP_DF) ? "DF " : "",
-+ ip->frag_off & __constant_htons(IP_MF) ? "MF " : "",
-+ (ntohs(ip->frag_off) & IP_OFFSET) << 3);
-+ printk(" ttl:%d", ip->ttl);
-+ printk(" proto:%d", ip->protocol);
-+ if(ip->protocol == IPPROTO_UDP)
-+ printk(" (UDP)");
-+ if(ip->protocol == IPPROTO_TCP)
-+ printk(" (TCP)");
-+ if(ip->protocol == IPPROTO_ICMP)
-+ printk(" (ICMP)");
-+ if(ip->protocol == IPPROTO_ESP)
-+ printk(" (ESP)");
-+ if(ip->protocol == IPPROTO_AH)
-+ printk(" (AH)");
-+ if(ip->protocol == IPPROTO_COMP)
-+ printk(" (COMP)");
-+ printk(" chk:%d", ntohs(ip->check));
-+ addrtoa(*((struct in_addr*)(&ip->saddr)), 0, buf, sizeof(buf));
-+ printk(" saddr:%s", buf);
-+ if(ip->protocol == IPPROTO_UDP)
-+ printk(":%d",
-+ ntohs(((struct udphdr*)((caddr_t)ip + (ip->ihl << 2)))->source));
-+ if(ip->protocol == IPPROTO_TCP)
-+ printk(":%d",
-+ ntohs(((struct tcphdr*)((caddr_t)ip + (ip->ihl << 2)))->source));
-+ addrtoa(*((struct in_addr*)(&ip->daddr)), 0, buf, sizeof(buf));
-+ printk(" daddr:%s", buf);
-+ if(ip->protocol == IPPROTO_UDP)
-+ printk(":%d",
-+ ntohs(((struct udphdr*)((caddr_t)ip + (ip->ihl << 2)))->dest));
-+ if(ip->protocol == IPPROTO_TCP)
-+ printk(":%d",
-+ ntohs(((struct tcphdr*)((caddr_t)ip + (ip->ihl << 2)))->dest));
-+ if(ip->protocol == IPPROTO_ICMP)
-+ printk(" type:code=%d:%d",
-+ ((struct icmphdr*)((caddr_t)ip + (ip->ihl << 2)))->type,
-+ ((struct icmphdr*)((caddr_t)ip + (ip->ihl << 2)))->code);
-+ printk("\n");
-+
-+ if(sysctl_ipsec_debug_verbose) {
-+ __u8 *c;
-+ int len = ntohs(ip->tot_len) - ip->ihl*4;
-+
-+ c = ((__u8*)ip) + ip->ihl*4;
-+ ipsec_dmp_block("ip_print", c, len);
-+ }
-+}
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+#ifdef MSS_HACK
-+/*
-+ * Issues:
-+ * 1) Fragments arriving in the tunnel should probably be rejected.
-+ * 2) How does this affect syncookies, mss_cache, dst cache ?
-+ * 3) Path MTU discovery handling needs to be reviewed. For example,
-+ * if we receive an ICMP 'packet too big' message from an intermediate
-+ * router specifying it's next hop MTU, our stack may process this and
-+ * adjust the MSS without taking our AH/ESP overheads into account.
-+ */
-+
-+
-+/*
-+ * Recaclulate checksum using differences between changed datum,
-+ * borrowed from netfilter.
-+ */
-+DEBUG_NO_STATIC u_int16_t
-+ipsec_fast_csum(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-+{
-+ u_int32_t diffs[] = { oldvalinv, newval };
-+ return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-+ oldcheck^0xFFFF));
-+}
-+
-+/*
-+ * Determine effective MSS.
-+ *
-+ * Note that we assume that there is always an MSS option for our own
-+ * SYN segments, which is mentioned in tcp_syn_build_options(), kernel 2.2.x.
-+ * This could change, and we should probably parse TCP options instead.
-+ *
-+ */
-+DEBUG_NO_STATIC u_int8_t
-+ipsec_adjust_mss(struct sk_buff *skb, struct tcphdr *tcph, u_int16_t mtu)
-+{
-+ u_int16_t oldmss, newmss;
-+ u_int32_t *mssp;
-+ struct sock *sk = skb->sk;
-+
-+ newmss = tcp_sync_mss(sk, mtu);
-+ printk(KERN_INFO "klips: setting mss to %u\n", newmss);
-+ mssp = (u_int32_t *)tcph + sizeof(struct tcphdr) / sizeof(u_int32_t);
-+ oldmss = ntohl(*mssp) & 0x0000FFFF;
-+ *mssp = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | newmss);
-+ tcph->check = ipsec_fast_csum(htons(~oldmss),
-+ htons(newmss), tcph->check);
-+ return 1;
-+}
-+#endif /* MSS_HACK */
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+DEBUG_NO_STATIC char *
-+ipsec_xmit_err(int err)
-+{
-+ static char tmp[32];
-+ switch ((int) err) {
-+ case IPSEC_XMIT_STOLEN: return("IPSEC_XMIT_STOLEN");
-+ case IPSEC_XMIT_PASS: return("IPSEC_XMIT_PASS");
-+ case IPSEC_XMIT_OK: return("IPSEC_XMIT_OK");
-+ case IPSEC_XMIT_ERRMEMALLOC: return("IPSEC_XMIT_ERRMEMALLOC");
-+ case IPSEC_XMIT_ESP_BADALG: return("IPSEC_XMIT_ESP_BADALG");
-+ case IPSEC_XMIT_BADPROTO: return("IPSEC_XMIT_BADPROTO");
-+ case IPSEC_XMIT_ESP_PUSHPULLERR:return("IPSEC_XMIT_ESP_PUSHPULLERR");
-+ case IPSEC_XMIT_BADLEN: return("IPSEC_XMIT_BADLEN");
-+ case IPSEC_XMIT_AH_BADALG: return("IPSEC_XMIT_AH_BADALG");
-+ case IPSEC_XMIT_SAIDNOTFOUND: return("IPSEC_XMIT_SAIDNOTFOUND");
-+ case IPSEC_XMIT_SAIDNOTLIVE: return("IPSEC_XMIT_SAIDNOTLIVE");
-+ case IPSEC_XMIT_REPLAYROLLED: return("IPSEC_XMIT_REPLAYROLLED");
-+ case IPSEC_XMIT_LIFETIMEFAILED: return("IPSEC_XMIT_LIFETIMEFAILED");
-+ case IPSEC_XMIT_CANNOTFRAG: return("IPSEC_XMIT_CANNOTFRAG");
-+ case IPSEC_XMIT_MSSERR: return("IPSEC_XMIT_MSSERR");
-+ case IPSEC_XMIT_ERRSKBALLOC: return("IPSEC_XMIT_ERRSKBALLOC");
-+ case IPSEC_XMIT_ENCAPFAIL: return("IPSEC_XMIT_ENCAPFAIL");
-+ case IPSEC_XMIT_NODEV: return("IPSEC_XMIT_NODEV");
-+ case IPSEC_XMIT_NOPRIVDEV: return("IPSEC_XMIT_NOPRIVDEV");
-+ case IPSEC_XMIT_NOPHYSDEV: return("IPSEC_XMIT_NOPHYSDEV");
-+ case IPSEC_XMIT_NOSKB: return("IPSEC_XMIT_NOSKB");
-+ case IPSEC_XMIT_NOIPV6: return("IPSEC_XMIT_NOIPV6");
-+ case IPSEC_XMIT_NOIPOPTIONS: return("IPSEC_XMIT_NOIPOPTIONS");
-+ case IPSEC_XMIT_TTLEXPIRED: return("IPSEC_XMIT_TTLEXPIRED");
-+ case IPSEC_XMIT_BADHHLEN: return("IPSEC_XMIT_BADHHLEN");
-+ case IPSEC_XMIT_PUSHPULLERR: return("IPSEC_XMIT_PUSHPULLERR");
-+ case IPSEC_XMIT_ROUTEERR: return("IPSEC_XMIT_ROUTEERR");
-+ case IPSEC_XMIT_RECURSDETECT: return("IPSEC_XMIT_RECURSDETECT");
-+ case IPSEC_XMIT_IPSENDFAILURE: return("IPSEC_XMIT_IPSENDFAILURE");
-+ case IPSEC_XMIT_ESPUDP: return("IPSEC_XMIT_ESPUDP");
-+ case IPSEC_XMIT_ESPUDP_BADTYPE: return("IPSEC_XMIT_ESPUDP_BADTYPE");
-+ case IPSEC_XMIT_PENDING: return("IPSEC_XMIT_PENDING");
-+ }
-+ snprintf(tmp, sizeof(tmp), "%d", err);
-+ return tmp;
-+}
-+#endif
-+
-+/*
-+ * Sanity checks
-+ */
-+enum ipsec_xmit_value
-+ipsec_xmit_sanity_check_dev(struct ipsec_xmit_state *ixs)
-+{
-+
-+ if (ixs->dev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_error:ipsec_xmit_sanity_check_dev: "
-+ "No device associated with skb!\n" );
-+ return IPSEC_XMIT_NODEV;
-+ }
-+
-+ ixs->prv = ixs->dev->priv;
-+ if (ixs->prv == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_error:ipsec_xmit_sanity_check_dev: "
-+ "Device has no private structure!\n" );
-+ return IPSEC_XMIT_NOPRIVDEV;
-+ }
-+
-+ ixs->physdev = ixs->prv->dev;
-+ if (ixs->physdev == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_error:ipsec_xmit_sanity_check_dev: "
-+ "Device is not attached to physical device!\n" );
-+ return IPSEC_XMIT_NOPHYSDEV;
-+ }
-+
-+ ixs->physmtu = ixs->physdev->mtu;
-+ ixs->cur_mtu = ixs->physdev->mtu;
-+ ixs->stats = (struct net_device_stats *) &(ixs->prv->mystats);
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_sanity_check_skb(struct ipsec_xmit_state *ixs)
-+{
-+ /*
-+ * Return if there is nothing to do. (Does this ever happen?) XXX
-+ */
-+ if (ixs->skb == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_error:ipsec_xmit_sanity_check_skb: "
-+ "Nothing to do!\n" );
-+ return IPSEC_XMIT_NOSKB;
-+ }
-+
-+ /* if skb was cloned (most likely due to a packet sniffer such as
-+ tcpdump being momentarily attached to the interface), make
-+ a copy of our own to modify */
-+ if(skb_cloned(ixs->skb)) {
-+ if
-+#ifdef SKB_COW_NEW
-+ (skb_cow(ixs->skb, skb_headroom(ixs->skb)) != 0)
-+#else /* SKB_COW_NEW */
-+ ((ixs->skb = skb_cow(ixs->skb, skb_headroom(ixs->skb))) == NULL)
-+#endif /* SKB_COW_NEW */
-+ {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_error:ipsec_xmit_sanity_check_skb: "
-+ "skb_cow failed to allocate buffer, dropping.\n" );
-+ ixs->stats->tx_dropped++;
-+ return IPSEC_XMIT_ERRSKBALLOC;
-+ }
-+ }
-+
-+ ixs->iph = ip_hdr(ixs->skb);
-+
-+ /* sanity check for IP version as we can't handle IPv6 right now */
-+ if (ixs->iph->version != 4) {
-+ KLIPS_PRINT(debug_tunnel,
-+ "klips_debug:ipsec_xmit_sanity_check_skb: "
-+ "found IP Version %d but cannot process other IP versions than v4.\n",
-+ ixs->iph->version); /* XXX */
-+ ixs->stats->tx_dropped++;
-+ return IPSEC_XMIT_NOIPV6;
-+ }
-+
-+#if IPSEC_DISALLOW_IPOPTIONS
-+ if ((ixs->iph->ihl << 2) != sizeof (struct iphdr)) {
-+ KLIPS_PRINT(debug_tunnel,
-+ "klips_debug:ipsec_xmit_sanity_check_skb: "
-+ "cannot process IP header options yet. May be mal-formed packet.\n"); /* XXX */
-+ ixs->stats->tx_dropped++;
-+ return IPSEC_XMIT_NOIPOPTIONS;
-+ }
-+#endif /* IPSEC_DISALLOW_IPOPTIONS */
-+
-+#ifndef NET_21
-+ if (ixs->iph->ttl <= 0) {
-+ /* Tell the sender its packet died... */
-+ ICMP_SEND(ixs->skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, ixs->physdev);
-+
-+ KLIPS_PRINT(debug_tunnel, "klips_debug:ipsec_xmit_sanity_check_skb: "
-+ "TTL=0, too many hops!\n");
-+ ixs->stats->tx_dropped++;
-+ return IPSEC_XMIT_TTLEXPIRED;
-+ }
-+#endif /* !NET_21 */
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_encap_init(struct ipsec_xmit_state *ixs)
-+{
-+ ixs->blocksize = 8;
-+ ixs->headroom = 0;
-+ ixs->tailroom = 0;
-+ ixs->authlen = 0;
-+
-+#ifdef CONFIG_KLIPS_ALG
-+ ixs->ixt_e = NULL;
-+ ixs->ixt_a = NULL;
-+#endif /* CONFIG_KLIPS_ALG */
-+
-+ ixs->iphlen = ixs->iph->ihl << 2;
-+ ixs->pyldsz = ntohs(ixs->iph->tot_len) - ixs->iphlen;
-+ ixs->sa_len = KLIPS_SATOT(debug_tunnel, &ixs->ipsp->ips_said, 0, ixs->sa_txt, SATOT_BUF);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_OXFS,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "calling output for <%s%s%s>, SA:%s\n",
-+ IPS_XFORM_NAME(ixs->ipsp),
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ switch(ixs->ipsp->ips_said.proto) {
-+#ifdef CONFIG_KLIPS_AH
-+ case IPPROTO_AH:
-+ ixs->headroom += sizeof(struct ahhdr);
-+ break;
-+#endif /* CONFIG_KLIPS_AH */
-+#ifdef CONFIG_KLIPS_ESP
-+ case IPPROTO_ESP:
-+#ifdef CONFIG_KLIPS_OCF
-+ /*
-+ * this needs cleaning up for sure - DM
-+ */
-+ if (ixs->ipsp->ocf_in_use) {
-+ switch (ixs->ipsp->ips_encalg) {
-+ case ESP_DES:
-+ case ESP_3DES:
-+ ixs->blocksize = 8;
-+ ixs->headroom += ESP_HEADER_LEN + 8 /* ivsize */;
-+ break;
-+ case ESP_AES:
-+ ixs->blocksize = 16;
-+ ixs->headroom += ESP_HEADER_LEN + 16 /* ivsize */;
-+ break;
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_BADALG;
-+ }
-+ } else
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ if ((ixs->ixt_e=ixs->ipsp->ips_alg_enc)) {
-+ ixs->blocksize = ixs->ixt_e->ixt_common.ixt_blocksize;
-+ ixs->headroom += ESP_HEADER_LEN + ixs->ixt_e->ixt_common.ixt_support.ias_ivlen/8;
-+ } else
-+#endif /* CONFIG_KLIPS_ALG */
-+ {
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_BADALG;
-+ }
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ixs->ipsp->ocf_in_use) {
-+ switch (ixs->ipsp->ips_authalg) {
-+ case AH_MD5:
-+ case AH_SHA:
-+ ixs->authlen = AHHMAC_HASHLEN;
-+ break;
-+ case AH_NONE:
-+ break;
-+ }
-+ } else
-+#endif /* CONFIG_KLIPS_OCF */
-+#ifdef CONFIG_KLIPS_ALG
-+
-+ ixs->ixt_a=ixs->ipsp->ips_alg_auth;
-+ if (ixs->ixt_a) {
-+ ixs->tailroom += AHHMAC_HASHLEN;
-+ ixs->authlen = AHHMAC_HASHLEN;
-+ } else
-+#endif /* CONFIG_KLIPS_ALG */
-+ switch(ixs->ipsp->ips_authalg) {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5:
-+ ixs->authlen = AHHMAC_HASHLEN;
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA:
-+ ixs->authlen = AHHMAC_HASHLEN;
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ case AH_NONE:
-+ break;
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_BADALG;
-+ }
-+ ixs->tailroom += ixs->blocksize != 1 ?
-+ ((ixs->blocksize - ((ixs->pyldsz + 2) % ixs->blocksize)) % ixs->blocksize) + 2 :
-+ ((4 - ((ixs->pyldsz + 2) % 4)) % 4) + 2;
-+ ixs->tailroom += ixs->authlen;
-+ break;
-+#endif /* !CONFIG_KLIPS_ESP */
-+#ifdef CONFIG_KLIPS_IPIP
-+ case IPPROTO_IPIP:
-+ ixs->headroom += sizeof(struct iphdr);
-+ ixs->iphlen = sizeof(struct iphdr);
-+ break;
-+#endif /* !CONFIG_KLIPS_IPIP */
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ case IPPROTO_COMP:
-+ break;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_BADPROTO;
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "pushing %d bytes, putting %d, proto %d.\n",
-+ ixs->headroom, ixs->tailroom, ixs->ipsp->ips_said.proto);
-+ if(skb_headroom(ixs->skb) < ixs->headroom) {
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_xmit_encap_once: "
-+ "tried to skb_push headroom=%d, %d available. This should never happen, please report.\n",
-+ ixs->headroom, skb_headroom(ixs->skb));
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_PUSHPULLERR;
-+ }
-+
-+ ixs->dat = skb_push(ixs->skb, ixs->headroom);
-+ ixs->ilen = ixs->skb->len - ixs->tailroom;
-+ if(skb_tailroom(ixs->skb) < ixs->tailroom) {
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_xmit_encap_once: "
-+ "tried to skb_put %d, %d available. This should never happen, please report.\n",
-+ ixs->tailroom, skb_tailroom(ixs->skb));
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_PUSHPULLERR;
-+ }
-+ skb_put(ixs->skb, ixs->tailroom);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "head,tailroom: %d,%d before xform.\n",
-+ skb_headroom(ixs->skb), skb_tailroom(ixs->skb));
-+ ixs->len = ixs->skb->len;
-+ if(ixs->len > 0xfff0) {
-+ printk(KERN_WARNING "klips_error:ipsec_xmit_encap_once: "
-+ "tot_len (%d) > 65520. This should never happen, please report.\n",
-+ ixs->len);
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_BADLEN;
-+ }
-+ memmove((void *)ixs->dat, (void *)(ixs->dat + ixs->headroom), ixs->iphlen);
-+ ixs->iph = (struct iphdr *)ixs->dat;
-+ ixs->iph->tot_len = htons(ixs->skb->len);
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+
-+/*
-+ * work out which state to proceed to next
-+ */
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_encap_select(struct ipsec_xmit_state *ixs)
-+{
-+ switch (ixs->ipsp->ips_said.proto) {
-+#ifdef CONFIG_KLIPS_ESP
-+ case IPPROTO_ESP:
-+ ixs->next_state = IPSEC_XSM_ESP;
-+ break;
-+#endif
-+#ifdef CONFIG_KLIPS_AH
-+ case IPPROTO_AH:
-+ ixs->next_state = IPSEC_XSM_AH;
-+ break;
-+#endif
-+#ifdef CONFIG_KLIPS_IPIP
-+ case IPPROTO_IPIP:
-+ ixs->next_state = IPSEC_XSM_IPIP;
-+ break;
-+#endif
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ case IPPROTO_COMP:
-+ ixs->next_state = IPSEC_XSM_IPCOMP;
-+ break;
-+#endif
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_BADPROTO;
-+ }
-+ return IPSEC_XMIT_OK;
-+}
-+
-+
-+#ifdef CONFIG_KLIPS_ESP
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_esp(struct ipsec_xmit_state *ixs)
-+{
-+ int i;
-+ unsigned char *pad;
-+ int padlen = 0;
-+
-+ ixs->espp = (struct esphdr *)(ixs->dat + ixs->iphlen);
-+#ifdef NET_21
-+ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ixs->espp));
-+#endif /* NET_21 */
-+ ixs->espp->esp_spi = ixs->ipsp->ips_said.spi;
-+ ixs->espp->esp_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq));
-+
-+ ixs->idat = ixs->dat + ixs->iphlen + ixs->headroom;
-+ ixs->ilen = ixs->len - (ixs->iphlen + ixs->headroom + ixs->authlen);
-+
-+ /* Self-describing padding */
-+ pad = &ixs->dat[ixs->len - ixs->tailroom];
-+ padlen = ixs->tailroom - 2 - ixs->authlen;
-+ for (i = 0; i < padlen; i++) {
-+ pad[i] = i + 1;
-+ }
-+ ixs->dat[ixs->len - ixs->authlen - 2] = padlen;
-+
-+ ixs->dat[ixs->len - ixs->authlen - 1] = ixs->iph->protocol;
-+ ixs->iph->protocol = IPPROTO_ESP;
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ixs->ipsp->ocf_in_use)
-+ return(ipsec_ocf_xmit(ixs));
-+#endif
-+
-+#ifdef CONFIG_KLIPS_ALG
-+ if (!ixs->ixt_e) {
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESP_BADALG;
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_tunnel & DB_TN_ENCAP) {
-+ dmp("pre-encrypt", ixs->dat, ixs->len);
-+ }
-+#endif
-+
-+ /*
-+ * Do all operations here:
-+ * copy IV->ESP, encrypt, update ips IV
-+ *
-+ */
-+ {
-+ int ret;
-+ memcpy(ixs->espp->esp_iv,
-+ ixs->ipsp->ips_iv,
-+ ixs->ipsp->ips_iv_size);
-+ ret=ipsec_alg_esp_encrypt(ixs->ipsp,
-+ ixs->idat, ixs->ilen, ixs->espp->esp_iv,
-+ IPSEC_ALG_ENCRYPT);
-+
-+ prng_bytes(&ipsec_prng,
-+ (char *)ixs->ipsp->ips_iv,
-+ ixs->ipsp->ips_iv_size);
-+ }
-+ return IPSEC_XMIT_OK;
-+#else
-+ return IPSEC_XMIT_ESP_BADALG;
-+#endif /* CONFIG_KLIPS_ALG */
-+}
-+
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_esp_ah(struct ipsec_xmit_state *ixs)
-+{
-+#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ __u8 hash[AH_AMAX];
-+#endif
-+#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ union {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ MD5_CTX md5;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ SHA1_CTX sha1;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ } tctx;
-+#endif /* defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1) */
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ixs->ipsp->ocf_in_use) {
-+ /* we should never be here using OCF */
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_AH_BADALG;
-+ } else
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ if (ixs->ixt_a) {
-+ ipsec_alg_sa_esp_hash(ixs->ipsp,
-+ (caddr_t)ixs->espp, ixs->len - ixs->iphlen - ixs->authlen,
-+ &(ixs->dat[ixs->len - ixs->authlen]), ixs->authlen);
-+
-+ } else
-+#endif /* CONFIG_KLIPS_ALG */
-+ switch(ixs->ipsp->ips_authalg) {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5:
-+ dmp("espp", (char*)ixs->espp, ixs->len - ixs->iphlen - ixs->authlen);
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (caddr_t)ixs->espp, ixs->len - ixs->iphlen - ixs->authlen);
-+ dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ dmp("ictx hash", (char*)&hash, sizeof(hash));
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, hash, AHMD596_ALEN);
-+ dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ dmp("octx hash", (char*)&hash, sizeof(hash));
-+ memcpy(&(ixs->dat[ixs->len - ixs->authlen]), hash, ixs->authlen);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA:
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ SHA1Update(&tctx.sha1, (caddr_t)ixs->espp, ixs->len - ixs->iphlen - ixs->authlen);
-+ SHA1Final(hash, &tctx.sha1);
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN);
-+ SHA1Final(hash, &tctx.sha1);
-+ memcpy(&(ixs->dat[ixs->len - ixs->authlen]), hash, ixs->authlen);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ case AH_NONE:
-+ break;
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_AH_BADALG;
-+ }
-+ return IPSEC_XMIT_OK;
-+}
-+
-+#endif /* CONFIG_KLIPS_ESP */
-+
-+
-+
-+#ifdef CONFIG_KLIPS_AH
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_ah(struct ipsec_xmit_state *ixs)
-+{
-+ struct iphdr ipo;
-+ struct ahhdr *ahp;
-+#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ __u8 hash[AH_AMAX];
-+#endif
-+#if defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1)
-+ union {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ MD5_CTX md5;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ SHA1_CTX sha1;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ } tctx;
-+#endif /* defined(CONFIG_KLIPS_AUTH_HMAC_MD5) || defined(CONFIG_KLIPS_AUTH_HMAC_SHA1) */
-+
-+ ahp = (struct ahhdr *)(ixs->dat + ixs->iphlen);
-+#ifdef NET_21
-+ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ahp));
-+#endif /* NET_21 */
-+ ahp->ah_spi = ixs->ipsp->ips_said.spi;
-+ ahp->ah_rpl = htonl(++(ixs->ipsp->ips_replaywin_lastseq));
-+ ahp->ah_rv = 0;
-+ ahp->ah_nh = ixs->iph->protocol;
-+ ahp->ah_hl = (ixs->headroom >> 2) - sizeof(__u64)/sizeof(__u32);
-+ ixs->iph->protocol = IPPROTO_AH;
-+ dmp("ahp", (char*)ahp, sizeof(*ahp));
-+
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ixs->ipsp->ocf_in_use)
-+ return(ipsec_ocf_xmit(ixs));
-+#endif
-+
-+ ipo = *ixs->iph;
-+ ipo.tos = 0;
-+ ipo.frag_off = 0;
-+ ipo.ttl = 0;
-+ ipo.check = 0;
-+ dmp("ipo", (char*)&ipo, sizeof(ipo));
-+
-+ switch(ixs->ipsp->ips_authalg) {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5:
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ dmp("ictx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (unsigned char *)&ipo, sizeof (struct iphdr));
-+ dmp("ictx+ipo", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (unsigned char *)ahp, ixs->headroom - sizeof(ahp->ah_data));
-+ dmp("ictx+ahp", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, (unsigned char *)zeroes, AHHMAC_HASHLEN);
-+ dmp("ictx+zeroes", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, ixs->dat + ixs->iphlen + ixs->headroom, ixs->len - ixs->iphlen - ixs->headroom);
-+ dmp("ictx+dat", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ dmp("ictx hash", (char*)&hash, sizeof(hash));
-+ tctx.md5 = ((struct md5_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ dmp("octx", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Update(&tctx.md5, hash, AHMD596_ALEN);
-+ dmp("octx+hash", (char*)&tctx.md5, sizeof(tctx.md5));
-+ osMD5Final(hash, &tctx.md5);
-+ dmp("octx hash", (char*)&hash, sizeof(hash));
-+
-+ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.md5, 0, sizeof(tctx.md5));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA:
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->ictx;
-+ SHA1Update(&tctx.sha1, (unsigned char *)&ipo, sizeof (struct iphdr));
-+ SHA1Update(&tctx.sha1, (unsigned char *)ahp, ixs->headroom - sizeof(ahp->ah_data));
-+ SHA1Update(&tctx.sha1, (unsigned char *)zeroes, AHHMAC_HASHLEN);
-+ SHA1Update(&tctx.sha1, ixs->dat + ixs->iphlen + ixs->headroom, ixs->len - ixs->iphlen - ixs->headroom);
-+ SHA1Final(hash, &tctx.sha1);
-+ tctx.sha1 = ((struct sha1_ctx*)(ixs->ipsp->ips_key_a))->octx;
-+ SHA1Update(&tctx.sha1, hash, AHSHA196_ALEN);
-+ SHA1Final(hash, &tctx.sha1);
-+
-+ memcpy(ahp->ah_data, hash, AHHMAC_HASHLEN);
-+
-+ /* paranoid */
-+ memset((caddr_t)&tctx.sha1, 0, sizeof(tctx.sha1));
-+ memset((caddr_t)hash, 0, sizeof(*hash));
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ default:
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_AH_BADALG;
-+ }
-+ return IPSEC_XMIT_OK;
-+}
-+
-+#endif /* CONFIG_KLIPS_AH */
-+
-+
-+#ifdef CONFIG_KLIPS_IPIP
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_ipip(struct ipsec_xmit_state *ixs)
-+{
-+ ixs->iph->version = 4;
-+ switch(sysctl_ipsec_tos) {
-+ case 0:
-+#ifdef NET_21
-+ ixs->iph->tos = ip_hdr(ixs->skb)->tos;
-+#else /* NET_21 */
-+ ixs->iph->tos = ixs->skb->ip_hdr->tos;
-+#endif /* NET_21 */
-+ break;
-+ case 1:
-+ ixs->iph->tos = 0;
-+ break;
-+ default:
-+ break;
-+ }
-+ ixs->iph->ttl = SYSCTL_IPSEC_DEFAULT_TTL;
-+ ixs->iph->frag_off = 0;
-+ ixs->iph->saddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_s))->sin_addr.s_addr;
-+ ixs->iph->daddr = ((struct sockaddr_in*)(ixs->ipsp->ips_addr_d))->sin_addr.s_addr;
-+ ixs->iph->protocol = IPPROTO_IPIP;
-+ ixs->iph->ihl = sizeof(struct iphdr) >> 2;
-+
-+ KLIPS_IP_SELECT_IDENT(ixs->iph, ixs->skb);
-+
-+ ixs->newdst = (__u32)ixs->iph->daddr;
-+ ixs->newsrc = (__u32)ixs->iph->saddr;
-+
-+#ifdef NET_21
-+ skb_set_transport_header(ixs->skb, ipsec_skb_offset(ixs->skb, ip_hdr(ixs->skb)));
-+#endif /* NET_21 */
-+ return IPSEC_XMIT_OK;
-+}
-+
-+#endif /* CONFIG_KLIPS_IPIP */
-+
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_ipcomp(struct ipsec_xmit_state *ixs)
-+{
-+#ifdef CONFIG_KLIPS_DEBUG
-+ unsigned int old_tot_len;
-+#endif
-+ int flags = 0;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ old_tot_len = ntohs(ixs->iph->tot_len);
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ ixs->ipsp->ips_comp_ratio_dbytes += ntohs(ixs->iph->tot_len);
-+ ixs->skb = skb_compress(ixs->skb, ixs->ipsp, &flags);
-+
-+#ifdef NET_21
-+ ixs->iph = ip_hdr(ixs->skb);
-+#else /* NET_21 */
-+ ixs->iph = ixs->skb->ip_hdr;
-+#endif /* NET_21 */
-+
-+ ixs->ipsp->ips_comp_ratio_cbytes += ntohs(ixs->iph->tot_len);
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_tunnel & DB_TN_CROUT)
-+ {
-+ if (old_tot_len > ntohs(ixs->iph->tot_len))
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "packet shrunk from %d to %d bytes after compression, cpi=%04x (should be from spi=%08x, spi&0xffff=%04x.\n",
-+ old_tot_len, ntohs(ixs->iph->tot_len),
-+ ntohs(((struct ipcomphdr*)(((char*)ixs->iph) + ((ixs->iph->ihl) << 2)))->ipcomp_cpi),
-+ ntohl(ixs->ipsp->ips_said.spi),
-+ (__u16)(ntohl(ixs->ipsp->ips_said.spi) & 0x0000ffff));
-+ else
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "packet did not compress (flags = %d).\n",
-+ flags);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ return IPSEC_XMIT_OK;
-+}
-+
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+
-+
-+/*
-+ * upon entry to this function, ixs->skb should be setup
-+ * as follows:
-+ *
-+ * data = beginning of IP packet <- differs from ipsec_rcv().
-+ * nh.raw = beginning of IP packet.
-+ * h.raw = data after the IP packet.
-+ *
-+ */
-+enum ipsec_xmit_value
-+ipsec_xmit_cont(struct ipsec_xmit_state *ixs)
-+{
-+#ifdef NET_21
-+ skb_set_network_header(ixs->skb, ipsec_skb_offset(ixs->skb, ixs->skb->data));
-+#else /* NET_21 */
-+ ixs->skb->ip_hdr = ixs->skb->h.iph = (struct iphdr *) ixs->skb->data;
-+#endif /* NET_21 */
-+ ixs->iph->check = 0;
-+ ixs->iph->check = ip_fast_csum((unsigned char *)ixs->iph, ixs->iph->ihl);
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_once: "
-+ "after <%s%s%s>, SA:%s:\n",
-+ IPS_XFORM_NAME(ixs->ipsp),
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ KLIPS_IP_PRINT(debug_tunnel & DB_TN_XMIT, ixs->iph);
-+
-+ ixs->ipsp->ips_life.ipl_bytes.ipl_count += ixs->len;
-+ ixs->ipsp->ips_life.ipl_bytes.ipl_last = ixs->len;
-+
-+ if(!ixs->ipsp->ips_life.ipl_usetime.ipl_count) {
-+ ixs->ipsp->ips_life.ipl_usetime.ipl_count = jiffies / HZ;
-+ }
-+ ixs->ipsp->ips_life.ipl_usetime.ipl_last = jiffies / HZ;
-+ ixs->ipsp->ips_life.ipl_packets.ipl_count++;
-+
-+ ixs->ipsp = ixs->ipsp->ips_next;
-+
-+ /*
-+ * start again if we have more work to do
-+ */
-+ if (ixs->ipsp)
-+ ixs->next_state = IPSEC_XSM_ENCAP_INIT;
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+
-+/*
-+ * If the IP packet (iph) is a carrying TCP/UDP, then set the encaps
-+ * source and destination ports to those from the TCP/UDP header.
-+ */
-+void ipsec_extract_ports(struct iphdr * iph, struct sockaddr_encap * er)
-+{
-+ struct udphdr *udp;
-+
-+ switch (iph->protocol) {
-+ case IPPROTO_UDP:
-+ case IPPROTO_TCP:
-+ /*
-+ * The ports are at the same offsets in a TCP and UDP
-+ * header so hack it ...
-+ */
-+ udp = (struct udphdr*)(((char*)iph)+(iph->ihl<<2));
-+ er->sen_sport = udp->source;
-+ er->sen_dport = udp->dest;
-+ break;
-+ default:
-+ er->sen_sport = 0;
-+ er->sen_dport = 0;
-+ break;
-+ }
-+}
-+
-+/*
-+ * A TRAP eroute is installed and we want to replace it with a HOLD
-+ * eroute.
-+ */
-+static int create_hold_eroute(struct eroute *origtrap,
-+ struct sk_buff * skb, struct iphdr * iph,
-+ uint32_t eroute_pid)
-+{
-+ struct eroute hold_eroute;
-+ ip_said hold_said;
-+ struct sk_buff *first, *last;
-+ int error;
-+
-+ first = last = NULL;
-+ memset((caddr_t)&hold_eroute, 0, sizeof(hold_eroute));
-+ memset((caddr_t)&hold_said, 0, sizeof(hold_said));
-+
-+ hold_said.proto = IPPROTO_INT;
-+ hold_said.spi = htonl(SPI_HOLD);
-+ hold_said.dst.u.v4.sin_addr.s_addr = INADDR_ANY;
-+
-+ hold_eroute.er_eaddr.sen_len = sizeof(struct sockaddr_encap);
-+ hold_eroute.er_emask.sen_len = sizeof(struct sockaddr_encap);
-+ hold_eroute.er_eaddr.sen_family = AF_ENCAP;
-+ hold_eroute.er_emask.sen_family = AF_ENCAP;
-+ hold_eroute.er_eaddr.sen_type = SENT_IP4;
-+ hold_eroute.er_emask.sen_type = 255;
-+
-+ hold_eroute.er_eaddr.sen_ip_src.s_addr = iph->saddr;
-+ hold_eroute.er_eaddr.sen_ip_dst.s_addr = iph->daddr;
-+ hold_eroute.er_emask.sen_ip_src.s_addr = INADDR_BROADCAST;
-+ hold_eroute.er_emask.sen_ip_dst.s_addr = INADDR_BROADCAST;
-+ hold_eroute.er_emask.sen_sport = 0;
-+ hold_eroute.er_emask.sen_dport = 0;
-+ hold_eroute.er_pid = eroute_pid;
-+ hold_eroute.er_count = 0;
-+ hold_eroute.er_lasttime = jiffies/HZ;
-+
-+ /*
-+ * if it wasn't captured by a wildcard, then don't record it as
-+ * a wildcard.
-+ */
-+ if(origtrap->er_eaddr.sen_proto != 0) {
-+ hold_eroute.er_eaddr.sen_proto = iph->protocol;
-+
-+ if((iph->protocol == IPPROTO_TCP ||
-+ iph->protocol == IPPROTO_UDP) &&
-+ (origtrap->er_eaddr.sen_sport != 0 ||
-+ origtrap->er_eaddr.sen_dport != 0)) {
-+
-+ if(origtrap->er_eaddr.sen_sport != 0)
-+ hold_eroute.er_emask.sen_sport = ~0;
-+
-+ if(origtrap->er_eaddr.sen_dport != 0)
-+ hold_eroute.er_emask.sen_dport = ~0;
-+
-+ ipsec_extract_ports(iph, &hold_eroute.er_eaddr);
-+ }
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_pfkey) {
-+ char buf1[64], buf2[64];
-+ subnettoa(hold_eroute.er_eaddr.sen_ip_src,
-+ hold_eroute.er_emask.sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(hold_eroute.er_eaddr.sen_ip_dst,
-+ hold_eroute.er_emask.sen_ip_dst, 0, buf2, sizeof(buf2));
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_tunnel_start_xmit: "
-+ "calling breakeroute and makeroute for %s:%d->%s:%d %d HOLD eroute.\n",
-+ buf1, ntohs(hold_eroute.er_eaddr.sen_sport),
-+ buf2, ntohs(hold_eroute.er_eaddr.sen_dport),
-+ hold_eroute.er_eaddr.sen_proto);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ if (ipsec_breakroute(&(hold_eroute.er_eaddr), &(hold_eroute.er_emask),
-+ &first, &last)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_tunnel_start_xmit: "
-+ "HOLD breakeroute found nothing.\n");
-+ } else {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_tunnel_start_xmit: "
-+ "HOLD breakroute deleted %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u %u\n",
-+ NIPQUAD(hold_eroute.er_eaddr.sen_ip_src),
-+ ntohs(hold_eroute.er_eaddr.sen_sport),
-+ NIPQUAD(hold_eroute.er_eaddr.sen_ip_dst),
-+ ntohs(hold_eroute.er_eaddr.sen_dport),
-+ hold_eroute.er_eaddr.sen_proto);
-+ }
-+ if (first != NULL)
-+ kfree_skb(first);
-+ if (last != NULL)
-+ kfree_skb(last);
-+
-+ error = ipsec_makeroute(&(hold_eroute.er_eaddr),
-+ &(hold_eroute.er_emask),
-+ hold_said, eroute_pid, skb, NULL, NULL);
-+ if (error) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_tunnel_start_xmit: "
-+ "HOLD makeroute returned %d, failed.\n", error);
-+ } else {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:ipsec_tunnel_start_xmit: "
-+ "HOLD makeroute call successful.\n");
-+ }
-+ return (error == 0);
-+}
-+
-+/*
-+ * upon entry to this function, ixs->skb should be setup
-+ * as follows:
-+ *
-+ * data = beginning of IP packet <- differs from ipsec_rcv().
-+ * nh.raw = beginning of IP packet.
-+ * h.raw = data after the IP packet.
-+ *
-+ */
-+enum ipsec_xmit_value
-+ipsec_xmit_init1(struct ipsec_xmit_state *ixs)
-+{
-+ ixs->newdst = ixs->orgdst = ixs->iph->daddr;
-+ ixs->newsrc = ixs->orgsrc = ixs->iph->saddr;
-+ ixs->orgedst = ixs->outgoing_said.dst.u.v4.sin_addr.s_addr;
-+ ixs->iphlen = ixs->iph->ihl << 2;
-+ ixs->pyldsz = ntohs(ixs->iph->tot_len) - ixs->iphlen;
-+ ixs->max_headroom = ixs->max_tailroom = 0;
-+
-+ if (ixs->outgoing_said.proto == IPPROTO_INT) {
-+ switch (ntohl(ixs->outgoing_said.spi)) {
-+ case SPI_DROP:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle: "
-+ "shunt SA of DROP or no eroute: dropping.\n");
-+ ixs->stats->tx_dropped++;
-+ break;
-+
-+ case SPI_REJECT:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle: "
-+ "shunt SA of REJECT: notifying and dropping.\n");
-+ ICMP_SEND(ixs->skb,
-+ ICMP_DEST_UNREACH,
-+ ICMP_PKT_FILTERED,
-+ 0,
-+ ixs->physdev);
-+ ixs->stats->tx_dropped++;
-+ break;
-+
-+ case SPI_PASS:
-+#ifdef NET_21
-+ ixs->pass = 1;
-+#endif /* NET_21 */
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle: "
-+ "PASS: calling dev_queue_xmit\n");
-+ return IPSEC_XMIT_PASS;
-+
-+ case SPI_HOLD:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle: "
-+ "shunt SA of HOLD: this does not make sense here, dropping.\n");
-+ ixs->stats->tx_dropped++;
-+ break;
-+
-+ case SPI_TRAP:
-+ case SPI_TRAPSUBNET:
-+ {
-+ struct sockaddr_in src, dst;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ char bufsrc[ADDRTOA_BUF], bufdst[ADDRTOA_BUF];
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ /* Signal all listening KMds with a PF_KEY ACQUIRE */
-+
-+ memset(&src, 0, sizeof(src));
-+ memset(&dst, 0, sizeof(dst));
-+ src.sin_family = AF_INET;
-+ dst.sin_family = AF_INET;
-+ src.sin_addr.s_addr = ixs->iph->saddr;
-+ dst.sin_addr.s_addr = ixs->iph->daddr;
-+
-+ ixs->ips.ips_transport_protocol = 0;
-+ src.sin_port = 0;
-+ dst.sin_port = 0;
-+
-+ if(ixs->eroute->er_eaddr.sen_proto != 0) {
-+ ixs->ips.ips_transport_protocol = ixs->iph->protocol;
-+
-+ if(ixs->eroute->er_eaddr.sen_sport != 0) {
-+ src.sin_port =
-+ (ixs->iph->protocol == IPPROTO_UDP
-+ ? ((struct udphdr*) (((caddr_t)ixs->iph) + (ixs->iph->ihl << 2)))->source
-+ : (ixs->iph->protocol == IPPROTO_TCP
-+ ? ((struct tcphdr*)((caddr_t)ixs->iph + (ixs->iph->ihl << 2)))->source
-+ : 0));
-+ }
-+ if(ixs->eroute->er_eaddr.sen_dport != 0) {
-+ dst.sin_port =
-+ (ixs->iph->protocol == IPPROTO_UDP
-+ ? ((struct udphdr*) (((caddr_t)ixs->iph) + (ixs->iph->ihl << 2)))->dest
-+ : (ixs->iph->protocol == IPPROTO_TCP
-+ ? ((struct tcphdr*)((caddr_t)ixs->iph + (ixs->iph->ihl << 2)))->dest
-+ : 0));
-+ }
-+ }
-+
-+ ixs->ips.ips_addr_s = (struct sockaddr*)(&src);
-+ ixs->ips.ips_addr_d = (struct sockaddr*)(&dst);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle: "
-+ "SADB_ACQUIRE sent with src=%s:%d, dst=%s:%d, proto=%d.\n",
-+ addrtoa(((struct sockaddr_in*)(ixs->ips.ips_addr_s))->sin_addr, 0, bufsrc, sizeof(bufsrc)) <= ADDRTOA_BUF ? bufsrc : "BAD_ADDR",
-+ ntohs(((struct sockaddr_in*)(ixs->ips.ips_addr_s))->sin_port),
-+ addrtoa(((struct sockaddr_in*)(ixs->ips.ips_addr_d))->sin_addr, 0, bufdst, sizeof(bufdst)) <= ADDRTOA_BUF ? bufdst : "BAD_ADDR",
-+ ntohs(((struct sockaddr_in*)(ixs->ips.ips_addr_d))->sin_port),
-+ ixs->ips.ips_said.proto);
-+
-+ /* increment count of total traps needed */
-+ ipsec_xmit_trap_count++;
-+
-+ if (pfkey_acquire(&ixs->ips) == 0) {
-+
-+ /* note that we succeeded */
-+ ipsec_xmit_trap_sendcount++;
-+
-+ if (ixs->outgoing_said.spi==htonl(SPI_TRAPSUBNET)) {
-+ /*
-+ * The spinlock is to prevent any other
-+ * process from accessing or deleting
-+ * the eroute while we are using and
-+ * updating it.
-+ */
-+ spin_lock_bh(&eroute_lock);
-+ ixs->eroute = ipsec_findroute(&ixs->matcher);
-+ if(ixs->eroute) {
-+ ixs->eroute->er_said.spi = htonl(SPI_HOLD);
-+ ixs->eroute->er_first = ixs->skb;
-+ ixs->skb = NULL;
-+ }
-+ spin_unlock_bh(&eroute_lock);
-+ } else if (create_hold_eroute(ixs->eroute,
-+ ixs->skb,
-+ ixs->iph,
-+ ixs->eroute_pid)) {
-+ ixs->skb = NULL;
-+ }
-+ /* whether or not the above succeeded, we continue */
-+
-+ }
-+ ixs->stats->tx_dropped++;
-+ }
-+ default:
-+ /* XXX what do we do with an unknown shunt spi? */
-+ break;
-+ } /* switch (ntohl(ixs->outgoing_said.spi)) */
-+ return IPSEC_XMIT_STOLEN;
-+ } /* if (ixs->outgoing_said.proto == IPPROTO_INT) */
-+
-+ ixs->ipsp = ipsec_sa_getbyid(&ixs->outgoing_said);
-+ ixs->sa_len = KLIPS_SATOT(debug_tunnel, &ixs->outgoing_said, 0, ixs->sa_txt, sizeof(ixs->sa_txt));
-+
-+ if (ixs->ipsp == NULL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle: "
-+ "no ipsec_sa for SA%s: outgoing packet with no SA, dropped.\n",
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ if(ixs->stats) {
-+ ixs->stats->tx_dropped++;
-+ }
-+ return IPSEC_XMIT_SAIDNOTFOUND;
-+ }
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_init2(struct ipsec_xmit_state *ixs)
-+{
-+ enum ipsec_xmit_value bundle_stat = IPSEC_XMIT_OK;
-+ struct ipsec_sa *saved_ipsp;
-+#ifdef CONFIG_KLIPS_ALG
-+ ixs->blocksize = 8;
-+ ixs->ixt_e = NULL;
-+ ixs->ixt_a = NULL;
-+#endif /* CONFIG_KLIPS_ALG */
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "found ipsec_sa -- SA:<%s%s%s> %s\n",
-+ IPS_XFORM_NAME(ixs->ipsp),
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+
-+ /*
-+ * How much headroom do we need to be able to apply
-+ * all the grouped transforms?
-+ */
-+ saved_ipsp = ixs->ipsp; /* save the head of the ipsec_sa chain */
-+ while (ixs->ipsp) {
-+ if (debug_tunnel & DB_TN_XMIT) {
-+ ixs->sa_len = KLIPS_SATOT(debug_tunnel, &ixs->ipsp->ips_said, 0, ixs->sa_txt, sizeof(ixs->sa_txt));
-+ if(ixs->sa_len == 0) {
-+ strcpy(ixs->sa_txt, "(error)");
-+ }
-+ } else {
-+ *ixs->sa_txt = 0;
-+ ixs->sa_len = 0;
-+ }
-+
-+ /* If it is in larval state, drop the packet, we cannot process yet. */
-+ if(ixs->ipsp->ips_state == K_SADB_SASTATE_LARVAL) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "ipsec_sa in larval state for SA:<%s%s%s> %s, cannot be used yet, dropping packet.\n",
-+ IPS_XFORM_NAME(ixs->ipsp),
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ if(ixs->stats) {
-+ ixs->stats->tx_errors++;
-+ }
-+ bundle_stat = IPSEC_XMIT_SAIDNOTLIVE;
-+ goto cleanup;
-+ }
-+
-+ if(ixs->ipsp->ips_state == K_SADB_SASTATE_DEAD) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "ipsec_sa in dead state for SA:<%s%s%s> %s, can no longer be used, dropping packet.\n",
-+ IPS_XFORM_NAME(ixs->ipsp),
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_SAIDNOTLIVE;
-+ goto cleanup;
-+ }
-+
-+ /* If the replay window counter == -1, expire SA, it will roll */
-+ if(ixs->ipsp->ips_replaywin && ixs->ipsp->ips_replaywin_lastseq == -1) {
-+ pfkey_expire(ixs->ipsp, 1);
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "replay window counter rolled for SA:<%s%s%s> %s, packet dropped, expiring SA.\n",
-+ IPS_XFORM_NAME(ixs->ipsp),
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ ipsec_sa_rm(ixs->ipsp);
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_REPLAYROLLED;
-+ goto cleanup;
-+ }
-+
-+ /*
-+ * if this is the first time we are using this SA, mark start time,
-+ * and offset hard/soft counters by "now" for later checking.
-+ */
-+#if 0
-+ if(ixs->ipsp->ips_life.ipl_usetime.count == 0) {
-+ ixs->ipsp->ips_life.ipl_usetime.count = jiffies;
-+ ixs->ipsp->ips_life.ipl_usetime.hard += jiffies;
-+ ixs->ipsp->ips_life.ipl_usetime.soft += jiffies;
-+ }
-+#endif
-+
-+
-+ if(ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_bytes, "bytes", ixs->sa_txt,
-+ ipsec_life_countbased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied ||
-+ ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_addtime, "addtime",ixs->sa_txt,
-+ ipsec_life_timebased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied ||
-+ ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_usetime, "usetime",ixs->sa_txt,
-+ ipsec_life_timebased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied ||
-+ ipsec_lifetime_check(&ixs->ipsp->ips_life.ipl_packets, "packets",ixs->sa_txt,
-+ ipsec_life_countbased, ipsec_outgoing, ixs->ipsp) == ipsec_life_harddied) {
-+
-+ ipsec_sa_rm(ixs->ipsp);
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_LIFETIMEFAILED;
-+ goto cleanup;
-+ }
-+
-+
-+ ixs->headroom = ixs->tailroom = 0;
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "calling room for <%s%s%s>, SA:%s\n",
-+ IPS_XFORM_NAME(ixs->ipsp),
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ switch(ixs->ipsp->ips_said.proto) {
-+#ifdef CONFIG_KLIPS_AH
-+ case IPPROTO_AH:
-+ ixs->headroom += sizeof(struct ahhdr);
-+ break;
-+#endif /* CONFIG_KLIPS_AH */
-+
-+#ifdef CONFIG_KLIPS_ESP
-+ case IPPROTO_ESP:
-+#ifdef CONFIG_KLIPS_OCF
-+ /*
-+ * this needs cleaning up for sure - DM
-+ */
-+ if (ixs->ipsp->ocf_in_use) {
-+ switch (ixs->ipsp->ips_encalg) {
-+ case ESP_DES:
-+ case ESP_3DES:
-+ ixs->blocksize = 8;
-+ ixs->headroom += ESP_HEADER_LEN + 8 /* ivsize */;
-+ break;
-+ case ESP_AES:
-+ ixs->blocksize = 16;
-+ ixs->headroom += ESP_HEADER_LEN + 16 /* ivsize */;
-+ break;
-+ default:
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_ESP_BADALG;
-+ goto cleanup;
-+ }
-+ } else
-+#endif /* CONFIG_KLIPS_OCF */
-+#ifdef CONFIG_KLIPS_ALG
-+ ixs->ixt_e=ixs->ipsp->ips_alg_enc;
-+ if (ixs->ixt_e) {
-+ ixs->blocksize = ixs->ixt_e->ixt_common.ixt_blocksize;
-+ ixs->headroom += ESP_HEADER_LEN + ixs->ixt_e->ixt_common.ixt_support.ias_ivlen/8;
-+ } else
-+#endif /* CONFIG_KLIPS_ALG */
-+ {
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_ESP_BADALG;
-+ goto cleanup;
-+ }
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ixs->ipsp->ocf_in_use) {
-+ switch (ixs->ipsp->ips_authalg) {
-+ case AH_MD5:
-+ case AH_SHA:
-+ ixs->tailroom += AHHMAC_HASHLEN;
-+ break;
-+ case AH_NONE:
-+ break;
-+ }
-+ } else
-+#endif /* CONFIG_KLIPS_OCF */
-+#ifdef CONFIG_KLIPS_ALG
-+ if ((ixs->ixt_a=ixs->ipsp->ips_alg_auth)) {
-+ ixs->tailroom += AHHMAC_HASHLEN;
-+ } else
-+#endif /* CONFIG_KLIPS_ALG */
-+ switch(ixs->ipsp->ips_authalg) {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ case AH_MD5:
-+ ixs->tailroom += AHHMAC_HASHLEN;
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ case AH_SHA:
-+ ixs->tailroom += AHHMAC_HASHLEN;
-+ break;
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ case AH_NONE:
-+ break;
-+ default:
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_AH_BADALG;
-+ goto cleanup;
-+ }
-+ ixs->tailroom += ixs->blocksize != 1 ?
-+ ((ixs->blocksize - ((ixs->pyldsz + 2) % ixs->blocksize)) % ixs->blocksize) + 2 :
-+ ((4 - ((ixs->pyldsz + 2) % 4)) % 4) + 2;
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ if ((ixs->ipsp->ips_natt_type) && (!ixs->natt_type)) {
-+ ixs->natt_type = ixs->ipsp->ips_natt_type;
-+ ixs->natt_sport = ixs->ipsp->ips_natt_sport;
-+ ixs->natt_dport = ixs->ipsp->ips_natt_dport;
-+ switch (ixs->natt_type) {
-+ case ESPINUDP_WITH_NON_IKE:
-+ ixs->natt_head = sizeof(struct udphdr)+(2*sizeof(__u32));
-+ break;
-+
-+ case ESPINUDP_WITH_NON_ESP:
-+ ixs->natt_head = sizeof(struct udphdr);
-+ break;
-+
-+ default:
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT
-+ , "klips_xmit: invalid nat-t type %d"
-+ , ixs->natt_type);
-+ bundle_stat = IPSEC_XMIT_ESPUDP_BADTYPE;
-+ goto cleanup;
-+
-+ break;
-+ }
-+ ixs->tailroom += ixs->natt_head;
-+ }
-+#endif
-+ break;
-+#endif /* CONFIG_KLIPS_ESP */
-+#ifdef CONFIG_KLIPS_IPIP
-+ case IPPROTO_IPIP:
-+ ixs->headroom += sizeof(struct iphdr);
-+ break;
-+#endif /* !CONFIG_KLIPS_IPIP */
-+ case IPPROTO_COMP:
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ /*
-+ We can't predict how much the packet will
-+ shrink without doing the actual compression.
-+ We could do it here, if we were the first
-+ encapsulation in the chain. That might save
-+ us a skb_copy_expand, since we might fit
-+ into the existing skb then. However, this
-+ would be a bit unclean (and this hack has
-+ bit us once), so we better not do it. After
-+ all, the skb_copy_expand is cheap in
-+ comparison to the actual compression.
-+ At least we know the packet will not grow.
-+ */
-+ break;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ default:
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_BADPROTO;
-+ goto cleanup;
-+ }
-+ ixs->ipsp = ixs->ipsp->ips_next;
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "Required head,tailroom: %d,%d\n",
-+ ixs->headroom, ixs->tailroom);
-+ ixs->max_headroom += ixs->headroom;
-+ ixs->max_tailroom += ixs->tailroom;
-+ ixs->pyldsz += (ixs->headroom + ixs->tailroom);
-+ }
-+ ixs->ipsp = saved_ipsp; /* restore the head of the ipsec_sa chain */
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "existing head,tailroom: %d,%d before applying xforms with head,tailroom: %d,%d .\n",
-+ skb_headroom(ixs->skb), skb_tailroom(ixs->skb),
-+ ixs->max_headroom, ixs->max_tailroom);
-+
-+ ixs->tot_headroom += ixs->max_headroom;
-+ ixs->tot_tailroom += ixs->max_tailroom;
-+
-+ ixs->mtudiff = ixs->cur_mtu + ixs->tot_headroom + ixs->tot_tailroom - ixs->physmtu;
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "mtu:%d physmtu:%d tothr:%d tottr:%d mtudiff:%d ippkttotlen:%d\n",
-+ ixs->cur_mtu, ixs->physmtu,
-+ ixs->tot_headroom, ixs->tot_tailroom, ixs->mtudiff, ntohs(ixs->iph->tot_len));
-+ if(ixs->cur_mtu == 0 || ixs->mtudiff > 0) {
-+ int newmtu = ixs->physmtu - (ixs->tot_headroom + ((ixs->tot_tailroom + 2) & ~7) + 5);
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_info:ipsec_xmit_encap_bundle_2: "
-+ "dev %s mtu of %d decreased by %d to %d\n",
-+ ixs->dev ? ixs->dev->name : "ifX",
-+ ixs->cur_mtu,
-+ ixs->cur_mtu - newmtu,
-+ newmtu);
-+ ixs->cur_mtu = newmtu;
-+
-+ /* this would seem to adjust the MTU of the route as well */
-+#if 0
-+ ixs->skb->dst->pmtu = ixs->prv->mtu; /* RGB */
-+#endif /* 0 */
-+ }
-+
-+ /*
-+ If the sender is doing PMTU discovery, and the
-+ packet doesn't fit within ixs->prv->mtu, notify him
-+ (unless it was an ICMP packet, or it was not the
-+ zero-offset packet) and send it anyways.
-+
-+ Note: buggy firewall configuration may prevent the
-+ ICMP packet from getting back.
-+ */
-+ if(sysctl_ipsec_icmp
-+ && ixs->cur_mtu < ntohs(ixs->iph->tot_len)
-+ && (ixs->iph->frag_off & __constant_htons(IP_DF)) ) {
-+ int notify = ixs->iph->protocol != IPPROTO_ICMP
-+ && (ixs->iph->frag_off & __constant_htons(IP_OFFSET)) == 0;
-+
-+#ifdef IPSEC_obey_DF
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "fragmentation needed and DF set; %sdropping packet\n",
-+ notify ? "sending ICMP and " : "");
-+ if (notify)
-+ ICMP_SEND(ixs->skb,
-+ ICMP_DEST_UNREACH,
-+ ICMP_FRAG_NEEDED,
-+ ixs->cur_mtu,
-+ ixs->physdev);
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_CANNOTFRAG;
-+ goto cleanup;
-+#else /* IPSEC_obey_DF */
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "fragmentation needed and DF set; %spassing packet\n",
-+ notify ? "sending ICMP and " : "");
-+ if (notify)
-+ ICMP_SEND(ixs->skb,
-+ ICMP_DEST_UNREACH,
-+ ICMP_FRAG_NEEDED,
-+ ixs->cur_mtu,
-+ ixs->physdev);
-+#endif /* IPSEC_obey_DF */
-+ }
-+
-+#ifdef MSS_HACK
-+ /*
-+ * If this is a transport mode TCP packet with
-+ * SYN set, determine an effective MSS based on
-+ * AH/ESP overheads determined above.
-+ */
-+ if (ixs->iph->protocol == IPPROTO_TCP
-+ && ixs->outgoing_said.proto != IPPROTO_IPIP) {
-+ struct tcphdr *tcph = ixs->skb->h.th;
-+ if (tcph->syn && !tcph->ack) {
-+ if(!ipsec_adjust_mss(ixs->skb, tcph, ixs->cur_mtu)) {
-+ printk(KERN_WARNING
-+ "klips_warning:ipsec_xmit_encap_bundle_2: "
-+ "ipsec_adjust_mss() failed\n");
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_MSSERR;
-+ goto cleanup;
-+ }
-+ }
-+ }
-+#endif /* MSS_HACK */
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ if ((ixs->natt_type) && (ixs->outgoing_said.proto != IPPROTO_IPIP)) {
-+ /**
-+ * NAT-Traversal and Transport Mode:
-+ * we need to force destination address to sane value
-+ */
-+
-+ struct sockaddr_in *sv4=(struct sockaddr_in *)ixs->ipsp->ips_addr_d;
-+ __u32 natt_d = sv4->sin_addr.s_addr;
-+ struct iphdr *ipp = ixs->iph;
-+
-+ /* set the destination address to what it needs to be for the
-+ * NAT encapsulation.
-+ */
-+ KLIPS_PRINT(debug_tunnel,
-+ "xmit: setting ND=%08x\n", natt_d);
-+ ipp->daddr = natt_d;
-+ ipp->check = 0;
-+ ipp->check = ip_fast_csum((unsigned char *)ipp, ipp->ihl);
-+ }
-+#endif /* CONFIG_IPSEC_NAT_TRAVERSAL */
-+
-+ if(!ixs->hard_header_stripped && ixs->hard_header_len>0) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "allocating %d bytes for hardheader.\n",
-+ ixs->hard_header_len);
-+ if((ixs->saved_header = kmalloc(ixs->hard_header_len, GFP_ATOMIC)) == NULL) {
-+ printk(KERN_WARNING "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "Failed, tried to allocate %d bytes for temp hard_header.\n",
-+ ixs->hard_header_len);
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_ERRMEMALLOC;
-+ goto cleanup;
-+ }
-+ {
-+ int i;
-+ for (i = 0; i < ixs->hard_header_len; i++) {
-+ ixs->saved_header[i] = ixs->skb->data[i];
-+ }
-+ }
-+ if(ixs->skb->len < ixs->hard_header_len) {
-+ printk(KERN_WARNING "klips_error:ipsec_xmit_encap_bundle_2: "
-+ "tried to skb_pull hhlen=%d, %d available. This should never happen, please report.\n",
-+ ixs->hard_header_len, (int)(ixs->skb->len));
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_ESP_PUSHPULLERR;
-+ goto cleanup;
-+ }
-+ skb_pull(ixs->skb, ixs->hard_header_len);
-+ ixs->hard_header_stripped = 1;
-+
-+/* ixs->iph = (struct iphdr *) (ixs->skb->data); */
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "head,tailroom: %d,%d after hard_header stripped.\n",
-+ skb_headroom(ixs->skb), skb_tailroom(ixs->skb));
-+ KLIPS_IP_PRINT(debug_tunnel & DB_TN_CROUT, ixs->iph);
-+ } else {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "hard header already stripped.\n");
-+ }
-+
-+ ixs->ll_headroom = (ixs->hard_header_len + 15) & ~15;
-+
-+ if ((skb_headroom(ixs->skb) >= ixs->max_headroom + 2 * ixs->ll_headroom) &&
-+ (skb_tailroom(ixs->skb) >= ixs->max_tailroom)
-+#ifndef NET_21
-+ && ixs->skb->free
-+#endif /* !NET_21 */
-+ ) {
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "data fits in existing skb\n");
-+ } else {
-+ struct sk_buff* tskb;
-+
-+ if(!ixs->oskb) {
-+ ixs->oskb = ixs->skb;
-+ }
-+
-+ tskb = skb_copy_expand(ixs->skb,
-+ /* The need for 2 * link layer length here remains unexplained...RGB */
-+ ixs->max_headroom + 2 * ixs->ll_headroom,
-+ ixs->max_tailroom,
-+ GFP_ATOMIC);
-+
-+ if(tskb && ixs->skb->sk) {
-+ skb_set_owner_w(tskb, ixs->skb->sk);
-+ }
-+
-+ if(ixs->skb != ixs->oskb) {
-+ ipsec_kfree_skb(ixs->skb);
-+ }
-+ ixs->skb = tskb;
-+ if (!ixs->skb) {
-+ printk(KERN_WARNING
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "Failed, tried to allocate %d head and %d tailroom\n",
-+ ixs->max_headroom, ixs->max_tailroom);
-+ ixs->stats->tx_errors++;
-+ bundle_stat = IPSEC_XMIT_ERRSKBALLOC;
-+ goto cleanup;
-+ }
-+ KLIPS_PRINT(debug_tunnel & DB_TN_CROUT,
-+ "klips_debug:ipsec_xmit_encap_bundle_2: "
-+ "head,tailroom: %d,%d after allocation\n",
-+ skb_headroom(ixs->skb), skb_tailroom(ixs->skb));
-+ }
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_tunnel & DB_TN_ENCAP) {
-+ ipsec_print_ip(ixs->iph);
-+ }
-+#endif
-+
-+cleanup:
-+ return bundle_stat;
-+}
-+
-+void
-+ipsec_xmit_cleanup(struct ipsec_xmit_state*ixs)
-+{
-+ if(ixs->dev) {
-+#if defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE)
-+ netif_wake_queue(ixs->dev);
-+#else /* defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) */
-+ ixs->dev->tbusy = 0;
-+#endif /* defined(HAS_NETIF_QUEUE) || defined (HAVE_NETIF_QUEUE) */
-+ }
-+
-+ if(ixs->saved_header) {
-+ kfree(ixs->saved_header);
-+ ixs->saved_header = NULL;
-+ }
-+ if(ixs->skb) {
-+ dev_kfree_skb(ixs->skb);
-+ ixs->skb=NULL;
-+ }
-+ if(ixs->oskb) {
-+ dev_kfree_skb(ixs->oskb);
-+ ixs->oskb=NULL;
-+ }
-+ if (ixs->ips.ips_ident_s.data) {
-+ kfree(ixs->ips.ips_ident_s.data);
-+ ixs->ips.ips_ident_s.data=NULL;
-+ }
-+ if (ixs->ips.ips_ident_d.data) {
-+ kfree(ixs->ips.ips_ident_d.data);
-+ ixs->ips.ips_ident_d.data=NULL;
-+ }
-+}
-+
-+#ifdef NETDEV_23
-+static inline int ipsec_xmit_send2(struct sk_buff *skb)
-+{
-+#ifdef NETDEV_25 /* 2.6 kernels */
-+ return dst_output(skb);
-+#else
-+ return ip_send(skb);
-+#endif
-+}
-+#endif /* NETDEV_23 */
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+enum ipsec_xmit_value ipsec_nat_encap(struct ipsec_xmit_state *ixs)
-+{
-+ if (ixs->natt_type && ixs->natt_head) {
-+ struct iphdr *ipp = ip_hdr(ixs->skb);
-+ struct udphdr *udp;
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_tunnel_start_xmit: "
-+ "encapsuling packet into UDP (NAT-Traversal) (%d %d)\n",
-+ ixs->natt_type, ixs->natt_head);
-+
-+ ixs->iphlen = ipp->ihl << 2;
-+ ipp->tot_len =
-+ htons(ntohs(ipp->tot_len) + ixs->natt_head);
-+ if(skb_tailroom(ixs->skb) < ixs->natt_head) {
-+ printk(KERN_WARNING "klips_error:ipsec_tunnel_start_xmit: "
-+ "tried to skb_put %d, %d available. "
-+ "This should never happen, please report.\n",
-+ ixs->natt_head,
-+ skb_tailroom(ixs->skb));
-+ ixs->stats->tx_errors++;
-+ return IPSEC_XMIT_ESPUDP;
-+ }
-+ skb_put(ixs->skb, ixs->natt_head);
-+
-+ udp = (struct udphdr *)((char *)ipp + ixs->iphlen);
-+
-+ /* move ESP hdr after UDP hdr */
-+ memmove((void *)((char *)udp + ixs->natt_head),
-+ (void *)(udp),
-+ ntohs(ipp->tot_len) - ixs->iphlen - ixs->natt_head);
-+
-+#if 0
-+ /* set IP destination address (matters in transport mode) */
-+ {
-+ struct sockaddr_in *d = (struct sockaddr_in *)ixs->ipsp->ips_addr_d;
-+ ipp->daddr = d->sin_addr.s_addr;
-+ }
-+#endif
-+
-+ /* clear UDP & Non-IKE Markers (if any) */
-+ memset(udp, 0, ixs->natt_head);
-+
-+ /* fill UDP with usefull informations ;-) */
-+ udp->source = htons(ixs->natt_sport);
-+ udp->dest = htons(ixs->natt_dport);
-+ udp->len = htons(ntohs(ipp->tot_len) - ixs->iphlen);
-+
-+ /* set protocol */
-+ ipp->protocol = IPPROTO_UDP;
-+
-+ /* fix IP checksum */
-+ ipp->check = 0;
-+ ipp->check = ip_fast_csum((unsigned char *)ipp, ipp->ihl);
-+ }
-+ return IPSEC_XMIT_OK;
-+}
-+#endif
-+
-+
-+/* avoid forward reference complain on <2.5 */
-+struct flowi;
-+
-+enum ipsec_xmit_value
-+ipsec_xmit_send(struct ipsec_xmit_state*ixs, struct flowi *fl)
-+{
-+ int error;
-+
-+#ifdef NETDEV_25
-+ fl->nl_u.ip4_u.daddr = ip_hdr(ixs->skb)->daddr;
-+ fl->nl_u.ip4_u.saddr = ixs->pass ? 0 : ip_hdr(ixs->skb)->saddr;
-+ fl->nl_u.ip4_u.tos = RT_TOS(ip_hdr(ixs->skb)->tos);
-+ fl->proto = ip_hdr(ixs->skb)->protocol;
-+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24)
-+ error = ip_route_output_key(&ixs->route, &fl);
-+#else
-+ error = ip_route_output_key(&init_net, &ixs->route, fl);
-+#endif
-+ if (error) {
-+
-+#else
-+ /*skb_orphan(ixs->skb);*/
-+ if((error = ip_route_output(&ixs->route,
-+ ip_hdr(ixs->skb)->daddr,
-+ ixs->pass ? 0 : ip_hdr(ixs->skb)->saddr,
-+ RT_TOS(ip_hdr(ixs->skb)->tos),
-+ /* mcr->rgb: should this be 0 instead? */
-+ ixs->physdev->iflink))) {
-+#endif
-+ ixs->stats->tx_errors++;
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_send: "
-+ "ip_route_output failed with error code %d, rt->u.dst.dev=%s, dropped\n",
-+ error,
-+ ixs->route->u.dst.dev->name);
-+ return IPSEC_XMIT_ROUTEERR;
-+ }
-+
-+ if(ixs->dev == ixs->route->u.dst.dev) {
-+ ip_rt_put(ixs->route);
-+ /* This is recursion, drop it. */
-+ ixs->stats->tx_errors++;
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_send: "
-+ "suspect recursion, dev=rt->u.dst.dev=%s, dropped\n",
-+ ixs->dev->name);
-+ return IPSEC_XMIT_RECURSDETECT;
-+ }
-+
-+ dst_release(ixs->skb->dst);
-+ ixs->skb->dst = &ixs->route->u.dst;
-+ if(ixs->stats) {
-+ ixs->stats->tx_bytes += ixs->skb->len;
-+ }
-+
-+ if(ixs->skb->len < skb_network_header(ixs->skb) - ixs->skb->data) {
-+ if(ixs->stats) {
-+ ixs->stats->tx_errors++;
-+ }
-+ printk(KERN_WARNING
-+ "klips_error:ipsec_xmit_send: "
-+ "tried to __skb_pull nh-data=%ld, %d available. This should never happen, please report.\n",
-+ (unsigned long)(skb_network_header(ixs->skb) - ixs->skb->data),
-+ ixs->skb->len);
-+ return IPSEC_XMIT_PUSHPULLERR;
-+ }
-+ __skb_pull(ixs->skb, skb_network_header(ixs->skb) - ixs->skb->data);
-+ if(!ixs->pass) {
-+ ipsec_nf_reset(ixs->skb);
-+ }
-+
-+ KLIPS_PRINT(debug_tunnel & DB_TN_XMIT,
-+ "klips_debug:ipsec_xmit_send: "
-+ "...done, calling ip_send() on device:%s\n",
-+ ixs->skb->dev ? ixs->skb->dev->name : "NULL");
-+ KLIPS_IP_PRINT(debug_tunnel & DB_TN_XMIT, ip_hdr(ixs->skb));
-+#ifdef NETDEV_23 /* 2.4 kernels */
-+ {
-+ int err;
-+
-+/* XXX huh, we include linux/netfilter_ipv4.h where NF_IP_LOCAL_OUT is defined as 3 */
-+#ifndef NF_IP_LOCAL_OUT
-+#warning I dont understand why NF_IP_LOCAL_OUT is undefined when including linux/netfilter_ipv4.h
-+#define NF_IP_LOCAL_OUT 3
-+#endif
-+ err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, ixs->skb, NULL,
-+ ixs->route->u.dst.dev,
-+ ipsec_xmit_send2);
-+ if(err != NET_XMIT_SUCCESS && err != NET_XMIT_CN) {
-+ if(net_ratelimit())
-+ printk(KERN_ERR
-+ "klips_error:ipsec_xmit_send: "
-+ "ip_send() failed, err=%d\n",
-+ -err);
-+ if(ixs->stats) {
-+ ixs->stats->tx_errors++;
-+ ixs->stats->tx_aborted_errors++;
-+ }
-+ ixs->skb = NULL;
-+ return IPSEC_XMIT_IPSENDFAILURE;
-+ }
-+ }
-+#else /* NETDEV_23 */ /* 2.2 kernels */
-+ ip_send(ixs->skb);
-+#endif /* NETDEV_23 */
-+ if(ixs->stats) {
-+ ixs->stats->tx_packets++;
-+ }
-+
-+ ixs->skb = NULL;
-+
-+ return IPSEC_XMIT_OK;
-+}
-+
-+#ifdef NETDEV_25
-+enum ipsec_xmit_value
-+ipsec_tunnel_send(struct ipsec_xmit_state *ixs)
-+{
-+ struct flowi fl;
-+ memset(&fl, 0, sizeof(fl));
-+
-+ /* new route/dst cache code from James Morris */
-+ ixs->skb->dev = ixs->physdev;
-+ fl.oif = ixs->physdev->iflink;
-+
-+ return ipsec_xmit_send(ixs, &fl);
-+}
-+#else
-+enum ipsec_xmit_value
-+ipsec_tunnel_send(struct ipsec_xmit_state *ixs)
-+{
-+ return ipsec_xmit_send(ixs, NULL);
-+}
-+#endif
-+
-+
-+/*
-+ * here is a state machine to handle encapsulation
-+ * basically we keep getting re-entered until processing is
-+ * complete. For the simple case we step down the states and finish.
-+ * each state is ideally some logical part of the process. If a state
-+ * can pend (ie., require async processing to complete), then this
-+ * should be the part of last action before it returns IPSEC_RCV_PENDING
-+ *
-+ * Any particular action may alter the next_state in ixs to move us to
-+ * a state other than the preferred "next_state", but this is the
-+ * exception and is highlighted when it is done.
-+ *
-+ * prototypes for state action
-+ */
-+
-+struct {
-+ enum ipsec_xmit_value (*action)(struct ipsec_xmit_state *ixs);
-+ int next_state;
-+} xmit_state_table[] = {
-+ [IPSEC_XSM_INIT1] = {ipsec_xmit_init1, IPSEC_XSM_INIT2 },
-+ [IPSEC_XSM_INIT2] = {ipsec_xmit_init2, IPSEC_XSM_ENCAP_INIT },
-+ [IPSEC_XSM_ENCAP_INIT] = {ipsec_xmit_encap_init, IPSEC_XSM_ENCAP_SELECT },
-+ [IPSEC_XSM_ENCAP_SELECT]= {ipsec_xmit_encap_select,IPSEC_XSM_DONE },
-+
-+#ifdef CONFIG_KLIPS_ESP
-+ [IPSEC_XSM_ESP] = {ipsec_xmit_esp, IPSEC_XSM_ESP_AH },
-+ [IPSEC_XSM_ESP_AH] = {ipsec_xmit_esp_ah, IPSEC_XSM_CONT },
-+#endif
-+
-+#ifdef CONFIG_KLIPS_AH
-+ [IPSEC_XSM_AH] = {ipsec_xmit_ah, IPSEC_XSM_CONT },
-+#endif
-+
-+#ifdef CONFIG_KLIPS_IPIP
-+ [IPSEC_XSM_IPIP] = {ipsec_xmit_ipip, IPSEC_XSM_CONT },
-+#endif
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ [IPSEC_XSM_IPCOMP] = {ipsec_xmit_ipcomp, IPSEC_XSM_CONT },
-+#endif
-+
-+ [IPSEC_XSM_CONT] = {ipsec_xmit_cont, IPSEC_XSM_DONE },
-+ [IPSEC_XSM_DONE] = {NULL, IPSEC_XSM_DONE},
-+};
-+
-+
-+
-+void
-+ipsec_xsm(struct ipsec_xmit_state *ixs)
-+{
-+ enum ipsec_xmit_value stat = IPSEC_XMIT_ENCAPFAIL;
-+
-+ if (ixs == NULL) {
-+ KLIPS_PRINT(debug_tunnel, "klips_debug:ipsec_xsm: ixs == NULL.\n");
-+ return;
-+ }
-+
-+ /*
-+ * make sure nothing is removed from underneath us
-+ */
-+ spin_lock_bh(&tdb_lock);
-+
-+ /*
-+ * if we have a valid said, then we must check it here to ensure it
-+ * hasn't gone away while we were waiting for a task to complete
-+ */
-+
-+ if (ixs->ipsp && ipsec_sa_getbyid(&ixs->outgoing_said) == NULL) {
-+ KLIPS_PRINT(debug_tunnel,
-+ "klips_debug:ipsec_xsm: "
-+ "no ipsec_sa for SA:%s: outgoing packet with no SA dropped\n",
-+ ixs->sa_len ? ixs->sa_txt : " (error)");
-+ if (ixs->stats)
-+ ixs->stats->tx_dropped++;
-+
-+ /* drop through and cleanup */
-+ stat = IPSEC_XMIT_SAIDNOTFOUND;
-+ ixs->state = IPSEC_XSM_DONE;
-+ }
-+
-+ while (ixs->state != IPSEC_XSM_DONE) {
-+
-+ ixs->next_state = xmit_state_table[ixs->state].next_state;
-+
-+ stat = xmit_state_table[ixs->state].action(ixs);
-+
-+ if (stat == IPSEC_XMIT_OK) {
-+ /* some functions change the next state, see the state table */
-+ ixs->state = ixs->next_state;
-+ } else if (stat == IPSEC_XMIT_PENDING) {
-+ /*
-+ * things are on hold until we return here in the next/new state
-+ * we check our SA is valid when we return
-+ */
-+ spin_unlock_bh(&tdb_lock);
-+ return;
-+ } else {
-+ /* bad result, force state change to done */
-+ KLIPS_PRINT(debug_tunnel,
-+ "klips_debug:ipsec_xsm: "
-+ "processing completed due to %s.\n",
-+ ipsec_xmit_err(stat));
-+ ixs->state = IPSEC_XSM_DONE;
-+ }
-+ }
-+
-+ /*
-+ * all done with anything needing locks
-+ */
-+ spin_unlock_bh(&tdb_lock);
-+
-+ /* we are done with this SA */
-+ if (ixs->ipsp) {
-+ ipsec_sa_put(ixs->ipsp);
-+ ixs->ipsp = NULL;
-+ }
-+
-+ /*
-+ * let the caller continue with their processing
-+ */
-+ ixs->xsm_complete(ixs, stat);
-+}
-+
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/match586.S Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,357 @@
-+/* match.s -- Pentium-optimized version of longest_match()
-+ * Written for zlib 1.1.2
-+ * Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
-+ *
-+ * This is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License.
-+ */
-+
-+#ifndef NO_UNDERLINE
-+#define match_init _ipcomp_match_init
-+#define longest_match _ipcomp_longest_match
-+#else
-+#define match_init ipcomp_match_init
-+#define longest_match ipcomp_longest_match
-+#endif
-+
-+#define MAX_MATCH (258)
-+#define MIN_MATCH (3)
-+#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1)
-+#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7)
-+
-+/* stack frame offsets */
-+
-+#define wmask 0 /* local copy of s->wmask */
-+#define window 4 /* local copy of s->window */
-+#define windowbestlen 8 /* s->window + bestlen */
-+#define chainlenscanend 12 /* high word: current chain len */
-+ /* low word: last bytes sought */
-+#define scanstart 16 /* first two bytes of string */
-+#define scanalign 20 /* dword-misalignment of string */
-+#define nicematch 24 /* a good enough match size */
-+#define bestlen 28 /* size of best match so far */
-+#define scan 32 /* ptr to string wanting match */
-+
-+#define LocalVarsSize (36)
-+/* saved ebx 36 */
-+/* saved edi 40 */
-+/* saved esi 44 */
-+/* saved ebp 48 */
-+/* return address 52 */
-+#define deflatestate 56 /* the function arguments */
-+#define curmatch 60
-+
-+/* Offsets for fields in the deflate_state structure. These numbers
-+ * are calculated from the definition of deflate_state, with the
-+ * assumption that the compiler will dword-align the fields. (Thus,
-+ * changing the definition of deflate_state could easily cause this
-+ * program to crash horribly, without so much as a warning at
-+ * compile time. Sigh.)
-+ */
-+#define dsWSize 36
-+#define dsWMask 44
-+#define dsWindow 48
-+#define dsPrev 56
-+#define dsMatchLen 88
-+#define dsPrevMatch 92
-+#define dsStrStart 100
-+#define dsMatchStart 104
-+#define dsLookahead 108
-+#define dsPrevLen 112
-+#define dsMaxChainLen 116
-+#define dsGoodMatch 132
-+#define dsNiceMatch 136
-+
-+
-+.file "match.S"
-+
-+.globl match_init, longest_match
-+
-+.text
-+
-+/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
-+
-+longest_match:
-+
-+/* Save registers that the compiler may be using, and adjust %esp to */
-+/* make room for our stack frame. */
-+
-+ pushl %ebp
-+ pushl %edi
-+ pushl %esi
-+ pushl %ebx
-+ subl $LocalVarsSize, %esp
-+
-+/* Retrieve the function arguments. %ecx will hold cur_match */
-+/* throughout the entire function. %edx will hold the pointer to the */
-+/* deflate_state structure during the function's setup (before */
-+/* entering the main loop). */
-+
-+ movl deflatestate(%esp), %edx
-+ movl curmatch(%esp), %ecx
-+
-+/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */
-+
-+ movl dsNiceMatch(%edx), %eax
-+ movl dsLookahead(%edx), %ebx
-+ cmpl %eax, %ebx
-+ jl LookaheadLess
-+ movl %eax, %ebx
-+LookaheadLess: movl %ebx, nicematch(%esp)
-+
-+/* register Bytef *scan = s->window + s->strstart; */
-+
-+ movl dsWindow(%edx), %esi
-+ movl %esi, window(%esp)
-+ movl dsStrStart(%edx), %ebp
-+ lea (%esi,%ebp), %edi
-+ movl %edi, scan(%esp)
-+
-+/* Determine how many bytes the scan ptr is off from being */
-+/* dword-aligned. */
-+
-+ movl %edi, %eax
-+ negl %eax
-+ andl $3, %eax
-+ movl %eax, scanalign(%esp)
-+
-+/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
-+/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
-+
-+ movl dsWSize(%edx), %eax
-+ subl $MIN_LOOKAHEAD, %eax
-+ subl %eax, %ebp
-+ jg LimitPositive
-+ xorl %ebp, %ebp
-+LimitPositive:
-+
-+/* unsigned chain_length = s->max_chain_length; */
-+/* if (s->prev_length >= s->good_match) { */
-+/* chain_length >>= 2; */
-+/* } */
-+
-+ movl dsPrevLen(%edx), %eax
-+ movl dsGoodMatch(%edx), %ebx
-+ cmpl %ebx, %eax
-+ movl dsMaxChainLen(%edx), %ebx
-+ jl LastMatchGood
-+ shrl $2, %ebx
-+LastMatchGood:
-+
-+/* chainlen is decremented once beforehand so that the function can */
-+/* use the sign flag instead of the zero flag for the exit test. */
-+/* It is then shifted into the high word, to make room for the scanend */
-+/* scanend value, which it will always accompany. */
-+
-+ decl %ebx
-+ shll $16, %ebx
-+
-+/* int best_len = s->prev_length; */
-+
-+ movl dsPrevLen(%edx), %eax
-+ movl %eax, bestlen(%esp)
-+
-+/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
-+
-+ addl %eax, %esi
-+ movl %esi, windowbestlen(%esp)
-+
-+/* register ush scan_start = *(ushf*)scan; */
-+/* register ush scan_end = *(ushf*)(scan+best_len-1); */
-+
-+ movw (%edi), %bx
-+ movw %bx, scanstart(%esp)
-+ movw -1(%edi,%eax), %bx
-+ movl %ebx, chainlenscanend(%esp)
-+
-+/* Posf *prev = s->prev; */
-+/* uInt wmask = s->w_mask; */
-+
-+ movl dsPrev(%edx), %edi
-+ movl dsWMask(%edx), %edx
-+ mov %edx, wmask(%esp)
-+
-+/* Jump into the main loop. */
-+
-+ jmp LoopEntry
-+
-+.balign 16
-+
-+/* do {
-+ * match = s->window + cur_match;
-+ * if (*(ushf*)(match+best_len-1) != scan_end ||
-+ * *(ushf*)match != scan_start) continue;
-+ * [...]
-+ * } while ((cur_match = prev[cur_match & wmask]) > limit
-+ * && --chain_length != 0);
-+ *
-+ * Here is the inner loop of the function. The function will spend the
-+ * majority of its time in this loop, and majority of that time will
-+ * be spent in the first ten instructions.
-+ *
-+ * Within this loop:
-+ * %ebx = chainlenscanend - i.e., ((chainlen << 16) | scanend)
-+ * %ecx = curmatch
-+ * %edx = curmatch & wmask
-+ * %esi = windowbestlen - i.e., (window + bestlen)
-+ * %edi = prev
-+ * %ebp = limit
-+ *
-+ * Two optimization notes on the choice of instructions:
-+ *
-+ * The first instruction uses a 16-bit address, which costs an extra,
-+ * unpairable cycle. This is cheaper than doing a 32-bit access and
-+ * zeroing the high word, due to the 3-cycle misalignment penalty which
-+ * would occur half the time. This also turns out to be cheaper than
-+ * doing two separate 8-bit accesses, as the memory is so rarely in the
-+ * L1 cache.
-+ *
-+ * The window buffer, however, apparently spends a lot of time in the
-+ * cache, and so it is faster to retrieve the word at the end of the
-+ * match string with two 8-bit loads. The instructions that test the
-+ * word at the beginning of the match string, however, are executed
-+ * much less frequently, and there it was cheaper to use 16-bit
-+ * instructions, which avoided the necessity of saving off and
-+ * subsequently reloading one of the other registers.
-+ */
-+LookupLoop:
-+ /* 1 U & V */
-+ movw (%edi,%edx,2), %cx /* 2 U pipe */
-+ movl wmask(%esp), %edx /* 2 V pipe */
-+ cmpl %ebp, %ecx /* 3 U pipe */
-+ jbe LeaveNow /* 3 V pipe */
-+ subl $0x00010000, %ebx /* 4 U pipe */
-+ js LeaveNow /* 4 V pipe */
-+LoopEntry: movb -1(%esi,%ecx), %al /* 5 U pipe */
-+ andl %ecx, %edx /* 5 V pipe */
-+ cmpb %bl, %al /* 6 U pipe */
-+ jnz LookupLoop /* 6 V pipe */
-+ movb (%esi,%ecx), %ah
-+ cmpb %bh, %ah
-+ jnz LookupLoop
-+ movl window(%esp), %eax
-+ movw (%eax,%ecx), %ax
-+ cmpw scanstart(%esp), %ax
-+ jnz LookupLoop
-+
-+/* Store the current value of chainlen. */
-+
-+ movl %ebx, chainlenscanend(%esp)
-+
-+/* Point %edi to the string under scrutiny, and %esi to the string we */
-+/* are hoping to match it up with. In actuality, %esi and %edi are */
-+/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
-+/* initialized to -(MAX_MATCH_8 - scanalign). */
-+
-+ movl window(%esp), %esi
-+ movl scan(%esp), %edi
-+ addl %ecx, %esi
-+ movl scanalign(%esp), %eax
-+ movl $(-MAX_MATCH_8), %edx
-+ lea MAX_MATCH_8(%edi,%eax), %edi
-+ lea MAX_MATCH_8(%esi,%eax), %esi
-+
-+/* Test the strings for equality, 8 bytes at a time. At the end,
-+ * adjust %edx so that it is offset to the exact byte that mismatched.
-+ *
-+ * We already know at this point that the first three bytes of the
-+ * strings match each other, and they can be safely passed over before
-+ * starting the compare loop. So what this code does is skip over 0-3
-+ * bytes, as much as necessary in order to dword-align the %edi
-+ * pointer. (%esi will still be misaligned three times out of four.)
-+ *
-+ * It should be confessed that this loop usually does not represent
-+ * much of the total running time. Replacing it with a more
-+ * straightforward "rep cmpsb" would not drastically degrade
-+ * performance.
-+ */
-+LoopCmps:
-+ movl (%esi,%edx), %eax
-+ movl (%edi,%edx), %ebx
-+ xorl %ebx, %eax
-+ jnz LeaveLoopCmps
-+ movl 4(%esi,%edx), %eax
-+ movl 4(%edi,%edx), %ebx
-+ xorl %ebx, %eax
-+ jnz LeaveLoopCmps4
-+ addl $8, %edx
-+ jnz LoopCmps
-+ jmp LenMaximum
-+LeaveLoopCmps4: addl $4, %edx
-+LeaveLoopCmps: testl $0x0000FFFF, %eax
-+ jnz LenLower
-+ addl $2, %edx
-+ shrl $16, %eax
-+LenLower: subb $1, %al
-+ adcl $0, %edx
-+
-+/* Calculate the length of the match. If it is longer than MAX_MATCH, */
-+/* then automatically accept it as the best possible match and leave. */
-+
-+ lea (%edi,%edx), %eax
-+ movl scan(%esp), %edi
-+ subl %edi, %eax
-+ cmpl $MAX_MATCH, %eax
-+ jge LenMaximum
-+
-+/* If the length of the match is not longer than the best match we */
-+/* have so far, then forget it and return to the lookup loop. */
-+
-+ movl deflatestate(%esp), %edx
-+ movl bestlen(%esp), %ebx
-+ cmpl %ebx, %eax
-+ jg LongerMatch
-+ movl chainlenscanend(%esp), %ebx
-+ movl windowbestlen(%esp), %esi
-+ movl dsPrev(%edx), %edi
-+ movl wmask(%esp), %edx
-+ andl %ecx, %edx
-+ jmp LookupLoop
-+
-+/* s->match_start = cur_match; */
-+/* best_len = len; */
-+/* if (len >= nice_match) break; */
-+/* scan_end = *(ushf*)(scan+best_len-1); */
-+
-+LongerMatch: movl nicematch(%esp), %ebx
-+ movl %eax, bestlen(%esp)
-+ movl %ecx, dsMatchStart(%edx)
-+ cmpl %ebx, %eax
-+ jge LeaveNow
-+ movl window(%esp), %esi
-+ addl %eax, %esi
-+ movl %esi, windowbestlen(%esp)
-+ movl chainlenscanend(%esp), %ebx
-+ movw -1(%edi,%eax), %bx
-+ movl dsPrev(%edx), %edi
-+ movl %ebx, chainlenscanend(%esp)
-+ movl wmask(%esp), %edx
-+ andl %ecx, %edx
-+ jmp LookupLoop
-+
-+/* Accept the current string, with the maximum possible length. */
-+
-+LenMaximum: movl deflatestate(%esp), %edx
-+ movl $MAX_MATCH, bestlen(%esp)
-+ movl %ecx, dsMatchStart(%edx)
-+
-+/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
-+/* return s->lookahead; */
-+
-+LeaveNow:
-+ movl deflatestate(%esp), %edx
-+ movl bestlen(%esp), %ebx
-+ movl dsLookahead(%edx), %eax
-+ cmpl %eax, %ebx
-+ jg LookaheadRet
-+ movl %ebx, %eax
-+LookaheadRet:
-+
-+/* Restore the stack and return from whence we came. */
-+
-+ addl $LocalVarsSize, %esp
-+ popl %ebx
-+ popl %esi
-+ popl %edi
-+ popl %ebp
-+match_init: ret
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/match686.S Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,330 @@
-+/* match.s -- Pentium-Pro-optimized version of longest_match()
-+ * Written for zlib 1.1.2
-+ * Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
-+ *
-+ * This is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License.
-+ */
-+
-+#ifndef NO_UNDERLINE
-+#define match_init _ipcomp_match_init
-+#define longest_match _ipcomp_longest_match
-+#else
-+#define match_init ipcomp_match_init
-+#define longest_match ipcomp_longest_match
-+#endif
-+
-+#define MAX_MATCH (258)
-+#define MIN_MATCH (3)
-+#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1)
-+#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7)
-+
-+/* stack frame offsets */
-+
-+#define chainlenwmask 0 /* high word: current chain len */
-+ /* low word: s->wmask */
-+#define window 4 /* local copy of s->window */
-+#define windowbestlen 8 /* s->window + bestlen */
-+#define scanstart 16 /* first two bytes of string */
-+#define scanend 12 /* last two bytes of string */
-+#define scanalign 20 /* dword-misalignment of string */
-+#define nicematch 24 /* a good enough match size */
-+#define bestlen 28 /* size of best match so far */
-+#define scan 32 /* ptr to string wanting match */
-+
-+#define LocalVarsSize (36)
-+/* saved ebx 36 */
-+/* saved edi 40 */
-+/* saved esi 44 */
-+/* saved ebp 48 */
-+/* return address 52 */
-+#define deflatestate 56 /* the function arguments */
-+#define curmatch 60
-+
-+/* Offsets for fields in the deflate_state structure. These numbers
-+ * are calculated from the definition of deflate_state, with the
-+ * assumption that the compiler will dword-align the fields. (Thus,
-+ * changing the definition of deflate_state could easily cause this
-+ * program to crash horribly, without so much as a warning at
-+ * compile time. Sigh.)
-+ */
-+#define dsWSize 36
-+#define dsWMask 44
-+#define dsWindow 48
-+#define dsPrev 56
-+#define dsMatchLen 88
-+#define dsPrevMatch 92
-+#define dsStrStart 100
-+#define dsMatchStart 104
-+#define dsLookahead 108
-+#define dsPrevLen 112
-+#define dsMaxChainLen 116
-+#define dsGoodMatch 132
-+#define dsNiceMatch 136
-+
-+
-+.file "match.S"
-+
-+.globl match_init, longest_match
-+
-+.text
-+
-+/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
-+
-+longest_match:
-+
-+/* Save registers that the compiler may be using, and adjust %esp to */
-+/* make room for our stack frame. */
-+
-+ pushl %ebp
-+ pushl %edi
-+ pushl %esi
-+ pushl %ebx
-+ subl $LocalVarsSize, %esp
-+
-+/* Retrieve the function arguments. %ecx will hold cur_match */
-+/* throughout the entire function. %edx will hold the pointer to the */
-+/* deflate_state structure during the function's setup (before */
-+/* entering the main loop). */
-+
-+ movl deflatestate(%esp), %edx
-+ movl curmatch(%esp), %ecx
-+
-+/* uInt wmask = s->w_mask; */
-+/* unsigned chain_length = s->max_chain_length; */
-+/* if (s->prev_length >= s->good_match) { */
-+/* chain_length >>= 2; */
-+/* } */
-+
-+ movl dsPrevLen(%edx), %eax
-+ movl dsGoodMatch(%edx), %ebx
-+ cmpl %ebx, %eax
-+ movl dsWMask(%edx), %eax
-+ movl dsMaxChainLen(%edx), %ebx
-+ jl LastMatchGood
-+ shrl $2, %ebx
-+LastMatchGood:
-+
-+/* chainlen is decremented once beforehand so that the function can */
-+/* use the sign flag instead of the zero flag for the exit test. */
-+/* It is then shifted into the high word, to make room for the wmask */
-+/* value, which it will always accompany. */
-+
-+ decl %ebx
-+ shll $16, %ebx
-+ orl %eax, %ebx
-+ movl %ebx, chainlenwmask(%esp)
-+
-+/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */
-+
-+ movl dsNiceMatch(%edx), %eax
-+ movl dsLookahead(%edx), %ebx
-+ cmpl %eax, %ebx
-+ jl LookaheadLess
-+ movl %eax, %ebx
-+LookaheadLess: movl %ebx, nicematch(%esp)
-+
-+/* register Bytef *scan = s->window + s->strstart; */
-+
-+ movl dsWindow(%edx), %esi
-+ movl %esi, window(%esp)
-+ movl dsStrStart(%edx), %ebp
-+ lea (%esi,%ebp), %edi
-+ movl %edi, scan(%esp)
-+
-+/* Determine how many bytes the scan ptr is off from being */
-+/* dword-aligned. */
-+
-+ movl %edi, %eax
-+ negl %eax
-+ andl $3, %eax
-+ movl %eax, scanalign(%esp)
-+
-+/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
-+/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
-+
-+ movl dsWSize(%edx), %eax
-+ subl $MIN_LOOKAHEAD, %eax
-+ subl %eax, %ebp
-+ jg LimitPositive
-+ xorl %ebp, %ebp
-+LimitPositive:
-+
-+/* int best_len = s->prev_length; */
-+
-+ movl dsPrevLen(%edx), %eax
-+ movl %eax, bestlen(%esp)
-+
-+/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
-+
-+ addl %eax, %esi
-+ movl %esi, windowbestlen(%esp)
-+
-+/* register ush scan_start = *(ushf*)scan; */
-+/* register ush scan_end = *(ushf*)(scan+best_len-1); */
-+/* Posf *prev = s->prev; */
-+
-+ movzwl (%edi), %ebx
-+ movl %ebx, scanstart(%esp)
-+ movzwl -1(%edi,%eax), %ebx
-+ movl %ebx, scanend(%esp)
-+ movl dsPrev(%edx), %edi
-+
-+/* Jump into the main loop. */
-+
-+ movl chainlenwmask(%esp), %edx
-+ jmp LoopEntry
-+
-+.balign 16
-+
-+/* do {
-+ * match = s->window + cur_match;
-+ * if (*(ushf*)(match+best_len-1) != scan_end ||
-+ * *(ushf*)match != scan_start) continue;
-+ * [...]
-+ * } while ((cur_match = prev[cur_match & wmask]) > limit
-+ * && --chain_length != 0);
-+ *
-+ * Here is the inner loop of the function. The function will spend the
-+ * majority of its time in this loop, and majority of that time will
-+ * be spent in the first ten instructions.
-+ *
-+ * Within this loop:
-+ * %ebx = scanend
-+ * %ecx = curmatch
-+ * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
-+ * %esi = windowbestlen - i.e., (window + bestlen)
-+ * %edi = prev
-+ * %ebp = limit
-+ */
-+LookupLoop:
-+ andl %edx, %ecx
-+ movzwl (%edi,%ecx,2), %ecx
-+ cmpl %ebp, %ecx
-+ jbe LeaveNow
-+ subl $0x00010000, %edx
-+ js LeaveNow
-+LoopEntry: movzwl -1(%esi,%ecx), %eax
-+ cmpl %ebx, %eax
-+ jnz LookupLoop
-+ movl window(%esp), %eax
-+ movzwl (%eax,%ecx), %eax
-+ cmpl scanstart(%esp), %eax
-+ jnz LookupLoop
-+
-+/* Store the current value of chainlen. */
-+
-+ movl %edx, chainlenwmask(%esp)
-+
-+/* Point %edi to the string under scrutiny, and %esi to the string we */
-+/* are hoping to match it up with. In actuality, %esi and %edi are */
-+/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
-+/* initialized to -(MAX_MATCH_8 - scanalign). */
-+
-+ movl window(%esp), %esi
-+ movl scan(%esp), %edi
-+ addl %ecx, %esi
-+ movl scanalign(%esp), %eax
-+ movl $(-MAX_MATCH_8), %edx
-+ lea MAX_MATCH_8(%edi,%eax), %edi
-+ lea MAX_MATCH_8(%esi,%eax), %esi
-+
-+/* Test the strings for equality, 8 bytes at a time. At the end,
-+ * adjust %edx so that it is offset to the exact byte that mismatched.
-+ *
-+ * We already know at this point that the first three bytes of the
-+ * strings match each other, and they can be safely passed over before
-+ * starting the compare loop. So what this code does is skip over 0-3
-+ * bytes, as much as necessary in order to dword-align the %edi
-+ * pointer. (%esi will still be misaligned three times out of four.)
-+ *
-+ * It should be confessed that this loop usually does not represent
-+ * much of the total running time. Replacing it with a more
-+ * straightforward "rep cmpsb" would not drastically degrade
-+ * performance.
-+ */
-+LoopCmps:
-+ movl (%esi,%edx), %eax
-+ xorl (%edi,%edx), %eax
-+ jnz LeaveLoopCmps
-+ movl 4(%esi,%edx), %eax
-+ xorl 4(%edi,%edx), %eax
-+ jnz LeaveLoopCmps4
-+ addl $8, %edx
-+ jnz LoopCmps
-+ jmp LenMaximum
-+LeaveLoopCmps4: addl $4, %edx
-+LeaveLoopCmps: testl $0x0000FFFF, %eax
-+ jnz LenLower
-+ addl $2, %edx
-+ shrl $16, %eax
-+LenLower: subb $1, %al
-+ adcl $0, %edx
-+
-+/* Calculate the length of the match. If it is longer than MAX_MATCH, */
-+/* then automatically accept it as the best possible match and leave. */
-+
-+ lea (%edi,%edx), %eax
-+ movl scan(%esp), %edi
-+ subl %edi, %eax
-+ cmpl $MAX_MATCH, %eax
-+ jge LenMaximum
-+
-+/* If the length of the match is not longer than the best match we */
-+/* have so far, then forget it and return to the lookup loop. */
-+
-+ movl deflatestate(%esp), %edx
-+ movl bestlen(%esp), %ebx
-+ cmpl %ebx, %eax
-+ jg LongerMatch
-+ movl windowbestlen(%esp), %esi
-+ movl dsPrev(%edx), %edi
-+ movl scanend(%esp), %ebx
-+ movl chainlenwmask(%esp), %edx
-+ jmp LookupLoop
-+
-+/* s->match_start = cur_match; */
-+/* best_len = len; */
-+/* if (len >= nice_match) break; */
-+/* scan_end = *(ushf*)(scan+best_len-1); */
-+
-+LongerMatch: movl nicematch(%esp), %ebx
-+ movl %eax, bestlen(%esp)
-+ movl %ecx, dsMatchStart(%edx)
-+ cmpl %ebx, %eax
-+ jge LeaveNow
-+ movl window(%esp), %esi
-+ addl %eax, %esi
-+ movl %esi, windowbestlen(%esp)
-+ movzwl -1(%edi,%eax), %ebx
-+ movl dsPrev(%edx), %edi
-+ movl %ebx, scanend(%esp)
-+ movl chainlenwmask(%esp), %edx
-+ jmp LookupLoop
-+
-+/* Accept the current string, with the maximum possible length. */
-+
-+LenMaximum: movl deflatestate(%esp), %edx
-+ movl $MAX_MATCH, bestlen(%esp)
-+ movl %ecx, dsMatchStart(%edx)
-+
-+/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
-+/* return s->lookahead; */
-+
-+LeaveNow:
-+ movl deflatestate(%esp), %edx
-+ movl bestlen(%esp), %ebx
-+ movl dsLookahead(%edx), %eax
-+ cmpl %eax, %ebx
-+ jg LookaheadRet
-+ movl %ebx, %eax
-+LookaheadRet:
-+
-+/* Restore the stack and return from whence we came. */
-+
-+ addl $LocalVarsSize, %esp
-+ popl %ebx
-+ popl %esi
-+ popl %edi
-+ popl %ebp
-+match_init: ret
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/pfkey_v2.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1587 @@
-+/*
-+ * @(#) RFC2367 PF_KEYv2 Key management API domain socket I/F
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+
-+/*
-+ * Template from /usr/src/linux-2.0.36/net/unix/af_unix.c.
-+ * Hints from /usr/src/linux-2.0.36/net/ipv4/udp.c.
-+ */
-+
-+#define __NO_VERSION__
-+#include <linux/module.h>
-+#include <linux/version.h>
-+#ifndef AUTOCONF_INCLUDED
-+# include <linux/config.h>
-+#endif
-+#include <linux/kernel.h>
-+
-+#include "openswan/ipsec_param.h"
-+
-+#include <linux/major.h>
-+#include <linux/signal.h>
-+#include <linux/sched.h>
-+#include <linux/errno.h>
-+#include <linux/string.h>
-+#include <linux/stat.h>
-+#include <linux/socket.h>
-+#include <linux/un.h>
-+#include <linux/fcntl.h>
-+#include <linux/termios.h>
-+#include <linux/socket.h>
-+#include <linux/sockios.h>
-+#include <linux/net.h> /* struct socket */
-+#include <linux/in.h>
-+#include <linux/fs.h>
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#ifdef CONFIG_X86
-+# include <asm/segment.h>
-+#endif
-+#include <linux/skbuff.h>
-+#include <linux/netdevice.h>
-+#include <net/sock.h> /* struct sock */
-+#include <net/protocol.h>
-+/* #include <net/tcp.h> */
-+#include <net/af_unix.h>
-+#ifdef CONFIG_PROC_FS
-+# include <linux/proc_fs.h>
-+#endif /* CONFIG_PROC_FS */
-+#ifdef HAVE_SEQ_FILE
-+# include <linux/seq_file.h>
-+#endif
-+
-+#include <linux/types.h>
-+
-+#include <openswan.h>
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_kern24.h"
-+#include "openswan/ipsec_sysctl.h"
-+
-+#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0)
-+
-+#if 0
-+#ifndef SOCKOPS_WRAPPED
-+#define SOCKOPS_WRAPPED(name) name
-+#endif /* SOCKOPS_WRAPPED */
-+#endif
-+
-+extern struct proto_ops SOCKOPS_WRAPPED(pfkey_ops);
-+
-+#ifdef NET_26
-+static rwlock_t pfkey_sock_lock = RW_LOCK_UNLOCKED;
-+HLIST_HEAD(pfkey_sock_list);
-+static DECLARE_WAIT_QUEUE_HEAD(pfkey_sock_wait);
-+static atomic_t pfkey_sock_users = ATOMIC_INIT(0);
-+#else
-+struct sock *pfkey_sock_list = NULL;
-+#endif
-+
-+struct supported_list *pfkey_supported_list[K_SADB_SATYPE_MAX+1];
-+
-+struct socket_list *pfkey_open_sockets = NULL;
-+struct socket_list *pfkey_registered_sockets[K_SADB_SATYPE_MAX+1];
-+
-+int pfkey_msg_interp(struct sock *, struct sadb_msg *);
-+
-+#ifdef NET_26_24_SKALLOC
-+DEBUG_NO_STATIC int pfkey_create(struct net *net, struct socket *sock, int protocol);
-+#else
-+DEBUG_NO_STATIC int pfkey_create(struct socket *sock, int protocol);
-+#endif
-+DEBUG_NO_STATIC int pfkey_shutdown(struct socket *sock, int mode);
-+DEBUG_NO_STATIC int pfkey_release(struct socket *sock);
-+
-+#ifdef NET_26
-+DEBUG_NO_STATIC int pfkey_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len);
-+DEBUG_NO_STATIC int pfkey_recvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg
-+ , size_t size, int flags);
-+#else
-+DEBUG_NO_STATIC int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm);
-+DEBUG_NO_STATIC int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags, struct scm_cookie *scm);
-+#endif
-+
-+struct net_proto_family pfkey_family_ops = {
-+ .owner = THIS_MODULE,
-+ .family = PF_KEY,
-+ .create = pfkey_create
-+};
-+
-+struct proto_ops SOCKOPS_WRAPPED(pfkey_ops) = {
-+ owner: THIS_MODULE,
-+ family: PF_KEY,
-+ release: pfkey_release,
-+ bind: sock_no_bind,
-+ connect: sock_no_connect,
-+ socketpair: sock_no_socketpair,
-+ accept: sock_no_accept,
-+ getname: sock_no_getname,
-+ poll: datagram_poll,
-+ ioctl: sock_no_ioctl,
-+ listen: sock_no_listen,
-+ shutdown: pfkey_shutdown,
-+ setsockopt: sock_no_setsockopt,
-+ getsockopt: sock_no_getsockopt,
-+ sendmsg: pfkey_sendmsg,
-+ recvmsg: pfkey_recvmsg,
-+ mmap: sock_no_mmap,
-+};
-+
-+#include <linux/smp_lock.h>
-+SOCKOPS_WRAP(pfkey, PF_KEY);
-+
-+#ifdef NET_26
-+static void pfkey_sock_list_grab(void)
-+{
-+ write_lock_bh(&pfkey_sock_lock);
-+
-+ if (atomic_read(&pfkey_sock_users)) {
-+ DECLARE_WAITQUEUE(wait, current);
-+
-+ add_wait_queue_exclusive(&pfkey_sock_wait, &wait);
-+ for(;;) {
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ if (atomic_read(&pfkey_sock_users) == 0)
-+ break;
-+ write_unlock_bh(&pfkey_sock_lock);
-+ schedule();
-+ write_lock_bh(&pfkey_sock_lock);
-+ }
-+
-+ __set_current_state(TASK_RUNNING);
-+ remove_wait_queue(&pfkey_sock_wait, &wait);
-+ }
-+}
-+
-+static __inline__ void pfkey_sock_list_ungrab(void)
-+{
-+ write_unlock_bh(&pfkey_sock_lock);
-+ wake_up(&pfkey_sock_wait);
-+}
-+
-+static __inline__ void pfkey_lock_sock_list(void)
-+{
-+ /* read_lock() synchronizes us to pfkey_table_grab */
-+
-+ read_lock(&pfkey_sock_lock);
-+ atomic_inc(&pfkey_sock_users);
-+ read_unlock(&pfkey_sock_lock);
-+}
-+
-+static __inline__ void pfkey_unlock_sock_list(void)
-+{
-+ if (atomic_dec_and_test(&pfkey_sock_users))
-+ wake_up(&pfkey_sock_wait);
-+}
-+#endif
-+
-+int
-+pfkey_list_remove_socket(struct socket *socketp, struct socket_list **sockets)
-+{
-+ struct socket_list *socket_listp,*prev;
-+
-+ if(!socketp) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_remove_socket: "
-+ "NULL socketp handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ if(!sockets) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_remove_socket: "
-+ "NULL sockets list handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ socket_listp = *sockets;
-+ prev = NULL;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_remove_socket: "
-+ "removing sock=0p%p\n",
-+ socketp);
-+
-+ while(socket_listp != NULL) {
-+ if(socket_listp->socketp == socketp) {
-+ if(prev != NULL) {
-+ prev->next = socket_listp->next;
-+ } else {
-+ *sockets = socket_listp->next;
-+ }
-+
-+ kfree((void*)socket_listp);
-+
-+ break;
-+ }
-+ prev = socket_listp;
-+ socket_listp = socket_listp->next;
-+ }
-+
-+ return 0;
-+}
-+
-+int
-+pfkey_list_insert_socket(struct socket *socketp, struct socket_list **sockets)
-+{
-+ struct socket_list *socket_listp;
-+
-+ if(!socketp) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_socket: "
-+ "NULL socketp handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ if(!sockets) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_socket: "
-+ "NULL sockets list handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_socket: "
-+ "allocating %lu bytes for socketp=0p%p\n",
-+ (unsigned long) sizeof(struct socket_list),
-+ socketp);
-+
-+ if((socket_listp = (struct socket_list *)kmalloc(sizeof(struct socket_list), GFP_KERNEL)) == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_socket: "
-+ "memory allocation error.\n");
-+ return -ENOMEM;
-+ }
-+
-+ socket_listp->socketp = socketp;
-+ socket_listp->next = *sockets;
-+ *sockets = socket_listp;
-+
-+ return 0;
-+}
-+
-+int
-+pfkey_list_remove_supported(struct ipsec_alg_supported *supported, struct supported_list **supported_list)
-+{
-+ struct supported_list *supported_listp = *supported_list, *prev = NULL;
-+
-+ if(!supported) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_remove_supported: "
-+ "NULL supported handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ if(!supported_list) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_remove_supported: "
-+ "NULL supported_list handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_remove_supported: "
-+ "removing supported=0p%p\n",
-+ supported);
-+
-+ while(supported_listp != NULL) {
-+ if(supported_listp->supportedp == supported) {
-+ if(prev != NULL) {
-+ prev->next = supported_listp->next;
-+ } else {
-+ *supported_list = supported_listp->next;
-+ }
-+
-+ kfree((void*)supported_listp);
-+
-+ break;
-+ }
-+ prev = supported_listp;
-+ supported_listp = supported_listp->next;
-+ }
-+
-+ return 0;
-+}
-+
-+int
-+pfkey_list_insert_supported(struct ipsec_alg_supported *supported
-+ , struct supported_list **supported_list)
-+{
-+ struct supported_list *supported_listp;
-+
-+ if(!supported) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_supported: "
-+ "NULL supported handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ if(!supported_list) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_supported: "
-+ "NULL supported_list handed in, failed.\n");
-+ return -EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_supported: "
-+ "allocating %lu bytes for incoming, supported=0p%p, supported_list=0p%p\n",
-+ (unsigned long) sizeof(struct supported_list),
-+ supported,
-+ supported_list);
-+
-+ supported_listp = (struct supported_list *)kmalloc(sizeof(struct supported_list), GFP_KERNEL);
-+
-+ if(supported_listp == NULL)
-+ {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_supported: "
-+ "memory allocation error.\n");
-+ return -ENOMEM;
-+ }
-+
-+ supported_listp->supportedp = supported;
-+ supported_listp->next = *supported_list;
-+ *supported_list = supported_listp;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_list_insert_supported: "
-+ "outgoing, supported=0p%p, supported_list=0p%p\n",
-+ supported,
-+ supported_list);
-+
-+ return 0;
-+}
-+
-+#ifdef NET_26
-+DEBUG_NO_STATIC void
-+pfkey_insert_socket(struct sock *sk)
-+{
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_insert_socket: "
-+ "sk=0p%p\n",
-+ sk);
-+ pfkey_sock_list_grab();
-+ sk_add_node(sk, &pfkey_sock_list);
-+ pfkey_sock_list_ungrab();
-+}
-+
-+DEBUG_NO_STATIC void
-+pfkey_remove_socket(struct sock *sk)
-+{
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_remove_socket: 0p%p\n", sk);
-+ pfkey_sock_list_grab();
-+ sk_del_node_init(sk);
-+ pfkey_sock_list_ungrab();
-+ return;
-+}
-+#else
-+
-+DEBUG_NO_STATIC void
-+pfkey_insert_socket(struct sock *sk)
-+{
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_insert_socket: "
-+ "sk=0p%p\n",
-+ sk);
-+ cli();
-+ sk->next=pfkey_sock_list;
-+ pfkey_sock_list=sk;
-+ sti();
-+}
-+DEBUG_NO_STATIC void
-+pfkey_remove_socket(struct sock *sk)
-+{
-+ struct sock **s;
-+
-+ s = NULL;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_remove_socket: .\n");
-+
-+ cli();
-+ s=&pfkey_sock_list;
-+
-+ while(*s!=NULL) {
-+ if(*s==sk) {
-+ *s=sk->next;
-+ sk->next=NULL;
-+ sti();
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_remove_socket: "
-+ "succeeded.\n");
-+ return;
-+ }
-+ s=&((*s)->next);
-+ }
-+ sti();
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_remove_socket: "
-+ "not found.\n");
-+ return;
-+}
-+#endif
-+
-+DEBUG_NO_STATIC void
-+pfkey_destroy_socket(struct sock *sk)
-+{
-+ struct sk_buff *skb;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_destroy_socket: 0p%p\n",sk);
-+ pfkey_remove_socket(sk);
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_destroy_socket: "
-+ "pfkey_remove_socket called, sk=0p%p\n",sk);
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_destroy_socket: "
-+ "sk(0p%p)->(&0p%p)receive_queue.{next=0p%p,prev=0p%p}.\n",
-+ sk,
-+ &(sk->sk_receive_queue),
-+ sk->sk_receive_queue.next,
-+ sk->sk_receive_queue.prev);
-+
-+ while(sk && ((skb=skb_dequeue(&(sk->sk_receive_queue)))!=NULL)) {
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_pfkey && sysctl_ipsec_debug_verbose) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_destroy_socket: "
-+ "skb=0p%p dequeued.\n", skb);
-+ printk(KERN_INFO "klips_debug:pfkey_destroy_socket: "
-+ "pfkey_skb contents:");
-+ printk(" next:0p%p", skb->next);
-+ printk(" prev:0p%p", skb->prev);
-+ printk(" sk:0p%p", skb->sk);
-+ printk(" dev:0p%p", skb->dev);
-+ if(skb->dev) {
-+ if(skb->dev->name) {
-+ printk(" dev->name:%s", skb->dev->name);
-+ } else {
-+ printk(" dev->name:NULL?");
-+ }
-+ } else {
-+ printk(" dev:NULL");
-+ }
-+ printk(" h:0p%p", skb_transport_header(skb));
-+ printk(" nh:0p%p", skb_network_header(skb));
-+ printk(" mac:0p%p", skb_mac_header(skb));
-+ printk(" dst:0p%p", skb->dst);
-+ if(sysctl_ipsec_debug_verbose) {
-+ int i;
-+
-+ printk(" cb");
-+ for(i=0; i<48; i++) {
-+ printk(":%2x", skb->cb[i]);
-+ }
-+ }
-+ printk(" len:%d", skb->len);
-+ printk(" csum:%d", skb->csum);
-+#ifndef NETDEV_23
-+ printk(" used:%d", skb->used);
-+ printk(" is_clone:%d", skb->is_clone);
-+#endif /* NETDEV_23 */
-+ printk(" cloned:%d", skb->cloned);
-+ printk(" pkt_type:%d", skb->pkt_type);
-+ printk(" ip_summed:%d", skb->ip_summed);
-+ printk(" priority:%d", skb->priority);
-+ printk(" protocol:%d", skb->protocol);
-+#ifdef HAVE_SOCK_SECURITY
-+ printk(" security:%d", skb->security);
-+#endif
-+ printk(" truesize:%d", skb->truesize);
-+ printk(" head:0p%p", skb->head);
-+ printk(" data:0p%p", skb->data);
-+ printk(" tail:0p%p", skb_tail_pointer(skb));
-+ printk(" end:0p%p", skb_end_pointer(skb));
-+ if(sysctl_ipsec_debug_verbose) {
-+ unsigned char* i;
-+ printk(" data");
-+ for(i = skb->head; i < skb_end_pointer(skb); i++) {
-+ printk(":%2x", (unsigned char)(*(i)));
-+ }
-+ }
-+ printk(" destructor:0p%p", skb->destructor);
-+ printk("\n");
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_destroy_socket: "
-+ "skb=0p%p freed.\n",
-+ skb);
-+ ipsec_kfree_skb(skb);
-+ }
-+
-+#ifdef NET_26
-+ sock_set_flag(sk, SOCK_DEAD);
-+#else
-+ sk->dead = 1;
-+#endif
-+ sk_free(sk);
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_destroy_socket: destroyed.\n");
-+}
-+
-+int
-+pfkey_upmsg(struct socket *sock, struct sadb_msg *pfkey_msg)
-+{
-+ struct sock *sk;
-+
-+ if(sock == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_upmsg: "
-+ "NULL socket passed in.\n");
-+ return -EINVAL;
-+ }
-+
-+ if(pfkey_msg == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_upmsg: "
-+ "NULL pfkey_msg passed in.\n");
-+ return -EINVAL;
-+ }
-+
-+ sk = sock->sk;
-+ return pfkey_upmsgsk(sk, pfkey_msg);
-+}
-+int
-+pfkey_upmsgsk(struct sock *sk, struct sadb_msg *pfkey_msg)
-+{
-+ int error = 0;
-+ struct sk_buff * skb = NULL;
-+
-+ if(sk == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_upmsg: "
-+ "NULL sock passed in.\n");
-+ return -EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_upmsg: "
-+ "allocating %d bytes...\n",
-+ (int)(pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN));
-+ if(!(skb = alloc_skb(pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN, GFP_ATOMIC) )) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_upmsg: "
-+ "no buffers left to send up a message.\n");
-+ return -ENOBUFS;
-+ }
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_upmsg: "
-+ "...allocated at 0p%p.\n",
-+ skb);
-+
-+ skb->dev = NULL;
-+
-+ if(skb_tailroom(skb) < pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN) {
-+ printk(KERN_WARNING "klips_error:pfkey_upmsg: "
-+ "tried to skb_put %ld, %d available. This should never happen, please report.\n",
-+ (unsigned long int)pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN,
-+ skb_tailroom(skb));
-+ ipsec_kfree_skb(skb);
-+ return -ENOBUFS;
-+ }
-+ skb_set_transport_header(skb, ipsec_skb_offset(skb, skb_put(skb, pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN)));
-+ memcpy(skb_transport_header(skb), pfkey_msg, pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN);
-+
-+ if((error = sock_queue_rcv_skb(sk, skb)) < 0) {
-+ skb->sk=NULL;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_upmsg: "
-+ "error=%d calling sock_queue_rcv_skb with skb=0p%p.\n",
-+ error,
-+ skb);
-+ ipsec_kfree_skb(skb);
-+ return error;
-+ }
-+ return error;
-+}
-+
-+#if defined(NET_26_12_SKALLOC) || defined(NET_26_24_SKALLOC)
-+
-+static struct proto key_proto = {
-+ .name = "KEY",
-+ .owner = THIS_MODULE,
-+ .obj_size = sizeof(struct sock),
-+
-+};
-+#endif
-+#ifdef NET_26_24_SKALLOC
-+DEBUG_NO_STATIC int
-+pfkey_create(struct net *net, struct socket *sock, int protocol)
-+#else
-+DEBUG_NO_STATIC int
-+pfkey_create(struct socket *sock, int protocol)
-+#endif
-+{
-+ struct sock *sk;
-+
-+ if(sock == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "socket NULL.\n");
-+ return -EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "sock=0p%p type:%d state:%d flags:%ld protocol:%d\n",
-+ sock,
-+ sock->type,
-+ (unsigned int)(sock->state),
-+ sock->flags, protocol);
-+
-+ if(sock->type != SOCK_RAW) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "only SOCK_RAW supported.\n");
-+ return -ESOCKTNOSUPPORT;
-+ }
-+
-+ if(protocol != PF_KEY_V2) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "protocol not PF_KEY_V2.\n");
-+ return -EPROTONOSUPPORT;
-+ }
-+
-+ if((current->uid != 0)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "must be root to open pfkey sockets.\n");
-+ return -EACCES;
-+ }
-+
-+ sock->state = SS_UNCONNECTED;
-+
-+ KLIPS_INC_USE;
-+
-+#ifdef NET_26
-+#ifdef NET_26_24_SKALLOC
-+ sk=(struct sock *)sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto);
-+#else
-+#ifdef NET_26_12_SKALLOC
-+ sk=(struct sock *)sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1);
-+#else
-+ sk=(struct sock *)sk_alloc(PF_KEY, GFP_KERNEL, 1, NULL);
-+#endif
-+#endif
-+#else
-+ /* 2.4 interface */
-+ sk=(struct sock *)sk_alloc(PF_KEY, GFP_KERNEL, 1);
-+#endif
-+
-+ if(sk == NULL)
-+ {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "Out of memory trying to allocate.\n");
-+ KLIPS_DEC_USE;
-+ return -ENOMEM;
-+ }
-+
-+ sock_init_data(sock, sk);
-+
-+ sk->sk_destruct = NULL;
-+ sk->sk_reuse = 1;
-+ sock->ops = &SOCKOPS_WRAPPED(pfkey_ops);
-+
-+ sk->sk_family = PF_KEY;
-+/* sk->num = protocol; */
-+ sk->sk_protocol = protocol;
-+ key_pid(sk) = current->pid;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "sock->fasync_list=0p%p sk->sleep=0p%p.\n",
-+ sock->fasync_list,
-+ sk->sk_sleep);
-+
-+ pfkey_insert_socket(sk);
-+ pfkey_list_insert_socket(sock, &pfkey_open_sockets);
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_create: "
-+ "Socket sock=0p%p sk=0p%p initialised.\n", sock, sk);
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+#ifdef NETDEV_23
-+pfkey_release(struct socket *sock)
-+#else /* NETDEV_23 */
-+pfkey_release(struct socket *sock, struct socket *peersock)
-+#endif /* NETDEV_23 */
-+{
-+ struct sock *sk;
-+ int i;
-+
-+ if(sock==NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_release: "
-+ "No socket attached.\n");
-+ return 0; /* -EINVAL; */
-+ }
-+
-+ sk=sock->sk;
-+
-+ /* May not have data attached */
-+ if(sk==NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_release: "
-+ "No sk attached to sock=0p%p.\n", sock);
-+ return 0; /* -EINVAL; */
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_release: "
-+ "sock=0p%p sk=0p%p\n", sock, sk);
-+
-+ if(sock_flag(sk, SOCK_DEAD))
-+ if(sk->sk_state_change) {
-+ sk->sk_state_change(sk);
-+ }
-+
-+ sock->sk = NULL;
-+
-+ /* Try to flush out this socket. Throw out buffers at least */
-+ pfkey_destroy_socket(sk);
-+ pfkey_list_remove_socket(sock, &pfkey_open_sockets);
-+ for(i = K_SADB_SATYPE_UNSPEC; i <= K_SADB_SATYPE_MAX; i++) {
-+ pfkey_list_remove_socket(sock, &(pfkey_registered_sockets[i]));
-+ }
-+
-+ KLIPS_DEC_USE;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_release: "
-+ "succeeded.\n");
-+
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_shutdown(struct socket *sock, int mode)
-+{
-+ struct sock *sk;
-+
-+ if(sock == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_shutdown: "
-+ "NULL socket passed in.\n");
-+ return -EINVAL;
-+ }
-+
-+ sk=sock->sk;
-+
-+ if(sk == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_shutdown: "
-+ "No sock attached to socket.\n");
-+ return -EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_shutdown: "
-+ "mode=%x.\n", mode);
-+ mode++;
-+
-+ if(mode&SEND_SHUTDOWN) {
-+ sk->sk_shutdown|=SEND_SHUTDOWN;
-+ sk->sk_state_change(sk);
-+ }
-+
-+ if(mode&RCV_SHUTDOWN) {
-+ sk->sk_shutdown|=RCV_SHUTDOWN;
-+ sk->sk_state_change(sk);
-+ }
-+ return 0;
-+}
-+
-+/*
-+ * Send PF_KEY data down.
-+ */
-+
-+DEBUG_NO_STATIC int
-+#ifdef NET_26
-+pfkey_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
-+#else
-+pfkey_sendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm)
-+#endif
-+{
-+ struct sock *sk;
-+ int error = 0;
-+ struct sadb_msg *pfkey_msg = NULL, *pfkey_reply = NULL;
-+
-+ if(sock == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "Null socket passed in.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ sk = sock->sk;
-+
-+ if(sk == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "Null sock passed in.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(msg == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "Null msghdr passed in.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: .\n");
-+ if(sk->sk_err) {
-+ error = sock_error(sk);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "sk->err is non-zero, returns %d.\n",
-+ error);
-+ SENDERR(-error);
-+ }
-+
-+ if((current->uid != 0)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "must be root to send messages to pfkey sockets.\n");
-+ SENDERR(EACCES);
-+ }
-+
-+ if(msg->msg_control)
-+ {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "can't set flags or set msg_control.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(sk->sk_shutdown & SEND_SHUTDOWN) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "shutdown.\n");
-+ send_sig(SIGPIPE, current, 0);
-+ SENDERR(EPIPE);
-+ }
-+
-+ if(len < sizeof(struct sadb_msg)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "bogus msg len of %d, too small.\n", (int)len);
-+ SENDERR(EMSGSIZE);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "allocating %d bytes for downward message.\n",
-+ (int)len);
-+ if((pfkey_msg = (struct sadb_msg*)kmalloc(len, GFP_KERNEL)) == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "memory allocation error.\n");
-+ SENDERR(ENOBUFS);
-+ }
-+
-+ memcpy_fromiovec((void *)pfkey_msg, msg->msg_iov, len);
-+
-+ if(pfkey_msg->sadb_msg_version != PF_KEY_V2) {
-+ KLIPS_PRINT(1 || debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "not PF_KEY_V2 msg, found %d, should be %d.\n",
-+ pfkey_msg->sadb_msg_version,
-+ PF_KEY_V2);
-+ kfree((void*)pfkey_msg);
-+ return -EINVAL;
-+ }
-+
-+ if(len != pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "bogus msg len of %d, not %d byte aligned.\n",
-+ (int)len, (int)IPSEC_PFKEYv2_ALIGN);
-+ SENDERR(EMSGSIZE);
-+ }
-+
-+ if(pfkey_msg->sadb_msg_reserved) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "reserved field must be zero, set to %d.\n",
-+ pfkey_msg->sadb_msg_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if((pfkey_msg->sadb_msg_type > K_SADB_MAX) || (!pfkey_msg->sadb_msg_type)){
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "msg type too large or small:%d.\n",
-+ pfkey_msg->sadb_msg_type);
-+ SENDERR(EINVAL);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "msg sent for parsing.\n");
-+
-+ if((error = pfkey_msg_interp(sk, pfkey_msg))) {
-+ struct socket_list *pfkey_socketsp;
-+
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: "
-+ "pfkey_msg_parse returns %d.\n",
-+ error);
-+
-+ if((pfkey_reply = (struct sadb_msg*)kmalloc(sizeof(struct sadb_msg), GFP_KERNEL)) == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "memory allocation error.\n");
-+ SENDERR(ENOBUFS);
-+ }
-+ memcpy((void*)pfkey_reply, (void*)pfkey_msg, sizeof(struct sadb_msg));
-+ pfkey_reply->sadb_msg_errno = -error;
-+ pfkey_reply->sadb_msg_len = sizeof(struct sadb_msg) / IPSEC_PFKEYv2_ALIGN;
-+
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ int error_upmsg = 0;
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: "
-+ "sending up error=%d message=0p%p to socket=0p%p.\n",
-+ error,
-+ pfkey_reply,
-+ pfkey_socketsp->socketp);
-+ if((error_upmsg = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: "
-+ "sending up error message to socket=0p%p failed with error=%d.\n",
-+ pfkey_socketsp->socketp,
-+ error_upmsg);
-+ /* pfkey_msg_free(&pfkey_reply); */
-+ /* SENDERR(-error); */
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_sendmsg: "
-+ "sending up error message to socket=0p%p succeeded.\n",
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ pfkey_msg_free(&pfkey_reply);
-+
-+ SENDERR(-error);
-+ }
-+
-+ errlab:
-+ if (pfkey_msg) {
-+ kfree((void*)pfkey_msg);
-+ }
-+
-+ if(error) {
-+ return error;
-+ } else {
-+ return len;
-+ }
-+}
-+
-+/*
-+ * Receive PF_KEY data up.
-+ */
-+
-+DEBUG_NO_STATIC int
-+#ifdef NET_26
-+pfkey_recvmsg(struct kiocb *kiocb
-+ , struct socket *sock
-+ , struct msghdr *msg
-+ , size_t size
-+ , int flags)
-+#else
-+pfkey_recvmsg(struct socket *sock
-+ , struct msghdr *msg
-+ , int size, int flags
-+ , struct scm_cookie *scm)
-+#endif
-+{
-+ struct sock *sk;
-+ int noblock = flags & MSG_DONTWAIT;
-+ struct sk_buff *skb;
-+ int error;
-+
-+ if(sock == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_recvmsg: "
-+ "Null socket passed in.\n");
-+ return -EINVAL;
-+ }
-+
-+ sk = sock->sk;
-+
-+ if(sk == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_recvmsg: "
-+ "Null sock passed in for sock=0p%p.\n", sock);
-+ return -EINVAL;
-+ }
-+
-+ if(msg == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_recvmsg: "
-+ "Null msghdr passed in for sock=0p%p, sk=0p%p.\n",
-+ sock, sk);
-+ return -EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "klips_debug:pfkey_recvmsg: sock=0p%p sk=0p%p msg=0p%p size=%d.\n",
-+ sock, sk, msg, (int)size);
-+ if(flags & ~MSG_PEEK) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "flags (%d) other than MSG_PEEK not supported.\n",
-+ flags);
-+ return -EOPNOTSUPP;
-+ }
-+
-+ msg->msg_namelen = 0; /* sizeof(*ska); */
-+
-+ if(sk->sk_err) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sendmsg: "
-+ "sk->sk_err=%d.\n", sk->sk_err);
-+ return sock_error(sk);
-+ }
-+
-+ if((skb = skb_recv_datagram(sk, flags, noblock, &error) ) == NULL) {
-+ return error;
-+ }
-+
-+ if(size > skb->len) {
-+ size = skb->len;
-+ }
-+ else if(size <skb->len) {
-+ msg->msg_flags |= MSG_TRUNC;
-+ }
-+
-+ skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
-+#ifdef HAVE_KERNEL_TSTAMP
-+ sk->sk_stamp = skb->tstamp;
-+#elif defined(HAVE_TSTAMP)
-+ sk->sk_stamp.tv_sec = skb->tstamp.off_sec;
-+ sk->sk_stamp.tv_usec = skb->tstamp.off_usec;
-+#else
-+ sk->sk_stamp=skb->stamp;
-+#endif
-+
-+ skb_free_datagram(sk, skb);
-+ return size;
-+}
-+
-+#ifdef CONFIG_PROC_FS
-+#ifndef PROC_FS_2325
-+DEBUG_NO_STATIC
-+#endif /* PROC_FS_2325 */
-+int
-+pfkey_get_info(char *buffer, char **start, off_t offset, int length
-+#ifndef PROC_NO_DUMMY
-+, int dummy
-+#endif /* !PROC_NO_DUMMY */
-+#ifdef PROC_EOF_DATA
-+, int *eof
-+, void *data
-+#endif
-+)
-+{
-+ const int max_content = length > 0? length-1 : 0; /* limit of useful snprintf output */
-+#ifdef NET_26
-+ struct hlist_node *node;
-+#endif
-+ off_t begin=0;
-+ int len=0;
-+ struct sock *sk;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(!sysctl_ipsec_debug_verbose) {
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ len += ipsec_snprintf(buffer, length,
-+ " sock pid socket next prev e n p sndbf Flags Type St\n");
-+#ifdef CONFIG_KLIPS_DEBUG
-+ } else {
-+ len += ipsec_snprintf(buffer, length,
-+ " sock pid d sleep socket next prev e r z n p sndbf stamp Flags Type St\n");
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ sk_for_each(sk, node, &pfkey_sock_list) {
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(!sysctl_ipsec_debug_verbose) {
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ "%8p %5d %8p %d %d %5d %08lX %8X %2X\n",
-+ sk,
-+ key_pid(sk),
-+ sk->sk_socket,
-+ sk->sk_err,
-+ sk->sk_protocol,
-+ sk->sk_sndbuf,
-+ sk->sk_socket->flags,
-+ sk->sk_socket->type,
-+ sk->sk_socket->state);
-+#ifdef CONFIG_KLIPS_DEBUG
-+ } else {
-+ struct timeval t;
-+ grab_socket_timeval(t, *sk);
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ "%8p %5d %d %8p %8p %d %d %d %d %5d %d.%06d %08lX %8X %2X\n",
-+ sk,
-+ key_pid(sk),
-+ sock_flag(sk, SOCK_DEAD),
-+ sk->sk_sleep,
-+ sk->sk_socket,
-+ sk->sk_err,
-+ sk->sk_reuse,
-+#ifdef HAVE_SOCK_ZAPPED
-+ sock_flag(sk, SOCK_ZAPPED),
-+#else
-+ sk->sk_zapped,
-+#endif
-+ sk->sk_protocol,
-+ sk->sk_sndbuf,
-+ (unsigned int)t.tv_sec,
-+ (unsigned int)t.tv_usec,
-+ sk->sk_socket->flags,
-+ sk->sk_socket->type,
-+ sk->sk_socket->state);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ if (len >= max_content) {
-+ /* we've done all that can fit -- stop loop */
-+ len = max_content; /* truncate crap */
-+ break;
-+ } else {
-+ const off_t pos = begin + len; /* file position of end of what we've generated */
-+
-+ if (pos <= offset) {
-+ /* all is before first interesting character:
-+ * discard, but note where we are.
-+ */
-+ len = 0;
-+ begin = pos;
-+ }
-+ }
-+ }
-+
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ return len - (offset - begin);
-+}
-+
-+#ifndef PROC_FS_2325
-+DEBUG_NO_STATIC
-+#endif /* PROC_FS_2325 */
-+int
-+pfkey_supported_get_info(char *buffer, char **start, off_t offset, int length
-+#ifndef PROC_NO_DUMMY
-+, int dummy
-+#endif /* !PROC_NO_DUMMY */
-+#ifdef PROC_EOF_DATA
-+, int *eof
-+, void *data
-+#endif
-+)
-+{
-+ /* limit of useful snprintf output */
-+ const int max_content = length > 0? length-1 : 0;
-+ off_t begin=0;
-+ int len=0;
-+ int satype;
-+ struct supported_list *ps;
-+
-+ len += ipsec_snprintf(buffer, length,
-+ "satype exttype alg_id ivlen minbits maxbits name\n");
-+
-+ for(satype = K_SADB_SATYPE_UNSPEC; satype <= K_SADB_SATYPE_MAX; satype++) {
-+ ps = pfkey_supported_list[satype];
-+ while(ps) {
-+ struct ipsec_alg_supported *alg = ps->supportedp;
-+ const char *n = alg->ias_name;
-+ if(n == NULL) n = "unknown";
-+
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ " %2d %2d %2d %3d %3d %3d %20s\n",
-+ satype,
-+ alg->ias_exttype,
-+ alg->ias_id,
-+ alg->ias_ivlen,
-+ alg->ias_keyminbits,
-+ alg->ias_keymaxbits,
-+ n);
-+
-+ if (len >= max_content) {
-+ /* we've done all that can fit -- stop loop */
-+ len = max_content; /* truncate crap */
-+ break;
-+ } else {
-+ const off_t pos = begin + len; /* file position of end of what we've generated */
-+
-+ if (pos <= offset) {
-+ /* all is before first interesting character:
-+ * discard, but note where we are.
-+ */
-+ len = 0;
-+ begin = pos;
-+ }
-+ }
-+
-+ ps = ps->next;
-+ }
-+ }
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ return len - (offset - begin);
-+}
-+
-+#ifndef PROC_FS_2325
-+DEBUG_NO_STATIC
-+#endif /* PROC_FS_2325 */
-+int
-+pfkey_registered_get_info(char *buffer, char **start, off_t offset, int length
-+#ifndef PROC_NO_DUMMY
-+, int dummy
-+#endif /* !PROC_NO_DUMMY */
-+#ifdef PROC_EOF_DATA
-+, int *eof
-+, void *data
-+#endif
-+)
-+{
-+ const int max_content = length > 0? length-1 : 0; /* limit of useful snprintf output */
-+ off_t begin=0;
-+ int len=0;
-+ int satype;
-+ struct socket_list *pfkey_sockets;
-+
-+ len += ipsec_snprintf(buffer, length,
-+ "satype socket pid sk\n");
-+
-+ for(satype = K_SADB_SATYPE_UNSPEC; satype <= K_SADB_SATYPE_MAX; satype++) {
-+ pfkey_sockets = pfkey_registered_sockets[satype];
-+ while(pfkey_sockets) {
-+ len += ipsec_snprintf(buffer+len, length-len,
-+ " %2d %8p %5d %8p\n",
-+ satype,
-+ pfkey_sockets->socketp,
-+ key_pid(pfkey_sockets->socketp->sk),
-+ pfkey_sockets->socketp->sk);
-+
-+ if (len >= max_content) {
-+ /* we've done all that can fit -- stop loop (could stop two) */
-+ len = max_content; /* truncate crap */
-+ break;
-+ } else {
-+ const off_t pos = begin + len; /* file position of end of what we've generated */
-+
-+ if (pos <= offset) {
-+ /* all is before first interesting character:
-+ * discard, but note where we are.
-+ */
-+ len = 0;
-+ begin = pos;
-+ }
-+ }
-+
-+ pfkey_sockets = pfkey_sockets->next;
-+ }
-+ }
-+ *start = buffer + (offset - begin); /* Start of wanted data */
-+ return len - (offset - begin);
-+}
-+
-+#ifndef PROC_FS_2325
-+struct proc_dir_entry proc_net_pfkey =
-+{
-+ 0,
-+ 6, "pf_key",
-+ S_IFREG | S_IRUGO, 1, 0, 0,
-+ 0, &proc_net_inode_operations,
-+ pfkey_get_info
-+};
-+struct proc_dir_entry proc_net_pfkey_supported =
-+{
-+ 0,
-+ 16, "pf_key_supported",
-+ S_IFREG | S_IRUGO, 1, 0, 0,
-+ 0, &proc_net_inode_operations,
-+ pfkey_supported_get_info
-+};
-+struct proc_dir_entry proc_net_pfkey_registered =
-+{
-+ 0,
-+ 17, "pf_key_registered",
-+ S_IFREG | S_IRUGO, 1, 0, 0,
-+ 0, &proc_net_inode_operations,
-+ pfkey_registered_get_info
-+};
-+#endif /* !PROC_FS_2325 */
-+#endif /* CONFIG_PROC_FS */
-+
-+DEBUG_NO_STATIC int
-+supported_add_all(int satype, struct ipsec_alg_supported supported[], int size)
-+{
-+ int i;
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:init_pfkey: "
-+ "sizeof(supported_init_<satype=%d>)[%d]/sizeof(struct ipsec_alg_supported)[%d]=%d.\n",
-+ satype,
-+ size,
-+ (int)sizeof(struct ipsec_alg_supported),
-+ (int)(size/sizeof(struct ipsec_alg_supported)));
-+
-+ for(i = 0; i < size / sizeof(struct ipsec_alg_supported); i++) {
-+
-+ const char *n = supported[i].ias_name;
-+ if(n == NULL) n="unknown";
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:init_pfkey: "
-+ "i=%d inserting satype=%d exttype=%d id=%d ivlen=%d minbits=%d maxbits=%d name=%s.\n",
-+ i,
-+ satype,
-+ supported[i].ias_exttype,
-+ supported[i].ias_id,
-+ supported[i].ias_ivlen,
-+ supported[i].ias_keyminbits,
-+ supported[i].ias_keymaxbits,
-+ n);
-+
-+ error |= pfkey_list_insert_supported(&(supported[i]),
-+ &(pfkey_supported_list[satype]));
-+ }
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+supported_remove_all(int satype)
-+{
-+ int error = 0;
-+ struct ipsec_alg_supported*supportedp;
-+
-+ while(pfkey_supported_list[satype]) {
-+ const char *n;
-+ supportedp = pfkey_supported_list[satype]->supportedp;
-+
-+ n = supportedp->ias_name;
-+ if(n == NULL) n="unknown";
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:init_pfkey: "
-+ "removing satype=%d exttype=%d id=%d ivlen=%d minbits=%d maxbits=%d name=%s.\n",
-+ satype,
-+ supportedp->ias_exttype,
-+ supportedp->ias_id,
-+ supportedp->ias_ivlen,
-+ supportedp->ias_keyminbits,
-+ supportedp->ias_keymaxbits, n);
-+
-+ error |= pfkey_list_remove_supported(supportedp,
-+ &(pfkey_supported_list[satype]));
-+ }
-+ return error;
-+}
-+
-+int
-+pfkey_init(void)
-+{
-+ int error = 0;
-+ int i;
-+#ifdef HAVE_PROC_DIR_ENTRY
-+ struct proc_dir_entry* entry;
-+#endif
-+
-+
-+ static struct ipsec_alg_supported supported_init_ah[] = {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ {K_SADB_EXT_SUPPORTED_AUTH, K_SADB_AALG_MD5HMAC, 0, 128, 128},
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ {K_SADB_EXT_SUPPORTED_AUTH, K_SADB_AALG_SHA1HMAC, 0, 160, 160}
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+ };
-+ static struct ipsec_alg_supported supported_init_esp[] = {
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
-+ {K_SADB_EXT_SUPPORTED_AUTH, K_SADB_AALG_MD5HMAC, 0, 128, 128},
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
-+#ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
-+ {K_SADB_EXT_SUPPORTED_AUTH, K_SADB_AALG_SHA1HMAC, 0, 160, 160},
-+#endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
-+#ifdef CONFIG_KLIPS_ENC_3DES
-+ {K_SADB_EXT_SUPPORTED_ENCRYPT, K_SADB_EALG_3DESCBC, 64, 168, 168},
-+#endif /* CONFIG_KLIPS_ENC_3DES */
-+ };
-+ static struct ipsec_alg_supported supported_init_ipip[] = {
-+ {K_SADB_EXT_SUPPORTED_ENCRYPT, K_SADB_X_TALG_IPv4_in_IPv4, 0, 32, 32}
-+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+ , {K_SADB_EXT_SUPPORTED_ENCRYPT, K_SADB_X_TALG_IPv6_in_IPv4, 0, 128, 32}
-+ , {K_SADB_EXT_SUPPORTED_ENCRYPT, K_SADB_X_TALG_IPv4_in_IPv6, 0, 32, 128}
-+ , {K_SADB_EXT_SUPPORTED_ENCRYPT, K_SADB_X_TALG_IPv6_in_IPv6, 0, 128, 128}
-+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
-+ };
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ static struct ipsec_alg_supported supported_init_ipcomp[] = {
-+ {K_SADB_EXT_SUPPORTED_ENCRYPT, SADB_X_CALG_DEFLATE, 0, 1, 1}
-+ };
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#if 0
-+ printk(KERN_INFO
-+ "klips_info:pfkey_init: "
-+ "FreeS/WAN: initialising PF_KEYv2 domain sockets.\n");
-+#endif
-+
-+ for(i = K_SADB_SATYPE_UNSPEC; i <= K_SADB_SATYPE_MAX; i++) {
-+ pfkey_registered_sockets[i] = NULL;
-+ pfkey_supported_list[i] = NULL;
-+ }
-+
-+ error |= supported_add_all(K_SADB_SATYPE_AH, supported_init_ah, sizeof(supported_init_ah));
-+ error |= supported_add_all(K_SADB_SATYPE_ESP, supported_init_esp, sizeof(supported_init_esp));
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ error |= supported_add_all(K_SADB_X_SATYPE_COMP, supported_init_ipcomp, sizeof(supported_init_ipcomp));
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ error |= supported_add_all(K_SADB_X_SATYPE_IPIP, supported_init_ipip, sizeof(supported_init_ipip));
-+
-+ error |= sock_register(&pfkey_family_ops);
-+
-+#ifdef CONFIG_PROC_FS
-+# ifndef PROC_FS_2325
-+# ifdef PROC_FS_21
-+ error |= proc_register(proc_net, &proc_net_pfkey);
-+ error |= proc_register(proc_net, &proc_net_pfkey_supported);
-+ error |= proc_register(proc_net, &proc_net_pfkey_registered);
-+# else /* PROC_FS_21 */
-+ error |= proc_register_dynamic(&proc_net, &proc_net_pfkey);
-+ error |= proc_register_dynamic(&proc_net, &proc_net_pfkey_supported);
-+ error |= proc_register_dynamic(&proc_net, &proc_net_pfkey_registered);
-+# endif /* PROC_FS_21 */
-+# else /* !PROC_FS_2325 */
-+# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
-+ proc_net_create ("pf_key", 0, pfkey_get_info);
-+ proc_net_create ("pf_key_supported", 0, pfkey_supported_get_info);
-+ proc_net_create ("pf_key_registered", 0, pfkey_registered_get_info);
-+# else
-+ entry = create_proc_entry ("pf_key", 0, init_net.proc_net);
-+ entry->read_proc = pfkey_get_info;
-+ entry = create_proc_entry ("pf_key_supported", 0, init_net.proc_net);
-+ entry->read_proc = pfkey_supported_get_info;
-+ entry = create_proc_entry ("pf_key_registered", 0, init_net.proc_net);
-+ entry->read_proc = pfkey_registered_get_info;
-+# endif
-+# endif /* !PROC_FS_2325 */
-+#endif /* CONFIG_PROC_FS */
-+
-+ return error;
-+}
-+
-+int
-+pfkey_cleanup(void)
-+{
-+ int error = 0;
-+
-+ printk(KERN_INFO "klips_info:pfkey_cleanup: "
-+ "shutting down PF_KEY domain sockets.\n");
-+#ifdef VOID_SOCK_UNREGISTER
-+ sock_unregister(PF_KEY);
-+#else
-+ error |= sock_unregister(PF_KEY);
-+#endif
-+
-+ error |= supported_remove_all(K_SADB_SATYPE_AH);
-+ error |= supported_remove_all(K_SADB_SATYPE_ESP);
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ error |= supported_remove_all(K_SADB_X_SATYPE_COMP);
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ error |= supported_remove_all(K_SADB_X_SATYPE_IPIP);
-+
-+#ifdef CONFIG_PROC_FS
-+# ifndef PROC_FS_2325
-+ if (proc_net_unregister(proc_net_pfkey.low_ino) != 0)
-+ printk("klips_debug:pfkey_cleanup: "
-+ "cannot unregister /proc/net/pf_key\n");
-+ if (proc_net_unregister(proc_net_pfkey_supported.low_ino) != 0)
-+ printk("klips_debug:pfkey_cleanup: "
-+ "cannot unregister /proc/net/pf_key_supported\n");
-+ if (proc_net_unregister(proc_net_pfkey_registered.low_ino) != 0)
-+ printk("klips_debug:pfkey_cleanup: "
-+ "cannot unregister /proc/net/pf_key_registered\n");
-+# else /* !PROC_FS_2325 */
-+# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
-+ proc_net_remove ("pf_key");
-+ proc_net_remove ("pf_key_supported");
-+ proc_net_remove ("pf_key_registered");
-+# else
-+ proc_net_remove (&init_net, "pf_key");
-+ proc_net_remove (&init_net, "pf_key_supported");
-+ proc_net_remove (&init_net, "pf_key_registered");
-+# endif
-+
-+# endif /* !PROC_FS_2325 */
-+#endif /* CONFIG_PROC_FS */
-+
-+ /* other module unloading cleanup happens here */
-+ return error;
-+}
-+
-+#ifdef MODULE
-+#if 0
-+int
-+init_module(void)
-+{
-+ pfkey_init();
-+ return 0;
-+}
-+
-+void
-+cleanup_module(void)
-+{
-+ pfkey_cleanup();
-+}
-+#endif /* 0 */
-+#else /* MODULE */
-+struct net_protocol;
-+void pfkey_proto_init(struct net_protocol *pro)
-+{
-+ pfkey_init();
-+}
-+#endif /* MODULE */
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/pfkey_v2_build.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1452 @@
-+/*
-+ * RFC2367 PF_KEYv2 Key management API message parser
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: pfkey_v2_build.c,v 1.53 2005/11/09 00:30:37 mcr Exp $
-+ */
-+
-+/*
-+ * Template from klips/net/ipsec/ipsec/ipsec_parser.c.
-+ */
-+
-+char pfkey_v2_build_c_version[] = "$Id: pfkey_v2_build.c,v 1.53 2005/11/09 00:30:37 mcr Exp $";
-+
-+/*
-+ * Some ugly stuff to allow consistent debugging code for use in the
-+ * kernel and in user space
-+*/
-+
-+#if defined(__KERNEL__) && defined(linux)
-+
-+# include <linux/kernel.h> /* for printk */
-+
-+# include "openswan/ipsec_kversion.h" /* for malloc switch */
-+# ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+# else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+# endif /* MALLOC_SLAB */
-+# include <linux/errno.h> /* error codes */
-+# include <linux/types.h> /* size_t */
-+# include <linux/interrupt.h> /* mark_bh */
-+
-+# include <linux/netdevice.h> /* struct device, and other headers */
-+# include <linux/etherdevice.h> /* eth_type_trans */
-+# include <linux/ip.h> /* struct iphdr */
-+# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+# include <linux/ipv6.h> /* struct ipv6hdr */
-+# endif /* if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
-+
-+# define MALLOC(size) kmalloc(size, GFP_ATOMIC)
-+# define FREE(obj) kfree(obj)
-+# include <openswan.h>
-+#else /* __KERNEL__ */
-+
-+# include <sys/types.h>
-+# include <sys/errno.h>
-+# include <netinet/in.h>
-+# include <stdlib.h>
-+# include <stdio.h>
-+# include <string.h> /* memset */
-+
-+# include <openswan.h>
-+
-+#endif /* __KERNEL__ */
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#ifdef __KERNEL__
-+#include "openswan/radij.h" /* rd_nodes */
-+#include "openswan/ipsec_encap.h" /* sockaddr_encap */
-+#endif /* __KERNEL__ */
-+
-+
-+#include "openswan/ipsec_sa.h" /* IPSEC_SAREF_NULL, IPSEC_SA_REF_TABLE_IDX_WIDTH */
-+#include "openswan/pfkey_debug.h"
-+
-+
-+#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0)
-+
-+void
-+pfkey_extensions_init(struct sadb_ext *extensions[K_SADB_EXT_MAX + 1])
-+{
-+ int i;
-+
-+ for (i = 0; i != K_SADB_EXT_MAX + 1; i++) {
-+ extensions[i] = NULL;
-+ }
-+}
-+
-+void
-+pfkey_extensions_free(struct sadb_ext *extensions[K_SADB_EXT_MAX + 1])
-+{
-+ int i;
-+
-+ if(!extensions) {
-+ return;
-+ }
-+
-+ if(extensions[0]) {
-+ memset(extensions[0], 0, sizeof(struct sadb_msg));
-+ FREE(extensions[0]);
-+ extensions[0] = NULL;
-+ }
-+
-+ for (i = 1; i != K_SADB_EXT_MAX + 1; i++) {
-+ if(extensions[i]) {
-+ memset(extensions[i], 0, extensions[i]->sadb_ext_len * IPSEC_PFKEYv2_ALIGN);
-+ FREE(extensions[i]);
-+ extensions[i] = NULL;
-+ }
-+ }
-+}
-+
-+void
-+pfkey_msg_free(struct sadb_msg **pfkey_msg)
-+{
-+ if(*pfkey_msg) {
-+ memset(*pfkey_msg, 0, (*pfkey_msg)->sadb_msg_len * IPSEC_PFKEYv2_ALIGN);
-+ FREE(*pfkey_msg);
-+ *pfkey_msg = NULL;
-+ }
-+}
-+
-+/* Default extension builders taken from the KLIPS code */
-+
-+int
-+pfkey_msg_hdr_build(struct sadb_ext** pfkey_ext,
-+ uint8_t msg_type,
-+ uint8_t satype,
-+ uint8_t msg_errno,
-+ uint32_t seq,
-+ uint32_t pid)
-+{
-+ int error = 0;
-+ struct sadb_msg *pfkey_msg = (struct sadb_msg *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_msg_hdr_build:\n");
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_msg_hdr_build: "
-+ "on_entry &pfkey_ext=0p%p pfkey_ext=0p%p *pfkey_ext=0p%p.\n",
-+ &pfkey_ext,
-+ pfkey_ext,
-+ *pfkey_ext);
-+ /* sanity checks... */
-+ if(pfkey_msg) {
-+ ERROR("pfkey_msg_hdr_build: "
-+ "why is pfkey_msg already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!msg_type) {
-+ ERROR("pfkey_msg_hdr_build: "
-+ "msg type not set, must be non-zero..\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(msg_type > K_SADB_MAX) {
-+ ERROR("pfkey_msg_hdr_build: "
-+ "msg type too large:%d.\n",
-+ msg_type);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(satype > K_SADB_SATYPE_MAX) {
-+ ERROR("pfkey_msg_hdr_build: "
-+ "satype %d > max %d\n",
-+ satype, SADB_SATYPE_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_msg = (struct sadb_msg*)MALLOC(sizeof(struct sadb_msg));
-+ *pfkey_ext = (struct sadb_ext*)pfkey_msg;
-+
-+ if(pfkey_msg == NULL) {
-+ ERROR("pfkey_msg_hdr_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_msg, 0, sizeof(struct sadb_msg));
-+
-+ pfkey_msg->sadb_msg_len = sizeof(struct sadb_msg) / IPSEC_PFKEYv2_ALIGN;
-+
-+ pfkey_msg->sadb_msg_type = msg_type;
-+ pfkey_msg->sadb_msg_satype = satype;
-+
-+ pfkey_msg->sadb_msg_version = PF_KEY_V2;
-+ pfkey_msg->sadb_msg_errno = msg_errno;
-+ pfkey_msg->sadb_msg_reserved = 0;
-+ pfkey_msg->sadb_msg_seq = seq;
-+ pfkey_msg->sadb_msg_pid = pid;
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_msg_hdr_build: "
-+ "on_exit &pfkey_ext=0p%p pfkey_ext=0p%p *pfkey_ext=0p%p.\n",
-+ &pfkey_ext,
-+ pfkey_ext,
-+ *pfkey_ext);
-+errlab:
-+ return error;
-+}
-+
-+
-+int
-+pfkey_sa_builds(struct sadb_ext **pfkey_ext,
-+ struct sadb_builds sab)
-+{
-+ int error = 0;
-+ struct k_sadb_sa *pfkey_sa = (struct k_sadb_sa *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "spi=%08x replay=%d sa_state=%d auth=%d encrypt=%d flags=%d\n",
-+ ntohl(sab.sa_base.sadb_sa_spi), /* in network order */
-+ sab.sa_base.sadb_sa_replay,
-+ sab.sa_base.sadb_sa_state,
-+ sab.sa_base.sadb_sa_auth,
-+ sab.sa_base.sadb_sa_encrypt,
-+ sab.sa_base.sadb_sa_flags);
-+ /* sanity checks... */
-+ if(pfkey_sa) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "why is pfkey_sa already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(sab.sa_base.sadb_sa_exttype != SADB_EXT_SA &&
-+ sab.sa_base.sadb_sa_exttype != K_SADB_X_EXT_SA2) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "invalid exttype=%d.\n",
-+ sab.sa_base.sadb_sa_exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(sab.sa_base.sadb_sa_replay > 64) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "replay window size: %d -- must be 0 <= size <= 64\n",
-+ sab.sa_base.sadb_sa_replay);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(sab.sa_base.sadb_sa_auth > SADB_AALG_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "auth=%d > SADB_AALG_MAX=%d.\n",
-+ sab.sa_base.sadb_sa_auth,
-+ SADB_AALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+#if K_SADB_EALG_MAX < 255
-+ if(sab.sa_base.sadb_sa_encrypt > K_SADB_EALG_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "encrypt=%d > K_SADB_EALG_MAX=%d.\n",
-+ sab.sa_base.sadb_sa_encrypt,
-+ K_SADB_EALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+#endif
-+
-+ if(sab.sa_base.sadb_sa_state > K_SADB_SASTATE_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "sa_state=%d exceeds MAX=%d.\n",
-+ sab.sa_base.sadb_sa_state,
-+ K_SADB_SASTATE_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(sab.sa_base.sadb_sa_state == K_SADB_SASTATE_DEAD) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "sa_state=%d is DEAD=%d is not allowed.\n",
-+ sab.sa_base.sadb_sa_state,
-+ K_SADB_SASTATE_DEAD);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if((IPSEC_SAREF_NULL != sab.sa_base.sadb_x_sa_ref) && (sab.sa_base.sadb_x_sa_ref >= (1 << IPSEC_SA_REF_TABLE_IDX_WIDTH))) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "SAref=%d must be (SAref == IPSEC_SAREF_NULL(%d) || SAref < IPSEC_SA_REF_TABLE_NUM_ENTRIES(%d)).\n",
-+ sab.sa_base.sadb_x_sa_ref,
-+ IPSEC_SAREF_NULL,
-+ IPSEC_SA_REF_TABLE_NUM_ENTRIES);
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_sa = (struct k_sadb_sa*)MALLOC(sizeof(struct k_sadb_sa));
-+ *pfkey_ext = (struct sadb_ext*)pfkey_sa;
-+
-+ if(pfkey_sa == NULL) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sa_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_sa, 0, sizeof(struct k_sadb_sa));
-+
-+ *pfkey_sa = sab.sa_base;
-+ pfkey_sa->sadb_sa_len = sizeof(*pfkey_sa) / IPSEC_PFKEYv2_ALIGN;
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_sa_build(struct sadb_ext ** pfkey_ext,
-+ uint16_t exttype,
-+ uint32_t spi,
-+ uint8_t replay_window,
-+ uint8_t sa_state,
-+ uint8_t auth,
-+ uint8_t encrypt,
-+ uint32_t flags)
-+{
-+ struct sadb_builds sab;
-+
-+ memset(&sab, 0, sizeof(sab));
-+ sab.sa_base.sadb_sa_exttype = exttype;
-+ sab.sa_base.sadb_sa_spi = spi;
-+ sab.sa_base.sadb_sa_replay = replay_window;
-+ sab.sa_base.sadb_sa_state = sa_state;
-+ sab.sa_base.sadb_sa_auth = auth;
-+ sab.sa_base.sadb_sa_encrypt = encrypt;
-+ sab.sa_base.sadb_sa_flags = flags;
-+ sab.sa_base.sadb_x_sa_ref = IPSEC_SAREF_NULL;
-+
-+ return pfkey_sa_builds(pfkey_ext, sab);
-+}
-+
-+int
-+pfkey_lifetime_build(struct sadb_ext ** pfkey_ext,
-+ uint16_t exttype,
-+ uint32_t allocations,
-+ uint64_t bytes,
-+ uint64_t addtime,
-+ uint64_t usetime,
-+ uint32_t packets)
-+{
-+ int error = 0;
-+ struct sadb_lifetime *pfkey_lifetime = (struct sadb_lifetime *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_lifetime_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_lifetime) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_lifetime_build: "
-+ "why is pfkey_lifetime already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(exttype != SADB_EXT_LIFETIME_CURRENT &&
-+ exttype != SADB_EXT_LIFETIME_HARD &&
-+ exttype != SADB_EXT_LIFETIME_SOFT) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_lifetime_build: "
-+ "invalid exttype=%d.\n",
-+ exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_lifetime = (struct sadb_lifetime*)MALLOC(sizeof(struct sadb_lifetime));
-+ *pfkey_ext = (struct sadb_ext*) pfkey_lifetime;
-+
-+ if(pfkey_lifetime == NULL) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_lifetime_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_lifetime, 0, sizeof(struct sadb_lifetime));
-+
-+ pfkey_lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime) / IPSEC_PFKEYv2_ALIGN;
-+ pfkey_lifetime->sadb_lifetime_exttype = exttype;
-+ pfkey_lifetime->sadb_lifetime_allocations = allocations;
-+ pfkey_lifetime->sadb_lifetime_bytes = bytes;
-+ pfkey_lifetime->sadb_lifetime_addtime = addtime;
-+ pfkey_lifetime->sadb_lifetime_usetime = usetime;
-+#ifdef NOT_YET
-+ /* XXX it is defined in struct sadb_lifetime, but not found?? */
-+ pfkey_lifetime->sadb_x_lifetime_packets = packets;
-+#endif
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_address_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint8_t proto,
-+ uint8_t prefixlen,
-+ struct sockaddr* address)
-+{
-+ int error = 0;
-+ int saddr_len = 0;
-+ char ipaddr_txt[ADDRTOT_BUF + 6/*extra for port number*/];
-+ struct sadb_address *pfkey_address = (struct sadb_address *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_address_build: "
-+ "exttype=%d proto=%d prefixlen=%d\n",
-+ exttype,
-+ proto,
-+ prefixlen);
-+ /* sanity checks... */
-+ if(pfkey_address) {
-+ ERROR("pfkey_address_build: "
-+ "why is pfkey_address already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if (!address) {
-+ ERROR("pfkey_address_build: " "address is NULL\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(exttype) {
-+ case SADB_EXT_ADDRESS_SRC:
-+ case SADB_EXT_ADDRESS_DST:
-+ case SADB_EXT_ADDRESS_PROXY:
-+ case K_SADB_X_EXT_ADDRESS_DST2:
-+ case K_SADB_X_EXT_ADDRESS_SRC_FLOW:
-+ case K_SADB_X_EXT_ADDRESS_DST_FLOW:
-+ case K_SADB_X_EXT_ADDRESS_SRC_MASK:
-+ case K_SADB_X_EXT_ADDRESS_DST_MASK:
-+#ifdef NAT_TRAVERSAL
-+ case K_SADB_X_EXT_NAT_T_OA:
-+#endif
-+ break;
-+ default:
-+ ERROR("pfkey_address_build: "
-+ "unrecognised ext_type=%d.\n",
-+ exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(address->sa_family) {
-+ case AF_INET:
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_address_build: "
-+ "found address family AF_INET.\n");
-+ saddr_len = sizeof(struct sockaddr_in);
-+ sprintf(ipaddr_txt, "%d.%d.%d.%d:%d"
-+ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 0) & 0xFF
-+ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 8) & 0xFF
-+ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 16) & 0xFF
-+ , (((struct sockaddr_in*)address)->sin_addr.s_addr >> 24) & 0xFF
-+ , ntohs(((struct sockaddr_in*)address)->sin_port));
-+ break;
-+ case AF_INET6:
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_address_build: "
-+ "found address family AF_INET6.\n");
-+ saddr_len = sizeof(struct sockaddr_in6);
-+ sprintf(ipaddr_txt, "%x:%x:%x:%x:%x:%x:%x:%x-%x"
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[0])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[1])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[2])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[3])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[4])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[5])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[6])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_addr.s6_addr16[7])
-+ , ntohs(((struct sockaddr_in6*)address)->sin6_port));
-+ break;
-+ default:
-+ ERROR("pfkey_address_build: "
-+ "address->sa_family=%d not supported.\n",
-+ address->sa_family);
-+ SENDERR(EPFNOSUPPORT);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_address_build: "
-+ "found address=%s.\n",
-+ ipaddr_txt);
-+ if(prefixlen != 0) {
-+ ERROR("pfkey_address_build: "
-+ "address prefixes not supported yet.\n");
-+ SENDERR(EAFNOSUPPORT); /* not supported yet */
-+ }
-+
-+ /* allocate some memory for the extension */
-+ pfkey_address = (struct sadb_address*)
-+ MALLOC(ALIGN_N(sizeof(struct sadb_address) + saddr_len, IPSEC_PFKEYv2_ALIGN));
-+ *pfkey_ext = (struct sadb_ext*)pfkey_address;
-+
-+ if(pfkey_address == NULL ) {
-+ ERROR("pfkey_lifetime_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_address,
-+ 0,
-+ ALIGN_N(sizeof(struct sadb_address) + saddr_len,
-+ IPSEC_PFKEYv2_ALIGN));
-+
-+ pfkey_address->sadb_address_len = DIVUP(sizeof(struct sadb_address) + saddr_len,
-+ IPSEC_PFKEYv2_ALIGN);
-+
-+ pfkey_address->sadb_address_exttype = exttype;
-+ pfkey_address->sadb_address_proto = proto;
-+ pfkey_address->sadb_address_prefixlen = prefixlen;
-+ pfkey_address->sadb_address_reserved = 0;
-+
-+ memcpy((char*)pfkey_address + sizeof(struct sadb_address),
-+ address,
-+ saddr_len);
-+
-+#if 0
-+ for(i = 0; i < sizeof(struct sockaddr_in) - offsetof(struct sockaddr_in, sin_zero); i++) {
-+ pfkey_address_s_ska.sin_zero[i] = 0;
-+ }
-+#endif
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_address_build: "
-+ "successful created len: %d.\n", pfkey_address->sadb_address_len);
-+
-+ errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_key_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint16_t key_bits,
-+ unsigned char * key)
-+{
-+ int error = 0;
-+ struct sadb_key *pfkey_key = (struct sadb_key *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_key_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_key) {
-+ ERROR("pfkey_key_build: "
-+ "why is pfkey_key already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!key_bits) {
-+ ERROR("pfkey_key_build: "
-+ "key_bits is zero, it must be non-zero.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if( !((exttype == SADB_EXT_KEY_AUTH) || (exttype == SADB_EXT_KEY_ENCRYPT))) {
-+ ERROR("pfkey_key_build: "
-+ "unsupported extension type=%d.\n",
-+ exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_key = (struct sadb_key*)
-+ MALLOC(sizeof(struct sadb_key) +
-+ DIVUP(key_bits, 64) * IPSEC_PFKEYv2_ALIGN);
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_key;
-+
-+ if(pfkey_key == NULL) {
-+ ERROR("pfkey_key_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_key,
-+ 0,
-+ sizeof(struct sadb_key) +
-+ DIVUP(key_bits, 64) * IPSEC_PFKEYv2_ALIGN);
-+
-+ pfkey_key->sadb_key_len = DIVUP(sizeof(struct sadb_key) * IPSEC_PFKEYv2_ALIGN + key_bits,
-+ 64);
-+ pfkey_key->sadb_key_exttype = exttype;
-+ pfkey_key->sadb_key_bits = key_bits;
-+ pfkey_key->sadb_key_reserved = 0;
-+ memcpy((char*)pfkey_key + sizeof(struct sadb_key),
-+ key,
-+ DIVUP(key_bits, 8));
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_ident_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint16_t ident_type,
-+ uint64_t ident_id,
-+ uint8_t ident_len,
-+ char* ident_string)
-+{
-+ int error = 0;
-+ struct sadb_ident *pfkey_ident = (struct sadb_ident *)*pfkey_ext;
-+ int data_len = ident_len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident);
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_ident_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_ident) {
-+ ERROR("pfkey_ident_build: "
-+ "why is pfkey_ident already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if( ! ((exttype == SADB_EXT_IDENTITY_SRC) ||
-+ (exttype == SADB_EXT_IDENTITY_DST))) {
-+ ERROR("pfkey_ident_build: "
-+ "unsupported extension type=%d.\n",
-+ exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if((ident_type == SADB_IDENTTYPE_RESERVED)) {
-+ ERROR("pfkey_ident_build: "
-+ "ident_type must be non-zero.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(ident_type > SADB_IDENTTYPE_MAX) {
-+ ERROR("pfkey_ident_build: "
-+ "identtype=%d out of range.\n",
-+ ident_type);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(((ident_type == SADB_IDENTTYPE_PREFIX) ||
-+ (ident_type == SADB_IDENTTYPE_FQDN)) &&
-+ !ident_string) {
-+ ERROR("pfkey_ident_build: "
-+ "string required to allocate size of extension.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+#if 0
-+ if((ident_type == SADB_IDENTTYPE_USERFQDN) ) {
-+ }
-+#endif
-+
-+ pfkey_ident = (struct sadb_ident*)
-+ MALLOC(ident_len * IPSEC_PFKEYv2_ALIGN);
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_ident;
-+
-+ if(pfkey_ident == NULL) {
-+ ERROR("pfkey_ident_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_ident, 0, ident_len * IPSEC_PFKEYv2_ALIGN);
-+
-+ pfkey_ident->sadb_ident_len = ident_len;
-+ pfkey_ident->sadb_ident_exttype = exttype;
-+ pfkey_ident->sadb_ident_type = ident_type;
-+ pfkey_ident->sadb_ident_reserved = 0;
-+ pfkey_ident->sadb_ident_id = ident_id;
-+ memcpy((char*)pfkey_ident + sizeof(struct sadb_ident),
-+ ident_string,
-+ data_len);
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_sens_build(struct sadb_ext** pfkey_ext,
-+ uint32_t dpd,
-+ uint8_t sens_level,
-+ uint8_t sens_len,
-+ uint64_t* sens_bitmap,
-+ uint8_t integ_level,
-+ uint8_t integ_len,
-+ uint64_t* integ_bitmap)
-+{
-+ int error = 0;
-+ struct sadb_sens *pfkey_sens = (struct sadb_sens *)*pfkey_ext;
-+ int i;
-+ uint64_t* bitmap;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sens_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_sens) {
-+ ERROR("pfkey_sens_build: "
-+ "why is pfkey_sens already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_sens_build: "
-+ "Sorry, I can't build exttype=%d yet.\n",
-+ (*pfkey_ext)->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+
-+ pfkey_sens = (struct sadb_sens*)
-+ MALLOC(sizeof(struct sadb_sens) +
-+ (sens_len + integ_len) * sizeof(uint64_t));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_sens;
-+
-+ if(pfkey_sens == NULL) {
-+ ERROR("pfkey_sens_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_sens,
-+ 0,
-+ sizeof(struct sadb_sens) +
-+ (sens_len + integ_len) * sizeof(uint64_t));
-+
-+ pfkey_sens->sadb_sens_len = (sizeof(struct sadb_sens) +
-+ (sens_len + integ_len) * sizeof(uint64_t)) / IPSEC_PFKEYv2_ALIGN;
-+ pfkey_sens->sadb_sens_exttype = SADB_EXT_SENSITIVITY;
-+ pfkey_sens->sadb_sens_dpd = dpd;
-+ pfkey_sens->sadb_sens_sens_level = sens_level;
-+ pfkey_sens->sadb_sens_sens_len = sens_len;
-+ pfkey_sens->sadb_sens_integ_level = integ_level;
-+ pfkey_sens->sadb_sens_integ_len = integ_len;
-+ pfkey_sens->sadb_sens_reserved = 0;
-+
-+ bitmap = (uint64_t*)((char*)pfkey_ext + sizeof(struct sadb_sens));
-+ for(i = 0; i < sens_len; i++) {
-+ *bitmap = sens_bitmap[i];
-+ bitmap++;
-+ }
-+ for(i = 0; i < integ_len; i++) {
-+ *bitmap = integ_bitmap[i];
-+ bitmap++;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_prop_build(struct sadb_ext** pfkey_ext,
-+ uint8_t replay,
-+ unsigned int comb_num,
-+ struct sadb_comb* comb)
-+{
-+ int error = 0;
-+ int i;
-+ struct sadb_prop *pfkey_prop = (struct sadb_prop *)*pfkey_ext;
-+ struct sadb_comb *combp;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_prop_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_prop) {
-+ ERROR("pfkey_prop_build: "
-+ "why is pfkey_prop already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_prop = (struct sadb_prop*)
-+ MALLOC(sizeof(struct sadb_prop) +
-+ comb_num * sizeof(struct sadb_comb));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_prop;
-+
-+ if(pfkey_prop == NULL) {
-+ ERROR("pfkey_prop_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_prop,
-+ 0,
-+ sizeof(struct sadb_prop) +
-+ comb_num * sizeof(struct sadb_comb));
-+
-+ pfkey_prop->sadb_prop_len = (sizeof(struct sadb_prop) +
-+ comb_num * sizeof(struct sadb_comb)) / IPSEC_PFKEYv2_ALIGN;
-+
-+ pfkey_prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
-+ pfkey_prop->sadb_prop_replay = replay;
-+
-+ for(i=0; i<3; i++) {
-+ pfkey_prop->sadb_prop_reserved[i] = 0;
-+ }
-+
-+ combp = (struct sadb_comb*)((char*)*pfkey_ext + sizeof(struct sadb_prop));
-+ for(i = 0; i < comb_num; i++) {
-+ memcpy (combp, &(comb[i]), sizeof(struct sadb_comb));
-+ combp++;
-+ }
-+
-+#if 0
-+ uint8_t sadb_comb_auth;
-+ uint8_t sadb_comb_encrypt;
-+ uint16_t sadb_comb_flags;
-+ uint16_t sadb_comb_auth_minbits;
-+ uint16_t sadb_comb_auth_maxbits;
-+ uint16_t sadb_comb_encrypt_minbits;
-+ uint16_t sadb_comb_encrypt_maxbits;
-+ uint32_t sadb_comb_reserved;
-+ uint32_t sadb_comb_soft_allocations;
-+ uint32_t sadb_comb_hard_allocations;
-+ uint64_t sadb_comb_soft_bytes;
-+ uint64_t sadb_comb_hard_bytes;
-+ uint64_t sadb_comb_soft_addtime;
-+ uint64_t sadb_comb_hard_addtime;
-+ uint64_t sadb_comb_soft_usetime;
-+ uint64_t sadb_comb_hard_usetime;
-+ uint32_t sadb_comb_soft_packets;
-+ uint32_t sadb_comb_hard_packets;
-+#endif
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_supported_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ unsigned int alg_num,
-+ struct sadb_alg* alg)
-+{
-+ int error = 0;
-+ unsigned int i;
-+ struct sadb_supported *pfkey_supported = (struct sadb_supported *)*pfkey_ext;
-+ struct sadb_alg *pfkey_alg;
-+
-+ /* sanity checks... */
-+ if(pfkey_supported) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_supported_build: "
-+ "why is pfkey_supported already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if( !((exttype == SADB_EXT_SUPPORTED_AUTH) || (exttype == SADB_EXT_SUPPORTED_ENCRYPT))) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_supported_build: "
-+ "unsupported extension type=%d.\n",
-+ exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_supported = (struct sadb_supported*)
-+ MALLOC(sizeof(struct sadb_supported) +
-+ alg_num *
-+ sizeof(struct sadb_alg));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_supported;
-+
-+ if(pfkey_supported == NULL) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_supported_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_supported,
-+ 0,
-+ sizeof(struct sadb_supported) +
-+ alg_num *
-+ sizeof(struct sadb_alg));
-+
-+ pfkey_supported->sadb_supported_len = (sizeof(struct sadb_supported) +
-+ alg_num *
-+ sizeof(struct sadb_alg)) /
-+ IPSEC_PFKEYv2_ALIGN;
-+ pfkey_supported->sadb_supported_exttype = exttype;
-+ pfkey_supported->sadb_supported_reserved = 0;
-+
-+ pfkey_alg = (struct sadb_alg*)((char*)pfkey_supported + sizeof(struct sadb_supported));
-+ for(i = 0; i < alg_num; i++) {
-+ memcpy (pfkey_alg, &(alg[i]), sizeof(struct sadb_alg));
-+ pfkey_alg->sadb_alg_reserved = 0;
-+ pfkey_alg++;
-+ }
-+
-+#if 0
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_supported_build: "
-+ "Sorry, I can't build exttype=%d yet.\n",
-+ (*pfkey_ext)->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+
-+ uint8_t sadb_alg_id;
-+ uint8_t sadb_alg_ivlen;
-+ uint16_t sadb_alg_minbits;
-+ uint16_t sadb_alg_maxbits;
-+ uint16_t sadb_alg_reserved;
-+#endif
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_spirange_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint32_t min, /* in network order */
-+ uint32_t max) /* in network order */
-+{
-+ int error = 0;
-+ struct sadb_spirange *pfkey_spirange = (struct sadb_spirange *)*pfkey_ext;
-+
-+ /* sanity checks... */
-+ if(pfkey_spirange) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_spirange_build: "
-+ "why is pfkey_spirange already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(ntohl(max) < ntohl(min)) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_spirange_build: "
-+ "minspi=%08x must be < maxspi=%08x.\n",
-+ ntohl(min),
-+ ntohl(max));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(ntohl(min) <= 255) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_spirange_build: "
-+ "minspi=%08x must be > 255.\n",
-+ ntohl(min));
-+ SENDERR(EEXIST);
-+ }
-+
-+ pfkey_spirange = (struct sadb_spirange*)
-+ MALLOC(sizeof(struct sadb_spirange));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_spirange;
-+
-+ if(pfkey_spirange == NULL) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_spirange_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_spirange,
-+ 0,
-+ sizeof(struct sadb_spirange));
-+
-+ pfkey_spirange->sadb_spirange_len = sizeof(struct sadb_spirange) / IPSEC_PFKEYv2_ALIGN;
-+
-+ pfkey_spirange->sadb_spirange_exttype = SADB_EXT_SPIRANGE;
-+ pfkey_spirange->sadb_spirange_min = min;
-+ pfkey_spirange->sadb_spirange_max = max;
-+ pfkey_spirange->sadb_spirange_reserved = 0;
-+ errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_x_kmprivate_build(struct sadb_ext** pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_x_kmprivate *pfkey_x_kmprivate = (struct sadb_x_kmprivate *)*pfkey_ext;
-+
-+ /* sanity checks... */
-+ if(pfkey_x_kmprivate) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_kmprivate_build: "
-+ "why is pfkey_x_kmprivate already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_x_kmprivate->sadb_x_kmprivate_reserved = 0;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_kmprivate_build: "
-+ "Sorry, I can't build exttype=%d yet.\n",
-+ (*pfkey_ext)->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+
-+ pfkey_x_kmprivate = (struct sadb_x_kmprivate*)
-+ MALLOC(sizeof(struct sadb_x_kmprivate));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_x_kmprivate;
-+
-+ if(pfkey_x_kmprivate == NULL) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_kmprivate_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_x_kmprivate,
-+ 0,
-+ sizeof(struct sadb_x_kmprivate));
-+
-+ pfkey_x_kmprivate->sadb_x_kmprivate_len =
-+ sizeof(struct sadb_x_kmprivate) / IPSEC_PFKEYv2_ALIGN;
-+
-+ pfkey_x_kmprivate->sadb_x_kmprivate_exttype = K_SADB_X_EXT_KMPRIVATE;
-+ pfkey_x_kmprivate->sadb_x_kmprivate_reserved = 0;
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_x_satype_build(struct sadb_ext** pfkey_ext,
-+ uint8_t satype)
-+{
-+ int error = 0;
-+ int i;
-+ struct sadb_x_satype *pfkey_x_satype = (struct sadb_x_satype *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_satype_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_x_satype) {
-+ ERROR("pfkey_x_satype_build: "
-+ "why is pfkey_x_satype already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!satype) {
-+ ERROR("pfkey_x_satype_build: "
-+ "SA type not set, must be non-zero.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(satype > K_SADB_SATYPE_MAX) {
-+ ERROR("pfkey_x_satype_build: "
-+ "satype %d > max %d\n",
-+ satype, K_SADB_SATYPE_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ pfkey_x_satype = (struct sadb_x_satype*)
-+ MALLOC(sizeof(struct sadb_x_satype));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_x_satype;
-+ if(pfkey_x_satype == NULL) {
-+ ERROR("pfkey_x_satype_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ memset(pfkey_x_satype,
-+ 0,
-+ sizeof(struct sadb_x_satype));
-+
-+ pfkey_x_satype->sadb_x_satype_len = sizeof(struct sadb_x_satype) / IPSEC_PFKEYv2_ALIGN;
-+
-+ pfkey_x_satype->sadb_x_satype_exttype = K_SADB_X_EXT_SATYPE2;
-+ pfkey_x_satype->sadb_x_satype_satype = satype;
-+ for(i=0; i<3; i++) {
-+ pfkey_x_satype->sadb_x_satype_reserved[i] = 0;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_x_debug_build(struct sadb_ext** pfkey_ext,
-+ uint32_t tunnel,
-+ uint32_t netlink,
-+ uint32_t xform,
-+ uint32_t eroute,
-+ uint32_t spi,
-+ uint32_t radij,
-+ uint32_t esp,
-+ uint32_t ah,
-+ uint32_t rcv,
-+ uint32_t pfkey,
-+ uint32_t ipcomp,
-+ uint32_t verbose)
-+{
-+ int error = 0;
-+ int i;
-+ struct sadb_x_debug *pfkey_x_debug = (struct sadb_x_debug *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_debug_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_x_debug) {
-+ ERROR("pfkey_x_debug_build: "
-+ "why is pfkey_x_debug already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_debug_build: "
-+ "tunnel=%x netlink=%x xform=%x eroute=%x spi=%x radij=%x esp=%x ah=%x rcv=%x pfkey=%x ipcomp=%x verbose=%x?\n",
-+ tunnel, netlink, xform, eroute, spi, radij, esp, ah, rcv, pfkey, ipcomp, verbose);
-+
-+ pfkey_x_debug = (struct sadb_x_debug*)
-+ MALLOC(sizeof(struct sadb_x_debug));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_x_debug;
-+
-+ if(pfkey_x_debug == NULL) {
-+ ERROR("pfkey_x_debug_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+#if 0
-+ memset(pfkey_x_debug,
-+ 0,
-+ sizeof(struct sadb_x_debug));
-+#endif
-+
-+ pfkey_x_debug->sadb_x_debug_len = sizeof(struct sadb_x_debug) / IPSEC_PFKEYv2_ALIGN;
-+ pfkey_x_debug->sadb_x_debug_exttype = K_SADB_X_EXT_DEBUG;
-+
-+ pfkey_x_debug->sadb_x_debug_tunnel = tunnel;
-+ pfkey_x_debug->sadb_x_debug_netlink = netlink;
-+ pfkey_x_debug->sadb_x_debug_xform = xform;
-+ pfkey_x_debug->sadb_x_debug_eroute = eroute;
-+ pfkey_x_debug->sadb_x_debug_spi = spi;
-+ pfkey_x_debug->sadb_x_debug_radij = radij;
-+ pfkey_x_debug->sadb_x_debug_esp = esp;
-+ pfkey_x_debug->sadb_x_debug_ah = ah;
-+ pfkey_x_debug->sadb_x_debug_rcv = rcv;
-+ pfkey_x_debug->sadb_x_debug_pfkey = pfkey;
-+ pfkey_x_debug->sadb_x_debug_ipcomp = ipcomp;
-+ pfkey_x_debug->sadb_x_debug_verbose = verbose;
-+
-+ for(i=0; i<4; i++) {
-+ pfkey_x_debug->sadb_x_debug_reserved[i] = 0;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_x_nat_t_type_build(struct sadb_ext** pfkey_ext,
-+ uint8_t type)
-+{
-+ int error = 0;
-+ int i;
-+ struct sadb_x_nat_t_type *pfkey_x_nat_t_type = (struct sadb_x_nat_t_type *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_type_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_x_nat_t_type) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_type_build: "
-+ "why is pfkey_x_nat_t_type already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_type_build: "
-+ "type=%d\n", type);
-+
-+ pfkey_x_nat_t_type = (struct sadb_x_nat_t_type*)
-+ MALLOC(sizeof(struct sadb_x_nat_t_type));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_x_nat_t_type;
-+
-+ if(pfkey_x_nat_t_type == NULL) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_type_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+
-+ pfkey_x_nat_t_type->sadb_x_nat_t_type_len = sizeof(struct sadb_x_nat_t_type) / IPSEC_PFKEYv2_ALIGN;
-+ pfkey_x_nat_t_type->sadb_x_nat_t_type_exttype = K_SADB_X_EXT_NAT_T_TYPE;
-+ pfkey_x_nat_t_type->sadb_x_nat_t_type_type = type;
-+ for(i=0; i<3; i++) {
-+ pfkey_x_nat_t_type->sadb_x_nat_t_type_reserved[i] = 0;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_x_nat_t_port_build(struct sadb_ext** pfkey_ext,
-+ uint16_t exttype,
-+ uint16_t port)
-+{
-+ int error = 0;
-+ struct sadb_x_nat_t_port *pfkey_x_nat_t_port = (struct sadb_x_nat_t_port *)*pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_port_build:\n");
-+ /* sanity checks... */
-+ if(pfkey_x_nat_t_port) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_port_build: "
-+ "why is pfkey_x_nat_t_port already pointing to something?\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(exttype) {
-+ case K_SADB_X_EXT_NAT_T_SPORT:
-+ case K_SADB_X_EXT_NAT_T_DPORT:
-+ break;
-+ default:
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_nat_t_port_build: "
-+ "unrecognised ext_type=%d.\n",
-+ exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_port_build: "
-+ "ext=%d, port=%d\n", exttype, port);
-+
-+ pfkey_x_nat_t_port = (struct sadb_x_nat_t_port*)
-+ MALLOC(sizeof(struct sadb_x_nat_t_port));
-+
-+ *pfkey_ext = (struct sadb_ext*)pfkey_x_nat_t_port;
-+
-+ if(pfkey_x_nat_t_port == NULL) {
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_x_nat_t_port_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+
-+ pfkey_x_nat_t_port->sadb_x_nat_t_port_len = sizeof(struct sadb_x_nat_t_port) / IPSEC_PFKEYv2_ALIGN;
-+ pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype = exttype;
-+ pfkey_x_nat_t_port->sadb_x_nat_t_port_port = port;
-+ pfkey_x_nat_t_port->sadb_x_nat_t_port_reserved = 0;
-+
-+errlab:
-+ return error;
-+}
-+
-+int pfkey_x_protocol_build(struct sadb_ext **pfkey_ext,
-+ uint8_t protocol)
-+{
-+ int error = 0;
-+ struct sadb_protocol * p = (struct sadb_protocol *)*pfkey_ext;
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,"pfkey_x_protocol_build: protocol=%u\n", protocol);
-+ /* sanity checks... */
-+ if (p != 0) {
-+ ERROR("pfkey_x_protocol_build: bogus protocol pointer\n");
-+ SENDERR(EINVAL);
-+ }
-+ if ((p = (struct sadb_protocol*)MALLOC(sizeof(*p))) == 0) {
-+ ERROR("pfkey_build: memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ *pfkey_ext = (struct sadb_ext *)p;
-+ p->sadb_protocol_len = sizeof(*p) / IPSEC_PFKEYv2_ALIGN;
-+ p->sadb_protocol_exttype = K_SADB_X_EXT_PROTOCOL;
-+ p->sadb_protocol_proto = protocol;
-+ p->sadb_protocol_flags = 0;
-+ p->sadb_protocol_reserved2 = 0;
-+ errlab:
-+ return error;
-+}
-+
-+int pfkey_outif_build(struct sadb_ext **pfkey_ext,
-+ uint16_t outif)
-+{
-+ int error = 0;
-+ struct sadb_x_plumbif * p = (struct sadb_x_plumbif *)*pfkey_ext;
-+
-+ if ((p = (struct sadb_x_plumbif*)MALLOC(sizeof(*p))) == 0) {
-+ ERROR("pfkey_build: memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ *pfkey_ext = (struct sadb_ext *)p;
-+
-+ p->sadb_x_outif_len = IPSEC_PFKEYv2_WORDS(sizeof(*p));
-+ p->sadb_x_outif_exttype = K_SADB_X_EXT_PLUMBIF;
-+ p->sadb_x_outif_ifnum = outif;
-+
-+ errlab:
-+ return error;
-+}
-+
-+
-+int pfkey_saref_build(struct sadb_ext **pfkey_ext,
-+ IPsecSAref_t in, IPsecSAref_t out)
-+{
-+ int error = 0;
-+ struct sadb_x_saref* s;
-+
-+ /* +4 because sadb_x_saref is not a multiple of 8 bytes */
-+
-+ if ((s = (struct sadb_x_saref*)MALLOC(sizeof(*s)+4)) == 0) {
-+ ERROR("pfkey_build: memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+ *pfkey_ext = (struct sadb_ext *)s;
-+
-+ s->sadb_x_saref_len = IPSEC_PFKEYv2_WORDS(sizeof(*s));
-+ s->sadb_x_saref_exttype = K_SADB_X_EXT_SAREF;
-+ s->sadb_x_saref_me = in;
-+ s->sadb_x_saref_him = out;
-+
-+ errlab:
-+ return error;
-+}
-+
-+
-+#if defined(I_DONT_THINK_THIS_WILL_BE_USEFUL) && I_DONT_THINK_THIS_WILL_BE_USEFUL
-+int (*ext_default_builders[K_SADB_EXT_MAX +1])(struct sadb_msg*, struct sadb_ext*)
-+ =
-+{
-+ NULL, /* pfkey_msg_build, */
-+ pfkey_sa_build,
-+ pfkey_lifetime_build,
-+ pfkey_lifetime_build,
-+ pfkey_lifetime_build,
-+ pfkey_address_build,
-+ pfkey_address_build,
-+ pfkey_address_build,
-+ pfkey_key_build,
-+ pfkey_key_build,
-+ pfkey_ident_build,
-+ pfkey_ident_build,
-+ pfkey_sens_build,
-+ pfkey_prop_build,
-+ pfkey_supported_build,
-+ pfkey_supported_build,
-+ pfkey_spirange_build,
-+ pfkey_x_kmprivate_build,
-+ pfkey_x_satype_build,
-+ pfkey_sa_build,
-+ pfkey_address_build,
-+ pfkey_address_build,
-+ pfkey_address_build,
-+ pfkey_address_build,
-+ pfkey_address_build,
-+ pfkey_x_ext_debug_build
-+};
-+#endif
-+
-+int
-+pfkey_msg_build(struct sadb_msg **pfkey_msg, struct sadb_ext *extensions[], int dir)
-+{
-+ int error = 0;
-+ unsigned ext;
-+ unsigned total_size;
-+ struct sadb_ext *pfkey_ext;
-+ pfkey_ext_track extensions_seen = 0;
-+#ifndef __KERNEL__
-+ struct sadb_ext *extensions_check[K_SADB_EXT_MAX + 1];
-+#endif
-+
-+ if(!extensions[0]) {
-+ ERROR("pfkey_msg_build: "
-+ "extensions[0] must be specified (struct sadb_msg).\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ /* figure out the total size for all the requested extensions */
-+ total_size = IPSEC_PFKEYv2_WORDS(sizeof(struct sadb_msg));
-+ for(ext = 1; ext <= K_SADB_EXT_MAX; ext++) {
-+ if(extensions[ext]) {
-+ total_size += (extensions[ext])->sadb_ext_len;
-+ }
-+ }
-+
-+ /* allocate that much space */
-+ *pfkey_msg = (struct sadb_msg*)MALLOC(total_size * IPSEC_PFKEYv2_ALIGN);
-+ if(*pfkey_msg == NULL) {
-+ ERROR("pfkey_msg_build: "
-+ "memory allocation failed\n");
-+ SENDERR(ENOMEM);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_msg_build: "
-+ "pfkey_msg=0p%p allocated %lu bytes, &(extensions[0])=0p%p\n",
-+ *pfkey_msg,
-+ (unsigned long)(total_size * IPSEC_PFKEYv2_ALIGN),
-+ &(extensions[0]));
-+
-+ memcpy(*pfkey_msg,
-+ extensions[0],
-+ sizeof(struct sadb_msg));
-+ (*pfkey_msg)->sadb_msg_len = total_size;
-+ (*pfkey_msg)->sadb_msg_reserved = 0;
-+ extensions_seen = 1 ;
-+
-+ /*
-+ * point pfkey_ext to immediately after the space for the header,
-+ * i.e. at the first extension location.
-+ */
-+ pfkey_ext = (struct sadb_ext*)(((char*)(*pfkey_msg)) + sizeof(struct sadb_msg));
-+
-+ for(ext = 1; ext <= K_SADB_EXT_MAX; ext++) {
-+ /* copy from extension[ext] to buffer */
-+ if(extensions[ext]) {
-+ /* Is this type of extension permitted for this type of message? */
-+ if(!pfkey_permitted_extension(dir,(*pfkey_msg)->sadb_msg_type,ext)) {
-+ ERROR("ext type %d not permitted for %d/%d (build)\n",
-+ ext,
-+ dir,(*pfkey_msg)->sadb_msg_type);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_BUILD,
-+ "pfkey_msg_build: "
-+ "copying %lu bytes from extensions[%u] (type=%d)\n",
-+ (unsigned long)(extensions[ext]->sadb_ext_len * IPSEC_PFKEYv2_ALIGN),
-+ ext,
-+ extensions[ext]->sadb_ext_type);
-+
-+ {
-+ char *pfkey_ext_c = (char *)pfkey_ext;
-+
-+ pfkey_ext_c += (extensions[ext])->sadb_ext_len * IPSEC_PFKEYv2_ALIGN;
-+
-+#if 0
-+ printf("memcpy(%p,%p,%d) -> %p %p:%p\n", pfkey_ext,
-+ extensions[ext],
-+ (extensions[ext])->sadb_ext_len * IPSEC_PFKEYv2_ALIGN,
-+ pfkey_ext_c, (*pfkey_msg), (char *)(*pfkey_msg)+(total_size*IPSEC_PFKEYv2_ALIGN));
-+#endif
-+ memcpy(pfkey_ext,
-+ extensions[ext],
-+ (extensions[ext])->sadb_ext_len * IPSEC_PFKEYv2_ALIGN);
-+ pfkey_ext = (struct sadb_ext *)pfkey_ext_c;
-+ }
-+
-+ /* Mark that we have seen this extension */
-+ pfkey_mark_extension(ext,&extensions_seen);
-+ }
-+ }
-+
-+ if(pfkey_extensions_missing(dir,(*pfkey_msg)->sadb_msg_type,extensions_seen)) {
-+ ERROR("required extensions missing. seen=%08llx\n", (unsigned long long)extensions_seen);
-+ SENDERR(EINVAL);
-+ }
-+
-+#ifndef __KERNEL__
-+/*
-+ * this is silly, there is no need to reparse the message that we just built.
-+ *
-+ */
-+ if((error = pfkey_msg_parse(*pfkey_msg,NULL,extensions_check, dir))) {
-+ ERROR("pfkey_msg_build: "
-+ "Trouble parsing newly built pfkey message, error=%d.\n",
-+ error);
-+ SENDERR(-error);
-+ }
-+#endif
-+
-+errlab:
-+
-+ return error;
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/pfkey_v2_debug.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,144 @@
-+/*
-+ * @(#) pfkey version 2 debugging messages
-+ *
-+ * Copyright (C) 2001 Richard Guy Briggs <rgb@openswan.org>
-+ * and Michael Richardson <mcr@openswan.org>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: pfkey_v2_debug.c,v 1.11 2005/04/06 17:45:16 mcr Exp $
-+ *
-+ */
-+
-+#ifdef __KERNEL__
-+
-+# include <linux/kernel.h> /* for printk */
-+
-+# include "openswan/ipsec_kversion.h" /* for malloc switch */
-+# ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+# else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+# endif /* MALLOC_SLAB */
-+# include <linux/errno.h> /* error codes */
-+# include <linux/types.h> /* size_t */
-+# include <linux/interrupt.h> /* mark_bh */
-+
-+# include <linux/netdevice.h> /* struct device, and other headers */
-+# include <linux/etherdevice.h> /* eth_type_trans */
-+extern int debug_pfkey;
-+
-+#else /* __KERNEL__ */
-+
-+#if defined(macintosh) || (defined(__MACH__) && defined(__APPLE__))
-+# include <sys/types.h>
-+#else
-+# include <sys/types.h>
-+# include <linux/types.h>
-+# include <linux/errno.h>
-+#endif
-+
-+#endif /* __KERNEL__ */
-+
-+#include "openswan.h"
-+#include "openswan/pfkeyv2.h"
-+#include "openswan/pfkey.h"
-+
-+/*
-+ * This file provides ASCII translations of PF_KEY magic numbers.
-+ *
-+ */
-+
-+static char *pfkey_sadb_ext_strings[]={
-+ "reserved", /* K_SADB_EXT_RESERVED 0 */
-+ "security-association", /* K_SADB_EXT_SA 1 */
-+ "lifetime-current", /* K_SADB_EXT_LIFETIME_CURRENT 2 */
-+ "lifetime-hard", /* K_SADB_EXT_LIFETIME_HARD 3 */
-+ "lifetime-soft", /* K_SADB_EXT_LIFETIME_SOFT 4 */
-+ "source-address", /* K_SADB_EXT_ADDRESS_SRC 5 */
-+ "destination-address", /* K_SADB_EXT_ADDRESS_DST 6 */
-+ "proxy-address", /* K_SADB_EXT_ADDRESS_PROXY 7 */
-+ "authentication-key", /* K_SADB_EXT_KEY_AUTH 8 */
-+ "cipher-key", /* K_SADB_EXT_KEY_ENCRYPT 9 */
-+ "source-identity", /* K_SADB_EXT_IDENTITY_SRC 10 */
-+ "destination-identity", /* K_SADB_EXT_IDENTITY_DST 11 */
-+ "sensitivity-label", /* K_SADB_EXT_SENSITIVITY 12 */
-+ "proposal", /* K_SADB_EXT_PROPOSAL 13 */
-+ "supported-auth", /* K_SADB_EXT_SUPPORTED_AUTH 14 */
-+ "supported-cipher", /* K_SADB_EXT_SUPPORTED_ENCRYPT 15 */
-+ "spi-range", /* K_SADB_EXT_SPIRANGE 16 */
-+ "X-kmpprivate", /* K_SADB_X_EXT_KMPRIVATE 17 */
-+ "X-satype2", /* K_SADB_X_EXT_SATYPE2 18 */
-+ "X-security-association", /* K_SADB_X_EXT_SA2 19 */
-+ "X-destination-address2", /* K_SADB_X_EXT_ADDRESS_DST2 20 */
-+ "X-source-flow-address", /* K_SADB_X_EXT_ADDRESS_SRC_FLOW 21 */
-+ "X-dest-flow-address", /* K_SADB_X_EXT_ADDRESS_DST_FLOW 22 */
-+ "X-source-mask", /* K_SADB_X_EXT_ADDRESS_SRC_MASK 23 */
-+ "X-dest-mask", /* K_SADB_X_EXT_ADDRESS_DST_MASK 24 */
-+ "X-set-debug", /* K_SADB_X_EXT_DEBUG 25 */
-+ /* NAT_TRAVERSAL */
-+ "X-ext-protocol", /* K_SADB_X_EXT_PROTOCOL 26 */
-+ "X-NAT-T-type", /* K_SADB_X_EXT_NAT_T_TYPE 27 */
-+ "X-NAT-T-sport", /* K_SADB_X_EXT_NAT_T_SPORT 28 */
-+ "X-NAT-T-dport", /* K_SADB_X_EXT_NAT_T_DPORT 29 */
-+ "X-NAT-T-OA", /* K_SADB_X_EXT_NAT_T_OA 30 */
-+ "X-plumbif", /* K_SADB_X_EXT_PLUMBIF 31 */
-+ "X-saref", /* K_SADB_X_EXT_SAREF 32 */
-+};
-+
-+const char *
-+pfkey_v2_sadb_ext_string(int ext)
-+{
-+ if(ext <= K_SADB_EXT_MAX) {
-+ return pfkey_sadb_ext_strings[ext];
-+ } else {
-+ return "unknown-ext";
-+ }
-+}
-+
-+
-+static char *pfkey_sadb_type_strings[]={
-+ "reserved", /* K_SADB_RESERVED */
-+ "getspi", /* K_SADB_GETSPI */
-+ "update", /* K_SADB_UPDATE */
-+ "add", /* K_SADB_ADD */
-+ "delete", /* K_SADB_DELETE */
-+ "get", /* K_SADB_GET */
-+ "acquire", /* K_SADB_ACQUIRE */
-+ "register", /* K_SADB_REGISTER */
-+ "expire", /* K_SADB_EXPIRE */
-+ "flush", /* K_SADB_FLUSH */
-+ "dump", /* K_SADB_DUMP */
-+ "x-promisc", /* K_SADB_X_PROMISC */
-+ "x-pchange", /* K_SADB_X_PCHANGE */
-+ "x-groupsa", /* K_SADB_X_GRPSA */
-+ "x-addflow(eroute)", /* K_SADB_X_ADDFLOW */
-+ "x-delflow(eroute)", /* K_SADB_X_DELFLOW */
-+ "x-debug", /* K_SADB_X_DEBUG */
-+};
-+
-+const char *
-+pfkey_v2_sadb_type_string(int sadb_type)
-+{
-+ if(sadb_type <= K_SADB_MAX) {
-+ return pfkey_sadb_type_strings[sadb_type];
-+ } else {
-+ return "unknown-sadb-type";
-+ }
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/pfkey_v2_ext_bits.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,814 @@
-+/*
-+ * RFC2367 PF_KEYv2 Key management API message parser
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: pfkey_v2_ext_bits.c,v 1.22 2005/05/11 01:45:31 mcr Exp $
-+ */
-+
-+/*
-+ * Template from klips/net/ipsec/ipsec/ipsec_parse.c.
-+ */
-+
-+char pfkey_v2_ext_bits_c_version[] = "$Id: pfkey_v2_ext_bits.c,v 1.22 2005/05/11 01:45:31 mcr Exp $";
-+
-+/*
-+ * Some ugly stuff to allow consistent debugging code for use in the
-+ * kernel and in user space
-+*/
-+
-+#if defined(__KERNEL__) && defined(linux)
-+
-+# include <linux/kernel.h> /* for printk */
-+
-+# include "openswan/ipsec_kversion.h" /* for malloc switch */
-+# ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+# else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+# endif /* MALLOC_SLAB */
-+# include <linux/errno.h> /* error codes */
-+# include <linux/types.h> /* size_t */
-+# include <linux/interrupt.h> /* mark_bh */
-+
-+# include <linux/netdevice.h> /* struct device, and other headers */
-+# include <linux/etherdevice.h> /* eth_type_trans */
-+# include <linux/ip.h> /* struct iphdr */
-+# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+# include <linux/ipv6.h>
-+# endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
-+
-+#else /* __KERNEL__ */
-+
-+# include <sys/types.h>
-+# include <sys/errno.h>
-+# include <stdio.h>
-+#endif
-+
-+#include <openswan.h>
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/pfkey_debug.h"
-+
-+
-+pfkey_ext_track extensions_bitmaps[2/*in/out*/][2/*perm/req*/][K_SADB_MAX+1]={
-+
-+/* INBOUND EXTENSIONS */
-+{
-+
-+/* PERMITTED IN */
-+{
-+/* K_SADB_RESERVED */
-+0
-+,
-+/* SADB_GETSPI */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_SPIRANGE
-+,
-+/* SADB_UPDATE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+,
-+/* SADB_ADD */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<K_SADB_X_EXT_NAT_T_TYPE
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_OA
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+| 1ULL<<K_SADB_X_EXT_SAREF
-+,
-+/* SADB_DELETE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_GET */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_ACQUIRE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+,
-+/* K_SADB_REGISTER */
-+1ULL<<K_SADB_EXT_RESERVED
-+,
-+/* K_SADB_EXPIRE */
-+0
-+,
-+/* K_SADB_FLUSH */
-+1ULL<<K_SADB_EXT_RESERVED
-+,
-+/* K_SADB_DUMP */
-+1ULL<<K_SADB_EXT_RESERVED
-+,
-+/* SADB_X_PROMISC */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_PCHANGE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_GRPSA */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_ADDFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<K_SADB_X_EXT_PROTOCOL
-+,
-+/* SADB_X_DELFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<K_SADB_X_EXT_PROTOCOL
-+,
-+/* SADB_X_DEBUG */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_DEBUG
-+,
-+/* SADB_X_NAT_T_NEW_MAPPING */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+,
-+/* SADB_X_PLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+,
-+/* SADB_X_UNPLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+},
-+
-+/* REQUIRED IN */
-+{
-+/* K_SADB_RESERVED */
-+0
-+,
-+/* K_SADB_GETSPI */
-+1ULL<<K_SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_EXT_ADDRESS_SRC
-+| 1ULL<<K_SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_EXT_SPIRANGE
-+,
-+/* K_SADB_UPDATE */
-+1ULL<<K_SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_EXT_SA
-+| 1ULL<<K_SADB_EXT_ADDRESS_SRC
-+| 1ULL<<K_SADB_EXT_ADDRESS_DST
-+/*| 1ULL<<K_SADB_EXT_KEY_AUTH*/
-+/*| 1ULL<<K_SADB_EXT_KEY_ENCRYPT*/
-+,
-+/* K_SADB_ADD */
-+1ULL<<K_SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_EXT_SA
-+| 1ULL<<K_SADB_EXT_ADDRESS_SRC
-+| 1ULL<<K_SADB_EXT_ADDRESS_DST
-+/*| 1ULL<<K_SADB_EXT_KEY_AUTH*/
-+/*| 1ULL<<K_SADB_EXT_KEY_ENCRYPT*/
-+,
-+/* K_SADB_DELETE */
-+1ULL<<K_SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_EXT_SA
-+| 1ULL<<K_SADB_EXT_ADDRESS_SRC
-+| 1ULL<<K_SADB_EXT_ADDRESS_DST
-+,
-+/* K_SADB_GET */
-+1ULL<<K_SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_EXT_SA
-+| 1ULL<<K_SADB_EXT_ADDRESS_SRC
-+| 1ULL<<K_SADB_EXT_ADDRESS_DST
-+,
-+/* K_SADB_ACQUIRE */
-+1ULL<<K_SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_EXT_ADDRESS_SRC
-+| 1ULL<<K_SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_EXT_PROPOSAL
-+,
-+/* K_SADB_REGISTER */
-+1ULL<<K_SADB_EXT_RESERVED
-+,
-+/* K_SADB_EXPIRE */
-+0
-+,
-+/* K_SADB_FLUSH */
-+1ULL<<K_SADB_EXT_RESERVED
-+,
-+/* K_SADB_DUMP */
-+1ULL<<K_SADB_EXT_RESERVED
-+,
-+/* SADB_X_PROMISC */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_PCHANGE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_GRPSA */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+/*| 1ULL<<K_SADB_X_EXT_SATYPE2*/
-+/*| 1ULL<<K_SADB_X_EXT_SA2*/
-+/*| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2*/
-+,
-+/* SADB_X_ADDFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+,
-+/* SADB_X_DELFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+/*| 1ULL<<SADB_EXT_SA*/
-+#if 0 /* SADB_X_CLREROUTE doesn't need all these... */
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+#endif
-+,
-+/* SADB_X_DEBUG */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_DEBUG
-+,
-+/* SADB_X_NAT_T_NEW_MAPPING */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+,
-+/* SADB_X_PLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+,
-+/* SADB_X_UNPLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+}
-+
-+},
-+
-+/* OUTBOUND EXTENSIONS */
-+{
-+
-+/* PERMITTED OUT */
-+{
-+/* K_SADB_RESERVED */
-+0
-+,
-+/* SADB_GETSPI */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_UPDATE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+,
-+/* SADB_ADD */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<K_SADB_X_EXT_NAT_T_TYPE
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_OA
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+| 1ULL<<K_SADB_X_EXT_SAREF
-+,
-+/* SADB_DELETE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_GET */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<K_SADB_X_EXT_NAT_T_TYPE
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_OA
-+,
-+/* SADB_ACQUIRE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+,
-+/* SADB_REGISTER */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+,
-+/* SADB_EXPIRE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_FLUSH */
-+1ULL<<SADB_EXT_RESERVED
-+,
-+/* SADB_DUMP */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<K_SADB_X_EXT_NAT_T_TYPE
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_OA
-+,
-+/* SADB_X_PROMISC */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_PCHANGE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_GRPSA */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_ADDFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+| 1ULL<<K_SADB_X_EXT_PROTOCOL
-+,
-+/* SADB_X_DELFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+| 1ULL<<K_SADB_X_EXT_PROTOCOL
-+,
-+/* SADB_X_DEBUG */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_DEBUG
-+,
-+/* SADB_X_NAT_T_NEW_MAPPING */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+,
-+/* SADB_X_PLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+,
-+/* SADB_X_UNPLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+},
-+
-+/* REQUIRED OUT */
-+{
-+/* K_SADB_RESERVED */
-+0
-+,
-+/* SADB_GETSPI */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_UPDATE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_ADD */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_DELETE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_GET */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+/* | 1ULL<<SADB_EXT_KEY_AUTH */
-+/* | 1ULL<<SADB_EXT_KEY_ENCRYPT */
-+,
-+/* SADB_ACQUIRE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_PROPOSAL
-+,
-+/* SADB_REGISTER */
-+1ULL<<SADB_EXT_RESERVED
-+/* | 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+ | 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT */
-+,
-+/* SADB_EXPIRE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+/* | 1ULL<<SADB_EXT_LIFETIME_HARD
-+ | 1ULL<<SADB_EXT_LIFETIME_SOFT */
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_FLUSH */
-+1ULL<<SADB_EXT_RESERVED
-+,
-+/* SADB_DUMP */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+,
-+/* SADB_X_PROMISC */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_PCHANGE */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_LIFETIME_CURRENT
-+| 1ULL<<SADB_EXT_LIFETIME_HARD
-+| 1ULL<<SADB_EXT_LIFETIME_SOFT
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<SADB_EXT_ADDRESS_PROXY
-+| 1ULL<<SADB_EXT_KEY_AUTH
-+| 1ULL<<SADB_EXT_KEY_ENCRYPT
-+| 1ULL<<SADB_EXT_IDENTITY_SRC
-+| 1ULL<<SADB_EXT_IDENTITY_DST
-+| 1ULL<<SADB_EXT_SENSITIVITY
-+| 1ULL<<SADB_EXT_PROPOSAL
-+| 1ULL<<SADB_EXT_SUPPORTED_AUTH
-+| 1ULL<<SADB_EXT_SUPPORTED_ENCRYPT
-+| 1ULL<<SADB_EXT_SPIRANGE
-+| 1ULL<<K_SADB_X_EXT_KMPRIVATE
-+| 1ULL<<K_SADB_X_EXT_SATYPE2
-+| 1ULL<<K_SADB_X_EXT_SA2
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST2
-+,
-+/* SADB_X_GRPSA */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+,
-+/* SADB_X_ADDFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+,
-+/* SADB_X_DELFLOW */
-+1ULL<<SADB_EXT_RESERVED
-+/*| 1ULL<<SADB_EXT_SA*/
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_FLOW
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_SRC_MASK
-+| 1ULL<<K_SADB_X_EXT_ADDRESS_DST_MASK
-+,
-+/* SADB_X_DEBUG */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_DEBUG
-+,
-+/* SADB_X_NAT_T_NEW_MAPPING */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<SADB_EXT_SA
-+| 1ULL<<SADB_EXT_ADDRESS_SRC
-+| 1ULL<<SADB_EXT_ADDRESS_DST
-+| 1ULL<<K_SADB_X_EXT_NAT_T_SPORT
-+| 1ULL<<K_SADB_X_EXT_NAT_T_DPORT
-+,
-+/* SADB_X_PLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+,
-+/* SADB_X_UNPLUMBIF */
-+1ULL<<SADB_EXT_RESERVED
-+| 1ULL<<K_SADB_X_EXT_PLUMBIF
-+}
-+}
-+};
-+
-+int pfkey_required_extension(enum pfkey_ext_required inout,
-+ enum sadb_msg_t sadb_operation,
-+ enum sadb_extension_t exttype)
-+{
-+ return (extensions_bitmaps[inout][EXT_BITS_REQ][sadb_operation] & (1ULL<<exttype)) != 0;
-+}
-+
-+int pfkey_permitted_extension(enum pfkey_ext_required inout,
-+ enum sadb_msg_t sadb_operation,
-+ enum sadb_extension_t exttype)
-+{
-+ return (extensions_bitmaps[inout][EXT_BITS_PERM][sadb_operation] & (1ULL<<exttype)) != 0;
-+}
-+
-+
-+int pfkey_extensions_missing(enum pfkey_ext_required inout,
-+ enum sadb_msg_t sadb_operation,
-+ pfkey_ext_track extensions_seen)
-+
-+{
-+ pfkey_ext_track req = extensions_bitmaps[inout][EXT_BITS_REQ][sadb_operation];
-+
-+ if((extensions_seen & req) != req) {
-+ ERROR("extensions for op: %d seen: %08llx required %08llx, missing: %08llx\n",
-+ sadb_operation,
-+ (unsigned long long)extensions_seen,
-+ (unsigned long long)req,
-+ (unsigned long long)req & ~(req & extensions_seen));
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/pfkey_v2_ext_process.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,880 @@
-+/*
-+ * @(#) RFC2367 PF_KEYv2 Key management API message parser
-+ * Copyright (C) 1998-2003 Richard Guy Briggs.
-+ * Copyright (C) 2004-2006 Michael Richardson <mcr@xelerance.com>
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+
-+/*
-+ * Template from klips/net/ipsec/ipsec/ipsec_netlink.c.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+
-+#include <openswan.h>
-+
-+#include <klips-crypto/des.h>
-+
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+#ifdef NET_21
-+# include <linux/in6.h>
-+# define IS_MYADDR RTN_LOCAL
-+#endif
-+
-+#include <net/ip.h>
-+#ifdef NETLINK_SOCK
-+# include <linux/netlink.h>
-+#else
-+# include <net/netlink.h>
-+#endif
-+
-+#include <linux/random.h> /* get_random_bytes() */
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipcomp.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_alg.h"
-+
-+#ifdef CONFIG_KLIPS_OCF
-+#include "ipsec_ocf.h"
-+#endif
-+
-+#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0)
-+
-+/* returns 0 on success */
-+int
-+pfkey_sa_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ struct k_sadb_sa *k_pfkey_sa = (struct k_sadb_sa *)pfkey_ext;
-+ struct sadb_sa *pfkey_sa = (struct sadb_sa *)pfkey_ext;
-+ int error = 0;
-+ struct ipsec_sa* ipsp;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sa_process: .\n");
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sa_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_ext->sadb_ext_type) {
-+ case K_SADB_EXT_SA:
-+ ipsp = extr->ips;
-+ break;
-+ case K_SADB_X_EXT_SA2:
-+ if(extr->ips2 == NULL) {
-+ extr->ips2 = ipsec_sa_alloc(&error); /* pass error var by pointer */
-+ }
-+ if(extr->ips2 == NULL) {
-+ SENDERR(-error);
-+ }
-+ ipsp = extr->ips2;
-+ break;
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sa_process: "
-+ "invalid exttype=%d.\n",
-+ pfkey_ext->sadb_ext_type);
-+ SENDERR(EINVAL);
-+ }
-+
-+ ipsp->ips_said.spi = pfkey_sa->sadb_sa_spi;
-+ ipsp->ips_replaywin = pfkey_sa->sadb_sa_replay;
-+ ipsp->ips_state = pfkey_sa->sadb_sa_state;
-+ ipsp->ips_flags = pfkey_sa->sadb_sa_flags;
-+ ipsp->ips_replaywin_lastseq = ipsp->ips_replaywin_bitmap = 0;
-+
-+ if(k_pfkey_sa->sadb_sa_len > sizeof(struct sadb_sa)/IPSEC_PFKEYv2_ALIGN) {
-+ ipsp->ips_ref = k_pfkey_sa->sadb_x_sa_ref;
-+ }
-+
-+ switch(ipsp->ips_said.proto) {
-+ case IPPROTO_AH:
-+ ipsp->ips_authalg = pfkey_sa->sadb_sa_auth;
-+ ipsp->ips_encalg = K_SADB_EALG_NONE;
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg, 0))
-+ break;
-+#endif
-+ break;
-+ case IPPROTO_ESP:
-+ ipsp->ips_authalg = pfkey_sa->sadb_sa_auth;
-+ ipsp->ips_encalg = pfkey_sa->sadb_sa_encrypt;
-+#ifdef CONFIG_KLIPS_OCF
-+ if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg, ipsp->ips_encalg))
-+ break;
-+#endif
-+#ifdef CONFIG_KLIPS_ALG
-+ ipsec_alg_sa_init(ipsp);
-+#endif
-+ break;
-+ case IPPROTO_IPIP:
-+ ipsp->ips_authalg = AH_NONE;
-+ ipsp->ips_encalg = ESP_NONE;
-+ break;
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ case IPPROTO_COMP:
-+ ipsp->ips_authalg = AH_NONE;
-+ ipsp->ips_encalg = pfkey_sa->sadb_sa_encrypt;
-+ break;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ case IPPROTO_INT:
-+ ipsp->ips_authalg = AH_NONE;
-+ ipsp->ips_encalg = ESP_NONE;
-+ break;
-+ case 0:
-+ break;
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sa_process: "
-+ "unknown proto=%d.\n",
-+ ipsp->ips_said.proto);
-+ SENDERR(EINVAL);
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_lifetime_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct sadb_lifetime *pfkey_lifetime = (struct sadb_lifetime *)pfkey_ext;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_lifetime_process: .\n");
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_lifetime_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_lifetime->sadb_lifetime_exttype) {
-+ case K_SADB_EXT_LIFETIME_CURRENT:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_lifetime_process: "
-+ "lifetime_current not supported yet.\n");
-+ SENDERR(EINVAL);
-+ break;
-+ case K_SADB_EXT_LIFETIME_HARD:
-+ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_allocations,
-+ pfkey_lifetime->sadb_lifetime_allocations);
-+
-+ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_bytes,
-+ pfkey_lifetime->sadb_lifetime_bytes);
-+
-+ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_addtime,
-+ pfkey_lifetime->sadb_lifetime_addtime);
-+
-+ ipsec_lifetime_update_hard(&extr->ips->ips_life.ipl_usetime,
-+ pfkey_lifetime->sadb_lifetime_usetime);
-+
-+ break;
-+
-+ case K_SADB_EXT_LIFETIME_SOFT:
-+ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_allocations,
-+ pfkey_lifetime->sadb_lifetime_allocations);
-+
-+ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_bytes,
-+ pfkey_lifetime->sadb_lifetime_bytes);
-+
-+ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_addtime,
-+ pfkey_lifetime->sadb_lifetime_addtime);
-+
-+ ipsec_lifetime_update_soft(&extr->ips->ips_life.ipl_usetime,
-+ pfkey_lifetime->sadb_lifetime_usetime);
-+
-+ break;
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_lifetime_process: "
-+ "invalid exttype=%d.\n",
-+ pfkey_ext->sadb_ext_type);
-+ SENDERR(EINVAL);
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_address_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ int saddr_len = 0;
-+ char ipaddr_txt[ADDRTOA_BUF];
-+ unsigned char **sap;
-+ unsigned short * portp = 0;
-+ struct sadb_address *pfkey_address = (struct sadb_address *)pfkey_ext;
-+ struct sockaddr* s = (struct sockaddr*)((char*)pfkey_address + sizeof(*pfkey_address));
-+ struct ipsec_sa* ipsp;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process:\n");
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(s->sa_family) {
-+ case AF_INET:
-+ saddr_len = sizeof(struct sockaddr_in);
-+ if (debug_pfkey)
-+ addrtoa(((struct sockaddr_in*)s)->sin_addr, 0, ipaddr_txt, sizeof(ipaddr_txt));
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found address family=%d, AF_INET, %s.\n",
-+ s->sa_family,
-+ ipaddr_txt);
-+ break;
-+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+ case AF_INET6:
-+ saddr_len = sizeof(struct sockaddr_in6);
-+ break;
-+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "s->sa_family=%d not supported.\n",
-+ s->sa_family);
-+ SENDERR(EPFNOSUPPORT);
-+ }
-+
-+ switch(pfkey_address->sadb_address_exttype) {
-+ case K_SADB_EXT_ADDRESS_SRC:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found src address.\n");
-+ sap = (unsigned char **)&(extr->ips->ips_addr_s);
-+ extr->ips->ips_addr_s_size = saddr_len;
-+ break;
-+ case K_SADB_EXT_ADDRESS_DST:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found dst address.\n");
-+ sap = (unsigned char **)&(extr->ips->ips_addr_d);
-+ extr->ips->ips_addr_d_size = saddr_len;
-+ break;
-+ case K_SADB_EXT_ADDRESS_PROXY:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found proxy address.\n");
-+ sap = (unsigned char **)&(extr->ips->ips_addr_p);
-+ extr->ips->ips_addr_p_size = saddr_len;
-+ break;
-+ case K_SADB_X_EXT_ADDRESS_DST2:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found 2nd dst address.\n");
-+ if(extr->ips2 == NULL) {
-+ extr->ips2 = ipsec_sa_alloc(&error); /* pass error var by pointer */
-+ }
-+ if(extr->ips2 == NULL) {
-+ SENDERR(-error);
-+ }
-+ sap = (unsigned char **)&(extr->ips2->ips_addr_d);
-+ extr->ips2->ips_addr_d_size = saddr_len;
-+ break;
-+ case K_SADB_X_EXT_ADDRESS_SRC_FLOW:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found src flow address.\n");
-+ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) {
-+ SENDERR(ENOMEM);
-+ }
-+ sap = (unsigned char **)&(extr->eroute->er_eaddr.sen_ip_src);
-+ portp = &(extr->eroute->er_eaddr.sen_sport);
-+ break;
-+ case K_SADB_X_EXT_ADDRESS_DST_FLOW:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found dst flow address.\n");
-+ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) {
-+ SENDERR(ENOMEM);
-+ }
-+ sap = (unsigned char **)&(extr->eroute->er_eaddr.sen_ip_dst);
-+ portp = &(extr->eroute->er_eaddr.sen_dport);
-+ break;
-+ case K_SADB_X_EXT_ADDRESS_SRC_MASK:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found src mask address.\n");
-+ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) {
-+ SENDERR(ENOMEM);
-+ }
-+ sap = (unsigned char **)&(extr->eroute->er_emask.sen_ip_src);
-+ portp = &(extr->eroute->er_emask.sen_sport);
-+ break;
-+ case K_SADB_X_EXT_ADDRESS_DST_MASK:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found dst mask address.\n");
-+ if(pfkey_alloc_eroute(&(extr->eroute)) == ENOMEM) {
-+ SENDERR(ENOMEM);
-+ }
-+ sap = (unsigned char **)&(extr->eroute->er_emask.sen_ip_dst);
-+ portp = &(extr->eroute->er_emask.sen_dport);
-+ break;
-+#ifdef NAT_TRAVERSAL
-+ case K_SADB_X_EXT_NAT_T_OA:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "found NAT-OA address.\n");
-+ sap = (unsigned char **)&(extr->ips->ips_natt_oa);
-+ extr->ips->ips_natt_oa_size = saddr_len;
-+ break;
-+#endif
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "unrecognised ext_type=%d.\n",
-+ pfkey_address->sadb_address_exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_address->sadb_address_exttype) {
-+ case K_SADB_EXT_ADDRESS_SRC:
-+ case K_SADB_EXT_ADDRESS_DST:
-+ case K_SADB_EXT_ADDRESS_PROXY:
-+ case K_SADB_X_EXT_ADDRESS_DST2:
-+#ifdef NAT_TRAVERSAL
-+ case K_SADB_X_EXT_NAT_T_OA:
-+#endif
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "allocating %d bytes for saddr.\n",
-+ saddr_len);
-+ if(!(*sap = kmalloc(saddr_len, GFP_KERNEL))) {
-+ SENDERR(ENOMEM);
-+ }
-+ memcpy(*sap, s, saddr_len);
-+ break;
-+ default:
-+ if(s->sa_family != AF_INET) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "s->sa_family=%d not supported.\n",
-+ s->sa_family);
-+ SENDERR(EPFNOSUPPORT);
-+ }
-+ {
-+ *(struct in_addr *)sap = ((struct sockaddr_in *)s)->sin_addr;
-+ }
-+
-+ if (portp != 0)
-+ *portp = ((struct sockaddr_in*)s)->sin_port;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(extr->eroute) {
-+ char buf1[64], buf2[64];
-+ if (debug_pfkey) {
-+ subnettoa(extr->eroute->er_eaddr.sen_ip_src,
-+ extr->eroute->er_emask.sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(extr->eroute->er_eaddr.sen_ip_dst,
-+ extr->eroute->er_emask.sen_ip_dst, 0, buf2, sizeof(buf2));
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_parse: "
-+ "extr->eroute set to %s:%d->%s:%d\n",
-+ buf1,
-+ ntohs(extr->eroute->er_eaddr.sen_sport),
-+ buf2,
-+ ntohs(extr->eroute->er_eaddr.sen_dport));
-+ }
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ }
-+
-+ ipsp = extr->ips;
-+ switch(pfkey_address->sadb_address_exttype) {
-+ case K_SADB_X_EXT_ADDRESS_DST2:
-+ ipsp = extr->ips2;
-+ case K_SADB_EXT_ADDRESS_DST:
-+ if(s->sa_family == AF_INET) {
-+ ipsp->ips_said.dst.u.v4.sin_addr.s_addr = ((struct sockaddr_in*)(ipsp->ips_addr_d))->sin_addr.s_addr;
-+ ipsp->ips_said.dst.u.v4.sin_family = AF_INET;
-+ if (debug_pfkey)
-+ addrtoa(((struct sockaddr_in*)(ipsp->ips_addr_d))->sin_addr,
-+ 0,
-+ ipaddr_txt,
-+ sizeof(ipaddr_txt));
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "ips_said.dst set to %s.\n",
-+ ipaddr_txt);
-+ } else {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: "
-+ "uh, ips_said.dst doesn't do address family=%d yet, said will be invalid.\n",
-+ s->sa_family);
-+ }
-+ default:
-+ break;
-+ }
-+
-+ /* XXX check if port!=0 */
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_address_process: successful.\n");
-+ errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_key_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_key_process: .\n");
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_key_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_key->sadb_key_exttype) {
-+ case K_SADB_EXT_KEY_AUTH:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_key_process: "
-+ "allocating %d bytes for authkey.\n",
-+ DIVUP(pfkey_key->sadb_key_bits, 8));
-+ if(!(extr->ips->ips_key_a = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_key_process: "
-+ "memory allocation error.\n");
-+ SENDERR(ENOMEM);
-+ }
-+ extr->ips->ips_key_bits_a = pfkey_key->sadb_key_bits;
-+ extr->ips->ips_key_a_size = DIVUP(pfkey_key->sadb_key_bits, 8);
-+ memcpy(extr->ips->ips_key_a,
-+ (char*)pfkey_key + sizeof(struct sadb_key),
-+ extr->ips->ips_key_a_size);
-+ break;
-+ case K_SADB_EXT_KEY_ENCRYPT: /* Key(s) */
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_key_process: "
-+ "allocating %d bytes for enckey.\n",
-+ DIVUP(pfkey_key->sadb_key_bits, 8));
-+ if(!(extr->ips->ips_key_e = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_key_process: "
-+ "memory allocation error.\n");
-+ SENDERR(ENOMEM);
-+ }
-+ extr->ips->ips_key_bits_e = pfkey_key->sadb_key_bits;
-+ extr->ips->ips_key_e_size = DIVUP(pfkey_key->sadb_key_bits, 8);
-+ memcpy(extr->ips->ips_key_e,
-+ (char*)pfkey_key + sizeof(struct sadb_key),
-+ extr->ips->ips_key_e_size);
-+ break;
-+ default:
-+ SENDERR(EINVAL);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_key_process: "
-+ "success.\n");
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_ident_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct sadb_ident *pfkey_ident = (struct sadb_ident *)pfkey_ext;
-+ int data_len;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_ident_process: .\n");
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_ident_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_ident->sadb_ident_exttype) {
-+ case K_SADB_EXT_IDENTITY_SRC:
-+ data_len = pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident);
-+
-+ extr->ips->ips_ident_s.type = pfkey_ident->sadb_ident_type;
-+ extr->ips->ips_ident_s.id = pfkey_ident->sadb_ident_id;
-+ extr->ips->ips_ident_s.len = pfkey_ident->sadb_ident_len;
-+ if(data_len) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_ident_process: "
-+ "allocating %d bytes for ident_s.\n",
-+ data_len);
-+ if(!(extr->ips->ips_ident_s.data
-+ = kmalloc(data_len, GFP_KERNEL))) {
-+ SENDERR(ENOMEM);
-+ }
-+ memcpy(extr->ips->ips_ident_s.data,
-+ (char*)pfkey_ident + sizeof(struct sadb_ident),
-+ data_len);
-+ } else {
-+ extr->ips->ips_ident_s.data = NULL;
-+ }
-+ break;
-+ case K_SADB_EXT_IDENTITY_DST: /* Identity(ies) */
-+ data_len = pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - sizeof(struct sadb_ident);
-+
-+ extr->ips->ips_ident_d.type = pfkey_ident->sadb_ident_type;
-+ extr->ips->ips_ident_d.id = pfkey_ident->sadb_ident_id;
-+ extr->ips->ips_ident_d.len = pfkey_ident->sadb_ident_len;
-+ if(data_len) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_ident_process: "
-+ "allocating %d bytes for ident_d.\n",
-+ data_len);
-+ if(!(extr->ips->ips_ident_d.data
-+ = kmalloc(data_len, GFP_KERNEL))) {
-+ SENDERR(ENOMEM);
-+ }
-+ memcpy(extr->ips->ips_ident_d.data,
-+ (char*)pfkey_ident + sizeof(struct sadb_ident),
-+ data_len);
-+ } else {
-+ extr->ips->ips_ident_d.data = NULL;
-+ }
-+ break;
-+ default:
-+ SENDERR(EINVAL);
-+ }
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_sens_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_sens_process: "
-+ "Sorry, I can't process exttype=%d yet.\n",
-+ pfkey_ext->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+ errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_prop_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_prop_process: "
-+ "Sorry, I can't process exttype=%d yet.\n",
-+ pfkey_ext->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+
-+ errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_supported_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_supported_process: "
-+ "Sorry, I can't process exttype=%d yet.\n",
-+ pfkey_ext->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_spirange_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_spirange_process: .\n");
-+/* errlab: */
-+ return error;
-+}
-+
-+int
-+pfkey_x_kmprivate_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_kmprivate_process: "
-+ "Sorry, I can't process exttype=%d yet.\n",
-+ pfkey_ext->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_x_satype_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct sadb_x_satype *pfkey_x_satype = (struct sadb_x_satype *)pfkey_ext;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "pfkey_x_satype_process: .\n");
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "pfkey_x_satype_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(extr->ips2 == NULL) {
-+ extr->ips2 = ipsec_sa_alloc(&error); /* pass error var by pointer */
-+ }
-+ if(extr->ips2 == NULL) {
-+ SENDERR(-error);
-+ }
-+ if(!(extr->ips2->ips_said.proto = satype2proto(pfkey_x_satype->sadb_x_satype_satype))) {
-+ KLIPS_ERROR(debug_pfkey,
-+ "pfkey_x_satype_process: "
-+ "proto lookup from satype=%d failed.\n",
-+ pfkey_x_satype->sadb_x_satype_satype);
-+ SENDERR(EINVAL);
-+ }
-+ KLIPS_PRINT(debug_pfkey,
-+ "pfkey_x_satype_process: "
-+ "protocol==%d decoded from satype==%d(%s).\n",
-+ extr->ips2->ips_said.proto,
-+ pfkey_x_satype->sadb_x_satype_satype,
-+ satype2name(pfkey_x_satype->sadb_x_satype_satype));
-+
-+errlab:
-+ return error;
-+}
-+
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+int
-+pfkey_x_nat_t_type_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct sadb_x_nat_t_type *pfkey_x_nat_t_type = (struct sadb_x_nat_t_type *)pfkey_ext;
-+
-+ if(!pfkey_x_nat_t_type) {
-+ printk("klips_debug:pfkey_x_nat_t_type_process: "
-+ "null pointer passed in\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_nat_t_type_process: %d.\n",
-+ pfkey_x_nat_t_type->sadb_x_nat_t_type_type);
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_nat_t_type_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_x_nat_t_type->sadb_x_nat_t_type_type) {
-+ case ESPINUDP_WITH_NON_IKE: /* with Non-IKE (older version) */
-+ case ESPINUDP_WITH_NON_ESP: /* with Non-ESP */
-+
-+ extr->ips->ips_natt_type = pfkey_x_nat_t_type->sadb_x_nat_t_type_type;
-+ break;
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_nat_t_type_process: "
-+ "unknown type %d.\n",
-+ pfkey_x_nat_t_type->sadb_x_nat_t_type_type);
-+ SENDERR(EINVAL);
-+ break;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_x_nat_t_port_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct sadb_x_nat_t_port *pfkey_x_nat_t_port = (struct sadb_x_nat_t_port *)pfkey_ext;
-+
-+ if(!pfkey_x_nat_t_port) {
-+ printk("klips_debug:pfkey_x_nat_t_port_process: "
-+ "null pointer passed in\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_nat_t_port_process: %d/%d.\n",
-+ pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype,
-+ pfkey_x_nat_t_port->sadb_x_nat_t_port_port);
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_nat_t_type_process: "
-+ "extr or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype) {
-+ case K_SADB_X_EXT_NAT_T_SPORT:
-+ extr->ips->ips_natt_sport = pfkey_x_nat_t_port->sadb_x_nat_t_port_port;
-+ break;
-+ case K_SADB_X_EXT_NAT_T_DPORT:
-+ extr->ips->ips_natt_dport = pfkey_x_nat_t_port->sadb_x_nat_t_port_port;
-+ break;
-+ default:
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_nat_t_port_process: "
-+ "unknown exttype %d.\n",
-+ pfkey_x_nat_t_port->sadb_x_nat_t_port_exttype);
-+ SENDERR(EINVAL);
-+ break;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+#endif
-+
-+int
-+pfkey_x_debug_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct sadb_x_debug *pfkey_x_debug = (struct sadb_x_debug *)pfkey_ext;
-+
-+ if(!pfkey_x_debug) {
-+ printk("klips_debug:pfkey_x_debug_process: "
-+ "null pointer passed in\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_debug_process: .\n");
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(pfkey_x_debug->sadb_x_debug_netlink >>
-+ (sizeof(pfkey_x_debug->sadb_x_debug_netlink) * 8 - 1)) {
-+ pfkey_x_debug->sadb_x_debug_netlink &=
-+ ~(1 << (sizeof(pfkey_x_debug->sadb_x_debug_netlink) * 8 -1));
-+ debug_tunnel |= pfkey_x_debug->sadb_x_debug_tunnel;
-+ debug_netlink |= pfkey_x_debug->sadb_x_debug_netlink;
-+ debug_xform |= pfkey_x_debug->sadb_x_debug_xform;
-+ debug_eroute |= pfkey_x_debug->sadb_x_debug_eroute;
-+ debug_spi |= pfkey_x_debug->sadb_x_debug_spi;
-+ debug_radij |= pfkey_x_debug->sadb_x_debug_radij;
-+ debug_esp |= pfkey_x_debug->sadb_x_debug_esp;
-+ debug_ah |= pfkey_x_debug->sadb_x_debug_ah;
-+ debug_rcv |= pfkey_x_debug->sadb_x_debug_rcv;
-+ debug_pfkey |= pfkey_x_debug->sadb_x_debug_pfkey;
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ sysctl_ipsec_debug_ipcomp |= pfkey_x_debug->sadb_x_debug_ipcomp;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ sysctl_ipsec_debug_verbose |= pfkey_x_debug->sadb_x_debug_verbose;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_debug_process: "
-+ "set\n");
-+ } else {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_debug_process: "
-+ "unset\n");
-+ debug_tunnel &= pfkey_x_debug->sadb_x_debug_tunnel;
-+ debug_netlink &= pfkey_x_debug->sadb_x_debug_netlink;
-+ debug_xform &= pfkey_x_debug->sadb_x_debug_xform;
-+ debug_eroute &= pfkey_x_debug->sadb_x_debug_eroute;
-+ debug_spi &= pfkey_x_debug->sadb_x_debug_spi;
-+ debug_radij &= pfkey_x_debug->sadb_x_debug_radij;
-+ debug_esp &= pfkey_x_debug->sadb_x_debug_esp;
-+ debug_ah &= pfkey_x_debug->sadb_x_debug_ah;
-+ debug_rcv &= pfkey_x_debug->sadb_x_debug_rcv;
-+ debug_pfkey &= pfkey_x_debug->sadb_x_debug_pfkey;
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ sysctl_ipsec_debug_ipcomp &= pfkey_x_debug->sadb_x_debug_ipcomp;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ sysctl_ipsec_debug_verbose &= pfkey_x_debug->sadb_x_debug_verbose;
-+ }
-+#else /* CONFIG_KLIPS_DEBUG */
-+ printk("klips_debug:pfkey_x_debug_process: "
-+ "debugging not enabled\n");
-+ SENDERR(EINVAL);
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+errlab:
-+ return error;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/pfkey_v2_parse.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1597 @@
-+/*
-+ * RFC2367 PF_KEYv2 Key management API message parser
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: pfkey_v2_parse.c,v 1.65 2005/04/06 17:46:05 mcr Exp $
-+ */
-+
-+/*
-+ * Template from klips/net/ipsec/ipsec/ipsec_parser.c.
-+ */
-+
-+char pfkey_v2_parse_c_version[] = "$Id: pfkey_v2_parse.c,v 1.65 2005/04/06 17:46:05 mcr Exp $";
-+
-+/*
-+ * Some ugly stuff to allow consistent debugging code for use in the
-+ * kernel and in user space
-+*/
-+
-+#ifdef __KERNEL__
-+
-+# include <linux/kernel.h> /* for printk */
-+
-+#include "openswan/ipsec_kversion.h" /* for malloc switch */
-+
-+# ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+# else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+# endif /* MALLOC_SLAB */
-+# include <linux/errno.h> /* error codes */
-+# include <linux/types.h> /* size_t */
-+# include <linux/interrupt.h> /* mark_bh */
-+
-+# include <linux/netdevice.h> /* struct device, and other headers */
-+# include <linux/etherdevice.h> /* eth_type_trans */
-+# include <linux/ip.h> /* struct iphdr */
-+# if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+# include <linux/ipv6.h> /* struct ipv6hdr */
-+# endif /* if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
-+extern int debug_pfkey;
-+
-+# include <openswan.h>
-+
-+#include "openswan/ipsec_encap.h"
-+
-+#else /* __KERNEL__ */
-+
-+# include <sys/types.h>
-+# include <sys/errno.h>
-+# include <stdio.h>
-+
-+# include <openswan.h>
-+# include "constants.h"
-+
-+#endif /* __KERNEL__ */
-+
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_sa.h" /* IPSEC_SAREF_NULL, IPSEC_SA_REF_TABLE_IDX_WIDTH */
-+
-+/*
-+ * how to handle debugging for pfkey.
-+ */
-+#include <openswan/pfkey_debug.h>
-+
-+unsigned int pfkey_lib_debug = PF_KEY_DEBUG_PARSE_NONE;
-+int (*pfkey_debug_func)(const char *message, ...) PRINTF_LIKE(1);
-+int (*pfkey_error_func)(const char *message, ...) PRINTF_LIKE(1);
-+
-+
-+#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0)
-+
-+struct satype_tbl {
-+ uint8_t proto;
-+ uint8_t satype;
-+ char* name;
-+} static satype_tbl[] = {
-+#ifdef __KERNEL__
-+ { IPPROTO_ESP, K_SADB_SATYPE_ESP, "ESP" },
-+ { IPPROTO_AH, K_SADB_SATYPE_AH, "AH" },
-+ { IPPROTO_IPIP, K_SADB_X_SATYPE_IPIP, "IPIP" },
-+#ifdef CONFIG_KLIPS_IPCOMP
-+ { IPPROTO_COMP, K_SADB_X_SATYPE_COMP, "COMP" },
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+ { IPPROTO_INT, K_SADB_X_SATYPE_INT, "INT" },
-+#else /* __KERNEL__ */
-+ { SA_ESP, K_SADB_SATYPE_ESP, "ESP" },
-+ { SA_AH, K_SADB_SATYPE_AH, "AH" },
-+ { SA_IPIP, K_SADB_X_SATYPE_IPIP, "IPIP" },
-+ { SA_COMP, K_SADB_X_SATYPE_COMP, "COMP" },
-+ { SA_INT, K_SADB_X_SATYPE_INT, "INT" },
-+#endif /* __KERNEL__ */
-+ { 0, 0, "UNKNOWN" }
-+};
-+
-+uint8_t
-+satype2proto(uint8_t satype)
-+{
-+ int i =0;
-+
-+ while(satype_tbl[i].satype != satype && satype_tbl[i].satype != 0) {
-+ i++;
-+ }
-+ return satype_tbl[i].proto;
-+}
-+
-+uint8_t
-+proto2satype(uint8_t proto)
-+{
-+ int i = 0;
-+
-+ while(satype_tbl[i].proto != proto && satype_tbl[i].proto != 0) {
-+ i++;
-+ }
-+ return satype_tbl[i].satype;
-+}
-+
-+char*
-+satype2name(uint8_t satype)
-+{
-+ int i = 0;
-+
-+ while(satype_tbl[i].satype != satype && satype_tbl[i].satype != 0) {
-+ i++;
-+ }
-+ return satype_tbl[i].name;
-+}
-+
-+char*
-+proto2name(uint8_t proto)
-+{
-+ int i = 0;
-+
-+ while(satype_tbl[i].proto != proto && satype_tbl[i].proto != 0) {
-+ i++;
-+ }
-+ return satype_tbl[i].name;
-+}
-+
-+/* Default extension parsers taken from the KLIPS code */
-+
-+DEBUG_NO_STATIC int
-+pfkey_sa_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct k_sadb_sa *pfkey_sa = (struct k_sadb_sa *)pfkey_ext;
-+
-+ /* sanity checks... */
-+ if(!pfkey_sa) {
-+ ERROR("pfkey_sa_parse: "
-+ "NULL pointer passed in.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+
-+
-+ if(pfkey_sa->sadb_sa_len !=sizeof(struct k_sadb_sa)/IPSEC_PFKEYv2_ALIGN
-+ && pfkey_sa->sadb_sa_len!=sizeof(struct sadb_sa)/IPSEC_PFKEYv2_ALIGN) {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "length wrong pfkey_sa->sadb_sa_len=%d sizeof(struct sadb_sa)=%d.\n",
-+ pfkey_sa->sadb_sa_len,
-+ (int)sizeof(struct k_sadb_sa));
-+ SENDERR(EINVAL);
-+ }
-+
-+#if K_SADB_EALG_MAX < 255
-+ if(pfkey_sa->sadb_sa_encrypt > K_SADB_EALG_MAX) {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "pfkey_sa->sadb_sa_encrypt=%d > K_SADB_EALG_MAX=%d.\n",
-+ pfkey_sa->sadb_sa_encrypt,
-+ K_SADB_EALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+#endif
-+
-+#if K_SADB_AALG_MAX < 255
-+ if(pfkey_sa->sadb_sa_auth > K_SADB_AALG_MAX) {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "pfkey_sa->sadb_sa_auth=%d > K_SADB_AALG_MAX=%d.\n",
-+ pfkey_sa->sadb_sa_auth,
-+ K_SADB_AALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+#endif
-+
-+#if K_SADB_SASTATE_MAX < 255
-+ if(pfkey_sa->sadb_sa_state > K_SADB_SASTATE_MAX) {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "state=%d exceeds MAX=%d.\n",
-+ pfkey_sa->sadb_sa_state,
-+ K_SADB_SASTATE_MAX);
-+ SENDERR(EINVAL);
-+ }
-+#endif
-+
-+ if(pfkey_sa->sadb_sa_state == K_SADB_SASTATE_DEAD) {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "state=%d is DEAD=%d.\n",
-+ pfkey_sa->sadb_sa_state,
-+ K_SADB_SASTATE_DEAD);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_sa->sadb_sa_replay > 64) {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "replay window size: %d -- must be 0 <= size <= 64\n",
-+ pfkey_sa->sadb_sa_replay);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(! ((pfkey_sa->sadb_sa_exttype == K_SADB_EXT_SA) ||
-+ (pfkey_sa->sadb_sa_exttype == K_SADB_X_EXT_SA2)))
-+ {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "unknown exttype=%d, expecting K_SADB_EXT_SA=%d or K_SADB_X_EXT_SA2=%d.\n",
-+ pfkey_sa->sadb_sa_exttype,
-+ K_SADB_EXT_SA,
-+ K_SADB_X_EXT_SA2);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_sa->sadb_sa_len > sizeof(struct sadb_sa)/IPSEC_PFKEYv2_ALIGN) {
-+ if(pfkey_sa->sadb_x_sa_ref == IPSEC_SAREF_NULL ||
-+ pfkey_sa->sadb_x_sa_ref == ~(IPSEC_SAREF_NULL))
-+ {
-+ pfkey_sa->sadb_x_sa_ref = IPSEC_SAREF_NULL;
-+ }
-+ }
-+
-+ if((IPSEC_SAREF_NULL != pfkey_sa->sadb_x_sa_ref)
-+ && (pfkey_sa->sadb_x_sa_ref >= (1 << IPSEC_SA_REF_TABLE_IDX_WIDTH)))
-+ {
-+ ERROR(
-+ "pfkey_sa_parse: "
-+ "SAref=%d must be (SAref == IPSEC_SAREF_NULL(%d) || SAref < IPSEC_SA_REF_TABLE_NUM_ENTRIES(%d)).\n",
-+ pfkey_sa->sadb_x_sa_ref,
-+ IPSEC_SAREF_NULL,
-+ IPSEC_SA_REF_TABLE_NUM_ENTRIES);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_sa_parse: "
-+ "successfully found len=%d exttype=%d(%s) spi=%08lx replay=%d state=%d auth=%d encrypt=%d flags=%d ref=%d.\n",
-+ pfkey_sa->sadb_sa_len,
-+ pfkey_sa->sadb_sa_exttype,
-+ pfkey_v2_sadb_ext_string(pfkey_sa->sadb_sa_exttype),
-+ (long unsigned int)ntohl(pfkey_sa->sadb_sa_spi),
-+ pfkey_sa->sadb_sa_replay,
-+ pfkey_sa->sadb_sa_state,
-+ pfkey_sa->sadb_sa_auth,
-+ pfkey_sa->sadb_sa_encrypt,
-+ pfkey_sa->sadb_sa_flags,
-+ pfkey_sa->sadb_x_sa_ref);
-+
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_lifetime_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_lifetime *pfkey_lifetime = (struct sadb_lifetime *)pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
-+ "pfkey_lifetime_parse:enter\n");
-+ /* sanity checks... */
-+ if(!pfkey_lifetime) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_lifetime_parse: "
-+ "NULL pointer passed in.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_lifetime->sadb_lifetime_len !=
-+ sizeof(struct sadb_lifetime) / IPSEC_PFKEYv2_ALIGN) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_lifetime_parse: "
-+ "length wrong pfkey_lifetime->sadb_lifetime_len=%d sizeof(struct sadb_lifetime)=%d.\n",
-+ pfkey_lifetime->sadb_lifetime_len,
-+ (int)sizeof(struct sadb_lifetime));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if((pfkey_lifetime->sadb_lifetime_exttype != K_SADB_EXT_LIFETIME_HARD) &&
-+ (pfkey_lifetime->sadb_lifetime_exttype != K_SADB_EXT_LIFETIME_SOFT) &&
-+ (pfkey_lifetime->sadb_lifetime_exttype != K_SADB_EXT_LIFETIME_CURRENT)) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_lifetime_parse: "
-+ "unexpected ext_type=%d.\n",
-+ pfkey_lifetime->sadb_lifetime_exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_lifetime_parse: "
-+ "life_type=%d(%s) alloc=%u bytes=%u add=%u use=%u.\n",
-+ pfkey_lifetime->sadb_lifetime_exttype,
-+ pfkey_v2_sadb_ext_string(pfkey_lifetime->sadb_lifetime_exttype),
-+ pfkey_lifetime->sadb_lifetime_allocations,
-+ (unsigned)pfkey_lifetime->sadb_lifetime_bytes,
-+ (unsigned)pfkey_lifetime->sadb_lifetime_addtime,
-+ (unsigned)pfkey_lifetime->sadb_lifetime_usetime);
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_address_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ int saddr_len = 0;
-+ struct sadb_address *pfkey_address = (struct sadb_address *)pfkey_ext;
-+ struct sockaddr* s = (struct sockaddr*)((char*)pfkey_address + sizeof(*pfkey_address));
-+ char ipaddr_txt[ADDRTOT_BUF];
-+
-+ /* sanity checks... */
-+ if(!pfkey_address) {
-+ ERROR(
-+ "pfkey_address_parse: "
-+ "NULL pointer passed in.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_address->sadb_address_len <
-+ (sizeof(struct sadb_address) + sizeof(struct sockaddr))/
-+ IPSEC_PFKEYv2_ALIGN) {
-+ ERROR("pfkey_address_parse: "
-+ "size wrong 1 ext_len=%d, adr_ext_len=%d, saddr_len=%d.\n",
-+ pfkey_address->sadb_address_len,
-+ (int)sizeof(struct sadb_address),
-+ (int)sizeof(struct sockaddr));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_address->sadb_address_reserved) {
-+ ERROR("pfkey_address_parse: "
-+ "res=%d, must be zero.\n",
-+ pfkey_address->sadb_address_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_address->sadb_address_exttype) {
-+ case K_SADB_EXT_ADDRESS_SRC:
-+ case K_SADB_EXT_ADDRESS_DST:
-+ case K_SADB_EXT_ADDRESS_PROXY:
-+ case K_SADB_X_EXT_ADDRESS_DST2:
-+ case K_SADB_X_EXT_ADDRESS_SRC_FLOW:
-+ case K_SADB_X_EXT_ADDRESS_DST_FLOW:
-+ case K_SADB_X_EXT_ADDRESS_SRC_MASK:
-+ case K_SADB_X_EXT_ADDRESS_DST_MASK:
-+#ifdef NAT_TRAVERSAL
-+ case K_SADB_X_EXT_NAT_T_OA:
-+#endif
-+ break;
-+ default:
-+ ERROR(
-+ "pfkey_address_parse: "
-+ "unexpected ext_type=%d.\n",
-+ pfkey_address->sadb_address_exttype);
-+ SENDERR(ENODEV);
-+ }
-+
-+ switch(s->sa_family) {
-+ case AF_INET:
-+ saddr_len = sizeof(struct sockaddr_in);
-+ sprintf(ipaddr_txt, "%d.%d.%d.%d"
-+ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 0) & 0xFF
-+ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 8) & 0xFF
-+ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 16) & 0xFF
-+ , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 24) & 0xFF);
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_address_parse: "
-+ "found exttype=%u(%s) family=%d(AF_INET) address=%s proto=%u port=%u.\n",
-+ pfkey_address->sadb_address_exttype,
-+ pfkey_v2_sadb_ext_string(pfkey_address->sadb_address_exttype),
-+ s->sa_family,
-+ ipaddr_txt,
-+ pfkey_address->sadb_address_proto,
-+ ntohs(((struct sockaddr_in*)s)->sin_port));
-+ break;
-+ case AF_INET6:
-+ saddr_len = sizeof(struct sockaddr_in6);
-+ sprintf(ipaddr_txt, "%x:%x:%x:%x:%x:%x:%x:%x"
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[0])
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[1])
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[2])
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[3])
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[4])
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[5])
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[6])
-+ , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[7]));
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_address_parse: "
-+ "found exttype=%u(%s) family=%d(AF_INET6) address=%s proto=%u port=%u.\n",
-+ pfkey_address->sadb_address_exttype,
-+ pfkey_v2_sadb_ext_string(pfkey_address->sadb_address_exttype),
-+ s->sa_family,
-+ ipaddr_txt,
-+ pfkey_address->sadb_address_proto,
-+ ((struct sockaddr_in6*)s)->sin6_port);
-+ break;
-+ default:
-+ ERROR(
-+ "pfkey_address_parse: "
-+ "s->sa_family=%d not supported.\n",
-+ s->sa_family);
-+ SENDERR(EPFNOSUPPORT);
-+ }
-+
-+ if(pfkey_address->sadb_address_len !=
-+ DIVUP(sizeof(struct sadb_address) + saddr_len, IPSEC_PFKEYv2_ALIGN)) {
-+ ERROR(
-+ "pfkey_address_parse: "
-+ "size wrong 2 ext_len=%d, adr_ext_len=%d, saddr_len=%d.\n",
-+ pfkey_address->sadb_address_len,
-+ (int)sizeof(struct sadb_address),
-+ saddr_len);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_address->sadb_address_prefixlen != 0) {
-+ ERROR(
-+ "pfkey_address_parse: "
-+ "address prefixes not supported yet.\n");
-+ SENDERR(EAFNOSUPPORT); /* not supported yet */
-+ }
-+
-+ /* XXX check if port!=0 */
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
-+ "pfkey_address_parse: successful.\n");
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_key_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext;
-+
-+ /* sanity checks... */
-+
-+ if(!pfkey_key) {
-+ ERROR(
-+ "pfkey_key_parse: "
-+ "NULL pointer passed in.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_key->sadb_key_len < sizeof(struct sadb_key) / IPSEC_PFKEYv2_ALIGN) {
-+ ERROR(
-+ "pfkey_key_parse: "
-+ "size wrong ext_len=%d, key_ext_len=%d.\n",
-+ pfkey_key->sadb_key_len,
-+ (int)sizeof(struct sadb_key));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!pfkey_key->sadb_key_bits) {
-+ ERROR(
-+ "pfkey_key_parse: "
-+ "key length set to zero, must be non-zero.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_key->sadb_key_len !=
-+ DIVUP(sizeof(struct sadb_key) * OCTETBITS + pfkey_key->sadb_key_bits,
-+ PFKEYBITS)) {
-+ ERROR(
-+ "pfkey_key_parse: "
-+ "key length=%d does not agree with extension length=%d.\n",
-+ pfkey_key->sadb_key_bits,
-+ pfkey_key->sadb_key_len);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_key->sadb_key_reserved) {
-+ ERROR(
-+ "pfkey_key_parse: "
-+ "res=%d, must be zero.\n",
-+ pfkey_key->sadb_key_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(! ( (pfkey_key->sadb_key_exttype == K_SADB_EXT_KEY_AUTH) ||
-+ (pfkey_key->sadb_key_exttype == K_SADB_EXT_KEY_ENCRYPT))) {
-+ ERROR(
-+ "pfkey_key_parse: "
-+ "expecting extension type AUTH or ENCRYPT, got %d.\n",
-+ pfkey_key->sadb_key_exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_key_parse: "
-+ "success, found len=%d exttype=%d(%s) bits=%d reserved=%d.\n",
-+ pfkey_key->sadb_key_len,
-+ pfkey_key->sadb_key_exttype,
-+ pfkey_v2_sadb_ext_string(pfkey_key->sadb_key_exttype),
-+ pfkey_key->sadb_key_bits,
-+ pfkey_key->sadb_key_reserved);
-+
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_ident_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_ident *pfkey_ident = (struct sadb_ident *)pfkey_ext;
-+
-+ /* sanity checks... */
-+ if(pfkey_ident->sadb_ident_len < sizeof(struct sadb_ident) / IPSEC_PFKEYv2_ALIGN) {
-+ ERROR(
-+ "pfkey_ident_parse: "
-+ "size wrong ext_len=%d, key_ext_len=%d.\n",
-+ pfkey_ident->sadb_ident_len,
-+ (int)sizeof(struct sadb_ident));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_ident->sadb_ident_type > K_SADB_IDENTTYPE_MAX) {
-+ ERROR(
-+ "pfkey_ident_parse: "
-+ "ident_type=%d out of range, must be less than %d.\n",
-+ pfkey_ident->sadb_ident_type,
-+ K_SADB_IDENTTYPE_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_ident->sadb_ident_reserved) {
-+ ERROR(
-+ "pfkey_ident_parse: "
-+ "res=%d, must be zero.\n",
-+ pfkey_ident->sadb_ident_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ /* string terminator/padding must be zero */
-+ if(pfkey_ident->sadb_ident_len > sizeof(struct sadb_ident) / IPSEC_PFKEYv2_ALIGN) {
-+ if(*((char*)pfkey_ident + pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - 1)) {
-+ ERROR(
-+ "pfkey_ident_parse: "
-+ "string padding must be zero, last is 0x%02x.\n",
-+ *((char*)pfkey_ident +
-+ pfkey_ident->sadb_ident_len * IPSEC_PFKEYv2_ALIGN - 1));
-+ SENDERR(EINVAL);
-+ }
-+ }
-+
-+ if( ! ((pfkey_ident->sadb_ident_exttype == K_SADB_EXT_IDENTITY_SRC) ||
-+ (pfkey_ident->sadb_ident_exttype == K_SADB_EXT_IDENTITY_DST))) {
-+ ERROR(
-+ "pfkey_key_parse: "
-+ "expecting extension type IDENTITY_SRC or IDENTITY_DST, got %d.\n",
-+ pfkey_ident->sadb_ident_exttype);
-+ SENDERR(EINVAL);
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_sens_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_sens *pfkey_sens = (struct sadb_sens *)pfkey_ext;
-+
-+ /* sanity checks... */
-+ if(pfkey_sens->sadb_sens_len < sizeof(struct sadb_sens) / IPSEC_PFKEYv2_ALIGN) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_sens_parse: "
-+ "size wrong ext_len=%d, key_ext_len=%d.\n",
-+ pfkey_sens->sadb_sens_len,
-+ (int)sizeof(struct sadb_sens));
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_sens_parse: "
-+ "Sorry, I can't parse exttype=%d yet.\n",
-+ pfkey_ext->sadb_ext_type);
-+#if 0
-+ SENDERR(EINVAL); /* don't process these yet */
-+#endif
-+
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_prop_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ int i, num_comb;
-+ struct sadb_prop *pfkey_prop = (struct sadb_prop *)pfkey_ext;
-+ struct k_sadb_comb *k_pfkey_comb = (struct k_sadb_comb *)((char*)pfkey_ext + sizeof(struct sadb_prop));
-+
-+ /* sanity checks... */
-+ if((pfkey_prop->sadb_prop_len < sizeof(struct sadb_prop) / IPSEC_PFKEYv2_ALIGN) ||
-+ (((pfkey_prop->sadb_prop_len * IPSEC_PFKEYv2_ALIGN) - sizeof(struct sadb_prop)) % sizeof(struct sadb_comb))) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "size wrong ext_len=%d, prop_ext_len=%d comb_ext_len=%d.\n",
-+ pfkey_prop->sadb_prop_len,
-+ (int)sizeof(struct sadb_prop),
-+ (int)sizeof(struct sadb_comb));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_prop->sadb_prop_replay > 64) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "replay window size: %d -- must be 0 <= size <= 64\n",
-+ pfkey_prop->sadb_prop_replay);
-+ SENDERR(EINVAL);
-+ }
-+
-+ for(i=0; i<3; i++) {
-+ if(pfkey_prop->sadb_prop_reserved[i]) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "res[%d]=%d, must be zero.\n",
-+ i, pfkey_prop->sadb_prop_reserved[i]);
-+ SENDERR(EINVAL);
-+ }
-+ }
-+
-+ num_comb = ((pfkey_prop->sadb_prop_len * IPSEC_PFKEYv2_ALIGN) - sizeof(struct sadb_prop)) / sizeof(struct sadb_comb);
-+
-+ for(i = 0; i < num_comb; i++) {
-+ struct sadb_comb *pfkey_comb = (struct sadb_comb *)k_pfkey_comb;
-+ if(pfkey_comb->sadb_comb_auth > K_SADB_AALG_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_auth=%d > K_SADB_AALG_MAX=%d.\n",
-+ i,
-+ pfkey_comb->sadb_comb_auth,
-+ K_SADB_AALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_comb->sadb_comb_auth) {
-+ if(!pfkey_comb->sadb_comb_auth_minbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_auth_minbits=0, fatal.\n",
-+ i);
-+ SENDERR(EINVAL);
-+ }
-+ if(!pfkey_comb->sadb_comb_auth_maxbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_auth_maxbits=0, fatal.\n",
-+ i);
-+ SENDERR(EINVAL);
-+ }
-+ if(pfkey_comb->sadb_comb_auth_minbits > pfkey_comb->sadb_comb_auth_maxbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_auth_minbits=%d > maxbits=%d, fatal.\n",
-+ i,
-+ pfkey_comb->sadb_comb_auth_minbits,
-+ pfkey_comb->sadb_comb_auth_maxbits);
-+ SENDERR(EINVAL);
-+ }
-+ } else {
-+ if(pfkey_comb->sadb_comb_auth_minbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_auth_minbits=%d != 0, fatal.\n",
-+ i,
-+ pfkey_comb->sadb_comb_auth_minbits);
-+ SENDERR(EINVAL);
-+ }
-+ if(pfkey_comb->sadb_comb_auth_maxbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_auth_maxbits=%d != 0, fatal.\n",
-+ i,
-+ pfkey_comb->sadb_comb_auth_maxbits);
-+ SENDERR(EINVAL);
-+ }
-+ }
-+
-+#if K_SADB_EALG_MAX < 255
-+ if(pfkey_comb->sadb_comb_encrypt > K_SADB_EALG_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_comb_parse: "
-+ "pfkey_comb[%d]->sadb_comb_encrypt=%d > K_SADB_EALG_MAX=%d.\n",
-+ i,
-+ pfkey_comb->sadb_comb_encrypt,
-+ K_SADB_EALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+#endif
-+
-+ if(pfkey_comb->sadb_comb_encrypt) {
-+ if(!pfkey_comb->sadb_comb_encrypt_minbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_encrypt_minbits=0, fatal.\n",
-+ i);
-+ SENDERR(EINVAL);
-+ }
-+ if(!pfkey_comb->sadb_comb_encrypt_maxbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_encrypt_maxbits=0, fatal.\n",
-+ i);
-+ SENDERR(EINVAL);
-+ }
-+ if(pfkey_comb->sadb_comb_encrypt_minbits > pfkey_comb->sadb_comb_encrypt_maxbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_encrypt_minbits=%d > maxbits=%d, fatal.\n",
-+ i,
-+ pfkey_comb->sadb_comb_encrypt_minbits,
-+ pfkey_comb->sadb_comb_encrypt_maxbits);
-+ SENDERR(EINVAL);
-+ }
-+ } else {
-+ if(pfkey_comb->sadb_comb_encrypt_minbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_encrypt_minbits=%d != 0, fatal.\n",
-+ i,
-+ pfkey_comb->sadb_comb_encrypt_minbits);
-+ SENDERR(EINVAL);
-+ }
-+ if(pfkey_comb->sadb_comb_encrypt_maxbits) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_encrypt_maxbits=%d != 0, fatal.\n",
-+ i,
-+ pfkey_comb->sadb_comb_encrypt_maxbits);
-+ SENDERR(EINVAL);
-+ }
-+ }
-+
-+ /* XXX do sanity check on flags */
-+
-+ if(pfkey_comb->sadb_comb_hard_allocations && pfkey_comb->sadb_comb_soft_allocations > pfkey_comb->sadb_comb_hard_allocations) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_soft_allocations=%d > hard_allocations=%d, fatal.\n",
-+ i,
-+ pfkey_comb->sadb_comb_soft_allocations,
-+ pfkey_comb->sadb_comb_hard_allocations);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_comb->sadb_comb_hard_bytes && pfkey_comb->sadb_comb_soft_bytes > pfkey_comb->sadb_comb_hard_bytes) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_soft_bytes=%Ld > hard_bytes=%Ld, fatal.\n",
-+ i,
-+ (unsigned long long int)pfkey_comb->sadb_comb_soft_bytes,
-+ (unsigned long long int)pfkey_comb->sadb_comb_hard_bytes);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_comb->sadb_comb_hard_addtime && pfkey_comb->sadb_comb_soft_addtime > pfkey_comb->sadb_comb_hard_addtime) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_soft_addtime=%Ld > hard_addtime=%Ld, fatal.\n",
-+ i,
-+ (unsigned long long int)pfkey_comb->sadb_comb_soft_addtime,
-+ (unsigned long long int)pfkey_comb->sadb_comb_hard_addtime);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_comb->sadb_comb_hard_usetime && pfkey_comb->sadb_comb_soft_usetime > pfkey_comb->sadb_comb_hard_usetime) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_comb_soft_usetime=%Ld > hard_usetime=%Ld, fatal.\n",
-+ i,
-+ (unsigned long long int)pfkey_comb->sadb_comb_soft_usetime,
-+ (unsigned long long int)pfkey_comb->sadb_comb_hard_usetime);
-+ SENDERR(EINVAL);
-+ }
-+
-+#ifdef COMB_PACKETS
-+ if(pfkey_comb->sadb_x_comb_hard_packets && pfkey_comb->sadb_x_comb_soft_packets > pfkey_comb->sadb_x_comb_hard_packets) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_prop_parse: "
-+ "pfkey_comb[%d]->sadb_x_comb_soft_packets=%d > hard_packets=%d, fatal.\n",
-+ i,
-+ k_pfkey_comb->sadb_x_comb_soft_packets,
-+ k_pfkey_comb->sadb_x_comb_hard_packets);
-+ SENDERR(EINVAL);
-+ }
-+#endif
-+
-+ pfkey_comb++;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_supported_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ unsigned int i, num_alg;
-+ struct sadb_supported *pfkey_supported = (struct sadb_supported *)pfkey_ext;
-+ struct sadb_alg *pfkey_alg = (struct sadb_alg*)((char*)pfkey_ext + sizeof(struct sadb_supported));
-+
-+ /* sanity checks... */
-+ if((pfkey_supported->sadb_supported_len <
-+ sizeof(struct sadb_supported) / IPSEC_PFKEYv2_ALIGN) ||
-+ (((pfkey_supported->sadb_supported_len * IPSEC_PFKEYv2_ALIGN) -
-+ sizeof(struct sadb_supported)) % sizeof(struct sadb_alg))) {
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_supported_parse: "
-+ "size wrong ext_len=%d, supported_ext_len=%d alg_ext_len=%d.\n",
-+ pfkey_supported->sadb_supported_len,
-+ (int)sizeof(struct sadb_supported),
-+ (int)sizeof(struct sadb_alg));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_supported->sadb_supported_reserved) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_supported_parse: "
-+ "res=%d, must be zero.\n",
-+ pfkey_supported->sadb_supported_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ num_alg = ((pfkey_supported->sadb_supported_len * IPSEC_PFKEYv2_ALIGN) - sizeof(struct sadb_supported)) / sizeof(struct sadb_alg);
-+
-+ for(i = 0; i < num_alg; i++) {
-+ /* process algo description */
-+ if(pfkey_alg->sadb_alg_reserved) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_supported_parse: "
-+ "alg[%d], id=%d, ivlen=%d, minbits=%d, maxbits=%d, res=%d, must be zero.\n",
-+ i,
-+ pfkey_alg->sadb_alg_id,
-+ pfkey_alg->sadb_alg_ivlen,
-+ pfkey_alg->sadb_alg_minbits,
-+ pfkey_alg->sadb_alg_maxbits,
-+ pfkey_alg->sadb_alg_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ /* XXX can alg_id auth/enc be determined from info given?
-+ Yes, but OpenBSD's method does not iteroperate with rfc2367.
-+ rgb, 2000-04-06 */
-+
-+ switch(pfkey_supported->sadb_supported_exttype) {
-+ case K_SADB_EXT_SUPPORTED_AUTH:
-+ if(pfkey_alg->sadb_alg_id > K_SADB_AALG_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_supported_parse: "
-+ "alg[%d], alg_id=%d > K_SADB_AALG_MAX=%d, fatal.\n",
-+ i,
-+ pfkey_alg->sadb_alg_id,
-+ K_SADB_AALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+ break;
-+ case SADB_EXT_SUPPORTED_ENCRYPT:
-+#if K_SADB_EALG_MAX < 255
-+ if(pfkey_alg->sadb_alg_id > K_SADB_EALG_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_supported_parse: "
-+ "alg[%d], alg_id=%d > K_SADB_EALG_MAX=%d, fatal.\n",
-+ i,
-+ pfkey_alg->sadb_alg_id,
-+ K_SADB_EALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+#endif
-+ break;
-+ default:
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_supported_parse: "
-+ "alg[%d], alg_id=%d > K_SADB_EALG_MAX=%d, fatal.\n",
-+ i,
-+ pfkey_alg->sadb_alg_id,
-+ K_SADB_EALG_MAX);
-+ SENDERR(EINVAL);
-+ }
-+ pfkey_alg++;
-+ }
-+
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_spirange_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_spirange *pfkey_spirange = (struct sadb_spirange *)pfkey_ext;
-+
-+ /* sanity checks... */
-+ if(pfkey_spirange->sadb_spirange_len !=
-+ sizeof(struct sadb_spirange) / IPSEC_PFKEYv2_ALIGN) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_spirange_parse: "
-+ "size wrong ext_len=%d, key_ext_len=%d.\n",
-+ pfkey_spirange->sadb_spirange_len,
-+ (int)sizeof(struct sadb_spirange));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_spirange->sadb_spirange_reserved) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_spirange_parse: "
-+ "reserved=%d must be set to zero.\n",
-+ pfkey_spirange->sadb_spirange_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(ntohl(pfkey_spirange->sadb_spirange_max) < ntohl(pfkey_spirange->sadb_spirange_min)) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_spirange_parse: "
-+ "minspi=%08x must be < maxspi=%08x.\n",
-+ ntohl(pfkey_spirange->sadb_spirange_min),
-+ ntohl(pfkey_spirange->sadb_spirange_max));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(ntohl(pfkey_spirange->sadb_spirange_min) <= 255) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_spirange_parse: "
-+ "minspi=%08x must be > 255.\n",
-+ ntohl(pfkey_spirange->sadb_spirange_min));
-+ SENDERR(EEXIST);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_spirange_parse: "
-+ "ext_len=%u ext_type=%u(%s) min=%u max=%u res=%u.\n",
-+ pfkey_spirange->sadb_spirange_len,
-+ pfkey_spirange->sadb_spirange_exttype,
-+ pfkey_v2_sadb_ext_string(pfkey_spirange->sadb_spirange_exttype),
-+ pfkey_spirange->sadb_spirange_min,
-+ pfkey_spirange->sadb_spirange_max,
-+ pfkey_spirange->sadb_spirange_reserved);
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_kmprivate_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_x_kmprivate *pfkey_x_kmprivate = (struct sadb_x_kmprivate *)pfkey_ext;
-+
-+ /* sanity checks... */
-+ if(pfkey_x_kmprivate->sadb_x_kmprivate_len <
-+ sizeof(struct sadb_x_kmprivate) / IPSEC_PFKEYv2_ALIGN) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_kmprivate_parse: "
-+ "size wrong ext_len=%d, key_ext_len=%d.\n",
-+ pfkey_x_kmprivate->sadb_x_kmprivate_len,
-+ (int)sizeof(struct sadb_x_kmprivate));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_x_kmprivate->sadb_x_kmprivate_reserved) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_kmprivate_parse: "
-+ "reserved=%d must be set to zero.\n",
-+ pfkey_x_kmprivate->sadb_x_kmprivate_reserved);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_kmprivate_parse: "
-+ "Sorry, I can't parse exttype=%d yet.\n",
-+ pfkey_ext->sadb_ext_type);
-+ SENDERR(EINVAL); /* don't process these yet */
-+
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_satype_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ int i;
-+ struct sadb_x_satype *pfkey_x_satype = (struct sadb_x_satype *)pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
-+ "pfkey_x_satype_parse: enter\n");
-+ /* sanity checks... */
-+ if(pfkey_x_satype->sadb_x_satype_len !=
-+ sizeof(struct sadb_x_satype) / IPSEC_PFKEYv2_ALIGN) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_satype_parse: "
-+ "size wrong ext_len=%d, key_ext_len=%d.\n",
-+ pfkey_x_satype->sadb_x_satype_len,
-+ (int)sizeof(struct sadb_x_satype));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!pfkey_x_satype->sadb_x_satype_satype) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_satype_parse: "
-+ "satype is zero, must be non-zero.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_x_satype->sadb_x_satype_satype > K_SADB_SATYPE_MAX) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_satype_parse: "
-+ "satype %d > max %d, invalid.\n",
-+ pfkey_x_satype->sadb_x_satype_satype, K_SADB_SATYPE_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!(satype2proto(pfkey_x_satype->sadb_x_satype_satype))) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_satype_parse: "
-+ "proto lookup from satype=%d failed.\n",
-+ pfkey_x_satype->sadb_x_satype_satype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ for(i = 0; i < 3; i++) {
-+ if(pfkey_x_satype->sadb_x_satype_reserved[i]) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_satype_parse: "
-+ "reserved[%d]=%d must be set to zero.\n",
-+ i, pfkey_x_satype->sadb_x_satype_reserved[i]);
-+ SENDERR(EINVAL);
-+ }
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_x_satype_parse: "
-+ "len=%u ext=%u(%s) satype=%u(%s) res=%u,%u,%u.\n",
-+ pfkey_x_satype->sadb_x_satype_len,
-+ pfkey_x_satype->sadb_x_satype_exttype,
-+ pfkey_v2_sadb_ext_string(pfkey_x_satype->sadb_x_satype_exttype),
-+ pfkey_x_satype->sadb_x_satype_satype,
-+ satype2name(pfkey_x_satype->sadb_x_satype_satype),
-+ pfkey_x_satype->sadb_x_satype_reserved[0],
-+ pfkey_x_satype->sadb_x_satype_reserved[1],
-+ pfkey_x_satype->sadb_x_satype_reserved[2]);
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_ext_debug_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ int i;
-+ struct sadb_x_debug *pfkey_x_debug = (struct sadb_x_debug *)pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
-+ "pfkey_x_debug_parse: enter\n");
-+ /* sanity checks... */
-+ if(pfkey_x_debug->sadb_x_debug_len !=
-+ sizeof(struct sadb_x_debug) / IPSEC_PFKEYv2_ALIGN) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_debug_parse: "
-+ "size wrong ext_len=%d, key_ext_len=%d.\n",
-+ pfkey_x_debug->sadb_x_debug_len,
-+ (int)sizeof(struct sadb_x_debug));
-+ SENDERR(EINVAL);
-+ }
-+
-+ for(i = 0; i < 4; i++) {
-+ if(pfkey_x_debug->sadb_x_debug_reserved[i]) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_debug_parse: "
-+ "reserved[%d]=%d must be set to zero.\n",
-+ i, pfkey_x_debug->sadb_x_debug_reserved[i]);
-+ SENDERR(EINVAL);
-+ }
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_ext_protocol_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_protocol *p = (struct sadb_protocol *)pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_x_protocol_parse:\n");
-+ /* sanity checks... */
-+
-+ if (p->sadb_protocol_len != sizeof(*p)/IPSEC_PFKEYv2_ALIGN) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_protocol_parse: size wrong ext_len=%d, key_ext_len=%d.\n",
-+ p->sadb_protocol_len, (int)sizeof(*p));
-+ SENDERR(EINVAL);
-+ }
-+
-+ if (p->sadb_protocol_reserved2 != 0) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_protocol_parse: res=%d, must be zero.\n",
-+ p->sadb_protocol_reserved2);
-+ SENDERR(EINVAL);
-+ }
-+
-+ errlab:
-+ return error;
-+}
-+
-+#ifdef NAT_TRAVERSAL
-+DEBUG_NO_STATIC int
-+pfkey_x_ext_nat_t_type_parse(struct sadb_ext *pfkey_ext)
-+{
-+ return 0;
-+}
-+DEBUG_NO_STATIC int
-+pfkey_x_ext_nat_t_port_parse(struct sadb_ext *pfkey_ext)
-+{
-+ return 0;
-+}
-+#endif
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_ext_outif_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_x_plumbif *p = (struct sadb_x_plumbif *)pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_x_outif_parse:\n");
-+ /* sanity checks... */
-+
-+ if (p->sadb_x_outif_len != IPSEC_PFKEYv2_WORDS(sizeof(*p))) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_outif_parse: size wrong ext_len=%d, key_ext_len=%d.\n",
-+ p->sadb_x_outif_len, (int)sizeof(*p));
-+ SENDERR(EINVAL);
-+ }
-+
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_ext_saref_parse(struct sadb_ext *pfkey_ext)
-+{
-+ int error = 0;
-+ struct sadb_x_saref *p = (struct sadb_x_saref *)pfkey_ext;
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_x_saref_parse:\n");
-+ /* sanity checks... */
-+
-+ if (p->sadb_x_saref_len != IPSEC_PFKEYv2_WORDS(sizeof(*p))) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_x_saref_parse: size wrong ext_len=%d, key_ext_len=%d.\n",
-+ p->sadb_x_saref_len, (int)sizeof(*p));
-+ SENDERR(EINVAL);
-+ }
-+
-+ errlab:
-+ return error;
-+}
-+
-+
-+#define DEFINEPARSER(NAME) static struct pf_key_ext_parsers_def NAME##_def={NAME, #NAME};
-+
-+DEFINEPARSER(pfkey_sa_parse);
-+DEFINEPARSER(pfkey_lifetime_parse);
-+DEFINEPARSER(pfkey_address_parse);
-+DEFINEPARSER(pfkey_key_parse);
-+DEFINEPARSER(pfkey_ident_parse);
-+DEFINEPARSER(pfkey_sens_parse);
-+DEFINEPARSER(pfkey_prop_parse);
-+DEFINEPARSER(pfkey_supported_parse);
-+DEFINEPARSER(pfkey_spirange_parse);
-+DEFINEPARSER(pfkey_x_kmprivate_parse);
-+DEFINEPARSER(pfkey_x_satype_parse);
-+DEFINEPARSER(pfkey_x_ext_debug_parse);
-+DEFINEPARSER(pfkey_x_ext_protocol_parse);
-+#ifdef NAT_TRAVERSAL
-+DEFINEPARSER(pfkey_x_ext_nat_t_type_parse);
-+DEFINEPARSER(pfkey_x_ext_nat_t_port_parse);
-+#endif
-+DEFINEPARSER(pfkey_x_ext_outif_parse);
-+DEFINEPARSER(pfkey_x_ext_saref_parse);
-+
-+struct pf_key_ext_parsers_def *ext_default_parsers[]=
-+{
-+ NULL, /* pfkey_msg_parse, */
-+ &pfkey_sa_parse_def,
-+ &pfkey_lifetime_parse_def,
-+ &pfkey_lifetime_parse_def,
-+ &pfkey_lifetime_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_key_parse_def,
-+ &pfkey_key_parse_def,
-+ &pfkey_ident_parse_def,
-+ &pfkey_ident_parse_def,
-+ &pfkey_sens_parse_def,
-+ &pfkey_prop_parse_def,
-+ &pfkey_supported_parse_def,
-+ &pfkey_supported_parse_def,
-+ &pfkey_spirange_parse_def,
-+ &pfkey_x_kmprivate_parse_def,
-+ &pfkey_x_satype_parse_def,
-+ &pfkey_sa_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_address_parse_def,
-+ &pfkey_x_ext_debug_parse_def,
-+ &pfkey_x_ext_protocol_parse_def,
-+#ifdef NAT_TRAVERSAL
-+ &pfkey_x_ext_nat_t_type_parse_def,
-+ &pfkey_x_ext_nat_t_port_parse_def,
-+ &pfkey_x_ext_nat_t_port_parse_def,
-+ &pfkey_address_parse_def,
-+#else
-+ NULL,NULL,NULL,NULL,
-+#endif
-+ &pfkey_x_ext_outif_parse_def,
-+ &pfkey_x_ext_saref_parse_def,
-+};
-+
-+int
-+pfkey_msg_parse(struct sadb_msg *pfkey_msg,
-+ struct pf_key_ext_parsers_def *ext_parsers[],
-+ struct sadb_ext *extensions[],
-+ int dir)
-+{
-+ int error = 0;
-+ int remain;
-+ struct sadb_ext *pfkey_ext;
-+ pfkey_ext_track extensions_seen = 0;
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_msg_parse: "
-+ "parsing message ver=%d, type=%d(%s), errno=%d, satype=%d(%s), len=%d, res=%d, seq=%d, pid=%d.\n",
-+ pfkey_msg->sadb_msg_version,
-+ pfkey_msg->sadb_msg_type,
-+ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type),
-+ pfkey_msg->sadb_msg_errno,
-+ pfkey_msg->sadb_msg_satype,
-+ satype2name(pfkey_msg->sadb_msg_satype),
-+ pfkey_msg->sadb_msg_len,
-+ pfkey_msg->sadb_msg_reserved,
-+ pfkey_msg->sadb_msg_seq,
-+ pfkey_msg->sadb_msg_pid);
-+
-+ if(ext_parsers == NULL) ext_parsers = ext_default_parsers;
-+
-+ pfkey_extensions_init(extensions);
-+
-+ remain = pfkey_msg->sadb_msg_len;
-+ remain -= IPSEC_PFKEYv2_WORDS(sizeof(struct sadb_msg));
-+
-+ pfkey_ext = (struct sadb_ext*)((char*)pfkey_msg +
-+ sizeof(struct sadb_msg));
-+
-+ extensions[0] = (struct sadb_ext *) pfkey_msg;
-+
-+
-+ if(pfkey_msg->sadb_msg_version != PF_KEY_V2) {
-+ ERROR("pfkey_msg_parse: "
-+ "not PF_KEY_V2 msg, found %d, should be %d.\n",
-+ pfkey_msg->sadb_msg_version,
-+ PF_KEY_V2);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!pfkey_msg->sadb_msg_type) {
-+ ERROR("pfkey_msg_parse: "
-+ "msg type not set, must be non-zero..\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(pfkey_msg->sadb_msg_type > K_SADB_MAX) {
-+ ERROR("pfkey_msg_parse: "
-+ "msg type=%d > max=%d.\n",
-+ pfkey_msg->sadb_msg_type,
-+ K_SADB_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_msg->sadb_msg_type) {
-+ case K_SADB_GETSPI:
-+ case K_SADB_UPDATE:
-+ case K_SADB_ADD:
-+ case K_SADB_DELETE:
-+ case K_SADB_GET:
-+ case K_SADB_X_GRPSA:
-+ case K_SADB_X_ADDFLOW:
-+ if(!satype2proto(pfkey_msg->sadb_msg_satype)) {
-+ ERROR("pfkey_msg_parse: "
-+ "satype %d conversion to proto failed for msg_type %d (%s).\n",
-+ pfkey_msg->sadb_msg_satype,
-+ pfkey_msg->sadb_msg_type,
-+ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type));
-+ SENDERR(EINVAL);
-+ } else {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "satype %d(%s) conversion to proto gives %d for msg_type %d(%s).\n",
-+ pfkey_msg->sadb_msg_satype,
-+ satype2name(pfkey_msg->sadb_msg_satype),
-+ satype2proto(pfkey_msg->sadb_msg_satype),
-+ pfkey_msg->sadb_msg_type,
-+ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type));
-+ }
-+ case K_SADB_ACQUIRE:
-+ case K_SADB_REGISTER:
-+ case K_SADB_EXPIRE:
-+ if(!pfkey_msg->sadb_msg_satype) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "satype is zero, must be non-zero for msg_type %d(%s).\n",
-+ pfkey_msg->sadb_msg_type,
-+ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type));
-+ SENDERR(EINVAL);
-+ }
-+ default:
-+ break;
-+ }
-+
-+ /* errno must not be set in downward messages */
-+ /* this is not entirely true... a response to an ACQUIRE could return an error */
-+ if((dir == EXT_BITS_IN) && (pfkey_msg->sadb_msg_type != K_SADB_ACQUIRE) && pfkey_msg->sadb_msg_errno) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "errno set to %d.\n",
-+ pfkey_msg->sadb_msg_errno);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
-+ "pfkey_msg_parse: "
-+ "remain=%d\n",
-+ remain
-+ );
-+
-+ extensions_seen = 1;
-+
-+ while( (remain * IPSEC_PFKEYv2_ALIGN) >= sizeof(struct sadb_ext) ) {
-+ /* Is there enough message left to support another extension header? */
-+ if(remain < pfkey_ext->sadb_ext_len) {
-+ ERROR("pfkey_msg_parse: "
-+ "remain %d less than ext len %d.\n",
-+ remain, pfkey_ext->sadb_ext_len);
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
-+ "pfkey_msg_parse: "
-+ "parsing ext type=%d(%s) remain=%d.\n",
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type),
-+ remain);
-+
-+ /* Is the extension header type valid? */
-+ if((pfkey_ext->sadb_ext_type > K_SADB_EXT_MAX) || (!pfkey_ext->sadb_ext_type)) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "ext type %d(%s) invalid, K_SADB_EXT_MAX=%d.\n",
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type),
-+ K_SADB_EXT_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ /* Have we already seen this type of extension? */
-+ if(extensions[pfkey_ext->sadb_ext_type] != NULL)
-+ {
-+ ERROR("pfkey_msg_parse: "
-+ "ext type %d(%s) already seen.\n",
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type));
-+ SENDERR(EINVAL);
-+ }
-+
-+ /* Do I even know about this type of extension? */
-+ if(ext_parsers[pfkey_ext->sadb_ext_type]==NULL) {
-+ ERROR("pfkey_msg_parse: "
-+ "ext type %d(%s) unknown, ignoring.\n",
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type));
-+ goto next_ext;
-+ }
-+
-+ /* Is this type of extension permitted for this type of message? */
-+ if(!pfkey_permitted_extension(dir,pfkey_msg->sadb_msg_type,pfkey_ext->sadb_ext_type)) {
-+ ERROR("ext type %d(%s) not permitted (parse)\n",
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type));
-+ SENDERR(EINVAL);
-+ }
-+
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
-+ "pfkey_msg_parse: "
-+ "remain=%d ext_type=%d(%s) ext_len=%d parsing ext 0p%p with parser %s.\n",
-+ remain,
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type),
-+ pfkey_ext->sadb_ext_len,
-+ pfkey_ext,
-+ ext_parsers[pfkey_ext->sadb_ext_type]->parser_name);
-+
-+ /* Parse the extension */
-+ if((error =
-+ (*ext_parsers[pfkey_ext->sadb_ext_type]->parser)(pfkey_ext))) {
-+ ERROR("pfkey_msg_parse: "
-+ "extension parsing for type %d(%s) failed with error %d.\n",
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type),
-+ error);
-+ SENDERR(-error);
-+ }
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
-+ "pfkey_msg_parse: "
-+ "Extension %d(%s) parsed.\n",
-+ pfkey_ext->sadb_ext_type,
-+ pfkey_v2_sadb_ext_string(pfkey_ext->sadb_ext_type));
-+
-+ /* Mark that we have seen this extension and remember the header location */
-+ extensions[pfkey_ext->sadb_ext_type] = pfkey_ext;
-+ pfkey_mark_extension(pfkey_ext->sadb_ext_type,&extensions_seen);
-+
-+ next_ext:
-+ /* Calculate how much message remains */
-+ remain -= pfkey_ext->sadb_ext_len;
-+
-+ if(!remain) {
-+ break;
-+ }
-+ /* Find the next extension header */
-+ pfkey_ext = (struct sadb_ext*)((char*)pfkey_ext +
-+ pfkey_ext->sadb_ext_len * IPSEC_PFKEYv2_ALIGN);
-+ }
-+
-+ if(remain) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "unexpected remainder of %d.\n",
-+ remain);
-+ /* why is there still something remaining? */
-+ SENDERR(EINVAL);
-+ }
-+
-+ /* don't check further if it is an error return message since it
-+ may not have a body */
-+ if(pfkey_msg->sadb_msg_errno) {
-+ SENDERR(-error);
-+ }
-+
-+ if(pfkey_extensions_missing(dir,pfkey_msg->sadb_msg_type,extensions_seen)) {
-+ ERROR("required extensions missing.seen=%08llx.\n",(unsigned long long)extensions_seen);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if((dir == EXT_BITS_IN) && (pfkey_msg->sadb_msg_type == K_SADB_X_DELFLOW)
-+ && ((extensions_seen & K_SADB_X_EXT_ADDRESS_DELFLOW)
-+ != K_SADB_X_EXT_ADDRESS_DELFLOW)
-+ && (((extensions_seen & (1<<SADB_EXT_SA)) != (1<<SADB_EXT_SA))
-+ || ((((struct k_sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_flags
-+ & SADB_X_SAFLAGS_CLEARFLOW)
-+ != SADB_X_SAFLAGS_CLEARFLOW))) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "required SADB_X_DELFLOW extensions missing: either %16llx must be present or %16llx must be present with SADB_X_SAFLAGS_CLEARFLOW set.\n",
-+ (unsigned long long)K_SADB_X_EXT_ADDRESS_DELFLOW
-+ - (extensions_seen & K_SADB_X_EXT_ADDRESS_DELFLOW),
-+ (unsigned long long)(1<<SADB_EXT_SA) - (extensions_seen & (1<<SADB_EXT_SA)));
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_msg->sadb_msg_type) {
-+ case K_SADB_ADD:
-+ case K_SADB_UPDATE:
-+ /* check maturity */
-+ if(((struct sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state !=
-+ K_SADB_SASTATE_MATURE) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "state=%d for add or update should be MATURE=%d.\n",
-+ ((struct k_sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_state,
-+ K_SADB_SASTATE_MATURE);
-+ SENDERR(EINVAL);
-+ }
-+
-+ /* check AH and ESP */
-+ switch(((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype) {
-+ case SADB_SATYPE_AH:
-+ if(!(((struct k_sadb_sa*)extensions[SADB_EXT_SA]) &&
-+ ((struct k_sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_auth !=
-+ SADB_AALG_NONE)) {
-+ ERROR("pfkey_msg_parse: "
-+ "auth alg is zero, must be non-zero for AH SAs.\n");
-+ SENDERR(EINVAL);
-+ }
-+ if(((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_encrypt !=
-+ SADB_EALG_NONE) {
-+ ERROR("pfkey_msg_parse: "
-+ "AH handed encalg=%d, must be zero.\n",
-+ ((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_encrypt);
-+ SENDERR(EINVAL);
-+ }
-+ break;
-+ case SADB_SATYPE_ESP:
-+ if(!(((struct k_sadb_sa*)extensions[SADB_EXT_SA]) &&
-+ ((struct k_sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt !=
-+ SADB_EALG_NONE)) {
-+ ERROR("pfkey_msg_parse: "
-+ "encrypt alg=%d is zero, must be non-zero for ESP=%d SAs.\n",
-+ ((struct k_sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt,
-+ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype);
-+ SENDERR(EINVAL);
-+ }
-+ if((((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_encrypt ==
-+ SADB_EALG_NULL) &&
-+ (((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_auth ==
-+ SADB_AALG_NONE) ) {
-+ ERROR("pfkey_msg_parse: "
-+ "ESP handed encNULL+authNONE, illegal combination.\n");
-+ SENDERR(EINVAL);
-+ }
-+ break;
-+ case K_SADB_X_SATYPE_COMP:
-+ if(!(((struct k_sadb_sa*)extensions[SADB_EXT_SA]) &&
-+ ((struct k_sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt !=
-+ SADB_EALG_NONE)) {
-+ ERROR("pfkey_msg_parse: "
-+ "encrypt alg=%d is zero, must be non-zero for COMP=%d SAs.\n",
-+ ((struct k_sadb_sa*)extensions[SADB_EXT_SA])->sadb_sa_encrypt,
-+ ((struct sadb_msg*)extensions[SADB_EXT_RESERVED])->sadb_msg_satype);
-+ SENDERR(EINVAL);
-+ }
-+ if(((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_auth !=
-+ SADB_AALG_NONE) {
-+ ERROR("pfkey_msg_parse: "
-+ "COMP handed auth=%d, must be zero.\n",
-+ ((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_auth);
-+ SENDERR(EINVAL);
-+ }
-+ break;
-+ default:
-+ break;
-+ }
-+ if(ntohl(((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_spi) <= 255) {
-+ DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
-+ "pfkey_msg_parse: "
-+ "spi=%08x must be > 255.\n",
-+ ntohl(((struct k_sadb_sa*)(extensions[SADB_EXT_SA]))->sadb_sa_spi));
-+ SENDERR(EINVAL);
-+ }
-+ default:
-+ break;
-+ }
-+
-+errlab:
-+ return error;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/pfkey_v2_parser.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,3063 @@
-+/*
-+ * @(#) RFC2367 PF_KEYv2 Key management API message parser
-+ * Copyright (C) 1999, 2000, 2001 Richard Guy Briggs <rgb@freeswan.org>
-+ *
-+ * OCF support written by David McCullough <dmccullough@cyberguard.com>
-+ * Copyright (C) 2004-2005 Intel Corporation. All Rights Reserved.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ */
-+
-+/*
-+ * Template from klips/net/ipsec/ipsec/ipsec_netlink.c.
-+ */
-+
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+
-+#include <openswan.h>
-+
-+#include <klips-crypto/des.h>
-+
-+#ifdef SPINLOCK
-+# ifdef SPINLOCK_23
-+# include <linux/spinlock.h> /* *lock* */
-+# else /* SPINLOCK_23 */
-+# include <asm/spinlock.h> /* *lock* */
-+# endif /* SPINLOCK_23 */
-+#endif /* SPINLOCK */
-+#ifdef NET_21
-+# include <net/route.h> /* inet_addr_type */
-+# include <linux/in6.h>
-+# define IS_MYADDR RTN_LOCAL
-+#endif
-+
-+#include <net/ip.h>
-+#ifdef NETLINK_SOCK
-+# include <linux/netlink.h>
-+#else
-+# include <net/netlink.h>
-+#endif
-+
-+#include <linux/random.h> /* get_random_bytes() */
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_sa.h"
-+
-+#include "openswan/ipsec_radij.h"
-+#include "openswan/ipsec_xform.h"
-+#include "openswan/ipsec_ah.h"
-+#include "openswan/ipsec_esp.h"
-+#include "openswan/ipsec_tunnel.h"
-+#include "openswan/ipsec_mast.h"
-+#include "openswan/ipsec_rcv.h"
-+#include "openswan/ipcomp.h"
-+
-+#include <openswan/pfkeyv2.h>
-+#include <openswan/pfkey.h>
-+
-+#include "openswan/ipsec_proto.h"
-+#include "openswan/ipsec_alg.h"
-+
-+#include "openswan/ipsec_kern24.h"
-+
-+#include "ipsec_ocf.h"
-+
-+#define SENDERR(_x) do { error = -(_x); goto errlab; } while (0)
-+
-+struct sklist_t {
-+ struct socket *sk;
-+ struct sklist_t* next;
-+} pfkey_sklist_head, *pfkey_sklist, *pfkey_sklist_prev;
-+
-+__u32 pfkey_msg_seq = 0;
-+
-+
-+#if 0
-+#define DUMP_SAID dump_said(&extr->ips->ips_said, __LINE__)
-+#define DUMP_SAID2 dump_said(&extr.ips->ips_said, __LINE__)
-+static void dump_said(ip_said *s, int line)
-+{
-+ char msa[SATOT_BUF];
-+ size_t msa_len;
-+
-+ msa_len = satot(s, 0, msa, sizeof(msa));
-+
-+ printk("line: %d msa: %s\n", line, msa);
-+}
-+#endif
-+
-+
-+int
-+pfkey_alloc_eroute(struct eroute** eroute)
-+{
-+ int error = 0;
-+ if(*eroute) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_alloc_eroute: "
-+ "eroute struct already allocated\n");
-+ SENDERR(EEXIST);
-+ }
-+
-+ if((*eroute = kmalloc(sizeof(**eroute), GFP_ATOMIC) ) == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_alloc_eroute: "
-+ "memory allocation error\n");
-+ SENDERR(ENOMEM);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_alloc_eroute: "
-+ "allocating %lu bytes for an eroute at 0p%p\n",
-+ (unsigned long) sizeof(**eroute), *eroute);
-+
-+ memset((caddr_t)*eroute, 0, sizeof(**eroute));
-+ (*eroute)->er_eaddr.sen_len =
-+ (*eroute)->er_emask.sen_len = sizeof(struct sockaddr_encap);
-+ (*eroute)->er_eaddr.sen_family =
-+ (*eroute)->er_emask.sen_family = AF_ENCAP;
-+ (*eroute)->er_eaddr.sen_type = SENT_IP4;
-+ (*eroute)->er_emask.sen_type = 255;
-+ (*eroute)->er_pid = 0;
-+ (*eroute)->er_count = 0;
-+ (*eroute)->er_lasttime = jiffies/HZ;
-+
-+ errlab:
-+ return(error);
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_protocol_process(struct sadb_ext *pfkey_ext,
-+ struct pfkey_extracted_data *extr)
-+{
-+ int error = 0;
-+ struct sadb_protocol * p = (struct sadb_protocol *)pfkey_ext;
-+
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_protocol_process: %p\n", extr);
-+
-+ if (extr == 0) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_protocol_process:"
-+ "extr is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+ if (extr->eroute == 0) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_protocol_process:"
-+ "extr->eroute is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ extr->eroute->er_eaddr.sen_proto = p->sadb_protocol_proto;
-+ extr->eroute->er_emask.sen_proto = p->sadb_protocol_proto ? ~0:0;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_protocol_process: protocol = %d.\n",
-+ p->sadb_protocol_proto);
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_ipsec_sa_init(struct ipsec_sa *ipsp)
-+{
-+ int rc;
-+ KLIPS_PRINT(debug_pfkey, "Calling SA_INIT\n");
-+ rc = ipsec_sa_init(ipsp);
-+ return rc;
-+}
-+
-+int
-+pfkey_safe_build(int error, struct sadb_ext *extensions[K_SADB_MAX+1])
-+{
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_safe_build: "
-+ "error=%d\n",
-+ error);
-+ if (!error) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_safe_build:"
-+ "success.\n");
-+ return 1;
-+ } else {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_safe_build:"
-+ "caught error %d\n",
-+ error);
-+ pfkey_extensions_free(extensions);
-+ return 0;
-+ }
-+}
-+
-+
-+DEBUG_NO_STATIC int
-+pfkey_getspi_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ ipsec_spi_t minspi = htonl(256), maxspi = htonl(-1L);
-+ int found_avail = 0;
-+ struct ipsec_sa *ipsq;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_getspi_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ if(extr == NULL || extr->ips == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_getspi_parse: "
-+ "error, extr or extr->ipsec_sa pointer NULL\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(extensions[K_SADB_EXT_SPIRANGE]) {
-+ minspi = ((struct sadb_spirange *)extensions[K_SADB_EXT_SPIRANGE])->sadb_spirange_min;
-+ maxspi = ((struct sadb_spirange *)extensions[K_SADB_EXT_SPIRANGE])->sadb_spirange_max;
-+ }
-+
-+ if(maxspi == minspi) {
-+ extr->ips->ips_said.spi = maxspi;
-+ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if(ipsq != NULL) {
-+ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa));
-+ ipsec_sa_put(ipsq);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_getspi_parse: "
-+ "EMT_GETSPI found an old ipsec_sa for SA: %s, delete it first.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(EEXIST);
-+ } else {
-+ found_avail = 1;
-+ }
-+ } else {
-+ int i = 0;
-+ __u32 rand_val;
-+ __u32 spi_diff;
-+ while( ( i < (spi_diff = (ntohl(maxspi) - ntohl(minspi)))) && !found_avail ) {
-+ prng_bytes(&ipsec_prng, (char *) &(rand_val),
-+ ( (spi_diff < (2^8)) ? 1 :
-+ ( (spi_diff < (2^16)) ? 2 :
-+ ( (spi_diff < (2^24)) ? 3 :
-+ 4 ) ) ) );
-+ extr->ips->ips_said.spi = htonl(ntohl(minspi) +
-+ (rand_val %
-+ (spi_diff + 1)));
-+ i++;
-+ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if(ipsq == NULL) {
-+ found_avail = 1;
-+ } else {
-+ ipsec_sa_put(ipsq);
-+ }
-+ }
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa));
-+
-+ if (!found_avail) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_getspi_parse: "
-+ "found an old ipsec_sa for SA: %s, delete it first.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(EEXIST);
-+ }
-+
-+ if(ip_chk_addr((unsigned long)extr->ips->ips_said.dst.u.v4.sin_addr.s_addr) == IS_MYADDR) {
-+ extr->ips->ips_flags |= EMT_INBOUND;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_getspi_parse: "
-+ "existing ipsec_sa not found (this is good) for SA: %s, %s-bound, allocating.\n",
-+ sa_len ? sa : " (error)",
-+ extr->ips->ips_flags & EMT_INBOUND ? "in" : "out");
-+
-+ /* XXX extr->ips->ips_rcvif = &(enc_softc[em->em_if].enc_if);*/
-+ extr->ips->ips_rcvif = NULL;
-+ extr->ips->ips_life.ipl_addtime.ipl_count = jiffies/HZ;
-+
-+ extr->ips->ips_state = K_SADB_SASTATE_LARVAL;
-+
-+ if(!extr->ips->ips_life.ipl_allocations.ipl_count) {
-+ extr->ips->ips_life.ipl_allocations.ipl_count += 1;
-+ }
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_GETSPI,
-+ satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ 0,
-+ K_SADB_SASTATE_LARVAL,
-+ 0,
-+ 0,
-+ 0),
-+ extensions_reply)
-+
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_s),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_d),
-+ extensions_reply) )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: "
-+ "failed to build the getspi reply message extensions\n");
-+ goto errlab;
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: "
-+ "failed to build the getspi reply message\n");
-+ SENDERR(-error);
-+ }
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: "
-+ "sending up getspi reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: "
-+ "sending up getspi reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ if((error = ipsec_sa_add(extr->ips))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_getspi_parse: "
-+ "failed to add the larval SA=%s with error=%d.\n",
-+ sa_len ? sa : " (error)",
-+ error);
-+ SENDERR(-error);
-+ }
-+ extr->ips = NULL;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_getspi_parse: "
-+ "successful for SA: %s\n",
-+ sa_len ? sa : " (error)");
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_update_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct ipsec_sa* ipsq;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ struct ipsec_sa *nat_t_ips_saved = NULL;
-+#endif
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ if(((struct sadb_sa*)extensions[K_SADB_EXT_SA])->sadb_sa_state != K_SADB_SASTATE_MATURE) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: "
-+ "error, sa_state=%d must be MATURE=%d\n",
-+ ((struct sadb_sa*)extensions[K_SADB_EXT_SA])->sadb_sa_state,
-+ K_SADB_SASTATE_MATURE);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(extr == NULL || extr->ips == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: "
-+ "error, extr or extr->ips pointer NULL\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa));
-+
-+ spin_lock_bh(&tdb_lock);
-+
-+ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if (ipsq == NULL) {
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: "
-+ "reserved ipsec_sa for SA: %s not found. Call K_SADB_GETSPI first or call K_SADB_ADD instead.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(ENOENT);
-+ }
-+
-+ if(ip_chk_addr((unsigned long)extr->ips->ips_said.dst.u.v4.sin_addr.s_addr) == IS_MYADDR) {
-+ extr->ips->ips_flags |= EMT_INBOUND;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: "
-+ "existing ipsec_sa found (this is good) for SA: %s, %s-bound, updating.\n",
-+ sa_len ? sa : " (error)",
-+ extr->ips->ips_flags & EMT_INBOUND ? "in" : "out");
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ if (extr->ips->ips_natt_sport || extr->ips->ips_natt_dport) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: only updating NAT-T ports "
-+ "(%u:%u -> %u:%u)\n",
-+ ipsq->ips_natt_sport, ipsq->ips_natt_dport,
-+ extr->ips->ips_natt_sport, extr->ips->ips_natt_dport);
-+
-+ if (extr->ips->ips_natt_sport) {
-+ ipsq->ips_natt_sport = extr->ips->ips_natt_sport;
-+ if (ipsq->ips_addr_s->sa_family == AF_INET) {
-+ ((struct sockaddr_in *)(ipsq->ips_addr_s))->sin_port = htons(extr->ips->ips_natt_sport);
-+ }
-+ }
-+
-+ if (extr->ips->ips_natt_dport) {
-+ ipsq->ips_natt_dport = extr->ips->ips_natt_dport;
-+ if (ipsq->ips_addr_d->sa_family == AF_INET) {
-+ ((struct sockaddr_in *)(ipsq->ips_addr_d))->sin_port = htons(extr->ips->ips_natt_dport);
-+ }
-+ }
-+
-+ nat_t_ips_saved = extr->ips;
-+ extr->ips = ipsq;
-+ }
-+ else
-+#endif
-+ {
-+ /* XXX extr->ips->ips_rcvif = &(enc_softc[em->em_if].enc_if);*/
-+ extr->ips->ips_rcvif = NULL;
-+ if ((error = pfkey_ipsec_sa_init(extr->ips))) {
-+ ipsec_sa_put(ipsq);
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: "
-+ "not successful for SA: %s, deleting.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(-error);
-+ }
-+
-+ extr->ips->ips_life.ipl_addtime.ipl_count = ipsq->ips_life.ipl_addtime.ipl_count;
-+
-+ /* this will call delchain-equivalent if refcount=>0 */
-+ ipsec_sa_put(ipsq);
-+ }
-+
-+ spin_unlock_bh(&tdb_lock);
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_UPDATE,
-+ satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ extr->ips->ips_replaywin,
-+ extr->ips->ips_state,
-+ extr->ips->ips_authalg,
-+ extr->ips->ips_encalg,
-+ extr->ips->ips_flags),
-+ extensions_reply)
-+ /* The 3 lifetime extentions should only be sent if non-zero. */
-+ && (extensions[K_SADB_EXT_LIFETIME_HARD]
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_HARD],
-+ K_SADB_EXT_LIFETIME_HARD,
-+ extr->ips->ips_life.ipl_allocations.ipl_hard,
-+ extr->ips->ips_life.ipl_bytes.ipl_hard,
-+ extr->ips->ips_life.ipl_addtime.ipl_hard,
-+ extr->ips->ips_life.ipl_usetime.ipl_hard,
-+ extr->ips->ips_life.ipl_packets.ipl_hard),
-+ extensions_reply) : 1)
-+ && (extensions[K_SADB_EXT_LIFETIME_SOFT]
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_SOFT],
-+ K_SADB_EXT_LIFETIME_SOFT,
-+ extr->ips->ips_life.ipl_allocations.ipl_count,
-+ extr->ips->ips_life.ipl_bytes.ipl_count,
-+ extr->ips->ips_life.ipl_addtime.ipl_count,
-+ extr->ips->ips_life.ipl_usetime.ipl_count,
-+ extr->ips->ips_life.ipl_packets.ipl_count),
-+ extensions_reply) : 1)
-+ && (extr->ips->ips_life.ipl_allocations.ipl_count
-+ || extr->ips->ips_life.ipl_bytes.ipl_count
-+ || extr->ips->ips_life.ipl_addtime.ipl_count
-+ || extr->ips->ips_life.ipl_usetime.ipl_count
-+ || extr->ips->ips_life.ipl_packets.ipl_count
-+
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_CURRENT],
-+ K_SADB_EXT_LIFETIME_CURRENT,
-+ extr->ips->ips_life.ipl_allocations.ipl_count,
-+ extr->ips->ips_life.ipl_bytes.ipl_count,
-+ extr->ips->ips_life.ipl_addtime.ipl_count,
-+ extr->ips->ips_life.ipl_usetime.ipl_count,
-+ extr->ips->ips_life.ipl_packets.ipl_count),
-+ extensions_reply) : 1)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_s),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_d),
-+ extensions_reply)
-+ && (extr->ips->ips_ident_s.data
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[K_SADB_EXT_IDENTITY_SRC],
-+ K_SADB_EXT_IDENTITY_SRC,
-+ extr->ips->ips_ident_s.type,
-+ extr->ips->ips_ident_s.id,
-+ extr->ips->ips_ident_s.len,
-+ extr->ips->ips_ident_s.data),
-+ extensions_reply) : 1)
-+ && (extr->ips->ips_ident_d.data
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[K_SADB_EXT_IDENTITY_DST],
-+ K_SADB_EXT_IDENTITY_DST,
-+ extr->ips->ips_ident_d.type,
-+ extr->ips->ips_ident_d.id,
-+ extr->ips->ips_ident_d.len,
-+ extr->ips->ips_ident_d.data),
-+ extensions_reply) : 1)
-+#if 0
-+ /* FIXME: This won't work yet because I have not finished
-+ it. */
-+ && (extr->ips->ips_sens_
-+ ? pfkey_safe_build(error = pfkey_sens_build(&extensions_reply[K_SADB_EXT_SENSITIVITY],
-+ extr->ips->ips_sens_dpd,
-+ extr->ips->ips_sens_sens_level,
-+ extr->ips->ips_sens_sens_len,
-+ extr->ips->ips_sens_sens_bitmap,
-+ extr->ips->ips_sens_integ_level,
-+ extr->ips->ips_sens_integ_len,
-+ extr->ips->ips_sens_integ_bitmap),
-+ extensions_reply) : 1)
-+#endif
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: "
-+ "failed to build the update reply message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: "
-+ "failed to build the update reply message\n");
-+ SENDERR(-error);
-+ }
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: "
-+ "sending up update reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: "
-+ "sending up update reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ if (nat_t_ips_saved) {
-+ /**
-+ * As we _really_ update existing SA, we keep tdbq and need to delete
-+ * parsed ips (nat_t_ips_saved, was extr->ips).
-+ *
-+ * goto errlab with extr->ips = nat_t_ips_saved will free it.
-+ */
-+
-+ extr->ips = nat_t_ips_saved;
-+
-+ error = 0;
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse (NAT-T ports): "
-+ "successful for SA: %s\n",
-+ sa_len ? sa : " (error)");
-+
-+ goto errlab;
-+ }
-+#endif
-+
-+ if((error = ipsec_sa_add(extr->ips))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_update_parse: "
-+ "failed to update the mature SA=%s with error=%d.\n",
-+ sa_len ? sa : " (error)",
-+ error);
-+ SENDERR(-error);
-+ }
-+ extr->ips = NULL;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_update_parse: "
-+ "successful for SA: %s\n",
-+ sa_len ? sa : " (error)");
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_add_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct ipsec_sa* ipsq;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_add_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ if(((struct sadb_sa*)extensions[K_SADB_EXT_SA])->sadb_sa_state != K_SADB_SASTATE_MATURE) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_add_parse: "
-+ "error, sa_state=%d must be MATURE=%d\n",
-+ ((struct sadb_sa*)extensions[K_SADB_EXT_SA])->sadb_sa_state,
-+ K_SADB_SASTATE_MATURE);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_add_parse: "
-+ "extr or extr->ips pointer NULL\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa));
-+
-+ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if(ipsq != NULL) {
-+ ipsec_sa_put(ipsq);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_add_parse: "
-+ "found an old ipsec_sa for SA%s, delete it first.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(EEXIST);
-+ }
-+
-+ if(ip_chk_addr((unsigned long)extr->ips->ips_said.dst.u.v4.sin_addr.s_addr) == IS_MYADDR) {
-+ extr->ips->ips_flags |= EMT_INBOUND;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_add_parse: "
-+ "existing ipsec_sa not found (this is good) for SA%s, %s-bound, allocating.\n",
-+ sa_len ? sa : " (error)",
-+ extr->ips->ips_flags & EMT_INBOUND ? "in" : "out");
-+
-+ /* XXX extr->ips->ips_rcvif = &(enc_softc[em->em_if].enc_if);*/
-+ extr->ips->ips_rcvif = NULL;
-+
-+ if ((error = ipsec_sa_init(extr->ips))) {
-+ KLIPS_ERROR(debug_pfkey,
-+ "pfkey_add_parse: "
-+ "not successful for SA: %s, deleting.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(-error);
-+ }
-+
-+ if(extr->sarefme!=IPSEC_SAREF_NULL
-+ && extr->ips->ips_ref==IPSEC_SAREF_NULL) {
-+ extr->ips->ips_ref=extr->sarefme;
-+ }
-+
-+ if(extr->sarefhim!=IPSEC_SAREF_NULL
-+ && extr->ips->ips_refhim==IPSEC_SAREF_NULL) {
-+ extr->ips->ips_refhim=extr->sarefhim;
-+ }
-+
-+ /* attach it to the SAref table */
-+ if((error = ipsec_sa_intern(extr->ips)) != 0) {
-+ KLIPS_ERROR(debug_pfkey,
-+ "pfkey_add_parse: "
-+ "failed to intern SA as SAref#%lu\n"
-+ , (unsigned long)extr->ips->ips_ref);
-+ SENDERR(-error);
-+ }
-+
-+ extr->ips->ips_life.ipl_addtime.ipl_count = jiffies / HZ;
-+ if(!extr->ips->ips_life.ipl_allocations.ipl_count) {
-+ extr->ips->ips_life.ipl_allocations.ipl_count += 1;
-+ }
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_ADD,
-+ satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ extr->ips->ips_replaywin,
-+ extr->ips->ips_state,
-+ extr->ips->ips_authalg,
-+ extr->ips->ips_encalg,
-+ extr->ips->ips_flags),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_saref_build(&extensions_reply[K_SADB_X_EXT_SAREF],
-+ extr->ips->ips_ref,
-+ extr->ips->ips_refhim),
-+ extensions_reply)
-+ /* The 3 lifetime extentions should only be sent if non-zero. */
-+ && (extensions[K_SADB_EXT_LIFETIME_HARD]
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_HARD],
-+ K_SADB_EXT_LIFETIME_HARD,
-+ extr->ips->ips_life.ipl_allocations.ipl_hard,
-+ extr->ips->ips_life.ipl_bytes.ipl_hard,
-+ extr->ips->ips_life.ipl_addtime.ipl_hard,
-+ extr->ips->ips_life.ipl_usetime.ipl_hard,
-+ extr->ips->ips_life.ipl_packets.ipl_hard),
-+ extensions_reply) : 1)
-+ && (extensions[K_SADB_EXT_LIFETIME_SOFT]
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_SOFT],
-+ K_SADB_EXT_LIFETIME_SOFT,
-+ extr->ips->ips_life.ipl_allocations.ipl_soft,
-+ extr->ips->ips_life.ipl_bytes.ipl_soft,
-+ extr->ips->ips_life.ipl_addtime.ipl_soft,
-+ extr->ips->ips_life.ipl_usetime.ipl_soft,
-+ extr->ips->ips_life.ipl_packets.ipl_soft),
-+ extensions_reply) : 1)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_s),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_d),
-+ extensions_reply)
-+ && (extr->ips->ips_ident_s.data
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[K_SADB_EXT_IDENTITY_SRC],
-+ K_SADB_EXT_IDENTITY_SRC,
-+ extr->ips->ips_ident_s.type,
-+ extr->ips->ips_ident_s.id,
-+ extr->ips->ips_ident_s.len,
-+ extr->ips->ips_ident_s.data),
-+ extensions_reply) : 1)
-+ && (extr->ips->ips_ident_d.data
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[K_SADB_EXT_IDENTITY_DST],
-+ K_SADB_EXT_IDENTITY_DST,
-+ extr->ips->ips_ident_d.type,
-+ extr->ips->ips_ident_d.id,
-+ extr->ips->ips_ident_d.len,
-+ extr->ips->ips_ident_d.data),
-+ extensions_reply) : 1)
-+#if 0
-+ /* FIXME: This won't work yet because I have not finished
-+ it. */
-+ && (extr->ips->ips_sens_
-+ ? pfkey_safe_build(error = pfkey_sens_build(&extensions_reply[K_SADB_EXT_SENSITIVITY],
-+ extr->ips->ips_sens_dpd,
-+ extr->ips->ips_sens_sens_level,
-+ extr->ips->ips_sens_sens_len,
-+ extr->ips->ips_sens_sens_bitmap,
-+ extr->ips->ips_sens_integ_level,
-+ extr->ips->ips_sens_integ_len,
-+ extr->ips->ips_sens_integ_bitmap),
-+ extensions_reply) : 1)
-+#endif
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: "
-+ "failed to build the add reply message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: "
-+ "failed to build the add reply message\n");
-+ SENDERR(-error);
-+ }
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: "
-+ "sending up add reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: "
-+ "sending up add reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ if(extr->outif != 0 && extr->outif != -1) {
-+ extr->ips->ips_out = ipsec_mast_get_device(extr->outif);
-+ extr->ips->ips_transport_direct = ipsec_mast_is_transport(extr->outif);
-+ }
-+
-+ if((error = ipsec_sa_add(extr->ips))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_add_parse: "
-+ "failed to add the mature SA=%s with error=%d.\n",
-+ sa_len ? sa : " (error)",
-+ error);
-+ SENDERR(-error);
-+ }
-+ ipsec_sa_put(extr->ips);
-+ extr->ips = NULL;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_add_parse: "
-+ "successful for SA: %s\n",
-+ sa_len ? sa : " (error)");
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_delete_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ struct ipsec_sa *ipsp;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ int error = 0;
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+ IPsecSAref_t ref;
-+ struct sadb_builds sab;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_delete_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_delete_parse: "
-+ "extr or extr->ips pointer NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa));
-+
-+ spin_lock_bh(&tdb_lock);
-+
-+ ipsp = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if (ipsp == NULL) {
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_delete_parse: "
-+ "ipsec_sa not found for SA:%s, could not delete.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(ESRCH);
-+ }
-+
-+ /* remove it from SAref tables */
-+ ref = ipsp->ips_ref;
-+ ipsec_sa_untern(ipsp);
-+ ipsec_sa_rm(ipsp);
-+
-+ /* this will call delchain-equivalent if refcount -> 0
-+ * noting that get() above, added to ref count */
-+ ipsec_sa_put(ipsp);
-+ spin_unlock_bh(&tdb_lock);
-+
-+ memset(&sab, 0, sizeof(sab));
-+ sab.sa_base.sadb_sa_exttype = K_SADB_EXT_SA;
-+ sab.sa_base.sadb_sa_spi = extr->ips->ips_said.spi;
-+ sab.sa_base.sadb_sa_replay = 0;
-+ sab.sa_base.sadb_sa_state = 0;
-+ sab.sa_base.sadb_sa_auth = 0;
-+ sab.sa_base.sadb_sa_encrypt = 0;
-+ sab.sa_base.sadb_sa_flags = 0;
-+ sab.sa_base.sadb_x_sa_ref = ref;
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_DELETE,
-+ satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_builds(&extensions_reply[K_SADB_EXT_SA], sab),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_s),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_d),
-+ extensions_reply)
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: "
-+ "failed to build the delete reply message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: "
-+ "failed to build the delete reply message\n");
-+ SENDERR(-error);
-+ }
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: "
-+ "sending up delete reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_delete_parse: "
-+ "sending up delete reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_get_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct ipsec_sa *ipsp;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_get_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ if(!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_get_parse: "
-+ "extr or extr->ips pointer NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa));
-+
-+ spin_lock_bh(&tdb_lock);
-+
-+ ipsp = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if (ipsp == NULL) {
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: "
-+ "ipsec_sa not found for SA=%s, could not get.\n",
-+ sa_len ? sa : " (error)");
-+ SENDERR(ESRCH);
-+ }
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_GET,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ extr->ips->ips_replaywin,
-+ extr->ips->ips_state,
-+ extr->ips->ips_authalg,
-+ extr->ips->ips_encalg,
-+ extr->ips->ips_flags),
-+ extensions_reply)
-+ /* The 3 lifetime extentions should only be sent if non-zero. */
-+ && (ipsp->ips_life.ipl_allocations.ipl_count
-+ || ipsp->ips_life.ipl_bytes.ipl_count
-+ || ipsp->ips_life.ipl_addtime.ipl_count
-+ || ipsp->ips_life.ipl_usetime.ipl_count
-+ || ipsp->ips_life.ipl_packets.ipl_count
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_CURRENT],
-+ K_SADB_EXT_LIFETIME_CURRENT,
-+ ipsp->ips_life.ipl_allocations.ipl_count,
-+ ipsp->ips_life.ipl_bytes.ipl_count,
-+ ipsp->ips_life.ipl_addtime.ipl_count,
-+ ipsp->ips_life.ipl_usetime.ipl_count,
-+ ipsp->ips_life.ipl_packets.ipl_count),
-+ extensions_reply) : 1)
-+ && (ipsp->ips_life.ipl_allocations.ipl_hard
-+ || ipsp->ips_life.ipl_bytes.ipl_hard
-+ || ipsp->ips_life.ipl_addtime.ipl_hard
-+ || ipsp->ips_life.ipl_usetime.ipl_hard
-+ || ipsp->ips_life.ipl_packets.ipl_hard
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_HARD],
-+ K_SADB_EXT_LIFETIME_HARD,
-+ ipsp->ips_life.ipl_allocations.ipl_hard,
-+ ipsp->ips_life.ipl_bytes.ipl_hard,
-+ ipsp->ips_life.ipl_addtime.ipl_hard,
-+ ipsp->ips_life.ipl_usetime.ipl_hard,
-+ ipsp->ips_life.ipl_packets.ipl_hard),
-+ extensions_reply) : 1)
-+ && (ipsp->ips_life.ipl_allocations.ipl_soft
-+ || ipsp->ips_life.ipl_bytes.ipl_soft
-+ || ipsp->ips_life.ipl_addtime.ipl_soft
-+ || ipsp->ips_life.ipl_usetime.ipl_soft
-+ || ipsp->ips_life.ipl_packets.ipl_soft
-+ ? pfkey_safe_build(error = pfkey_lifetime_build(&extensions_reply[K_SADB_EXT_LIFETIME_SOFT],
-+ K_SADB_EXT_LIFETIME_SOFT,
-+ ipsp->ips_life.ipl_allocations.ipl_soft,
-+ ipsp->ips_life.ipl_bytes.ipl_soft,
-+ ipsp->ips_life.ipl_addtime.ipl_soft,
-+ ipsp->ips_life.ipl_usetime.ipl_soft,
-+ ipsp->ips_life.ipl_packets.ipl_soft),
-+ extensions_reply) : 1)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_s),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_d),
-+ extensions_reply)
-+ && (extr->ips->ips_addr_p
-+ ? pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_PROXY],
-+ K_SADB_EXT_ADDRESS_PROXY,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_p),
-+ extensions_reply) : 1)
-+#if 0
-+ /* FIXME: This won't work yet because the keys are not
-+ stored directly in the ipsec_sa. They are stored as
-+ contexts. */
-+ && (extr->ips->ips_key_a_size
-+ ? pfkey_safe_build(error = pfkey_key_build(&extensions_reply[K_SADB_EXT_KEY_AUTH],
-+ K_SADB_EXT_KEY_AUTH,
-+ extr->ips->ips_key_a_size * 8,
-+ extr->ips->ips_key_a),
-+ extensions_reply) : 1)
-+ /* FIXME: This won't work yet because the keys are not
-+ stored directly in the ipsec_sa. They are stored as
-+ key schedules. */
-+ && (extr->ips->ips_key_e_size
-+ ? pfkey_safe_build(error = pfkey_key_build(&extensions_reply[K_SADB_EXT_KEY_ENCRYPT],
-+ K_SADB_EXT_KEY_ENCRYPT,
-+ extr->ips->ips_key_e_size * 8,
-+ extr->ips->ips_key_e),
-+ extensions_reply) : 1)
-+#endif
-+ && (extr->ips->ips_ident_s.data
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[K_SADB_EXT_IDENTITY_SRC],
-+ K_SADB_EXT_IDENTITY_SRC,
-+ extr->ips->ips_ident_s.type,
-+ extr->ips->ips_ident_s.id,
-+ extr->ips->ips_ident_s.len,
-+ extr->ips->ips_ident_s.data),
-+ extensions_reply) : 1)
-+ && (extr->ips->ips_ident_d.data
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions_reply[K_SADB_EXT_IDENTITY_DST],
-+ K_SADB_EXT_IDENTITY_DST,
-+ extr->ips->ips_ident_d.type,
-+ extr->ips->ips_ident_d.id,
-+ extr->ips->ips_ident_d.len,
-+ extr->ips->ips_ident_d.data),
-+ extensions_reply) : 1)
-+#if 0
-+ /* FIXME: This won't work yet because I have not finished
-+ it. */
-+ && (extr->ips->ips_sens_
-+ ? pfkey_safe_build(error = pfkey_sens_build(&extensions_reply[K_SADB_EXT_SENSITIVITY],
-+ extr->ips->ips_sens_dpd,
-+ extr->ips->ips_sens_sens_level,
-+ extr->ips->ips_sens_sens_len,
-+ extr->ips->ips_sens_sens_bitmap,
-+ extr->ips->ips_sens_integ_level,
-+ extr->ips->ips_sens_integ_len,
-+ extr->ips->ips_sens_integ_bitmap),
-+ extensions_reply) : 1)
-+#endif
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: "
-+ "failed to build the get reply message extensions\n");
-+ ipsec_sa_put(ipsp);
-+ spin_unlock_bh(&tdb_lock);
-+ SENDERR(-error);
-+ }
-+
-+ ipsec_sa_put(ipsp);
-+ spin_unlock_bh(&tdb_lock);
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: "
-+ "failed to build the get reply message\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_upmsg(sk->sk_socket, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: "
-+ "failed to send the get reply message\n");
-+ SENDERR(-error);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_get_parse: "
-+ "succeeded in sending get reply message.\n");
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_acquire_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_acquire_parse: .\n");
-+
-+ /* XXX I don't know if we want an upper bound, since userspace may
-+ want to register itself for an satype > K_SADB_SATYPE_MAX. */
-+ if((satype == 0) || (satype > K_SADB_SATYPE_MAX)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_acquire_parse: "
-+ "SATYPE=%d invalid.\n",
-+ satype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!(pfkey_registered_sockets[satype])) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire_parse: "
-+ "no sockets registered for SAtype=%d(%s).\n",
-+ satype,
-+ satype2name(satype));
-+ SENDERR(EPROTONOSUPPORT);
-+ }
-+
-+ for(pfkey_socketsp = pfkey_registered_sockets[satype];
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire_parse: "
-+ "sending up acquire reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire_parse: "
-+ "sending up acquire reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_register_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_parse: .\n");
-+
-+ /* XXX I don't know if we want an upper bound, since userspace may
-+ want to register itself for an satype > K_SADB_SATYPE_MAX. */
-+ if((satype == 0) || (satype > K_SADB_SATYPE_MAX)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_parse: "
-+ "SATYPE=%d invalid.\n",
-+ satype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!pfkey_list_insert_socket(sk->sk_socket,
-+ &(pfkey_registered_sockets[satype]))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_parse: "
-+ "SATYPE=%02d(%s) successfully registered by KMd (pid=%d).\n",
-+ satype,
-+ satype2name(satype),
-+ key_pid(sk));
-+ };
-+
-+ /* send up register msg with supported SATYPE algos */
-+
-+ error=pfkey_register_reply(satype, (struct sadb_msg*)extensions[K_SADB_EXT_RESERVED]);
-+ errlab:
-+ return error;
-+}
-+
-+int
-+pfkey_register_reply(int satype, struct sadb_msg *sadb_msg)
-+{
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ struct supported_list *pfkey_supported_listp;
-+ unsigned int alg_num_a = 0, alg_num_e = 0;
-+ struct sadb_alg *alg_a = NULL, *alg_e = NULL, *alg_ap = NULL, *alg_ep = NULL;
-+ int error = 0;
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ if((satype == 0) || (satype > K_SADB_SATYPE_MAX)) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: "
-+ "SAtype=%d unspecified or unknown.\n",
-+ satype);
-+ SENDERR(EINVAL);
-+ }
-+ if(!(pfkey_registered_sockets[satype])) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: "
-+ "no sockets registered for SAtype=%d(%s).\n",
-+ satype,
-+ satype2name(satype));
-+ SENDERR(EPROTONOSUPPORT);
-+ }
-+ /* send up register msg with supported SATYPE algos */
-+ pfkey_supported_listp = pfkey_supported_list[satype];
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "pfkey_supported_list[%d]=0p%p\n",
-+ satype,
-+ pfkey_supported_list[satype]);
-+ while(pfkey_supported_listp) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "checking supported=0p%p\n",
-+ pfkey_supported_listp);
-+ if(pfkey_supported_listp->supportedp->ias_exttype == K_SADB_EXT_SUPPORTED_AUTH) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "adding auth alg.\n");
-+ alg_num_a++;
-+ }
-+ if(pfkey_supported_listp->supportedp->ias_exttype == K_SADB_EXT_SUPPORTED_ENCRYPT) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "adding encrypt alg.\n");
-+ alg_num_e++;
-+ }
-+ pfkey_supported_listp = pfkey_supported_listp->next;
-+ }
-+
-+ if(alg_num_a) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "allocating %lu bytes for auth algs.\n",
-+ (unsigned long) (alg_num_a * sizeof(struct sadb_alg)));
-+ if((alg_a = kmalloc(alg_num_a * sizeof(struct sadb_alg), GFP_ATOMIC) ) == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "auth alg memory allocation error\n");
-+ SENDERR(ENOMEM);
-+ }
-+ alg_ap = alg_a;
-+ }
-+
-+ if(alg_num_e) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "allocating %lu bytes for enc algs.\n",
-+ (unsigned long) (alg_num_e * sizeof(struct sadb_alg)));
-+ if((alg_e = kmalloc(alg_num_e * sizeof(struct sadb_alg), GFP_ATOMIC) ) == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "enc alg memory allocation error\n");
-+ SENDERR(ENOMEM);
-+ }
-+ alg_ep = alg_e;
-+ }
-+
-+ pfkey_supported_listp = pfkey_supported_list[satype];
-+ while(pfkey_supported_listp) {
-+ if(alg_num_a) {
-+ if(pfkey_supported_listp->supportedp->ias_exttype == K_SADB_EXT_SUPPORTED_AUTH) {
-+ alg_ap->sadb_alg_id = pfkey_supported_listp->supportedp->ias_id;
-+ alg_ap->sadb_alg_ivlen = pfkey_supported_listp->supportedp->ias_ivlen;
-+ alg_ap->sadb_alg_minbits = pfkey_supported_listp->supportedp->ias_keyminbits;
-+ alg_ap->sadb_alg_maxbits = pfkey_supported_listp->supportedp->ias_keymaxbits;
-+ alg_ap->sadb_alg_reserved = 0;
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "klips_debug:pfkey_register_reply: "
-+ "adding auth=0p%p\n",
-+ alg_ap);
-+ alg_ap++;
-+ }
-+ }
-+ if(alg_num_e) {
-+ if(pfkey_supported_listp->supportedp->ias_exttype == K_SADB_EXT_SUPPORTED_ENCRYPT) {
-+ alg_ep->sadb_alg_id = pfkey_supported_listp->supportedp->ias_id;
-+ alg_ep->sadb_alg_ivlen = pfkey_supported_listp->supportedp->ias_ivlen;
-+ alg_ep->sadb_alg_minbits = pfkey_supported_listp->supportedp->ias_keyminbits;
-+ alg_ep->sadb_alg_maxbits = pfkey_supported_listp->supportedp->ias_keymaxbits;
-+ alg_ep->sadb_alg_reserved = 0;
-+ KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
-+ "klips_debug:pfkey_register_reply: "
-+ "adding encrypt=0p%p\n",
-+ alg_ep);
-+ alg_ep++;
-+ }
-+ }
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_register_reply: "
-+ "found satype=%d(%s) exttype=%d id=%d ivlen=%d minbits=%d maxbits=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_supported_listp->supportedp->ias_exttype,
-+ pfkey_supported_listp->supportedp->ias_id,
-+ pfkey_supported_listp->supportedp->ias_ivlen,
-+ pfkey_supported_listp->supportedp->ias_keyminbits,
-+ pfkey_supported_listp->supportedp->ias_keymaxbits);
-+ pfkey_supported_listp = pfkey_supported_listp->next;
-+ }
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_REGISTER,
-+ satype,
-+ 0,
-+ sadb_msg? sadb_msg->sadb_msg_seq : ++pfkey_msg_seq,
-+ sadb_msg? sadb_msg->sadb_msg_pid: current->pid),
-+ extensions_reply) &&
-+ (alg_num_a ? pfkey_safe_build(error = pfkey_supported_build(&extensions_reply[K_SADB_EXT_SUPPORTED_AUTH],
-+ K_SADB_EXT_SUPPORTED_AUTH,
-+ alg_num_a,
-+ alg_a),
-+ extensions_reply) : 1) &&
-+ (alg_num_e ? pfkey_safe_build(error = pfkey_supported_build(&extensions_reply[K_SADB_EXT_SUPPORTED_ENCRYPT],
-+ K_SADB_EXT_SUPPORTED_ENCRYPT,
-+ alg_num_e,
-+ alg_e),
-+ extensions_reply) : 1))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: "
-+ "failed to build the register message extensions_reply\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: "
-+ "failed to build the register message\n");
-+ SENDERR(-error);
-+ }
-+ /* this should go to all registered sockets for that satype only */
-+ for(pfkey_socketsp = pfkey_registered_sockets[satype];
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: "
-+ "sending up acquire message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_register_reply: "
-+ "sending up register message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ errlab:
-+ if(alg_a) {
-+ kfree(alg_a);
-+ }
-+ if(alg_e) {
-+ kfree(alg_e);
-+ }
-+
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_expire_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct socket_list *pfkey_socketsp;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_expire_parse: .\n");
-+
-+ if(pfkey_open_sockets) {
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire_parse: "
-+ "sending up expire reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire_parse: "
-+ "sending up expire reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+ }
-+
-+ errlab:
-+ return error;
-+}
-+
-+
-+/*
-+ *
-+ * flush all SAs from the table
-+ */
-+DEBUG_NO_STATIC int
-+pfkey_flush_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+ uint8_t proto = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_flush_parse: "
-+ "flushing type %d SAs\n",
-+ satype);
-+
-+ if(satype && !(proto = satype2proto(satype))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_flush_parse: "
-+ "satype %d lookup failed.\n",
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if ((error = ipsec_sadb_cleanup(proto))) {
-+ SENDERR(-error);
-+ }
-+
-+ if(pfkey_open_sockets) {
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_flush_parse: "
-+ "sending up flush reply message for satype=%d(%s) (proto=%d) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ proto,
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_flush_parse: "
-+ "sending up flush reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+ }
-+
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_dump_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_dump_parse: .\n");
-+
-+ SENDERR(ENOSYS);
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_promisc_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_promisc_parse: .\n");
-+
-+ SENDERR(ENOSYS);
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_pchange_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_pchange_parse: .\n");
-+
-+ SENDERR(ENOSYS);
-+ errlab:
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_grpsa_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ struct ipsec_sa *ips1p, *ips2p, *ipsp;
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+ char sa1[SATOT_BUF], sa2[SATOT_BUF];
-+ size_t sa_len1, sa_len2 = 0;
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ if(extr == NULL || extr->ips == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: "
-+ "extr or extr->ips is NULL, fatal.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ sa_len1 = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa1, sizeof(sa1));
-+ if(extr->ips2 != NULL) {
-+ sa_len2 = KLIPS_SATOT(debug_pfkey, &extr->ips2->ips_said, 0, sa2, sizeof(sa2));
-+ }
-+
-+ spin_lock_bh(&tdb_lock);
-+
-+ ips1p = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if(ips1p == NULL) {
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_ERROR(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: "
-+ "reserved ipsec_sa for SA1: %s not found. Call K_SADB_ADD/UPDATE first.\n",
-+ sa_len1 ? sa1 : " (error)");
-+ SENDERR(ENOENT);
-+ }
-+
-+ if(extr->ips2) { /* GRPSA */
-+
-+ /* group ips2p to be after ips1p */
-+
-+ ips2p = ipsec_sa_getbyid(&(extr->ips2->ips_said));
-+ if(ips2p == NULL) {
-+ ipsec_sa_put(ips1p);
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: "
-+ "reserved ipsec_sa for SA2: %s not found. Call K_SADB_ADD/UPDATE first.\n",
-+ sa_len2 ? sa2 : " (error)");
-+ SENDERR(ENOENT);
-+ }
-+
-+ /* userspace puts things in inner to outer order */
-+ if(ips2p->ips_flags & EMT_INBOUND) {
-+ struct ipsec_sa *t;
-+
-+ /* exchange ips and ips2 */
-+ t = ips1p;
-+ ips1p = ips2p;
-+ ips2p = t;
-+ }
-+
-+ /* Is ips1p already linked? */
-+ if(ips1p->ips_next) {
-+ ipsec_sa_put(ips1p);
-+ ipsec_sa_put(ips2p);
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_ERROR(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: "
-+ "ipsec_sa for SA: %s is already linked.\n",
-+ sa_len1 ? sa1 : " (error)");
-+ SENDERR(EEXIST);
-+ }
-+
-+ /* Is extr->ips already linked to extr->ips2? */
-+ ipsp = ips2p;
-+ while(ipsp) {
-+ if(ipsp == ips1p) {
-+ ipsec_sa_put(ips1p);
-+ ipsec_sa_put(ips2p);
-+ spin_unlock_bh(&tdb_lock);
-+ KLIPS_ERROR(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: "
-+ "ipsec_sa for SA: %s is already linked to %s.\n",
-+ sa_len1 ? sa1 : " (error)",
-+ sa_len2 ? sa2 : " (error)");
-+ SENDERR(EEXIST);
-+ }
-+ ipsp = ipsp->ips_next;
-+ }
-+
-+ /* link 'em */
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: "
-+ "linking ipsec_sa SA: %s with %s.\n",
-+ sa_len1 ? sa1 : " (error)",
-+ sa_len2 ? sa2 : " (error)");
-+ ips1p->ips_next = ips2p;
-+ } else { /* UNGRPSA */
-+ while(ips1p) {
-+ struct ipsec_sa *ipsn;
-+
-+ /* take the reference to next */
-+ ipsn = ips1p->ips_next;
-+ ips1p->ips_next = NULL;
-+
-+ /* drop reference to current */
-+ ipsec_sa_put(ips1p);
-+
-+ ips1p = ipsn;
-+ }
-+
-+ /* note: we have dropped reference to ips1p, and
-+ * it is now NULL
-+ */
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_grpsa_parse: "
-+ "unlinking ipsec_sa SA: %s.\n",
-+ sa_len1 ? sa1 : " (error)");
-+ }
-+
-+ spin_unlock_bh(&tdb_lock);
-+
-+ /* MCR: not only is this ugly to read, and impossible
-+ * to debug through, but it's also really inefficient.
-+ * XXX simplify me.
-+ */
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_X_GRPSA,
-+ satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ extr->ips->ips_replaywin,
-+ extr->ips->ips_state,
-+ extr->ips->ips_authalg,
-+ extr->ips->ips_encalg,
-+ extr->ips->ips_flags),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_d),
-+ extensions_reply)
-+ && (extr->ips2
-+ ? (pfkey_safe_build(error = pfkey_x_satype_build(&extensions_reply[K_SADB_X_EXT_SATYPE2],
-+ ((struct sadb_x_satype*)extensions[K_SADB_X_EXT_SATYPE2])->sadb_x_satype_satype
-+ /* proto2satype(extr->ips2->ips_said.proto) */),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_X_EXT_SA2],
-+ K_SADB_X_EXT_SA2,
-+ extr->ips2->ips_said.spi,
-+ extr->ips2->ips_replaywin,
-+ extr->ips2->ips_state,
-+ extr->ips2->ips_authalg,
-+ extr->ips2->ips_encalg,
-+ extr->ips2->ips_flags),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_DST2],
-+ K_SADB_X_EXT_ADDRESS_DST2,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips2->ips_addr_d),
-+ extensions_reply) ) : 1 )
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: "
-+ "failed to build the x_grpsa reply message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: "
-+ "failed to build the x_grpsa reply message\n");
-+ SENDERR(-error);
-+ }
-+
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: "
-+ "sending up x_grpsa reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: "
-+ "sending up x_grpsa reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_grpsa_parse: "
-+ "succeeded in sending x_grpsa reply message.\n");
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_addflow_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ char buf1[64], buf2[64];
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+ ip_address srcflow, dstflow, srcmask, dstmask;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ memset((caddr_t)&srcflow, 0, sizeof(srcflow));
-+ memset((caddr_t)&dstflow, 0, sizeof(dstflow));
-+ memset((caddr_t)&srcmask, 0, sizeof(srcmask));
-+ memset((caddr_t)&dstmask, 0, sizeof(dstmask));
-+
-+ if(!extr || !(extr->ips) || !(extr->eroute)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "missing extr, ipsec_sa or eroute data.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ srcflow.u.v4.sin_family = AF_INET;
-+ dstflow.u.v4.sin_family = AF_INET;
-+ srcmask.u.v4.sin_family = AF_INET;
-+ dstmask.u.v4.sin_family = AF_INET;
-+ srcflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_src;
-+ dstflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_dst;
-+ srcmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_src;
-+ dstmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_dst;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_pfkey) {
-+ subnettoa(extr->eroute->er_eaddr.sen_ip_src,
-+ extr->eroute->er_emask.sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(extr->eroute->er_eaddr.sen_ip_dst,
-+ extr->eroute->er_emask.sen_ip_dst, 0, buf2, sizeof(buf2));
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "calling breakeroute and/or makeroute for %s->%s\n",
-+ buf1, buf2);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ if(extr->ips->ips_flags & SADB_X_SAFLAGS_INFLOW) {
-+/* if(ip_chk_addr((unsigned long)extr->ips->ips_said.dst.u.v4.sin_addr.s_addr) == IS_MYADDR) */
-+ struct ipsec_sa *ipsp, *ipsq;
-+ char sa[SATOT_BUF];
-+ size_t sa_len;
-+
-+ ipsq = ipsec_sa_getbyid(&(extr->ips->ips_said));
-+ if(ipsq == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "ipsec_sa not found, cannot set incoming policy.\n");
-+ SENDERR(ENOENT);
-+ }
-+
-+ ipsp = ipsq;
-+ while(ipsp && ipsp->ips_said.proto != IPPROTO_IPIP) {
-+ ipsp = ipsp->ips_next;
-+ }
-+
-+ if(ipsp == NULL) {
-+ ipsec_sa_put(ipsq);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "SA chain does not have an IPIP SA, cannot set incoming policy.\n");
-+ SENDERR(ENOENT);
-+ }
-+
-+ sa_len = KLIPS_SATOT(debug_pfkey, &extr->ips->ips_said, 0, sa, sizeof(sa));
-+
-+ ipsp->ips_flags |= SADB_X_SAFLAGS_INFLOW;
-+ ipsp->ips_flow_s = srcflow;
-+ ipsp->ips_flow_d = dstflow;
-+ ipsp->ips_mask_s = srcmask;
-+ ipsp->ips_mask_d = dstmask;
-+
-+ ipsec_sa_put(ipsq);
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "inbound eroute, setting incoming policy information in IPIP ipsec_sa for SA: %s.\n",
-+ sa_len ? sa : " (error)");
-+ } else {
-+ struct sk_buff *first = NULL, *last = NULL;
-+
-+ if(extr->ips->ips_flags & SADB_X_SAFLAGS_REPLACEFLOW) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "REPLACEFLOW flag set, calling breakeroute.\n");
-+ if ((error = ipsec_breakroute(&(extr->eroute->er_eaddr),
-+ &(extr->eroute->er_emask),
-+ &first, &last))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "breakeroute returned %d. first=0p%p, last=0p%p\n",
-+ error,
-+ first,
-+ last);
-+ if(first != NULL) {
-+ ipsec_kfree_skb(first);
-+ }
-+ if(last != NULL) {
-+ ipsec_kfree_skb(last);
-+ }
-+ SENDERR(-error);
-+ }
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "calling makeroute.\n");
-+
-+ if ((error = ipsec_makeroute(&(extr->eroute->er_eaddr),
-+ &(extr->eroute->er_emask),
-+ extr->ips->ips_said,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid,
-+ NULL,
-+ &(extr->ips->ips_ident_s),
-+ &(extr->ips->ips_ident_d)))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "makeroute returned %d.\n", error);
-+ SENDERR(-error);
-+ }
-+ if(first != NULL) {
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "first=0p%p HOLD packet re-injected.\n",
-+ first);
-+ dst_output(first);
-+ }
-+ if(last != NULL) {
-+ KLIPS_PRINT(debug_eroute,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "last=0p%p HOLD packet re-injected.\n",
-+ last);
-+ dst_output(last);
-+ }
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "makeroute call successful.\n");
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_X_ADDFLOW,
-+ satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ extr->ips->ips_replaywin,
-+ extr->ips->ips_state,
-+ extr->ips->ips_authalg,
-+ extr->ips->ips_encalg,
-+ extr->ips->ips_flags),
-+ extensions_reply)
-+ && (extensions[K_SADB_EXT_ADDRESS_SRC]
-+ ? pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_s),
-+ extensions_reply) : 1)
-+ && (extensions[K_SADB_EXT_ADDRESS_DST]
-+ ? pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ extr->ips->ips_addr_d),
-+ extensions_reply) : 1)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_SRC_FLOW],
-+ K_SADB_X_EXT_ADDRESS_SRC_FLOW,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&srcflow),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_DST_FLOW],
-+ K_SADB_X_EXT_ADDRESS_DST_FLOW,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&dstflow),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_SRC_MASK],
-+ K_SADB_X_EXT_ADDRESS_SRC_MASK,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&srcmask),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_DST_MASK],
-+ K_SADB_X_EXT_ADDRESS_DST_MASK,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&dstmask),
-+ extensions_reply)
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: "
-+ "failed to build the x_addflow reply message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: "
-+ "failed to build the x_addflow reply message\n");
-+ SENDERR(-error);
-+ }
-+
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: "
-+ "sending up x_addflow reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_addflow_parse: "
-+ "sending up x_addflow reply message for satype=%d(%s) (proto=%d) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ extr->ips->ips_said.proto,
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_addflow_parse: "
-+ "extr->ips cleaned up and freed.\n");
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_delflow_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ char buf1[64], buf2[64];
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ struct sadb_ext *extensions_reply[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_reply = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ uint8_t satype = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_satype;
-+ ip_address srcflow, dstflow, srcmask, dstmask;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: .\n");
-+
-+ pfkey_extensions_init(extensions_reply);
-+
-+ memset((caddr_t)&srcflow, 0, sizeof(srcflow));
-+ memset((caddr_t)&dstflow, 0, sizeof(dstflow));
-+ memset((caddr_t)&srcmask, 0, sizeof(srcmask));
-+ memset((caddr_t)&dstmask, 0, sizeof(dstmask));
-+
-+ if(!extr || !(extr->ips)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: "
-+ "extr, or extr->ips is NULL, fatal\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(extr->ips->ips_flags & SADB_X_SAFLAGS_CLEARFLOW) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: "
-+ "CLEARFLOW flag set, calling cleareroutes.\n");
-+ if ((error = ipsec_cleareroutes())) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: "
-+ "cleareroutes returned %d.\n", error);
-+ SENDERR(-error);
-+ }
-+ } else {
-+ struct sk_buff *first = NULL, *last = NULL;
-+
-+ if(!(extr->eroute)) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: "
-+ "extr->eroute is NULL, fatal.\n");
-+ SENDERR(EINVAL);
-+ }
-+
-+ srcflow.u.v4.sin_family = AF_INET;
-+ dstflow.u.v4.sin_family = AF_INET;
-+ srcmask.u.v4.sin_family = AF_INET;
-+ dstmask.u.v4.sin_family = AF_INET;
-+ srcflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_src;
-+ dstflow.u.v4.sin_addr = extr->eroute->er_eaddr.sen_ip_dst;
-+ srcmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_src;
-+ dstmask.u.v4.sin_addr = extr->eroute->er_emask.sen_ip_dst;
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if (debug_pfkey) {
-+ subnettoa(extr->eroute->er_eaddr.sen_ip_src,
-+ extr->eroute->er_emask.sen_ip_src, 0, buf1, sizeof(buf1));
-+ subnettoa(extr->eroute->er_eaddr.sen_ip_dst,
-+ extr->eroute->er_emask.sen_ip_dst, 0, buf2, sizeof(buf2));
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: "
-+ "calling breakeroute for %s->%s\n",
-+ buf1, buf2);
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ error = ipsec_breakroute(&(extr->eroute->er_eaddr),
-+ &(extr->eroute->er_emask),
-+ &first, &last);
-+ if(error) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: "
-+ "breakeroute returned %d. first=0p%p, last=0p%p\n",
-+ error,
-+ first,
-+ last);
-+ }
-+ if(first != NULL) {
-+ ipsec_kfree_skb(first);
-+ }
-+ if(last != NULL) {
-+ ipsec_kfree_skb(last);
-+ }
-+ if(error) {
-+ SENDERR(-error);
-+ }
-+ }
-+
-+ if(!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions_reply[0],
-+ K_SADB_X_DELFLOW,
-+ satype,
-+ 0,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_seq,
-+ ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED])->sadb_msg_pid),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions_reply[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ extr->ips->ips_replaywin,
-+ extr->ips->ips_state,
-+ extr->ips->ips_authalg,
-+ extr->ips->ips_encalg,
-+ extr->ips->ips_flags),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_SRC_FLOW],
-+ K_SADB_X_EXT_ADDRESS_SRC_FLOW,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&srcflow),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_DST_FLOW],
-+ K_SADB_X_EXT_ADDRESS_DST_FLOW,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&dstflow),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_SRC_MASK],
-+ K_SADB_X_EXT_ADDRESS_SRC_MASK,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&srcmask),
-+ extensions_reply)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions_reply[K_SADB_X_EXT_ADDRESS_DST_MASK],
-+ K_SADB_X_EXT_ADDRESS_DST_MASK,
-+ 0, /*extr->ips->ips_said.proto,*/
-+ 0,
-+ (struct sockaddr*)&dstmask),
-+ extensions_reply)
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: "
-+ "failed to build the x_delflow reply message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if((error = pfkey_msg_build(&pfkey_reply, extensions_reply, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: "
-+ "failed to build the x_delflow reply message\n");
-+ SENDERR(-error);
-+ }
-+
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_reply))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: "
-+ "sending up x_delflow reply message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_x_delflow_parse: "
-+ "sending up x_delflow reply message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_delflow_parse: "
-+ "extr->ips cleaned up and freed.\n");
-+
-+ errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+ pfkey_extensions_free(extensions_reply);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_msg_debug_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ int error = 0;
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_x_msg_debug_parse: .\n");
-+
-+/* errlab:*/
-+ return error;
-+}
-+
-+/* pfkey_expire expects the ipsec_sa table to be locked before being called. */
-+int
-+pfkey_expire(struct ipsec_sa *ipsp, int hard)
-+{
-+ struct sadb_ext *extensions[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_msg = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ int error = 0;
-+ uint8_t satype;
-+
-+ pfkey_extensions_init(extensions);
-+
-+ if(!(satype = proto2satype(ipsp->ips_said.proto))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_expire: "
-+ "satype lookup for protocol %d lookup failed.\n",
-+ ipsp->ips_said.proto);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!pfkey_open_sockets) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: "
-+ "no sockets listening.\n");
-+ SENDERR(EPROTONOSUPPORT);
-+ }
-+
-+ if (!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions[0],
-+ K_SADB_EXPIRE,
-+ satype,
-+ 0,
-+ ++pfkey_msg_seq,
-+ 0),
-+ extensions)
-+ && pfkey_safe_build(error = pfkey_sa_build(&extensions[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ ipsp->ips_said.spi,
-+ ipsp->ips_replaywin,
-+ ipsp->ips_state,
-+ ipsp->ips_authalg,
-+ ipsp->ips_encalg,
-+ ipsp->ips_flags),
-+ extensions)
-+ && pfkey_safe_build(error = pfkey_lifetime_build(&extensions[K_SADB_EXT_LIFETIME_CURRENT],
-+ K_SADB_EXT_LIFETIME_CURRENT,
-+ ipsp->ips_life.ipl_allocations.ipl_count,
-+ ipsp->ips_life.ipl_bytes.ipl_count,
-+ ipsp->ips_life.ipl_addtime.ipl_count,
-+ ipsp->ips_life.ipl_usetime.ipl_count,
-+ ipsp->ips_life.ipl_packets.ipl_count),
-+ extensions)
-+ && (hard ?
-+ pfkey_safe_build(error = pfkey_lifetime_build(&extensions[K_SADB_EXT_LIFETIME_HARD],
-+ K_SADB_EXT_LIFETIME_HARD,
-+ ipsp->ips_life.ipl_allocations.ipl_hard,
-+ ipsp->ips_life.ipl_bytes.ipl_hard,
-+ ipsp->ips_life.ipl_addtime.ipl_hard,
-+ ipsp->ips_life.ipl_usetime.ipl_hard,
-+ ipsp->ips_life.ipl_packets.ipl_hard),
-+ extensions)
-+ : pfkey_safe_build(error = pfkey_lifetime_build(&extensions[K_SADB_EXT_LIFETIME_SOFT],
-+ K_SADB_EXT_LIFETIME_SOFT,
-+ ipsp->ips_life.ipl_allocations.ipl_soft,
-+ ipsp->ips_life.ipl_bytes.ipl_soft,
-+ ipsp->ips_life.ipl_addtime.ipl_soft,
-+ ipsp->ips_life.ipl_usetime.ipl_soft,
-+ ipsp->ips_life.ipl_packets.ipl_soft),
-+ extensions))
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ 0, /* ipsp->ips_said.proto, */
-+ 0,
-+ ipsp->ips_addr_s),
-+ extensions)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ 0, /* ipsp->ips_said.proto, */
-+ 0,
-+ ipsp->ips_addr_d),
-+ extensions))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: "
-+ "failed to build the expire message extensions\n");
-+ spin_unlock_bh(&tdb_lock);
-+ goto errlab;
-+ }
-+
-+ if ((error = pfkey_msg_build(&pfkey_msg, extensions, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: "
-+ "failed to build the expire message\n");
-+ SENDERR(-error);
-+ }
-+
-+ for(pfkey_socketsp = pfkey_open_sockets;
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_msg))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: "
-+ "sending up expire message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: "
-+ "sending up expire message for satype=%d(%s) (proto=%d) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ ipsp->ips_said.proto,
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ errlab:
-+ if (pfkey_msg) {
-+ pfkey_msg_free(&pfkey_msg);
-+ }
-+ pfkey_extensions_free(extensions);
-+ return error;
-+}
-+
-+int
-+pfkey_acquire(struct ipsec_sa *ipsp)
-+{
-+ struct sadb_ext *extensions[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_msg = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ int error = 0;
-+ struct sadb_comb comb[] = {
-+ /* auth; encrypt; flags; */
-+ /* auth_minbits; auth_maxbits; encrypt_minbits; encrypt_maxbits; */
-+ /* reserved; soft_allocations; hard_allocations; soft_bytes; hard_bytes; */
-+ /* soft_addtime; hard_addtime; soft_usetime; hard_usetime; */
-+ /* soft_packets; hard_packets; */
-+ { K_SADB_AALG_MD5HMAC, K_SADB_EALG_3DESCBC, SADB_SAFLAGS_PFS,
-+ 128, 128, 168, 168,
-+ 0, 0, 0, 0, 0,
-+ 57600, 86400, 57600, 86400},
-+ { K_SADB_AALG_SHA1HMAC, K_SADB_EALG_3DESCBC, SADB_SAFLAGS_PFS,
-+ 160, 160, 168, 168,
-+ 0, 0, 0, 0, 0,
-+ 57600, 86400, 57600, 86400},
-+ };
-+
-+ /* XXX This should not be hard-coded. It should be taken from the spdb */
-+ uint8_t satype = K_SADB_SATYPE_ESP;
-+
-+ pfkey_extensions_init(extensions);
-+
-+ if((satype == 0) || (satype > K_SADB_SATYPE_MAX)) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire: "
-+ "SAtype=%d unspecified or unknown.\n",
-+ satype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!(pfkey_registered_sockets[satype])) {
-+ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: "
-+ "no sockets registered for SAtype=%d(%s).\n",
-+ satype,
-+ satype2name(satype));
-+ SENDERR(EPROTONOSUPPORT);
-+ }
-+
-+ if (!(pfkey_safe_build(error = pfkey_msg_hdr_build(&extensions[0],
-+ K_SADB_ACQUIRE,
-+ satype,
-+ 0,
-+ ++pfkey_msg_seq,
-+ 0),
-+ extensions)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ ipsp->ips_transport_protocol,
-+ 0,
-+ ipsp->ips_addr_s),
-+ extensions)
-+ && pfkey_safe_build(error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ ipsp->ips_transport_protocol,
-+ 0,
-+ ipsp->ips_addr_d),
-+ extensions)
-+#if 0
-+ && (ipsp->ips_addr_p
-+ ? pfkey_safe_build(error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_PROXY],
-+ K_SADB_EXT_ADDRESS_PROXY,
-+ ipsp->ips_transport_protocol,
-+ 0,
-+ ipsp->ips_addr_p),
-+ extensions) : 1)
-+#endif
-+ && (ipsp->ips_ident_s.type != SADB_IDENTTYPE_RESERVED
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions[SADB_EXT_IDENTITY_SRC],
-+ K_SADB_EXT_IDENTITY_SRC,
-+ ipsp->ips_ident_s.type,
-+ ipsp->ips_ident_s.id,
-+ ipsp->ips_ident_s.len,
-+ ipsp->ips_ident_s.data),
-+ extensions) : 1)
-+
-+ && (ipsp->ips_ident_d.type != SADB_IDENTTYPE_RESERVED
-+ ? pfkey_safe_build(error = pfkey_ident_build(&extensions[K_SADB_EXT_IDENTITY_DST],
-+ K_SADB_EXT_IDENTITY_DST,
-+ ipsp->ips_ident_d.type,
-+ ipsp->ips_ident_d.id,
-+ ipsp->ips_ident_d.len,
-+ ipsp->ips_ident_d.data),
-+ extensions) : 1)
-+#if 0
-+ /* FIXME: This won't work yet because I have not finished
-+ it. */
-+ && (ipsp->ips_sens_
-+ ? pfkey_safe_build(error = pfkey_sens_build(&extensions[K_SADB_EXT_SENSITIVITY],
-+ ipsp->ips_sens_dpd,
-+ ipsp->ips_sens_sens_level,
-+ ipsp->ips_sens_sens_len,
-+ ipsp->ips_sens_sens_bitmap,
-+ ipsp->ips_sens_integ_level,
-+ ipsp->ips_sens_integ_len,
-+ ipsp->ips_sens_integ_bitmap),
-+ extensions) : 1)
-+#endif
-+ && pfkey_safe_build(error = pfkey_prop_build(&extensions[K_SADB_EXT_PROPOSAL],
-+ 64, /* replay */
-+ sizeof(comb)/sizeof(struct sadb_comb),
-+ &(comb[0])),
-+ extensions)
-+ )) {
-+ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: "
-+ "failed to build the acquire message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if ((error = pfkey_msg_build(&pfkey_msg, extensions, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: "
-+ "failed to build the acquire message\n");
-+ SENDERR(-error);
-+ }
-+
-+#ifdef KLIPS_PFKEY_ACQUIRE_LOSSAGE
-+# if KLIPS_PFKEY_ACQUIRE_LOSSAGE > 0
-+ if(sysctl_ipsec_regress_pfkey_lossage) {
-+ return(0);
-+ }
-+# endif
-+#endif
-+
-+ /* this should go to all registered sockets for that satype only */
-+ for(pfkey_socketsp = pfkey_registered_sockets[satype];
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_msg))) {
-+ KLIPS_PRINT(1|debug_pfkey, "klips_debug:pfkey_acquire: "
-+ "sending up acquire message for satype=%d(%s) to socket=0p%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_acquire: "
-+ "sending up acquire message for satype=%d(%s) to socket=0p%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ errlab:
-+ if (pfkey_msg) {
-+ pfkey_msg_free(&pfkey_msg);
-+ }
-+ pfkey_extensions_free(extensions);
-+ return error;
-+}
-+
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+int
-+pfkey_nat_t_new_mapping(struct ipsec_sa *ipsp, struct sockaddr *ipaddr,
-+ __u16 sport)
-+{
-+ struct sadb_ext *extensions[K_SADB_EXT_MAX+1];
-+ struct sadb_msg *pfkey_msg = NULL;
-+ struct socket_list *pfkey_socketsp;
-+ int error = 0;
-+ uint8_t satype = (ipsp->ips_said.proto==IPPROTO_ESP) ? K_SADB_SATYPE_ESP : 0;
-+
-+ /* Construct K_SADB_X_NAT_T_NEW_MAPPING message */
-+
-+ pfkey_extensions_init(extensions);
-+
-+ if((satype == 0) || (satype > K_SADB_SATYPE_MAX)) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: "
-+ "SAtype=%d unspecified or unknown.\n",
-+ satype);
-+ SENDERR(EINVAL);
-+ }
-+
-+ if(!(pfkey_registered_sockets[satype])) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: "
-+ "no sockets registered for SAtype=%d(%s).\n",
-+ satype,
-+ satype2name(satype));
-+ SENDERR(EPROTONOSUPPORT);
-+ }
-+
-+ if (!(pfkey_safe_build
-+ (error = pfkey_msg_hdr_build(&extensions[0], K_SADB_X_NAT_T_NEW_MAPPING,
-+ satype, 0, ++pfkey_msg_seq, 0), extensions)
-+ /* SA */
-+ && pfkey_safe_build
-+ (error = pfkey_sa_build(&extensions[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA, ipsp->ips_said.spi, 0, 0, 0, 0, 0), extensions)
-+ /* ADDRESS_SRC = old addr */
-+ && pfkey_safe_build
-+ (error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC, ipsp->ips_said.proto, 0, ipsp->ips_addr_s),
-+ extensions)
-+ /* NAT_T_SPORT = old port */
-+ && pfkey_safe_build
-+ (error = pfkey_x_nat_t_port_build(&extensions[K_SADB_X_EXT_NAT_T_SPORT],
-+ K_SADB_X_EXT_NAT_T_SPORT, ipsp->ips_natt_sport), extensions)
-+ /* ADDRESS_DST = new addr */
-+ && pfkey_safe_build
-+ (error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST, ipsp->ips_said.proto, 0, ipaddr), extensions)
-+ /* NAT_T_DPORT = new port */
-+ && pfkey_safe_build
-+ (error = pfkey_x_nat_t_port_build(&extensions[K_SADB_X_EXT_NAT_T_DPORT],
-+ K_SADB_X_EXT_NAT_T_DPORT, sport), extensions)
-+ )) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: "
-+ "failed to build the nat_t_new_mapping message extensions\n");
-+ SENDERR(-error);
-+ }
-+
-+ if ((error = pfkey_msg_build(&pfkey_msg, extensions, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: "
-+ "failed to build the nat_t_new_mapping message\n");
-+ SENDERR(-error);
-+ }
-+
-+ /* this should go to all registered sockets for that satype only */
-+ for(pfkey_socketsp = pfkey_registered_sockets[satype];
-+ pfkey_socketsp;
-+ pfkey_socketsp = pfkey_socketsp->next) {
-+ if((error = pfkey_upmsg(pfkey_socketsp->socketp, pfkey_msg))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: "
-+ "sending up nat_t_new_mapping message for satype=%d(%s) to socket=%p failed with error=%d.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp,
-+ error);
-+ SENDERR(-error);
-+ }
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_nat_t_new_mapping: "
-+ "sending up nat_t_new_mapping message for satype=%d(%s) to socket=%p succeeded.\n",
-+ satype,
-+ satype2name(satype),
-+ pfkey_socketsp->socketp);
-+ }
-+
-+ errlab:
-+ if (pfkey_msg) {
-+ pfkey_msg_free(&pfkey_msg);
-+ }
-+ pfkey_extensions_free(extensions);
-+ return error;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_nat_t_new_mapping_parse(struct sock *sk, struct sadb_ext **extensions, struct pfkey_extracted_data* extr)
-+{
-+ /* K_SADB_X_NAT_T_NEW_MAPPING not used in kernel */
-+ return -EINVAL;
-+}
-+#endif
-+
-+/*******************************
-+ * EXTENSION PARSERS FOR KLIPS
-+ ********************************/
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_outif_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ struct sadb_x_plumbif *oif;
-+
-+ oif = (struct sadb_x_plumbif *)pfkey_ext;
-+
-+ extr->outif = oif->sadb_x_outif_ifnum;
-+
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_saref_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
-+{
-+ struct sadb_x_saref *saf;
-+
-+ saf = (struct sadb_x_saref *)pfkey_ext;
-+
-+ extr->sarefme = saf->sadb_x_saref_me;
-+ extr->sarefhim = saf->sadb_x_saref_him;
-+
-+ return 0;
-+}
-+
-+DEBUG_NO_STATIC int (*ext_processors[K_SADB_EXT_MAX+1])(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) =
-+{
-+ NULL, /* pfkey_msg_process, */
-+ pfkey_sa_process,
-+ pfkey_lifetime_process,
-+ pfkey_lifetime_process,
-+ pfkey_lifetime_process,
-+ pfkey_address_process,
-+ pfkey_address_process,
-+ pfkey_address_process,
-+ pfkey_key_process,
-+ pfkey_key_process,
-+ pfkey_ident_process,
-+ pfkey_ident_process,
-+ pfkey_sens_process,
-+ pfkey_prop_process,
-+ pfkey_supported_process,
-+ pfkey_supported_process,
-+ pfkey_spirange_process,
-+ pfkey_x_kmprivate_process,
-+ pfkey_x_satype_process,
-+ pfkey_sa_process,
-+ pfkey_address_process,
-+ pfkey_address_process,
-+ pfkey_address_process,
-+ pfkey_address_process,
-+ pfkey_address_process,
-+ pfkey_x_debug_process,
-+ pfkey_x_protocol_process,
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ pfkey_x_nat_t_type_process,
-+ pfkey_x_nat_t_port_process,
-+ pfkey_x_nat_t_port_process,
-+ pfkey_address_process,
-+#else
-+ NULL, NULL, NULL, NULL,
-+#endif
-+ pfkey_x_outif_process,
-+ pfkey_x_saref_process,
-+};
-+
-+
-+/*******************************
-+ * MESSAGE PARSERS FOR KLIPS
-+ ********************************/
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_simple_reply(struct sock *sk , struct sadb_ext *extensions[], int err)
-+{
-+ struct sadb_msg *pfkey_reply = NULL;
-+ int error = 0;
-+ struct sadb_msg *m = ((struct sadb_msg*)extensions[K_SADB_EXT_RESERVED]);
-+
-+ m->sadb_msg_errno = err;
-+
-+ if ((error = pfkey_msg_build(&pfkey_reply, extensions, EXT_BITS_OUT))) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_expire: "
-+ "failed to build the expire message\n");
-+ SENDERR(-error);
-+ }
-+
-+ error = pfkey_upmsgsk(sk, pfkey_reply);
-+
-+ if(error) {
-+ KLIPS_ERROR(debug_pfkey, "pfkey_simple reply:"
-+ "sending up simple reply to pid=%d error=%d.\n",
-+ m->sadb_msg_pid, err);
-+ }
-+
-+errlab:
-+ if (pfkey_reply) {
-+ pfkey_msg_free(&pfkey_reply);
-+ }
-+
-+ return error;
-+}
-+
-+/*
-+ * this is a request to create a new device. Figure out which kind, and call appropriate
-+ * routine in mast or tunnel code.
-+ */
-+DEBUG_NO_STATIC int
-+pfkey_x_plumb_parse(struct sock *sk, struct sadb_ext *extensions[], struct pfkey_extracted_data* extr)
-+{
-+ unsigned int vifnum;
-+ int err;
-+
-+ vifnum = extr->outif;
-+ if(vifnum > IPSECDEV_OFFSET) {
-+ err = ipsec_tunnel_createnum(vifnum-IPSECDEV_OFFSET);
-+ } else {
-+ err = ipsec_mast_createnum(vifnum);
-+ }
-+
-+ return pfkey_x_simple_reply(sk, extensions, err);
-+}
-+
-+DEBUG_NO_STATIC int
-+pfkey_x_unplumb_parse(struct sock *sk, struct sadb_ext *extensions[], struct pfkey_extracted_data* extr)
-+{
-+ unsigned int vifnum;
-+ int err;
-+
-+ vifnum = extr->outif;
-+ if(vifnum > IPSECDEV_OFFSET) {
-+ err = ipsec_tunnel_deletenum(vifnum-IPSECDEV_OFFSET);
-+ } else {
-+ err = ipsec_mast_deletenum(vifnum);
-+ }
-+
-+ return pfkey_x_simple_reply(sk, extensions, err);
-+}
-+
-+
-+DEBUG_NO_STATIC int (*msg_parsers[K_SADB_MAX +1])(struct sock *sk, struct sadb_ext *extensions[], struct pfkey_extracted_data* extr)
-+ =
-+{
-+ NULL, /* RESERVED */
-+ pfkey_getspi_parse,
-+ pfkey_update_parse,
-+ pfkey_add_parse,
-+ pfkey_delete_parse,
-+ pfkey_get_parse,
-+ pfkey_acquire_parse,
-+ pfkey_register_parse,
-+ pfkey_expire_parse,
-+ pfkey_flush_parse,
-+ pfkey_dump_parse,
-+ pfkey_x_promisc_parse,
-+ pfkey_x_pchange_parse,
-+ pfkey_x_grpsa_parse,
-+ pfkey_x_addflow_parse,
-+ pfkey_x_delflow_parse,
-+ pfkey_x_msg_debug_parse,
-+#ifdef CONFIG_IPSEC_NAT_TRAVERSAL
-+ pfkey_x_nat_t_new_mapping_parse,
-+#else
-+ NULL,
-+#endif
-+ pfkey_x_plumb_parse,
-+ pfkey_x_unplumb_parse,
-+};
-+
-+int
-+pfkey_build_reply(struct sadb_msg *pfkey_msg,
-+ struct pfkey_extracted_data *extr,
-+ struct sadb_msg **pfkey_reply)
-+{
-+ struct sadb_ext *extensions[K_SADB_EXT_MAX+1];
-+ int error = 0;
-+ int msg_type = pfkey_msg->sadb_msg_type;
-+ int seq = pfkey_msg->sadb_msg_seq;
-+
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_build_reply: "
-+ "building reply with type: %d\n",
-+ msg_type);
-+ pfkey_extensions_init(extensions);
-+ if (!extr || !extr->ips) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_build_reply: "
-+ "bad ipsec_sa passed\n");
-+ return EINVAL; // TODO: should this not be negative?
-+ }
-+ error = pfkey_safe_build(pfkey_msg_hdr_build(&extensions[0],
-+ msg_type,
-+ proto2satype(extr->ips->ips_said.proto),
-+ 0,
-+ seq,
-+ pfkey_msg->sadb_msg_pid),
-+ extensions);
-+
-+ if(!error
-+ && pfkey_required_extension(EXT_BITS_OUT, msg_type, K_SADB_EXT_SA)) {
-+
-+ error = pfkey_sa_build(&extensions[K_SADB_EXT_SA],
-+ K_SADB_EXT_SA,
-+ extr->ips->ips_said.spi,
-+ extr->ips->ips_replaywin,
-+ extr->ips->ips_state,
-+ extr->ips->ips_authalg,
-+ extr->ips->ips_encalg,
-+ extr->ips->ips_flags);
-+ pfkey_safe_build(error, extensions);
-+ }
-+
-+ if(!error
-+ && pfkey_required_extension(EXT_BITS_OUT, msg_type, K_SADB_X_EXT_SAREF)) {
-+ error = pfkey_saref_build(&extensions[K_SADB_X_EXT_SAREF],
-+ extr->ips->ips_ref,
-+ extr->ips->ips_refhim);
-+ pfkey_safe_build(error, extensions);
-+ }
-+
-+ if(!error
-+ && pfkey_required_extension(EXT_BITS_OUT,msg_type,K_SADB_EXT_LIFETIME_CURRENT)) {
-+ error = pfkey_lifetime_build(&extensions
-+ [K_SADB_EXT_LIFETIME_CURRENT],
-+ K_SADB_EXT_LIFETIME_CURRENT,
-+ extr->ips->ips_life.ipl_allocations.ipl_count,
-+ extr->ips->ips_life.ipl_bytes.ipl_count,
-+ extr->ips->ips_life.ipl_addtime.ipl_count,
-+ extr->ips->ips_life.ipl_usetime.ipl_count,
-+ extr->ips->ips_life.ipl_packets.ipl_count);
-+ pfkey_safe_build(error, extensions);
-+ }
-+
-+ if(!error
-+ && pfkey_required_extension(EXT_BITS_OUT,msg_type,K_SADB_EXT_ADDRESS_SRC)) {
-+ error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_SRC],
-+ K_SADB_EXT_ADDRESS_SRC,
-+ extr->ips->ips_said.proto,
-+ 0,
-+ extr->ips->ips_addr_s);
-+ pfkey_safe_build(error, extensions);
-+ }
-+
-+ if(!error
-+ && pfkey_required_extension(EXT_BITS_OUT,msg_type,K_SADB_EXT_ADDRESS_DST)) {
-+ error = pfkey_address_build(&extensions[K_SADB_EXT_ADDRESS_DST],
-+ K_SADB_EXT_ADDRESS_DST,
-+ extr->ips->ips_said.proto,
-+ 0,
-+ extr->ips->ips_addr_d);
-+ pfkey_safe_build(error, extensions);
-+ }
-+
-+ if (error == 0) {
-+ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_build_reply: "
-+ "building extensions failed\n");
-+ return EINVAL;
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_build_reply: "
-+ "built extensions, proceed to build the message\n");
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_build_reply: "
-+ "extensions[1]=0p%p\n",
-+ extensions[1]);
-+ error = pfkey_msg_build(pfkey_reply, extensions, EXT_BITS_OUT);
-+ pfkey_extensions_free(extensions);
-+
-+ return error;
-+}
-+
-+/*
-+ * interpret a pfkey message for klips usage.
-+ * it used to be that we provided a reply in a seperate buffer,
-+ * but now we overwrite the request buffer and return it.
-+ */
-+int
-+pfkey_msg_interp(struct sock *sk, struct sadb_msg *pfkey_msg)
-+{
-+ int error = 0;
-+ int i;
-+ struct sadb_ext *extensions[K_SADB_EXT_MAX+1]; /* should be kalloc */
-+ struct pfkey_extracted_data extr;
-+
-+ memset(&extr, 0, sizeof(extr));
-+
-+ pfkey_extensions_init(extensions);
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "parsing message ver=%d, type=%d, errno=%d, satype=%d(%s), len=%d, res=%d, seq=%d, pid=%d.\n",
-+ pfkey_msg->sadb_msg_version,
-+ pfkey_msg->sadb_msg_type,
-+ pfkey_msg->sadb_msg_errno,
-+ pfkey_msg->sadb_msg_satype,
-+ satype2name(pfkey_msg->sadb_msg_satype),
-+ pfkey_msg->sadb_msg_len,
-+ pfkey_msg->sadb_msg_reserved,
-+ pfkey_msg->sadb_msg_seq,
-+ pfkey_msg->sadb_msg_pid);
-+
-+ extr.ips = ipsec_sa_alloc(&error); /* pass in error var by pointer */
-+ if(extr.ips == NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "memory allocation error.\n");
-+ SENDERR(-error);
-+ }
-+
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "allocated extr->ips=0p%p.\n",
-+ extr.ips);
-+
-+ if(pfkey_msg->sadb_msg_satype > K_SADB_SATYPE_MAX) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "satype %d > max %d\n",
-+ pfkey_msg->sadb_msg_satype,
-+ K_SADB_SATYPE_MAX);
-+ SENDERR(EINVAL);
-+ }
-+
-+ switch(pfkey_msg->sadb_msg_type) {
-+ case K_SADB_GETSPI:
-+ case K_SADB_UPDATE:
-+ case K_SADB_ADD:
-+ case K_SADB_DELETE:
-+ case K_SADB_X_GRPSA:
-+ case K_SADB_X_ADDFLOW:
-+ if(!(extr.ips->ips_said.proto = satype2proto(pfkey_msg->sadb_msg_satype))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "satype %d lookup failed.\n",
-+ pfkey_msg->sadb_msg_satype);
-+ SENDERR(EINVAL);
-+ } else {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "satype %d lookups to proto=%d.\n",
-+ pfkey_msg->sadb_msg_satype,
-+ extr.ips->ips_said.proto);
-+ }
-+ break;
-+ default:
-+ break;
-+ }
-+
-+ /* The NULL below causes the default extension parsers to be used */
-+ /* Parse the extensions */
-+ if((error = pfkey_msg_parse(pfkey_msg, NULL, extensions, EXT_BITS_IN)))
-+ {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "message parsing failed with error %d.\n",
-+ error);
-+ SENDERR(-error);
-+ }
-+
-+ /* Process the extensions */
-+ for(i=1; i <= K_SADB_EXT_MAX;i++) {
-+ if(extensions[i] != NULL && ext_processors[i]!=NULL) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "processing ext %d 0p%p with processor 0p%p.\n",
-+ i, extensions[i], ext_processors[i]);
-+ if((error = ext_processors[i](extensions[i], &extr))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "extension processing for type %d failed with error %d.\n",
-+ i,
-+ error);
-+ SENDERR(-error);
-+ }
-+
-+ }
-+
-+ }
-+
-+ /* Parse the message types */
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "parsing message type %d(%s) with msg_parser 0p%p.\n",
-+ pfkey_msg->sadb_msg_type,
-+ pfkey_v2_sadb_type_string(pfkey_msg->sadb_msg_type),
-+ msg_parsers[pfkey_msg->sadb_msg_type]);
-+ if((error = msg_parsers[pfkey_msg->sadb_msg_type](sk, extensions, &extr))) {
-+ KLIPS_PRINT(debug_pfkey,
-+ "klips_debug:pfkey_msg_interp: "
-+ "message parsing failed with error %d.\n",
-+ error);
-+ SENDERR(-error);
-+ }
-+
-+ errlab:
-+ if(extr.ips != NULL) {
-+ ipsec_sa_put(extr.ips);
-+ }
-+ if(extr.ips2 != NULL) {
-+ ipsec_sa_put(extr.ips2);
-+ }
-+ if (extr.eroute != NULL) {
-+ kfree(extr.eroute);
-+ }
-+ return(error);
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/prng.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,202 @@
-+/*
-+ * crypto-class pseudorandom number generator
-+ * currently uses same algorithm as RC4(TM), from Schneier 2nd ed p397
-+ * Copyright (C) 2002 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: prng.c,v 1.8 2005/08/25 01:20:21 paul Exp $
-+ */
-+#include "openswan.h"
-+
-+/*
-+ - prng_init - initialize PRNG from a key
-+ */
-+void
-+prng_init(prng, key, keylen)
-+struct prng *prng;
-+const unsigned char *key;
-+size_t keylen;
-+{
-+ unsigned char k[256];
-+ int i, j;
-+ unsigned const char *p;
-+ unsigned const char *keyend = key + keylen;
-+ unsigned char t;
-+
-+ for (i = 0; i <= 255; i++)
-+ prng->sbox[i] = i;
-+ p = key;
-+ for (i = 0; i <= 255; i++) {
-+ k[i] = *p++;
-+ if (p >= keyend)
-+ p = key;
-+ }
-+ j = 0;
-+ for (i = 0; i <= 255; i++) {
-+ j = (j + prng->sbox[i] + k[i]) & 0xff;
-+ t = prng->sbox[i];
-+ prng->sbox[i] = prng->sbox[j];
-+ prng->sbox[j] = t;
-+ k[i] = 0; /* clear out key memory */
-+ }
-+ prng->i = 0;
-+ prng->j = 0;
-+ prng->count = 0;
-+}
-+
-+/*
-+ - prng_bytes - get some pseudorandom bytes from PRNG
-+ */
-+void
-+prng_bytes(prng, dst, dstlen)
-+struct prng *prng;
-+unsigned char *dst;
-+size_t dstlen;
-+{
-+ int i, j, t;
-+ unsigned char *p = dst;
-+ size_t remain = dstlen;
-+# define MAXCOUNT 4000000000ul
-+
-+ while (remain > 0) {
-+ i = (prng->i + 1) & 0xff;
-+ prng->i = i;
-+ j = (prng->j + prng->sbox[i]) & 0xff;
-+ prng->j = j;
-+ t = prng->sbox[i];
-+ prng->sbox[i] = prng->sbox[j];
-+ prng->sbox[j] = t;
-+ t = (t + prng->sbox[i]) & 0xff;
-+ *p++ = prng->sbox[t];
-+ remain--;
-+ }
-+ if (prng->count < MAXCOUNT - dstlen)
-+ prng->count += dstlen;
-+ else
-+ prng->count = MAXCOUNT;
-+}
-+
-+/*
-+ - prnt_count - how many bytes have been extracted from PRNG so far?
-+ */
-+unsigned long
-+prng_count(prng)
-+struct prng *prng;
-+{
-+ return prng->count;
-+}
-+
-+/*
-+ - prng_final - clear out PRNG to ensure nothing left in memory
-+ */
-+void
-+prng_final(prng)
-+struct prng *prng;
-+{
-+ int i;
-+
-+ for (i = 0; i <= 255; i++)
-+ prng->sbox[i] = 0;
-+ prng->i = 0;
-+ prng->j = 0;
-+ prng->count = 0; /* just for good measure */
-+}
-+
-+
-+
-+#ifdef PRNG_MAIN
-+
-+#include <stdio.h>
-+#include <stdlib.h>
-+
-+void regress();
-+
-+int
-+main(argc, argv)
-+int argc;
-+char *argv[];
-+{
-+ struct prng pr;
-+ unsigned char buf[100];
-+ unsigned char *p;
-+ size_t n;
-+
-+ if (argc < 2) {
-+ fprintf(stderr, "Usage: %s {key|-r}\n", argv[0]);
-+ exit(2);
-+ }
-+
-+ if (strcmp(argv[1], "-r") == 0) {
-+ regress();
-+ fprintf(stderr, "regress() returned?!?\n");
-+ exit(1);
-+ }
-+
-+ prng_init(&pr, argv[1], strlen(argv[1]));
-+ prng_bytes(&pr, buf, 32);
-+ printf("0x");
-+ for (p = buf, n = 32; n > 0; p++, n--)
-+ printf("%02x", *p);
-+ printf("\n%lu bytes\n", prng_count(&pr));
-+ prng_final(&pr);
-+ exit(0);
-+}
-+
-+void
-+regress()
-+{
-+ struct prng pr;
-+ unsigned char buf[100];
-+ unsigned char *p;
-+ size_t n;
-+ /* somewhat non-random sample key */
-+ unsigned char key[] = "here we go gathering nuts in May";
-+ /* first thirty bytes of output from that key */
-+ unsigned char good[] = "\x3f\x02\x8e\x4a\x2a\xea\x23\x18\x92\x7c"
-+ "\x09\x52\x83\x61\xaa\x26\xce\xbb\x9d\x71"
-+ "\x71\xe5\x10\x22\xaf\x60\x54\x8d\x5b\x28";
-+ int nzero, none;
-+ int show = 0;
-+
-+ prng_init(&pr, key, strlen(key));
-+ prng_bytes(&pr, buf, sizeof(buf));
-+ for (p = buf, n = sizeof(buf); n > 0; p++, n--) {
-+ if (*p == 0)
-+ nzero++;
-+ if (*p == 255)
-+ none++;
-+ }
-+ if (nzero > 3 || none > 3) {
-+ fprintf(stderr, "suspiciously non-random output!\n");
-+ show = 1;
-+ }
-+ if (memcmp(buf, good, strlen(good)) != 0) {
-+ fprintf(stderr, "incorrect output!\n");
-+ show = 1;
-+ }
-+ if (show) {
-+ fprintf(stderr, "0x");
-+ for (p = buf, n = sizeof(buf); n > 0; p++, n--)
-+ fprintf(stderr, "%02x", *p);
-+ fprintf(stderr, "\n");
-+ exit(1);
-+ }
-+ if (prng_count(&pr) != sizeof(buf)) {
-+ fprintf(stderr, "got %u bytes, but count is %lu\n",
-+ sizeof(buf), prng_count(&pr));
-+ exit(1);
-+ }
-+ prng_final(&pr);
-+ exit(0);
-+}
-+
-+#endif /* PRNG_MAIN */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/radij.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1013 @@
-+char radij_c_version[] = "RCSID $Id: radij.c,v 1.48 2005/04/29 05:10:22 mcr Exp $";
-+
-+/*
-+ * This file is defived from ${SRC}/sys/net/radix.c of BSD 4.4lite
-+ *
-+ * Variable and procedure names have been modified so that they don't
-+ * conflict with the original BSD code, as a small number of modifications
-+ * have been introduced and we may want to reuse this code in BSD.
-+ *
-+ * The `j' in `radij' is pronounced as a voiceless guttural (like a Greek
-+ * chi or a German ch sound (as `doch', not as in `milch'), or even a
-+ * spanish j as in Juan. It is not as far back in the throat like
-+ * the corresponding Hebrew sound, nor is it a soft breath like the English h.
-+ * It has nothing to do with the Dutch ij sound.
-+ *
-+ * Here is the appropriate copyright notice:
-+ */
-+
-+/*
-+ * Copyright (c) 1988, 1989, 1993
-+ * The Regents of the University of California. All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. All advertising materials mentioning features or use of this software
-+ * must display the following acknowledgement:
-+ * This product includes software developed by the University of
-+ * California, Berkeley and its contributors.
-+ * 4. Neither the name of the University nor the names of its contributors
-+ * may be used to endorse or promote products derived from this software
-+ * without specific prior written permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-+ * SUCH DAMAGE.
-+ *
-+ * @(#)radix.c 8.2 (Berkeley) 1/4/94
-+ */
-+
-+/*
-+ * Routines to build and maintain radix trees for routing lookups.
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/version.h>
-+#include <linux/kernel.h> /* printk() */
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef MALLOC_SLAB
-+# include <linux/slab.h> /* kmalloc() */
-+#else /* MALLOC_SLAB */
-+# include <linux/malloc.h> /* kmalloc() */
-+#endif /* MALLOC_SLAB */
-+#include <linux/errno.h> /* error codes */
-+#include <linux/types.h> /* size_t */
-+#include <linux/interrupt.h> /* mark_bh */
-+
-+#include <linux/netdevice.h> /* struct device, and other headers */
-+#include <linux/etherdevice.h> /* eth_type_trans */
-+#include <linux/ip.h> /* struct iphdr */
-+#include <linux/skbuff.h>
-+#ifdef NET_21
-+# include <linux/in6.h>
-+#endif /* NET_21 */
-+
-+#include <net/ip.h>
-+
-+#include <openswan.h>
-+
-+#include "openswan/radij.h"
-+#include "openswan/ipsec_encap.h"
-+#include "openswan/ipsec_radij.h"
-+
-+int maj_keylen;
-+struct radij_mask *rj_mkfreelist;
-+struct radij_node_head *mask_rjhead;
-+static int gotOddMasks;
-+static char *maskedKey;
-+static char *rj_zeroes, *rj_ones;
-+
-+#define rj_masktop (mask_rjhead->rnh_treetop)
-+#ifdef Bcmp
-+# undef Bcmp
-+#endif /* Bcmp */
-+#define Bcmp(a, b, l) (l == 0 ? 0 : memcmp((caddr_t)(b), (caddr_t)(a), (size_t)l))
-+/*
-+ * The data structure for the keys is a radix tree with one way
-+ * branching removed. The index rj_b at an internal node n represents a bit
-+ * position to be tested. The tree is arranged so that all descendants
-+ * of a node n have keys whose bits all agree up to position rj_b - 1.
-+ * (We say the index of n is rj_b.)
-+ *
-+ * There is at least one descendant which has a one bit at position rj_b,
-+ * and at least one with a zero there.
-+ *
-+ * A route is determined by a pair of key and mask. We require that the
-+ * bit-wise logical and of the key and mask to be the key.
-+ * We define the index of a route to associated with the mask to be
-+ * the first bit number in the mask where 0 occurs (with bit number 0
-+ * representing the highest order bit).
-+ *
-+ * We say a mask is normal if every bit is 0, past the index of the mask.
-+ * If a node n has a descendant (k, m) with index(m) == index(n) == rj_b,
-+ * and m is a normal mask, then the route applies to every descendant of n.
-+ * If the index(m) < rj_b, this implies the trailing last few bits of k
-+ * before bit b are all 0, (and hence consequently true of every descendant
-+ * of n), so the route applies to all descendants of the node as well.
-+ *
-+ * The present version of the code makes no use of normal routes,
-+ * but similar logic shows that a non-normal mask m such that
-+ * index(m) <= index(n) could potentially apply to many children of n.
-+ * Thus, for each non-host route, we attach its mask to a list at an internal
-+ * node as high in the tree as we can go.
-+ */
-+
-+struct radij_node *
-+rj_search(v_arg, head)
-+ void *v_arg;
-+ struct radij_node *head;
-+{
-+ register struct radij_node *x;
-+ register caddr_t v;
-+
-+ for (x = head, v = v_arg; x->rj_b >= 0;) {
-+ if (x->rj_bmask & v[x->rj_off])
-+ x = x->rj_r;
-+ else
-+ x = x->rj_l;
-+ }
-+ return (x);
-+};
-+
-+struct radij_node *
-+rj_search_m(v_arg, head, m_arg)
-+ struct radij_node *head;
-+ void *v_arg, *m_arg;
-+{
-+ register struct radij_node *x;
-+ register caddr_t v = v_arg, m = m_arg;
-+
-+ for (x = head; x->rj_b >= 0;) {
-+ if ((x->rj_bmask & m[x->rj_off]) &&
-+ (x->rj_bmask & v[x->rj_off]))
-+ x = x->rj_r;
-+ else
-+ x = x->rj_l;
-+ }
-+ return x;
-+};
-+
-+int
-+rj_refines(m_arg, n_arg)
-+ void *m_arg, *n_arg;
-+{
-+ register caddr_t m = m_arg, n = n_arg;
-+ register caddr_t lim, lim2 = lim = n + *(u_char *)n;
-+ int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
-+ int masks_are_equal = 1;
-+
-+ if (longer > 0)
-+ lim -= longer;
-+ while (n < lim) {
-+ if (*n & ~(*m))
-+ return 0;
-+ if (*n++ != *m++)
-+ masks_are_equal = 0;
-+
-+ }
-+ while (n < lim2)
-+ if (*n++)
-+ return 0;
-+ if (masks_are_equal && (longer < 0))
-+ for (lim2 = m - longer; m < lim2; )
-+ if (*m++)
-+ return 1;
-+ return (!masks_are_equal);
-+}
-+
-+
-+struct radij_node *
-+rj_match(v_arg, head)
-+ void *v_arg;
-+ struct radij_node_head *head;
-+{
-+ caddr_t v = v_arg;
-+ register struct radij_node *t = head->rnh_treetop, *x;
-+ register caddr_t cp = v, cp2, cp3;
-+ caddr_t cplim, mstart;
-+ struct radij_node *saved_t, *top = t;
-+ int off = t->rj_off, vlen = *(u_char *)cp, matched_off;
-+
-+ /*
-+ * Open code rj_search(v, top) to avoid overhead of extra
-+ * subroutine call.
-+ */
-+ for (; t->rj_b >= 0; ) {
-+ if (t->rj_bmask & cp[t->rj_off])
-+ t = t->rj_r;
-+ else
-+ t = t->rj_l;
-+ }
-+ /*
-+ * See if we match exactly as a host destination
-+ */
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:rj_match: "
-+ "* See if we match exactly as a host destination\n");
-+
-+ cp += off; cp2 = t->rj_key + off; cplim = v + vlen;
-+ for (; cp < cplim; cp++, cp2++)
-+ if (*cp != *cp2)
-+ goto on1;
-+ /*
-+ * This extra grot is in case we are explicitly asked
-+ * to look up the default. Ugh!
-+ */
-+ if ((t->rj_flags & RJF_ROOT) && t->rj_dupedkey)
-+ t = t->rj_dupedkey;
-+ return t;
-+on1:
-+ matched_off = cp - v;
-+ saved_t = t;
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:rj_match: "
-+ "** try to match a leaf, t=0p%p\n", t);
-+ do {
-+ if (t->rj_mask) {
-+ /*
-+ * Even if we don't match exactly as a hosts;
-+ * we may match if the leaf we wound up at is
-+ * a route to a net.
-+ */
-+ cp3 = matched_off + t->rj_mask;
-+ cp2 = matched_off + t->rj_key;
-+ for (; cp < cplim; cp++)
-+ if ((*cp2++ ^ *cp) & *cp3++)
-+ break;
-+ if (cp == cplim)
-+ return t;
-+ cp = matched_off + v;
-+ }
-+ } while ((t = t->rj_dupedkey));
-+ t = saved_t;
-+ /* start searching up the tree */
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:rj_match: "
-+ "*** start searching up the tree, t=0p%p\n",
-+ t);
-+ do {
-+ register struct radij_mask *m;
-+
-+ t = t->rj_p;
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:rj_match: "
-+ "**** t=0p%p\n",
-+ t);
-+ if ((m = t->rj_mklist)) {
-+ /*
-+ * After doing measurements here, it may
-+ * turn out to be faster to open code
-+ * rj_search_m here instead of always
-+ * copying and masking.
-+ */
-+ /* off = min(t->rj_off, matched_off); */
-+ off = t->rj_off;
-+ if (matched_off < off)
-+ off = matched_off;
-+ mstart = maskedKey + off;
-+ do {
-+ cp2 = mstart;
-+ cp3 = m->rm_mask + off;
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:rj_match: "
-+ "***** cp2=0p%p cp3=0p%p\n",
-+ cp2, cp3);
-+ for (cp = v + off; cp < cplim;)
-+ *cp2++ = *cp++ & *cp3++;
-+ x = rj_search(maskedKey, t);
-+ while (x && x->rj_mask != m->rm_mask)
-+ x = x->rj_dupedkey;
-+ if (x &&
-+ (Bcmp(mstart, x->rj_key + off,
-+ vlen - off) == 0))
-+ return x;
-+ } while ((m = m->rm_mklist));
-+ }
-+ } while (t != top);
-+ KLIPS_PRINT(debug_radij,
-+ "klips_debug:rj_match: "
-+ "***** not found.\n");
-+ return 0;
-+};
-+
-+#ifdef RJ_DEBUG
-+int rj_nodenum;
-+struct radij_node *rj_clist;
-+int rj_saveinfo;
-+DEBUG_NO_STATIC void traverse(struct radij_node *);
-+#ifdef RJ_DEBUG2
-+int rj_debug = 1;
-+#else
-+int rj_debug = 0;
-+#endif /* RJ_DEBUG2 */
-+#endif /* RJ_DEBUG */
-+
-+struct radij_node *
-+rj_newpair(v, b, nodes)
-+ void *v;
-+ int b;
-+ struct radij_node nodes[2];
-+{
-+ register struct radij_node *tt = nodes, *t = tt + 1;
-+ t->rj_b = b; t->rj_bmask = 0x80 >> (b & 7);
-+ t->rj_l = tt; t->rj_off = b >> 3;
-+ tt->rj_b = -1; tt->rj_key = (caddr_t)v; tt->rj_p = t;
-+ tt->rj_flags = t->rj_flags = RJF_ACTIVE;
-+#ifdef RJ_DEBUG
-+ tt->rj_info = rj_nodenum++; t->rj_info = rj_nodenum++;
-+ tt->rj_twin = t; tt->rj_ybro = rj_clist; rj_clist = tt;
-+#endif /* RJ_DEBUG */
-+ return t;
-+}
-+
-+struct radij_node *
-+rj_insert(v_arg, head, dupentry, nodes)
-+ void *v_arg;
-+ struct radij_node_head *head;
-+ int *dupentry;
-+ struct radij_node nodes[2];
-+{
-+ caddr_t v = v_arg;
-+ struct radij_node *top = head->rnh_treetop;
-+ int head_off = top->rj_off, vlen = (int)*((u_char *)v);
-+ register struct radij_node *t = rj_search(v_arg, top);
-+ register caddr_t cp = v + head_off;
-+ register int b;
-+ struct radij_node *tt;
-+ /*
-+ *find first bit at which v and t->rj_key differ
-+ */
-+ {
-+ register caddr_t cp2 = t->rj_key + head_off;
-+ register int cmp_res;
-+ caddr_t cplim = v + vlen;
-+
-+ while (cp < cplim)
-+ if (*cp2++ != *cp++)
-+ goto on1;
-+ *dupentry = 1;
-+ return t;
-+on1:
-+ *dupentry = 0;
-+ cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
-+ for (b = (cp - v) << 3; cmp_res; b--)
-+ cmp_res >>= 1;
-+ }
-+ {
-+ register struct radij_node *p, *x = top;
-+ cp = v;
-+ do {
-+ p = x;
-+ if (cp[x->rj_off] & x->rj_bmask)
-+ x = x->rj_r;
-+ else x = x->rj_l;
-+ } while (b > (unsigned) x->rj_b); /* x->rj_b < b && x->rj_b >= 0 */
-+#ifdef RJ_DEBUG
-+ if (rj_debug)
-+ printk("klips_debug:rj_insert: Going In:\n"), traverse(p);
-+#endif /* RJ_DEBUG */
-+ t = rj_newpair(v_arg, b, nodes); tt = t->rj_l;
-+ if ((cp[p->rj_off] & p->rj_bmask) == 0)
-+ p->rj_l = t;
-+ else
-+ p->rj_r = t;
-+ x->rj_p = t; t->rj_p = p; /* frees x, p as temp vars below */
-+ if ((cp[t->rj_off] & t->rj_bmask) == 0) {
-+ t->rj_r = x;
-+ } else {
-+ t->rj_r = tt; t->rj_l = x;
-+ }
-+#ifdef RJ_DEBUG
-+ if (rj_debug)
-+ printk("klips_debug:rj_insert: Coming out:\n"), traverse(p);
-+#endif /* RJ_DEBUG */
-+ }
-+ return (tt);
-+}
-+
-+struct radij_node *
-+rj_addmask(n_arg, search, skip)
-+ int search, skip;
-+ void *n_arg;
-+{
-+ caddr_t netmask = (caddr_t)n_arg;
-+ register struct radij_node *x;
-+ register caddr_t cp, cplim;
-+ register int b, mlen, j;
-+ int maskduplicated;
-+
-+ mlen = *(u_char *)netmask;
-+ if (search) {
-+ x = rj_search(netmask, rj_masktop);
-+ mlen = *(u_char *)netmask;
-+ if (Bcmp(netmask, x->rj_key, mlen) == 0)
-+ return (x);
-+ }
-+ R_Malloc(x, struct radij_node *, maj_keylen + 2 * sizeof (*x));
-+ if (x == 0)
-+ return (0);
-+ Bzero(x, maj_keylen + 2 * sizeof (*x));
-+ cp = (caddr_t)(x + 2);
-+ Bcopy(netmask, cp, mlen);
-+ netmask = cp;
-+ x = rj_insert(netmask, mask_rjhead, &maskduplicated, x);
-+ /*
-+ * Calculate index of mask.
-+ */
-+ cplim = netmask + mlen;
-+ for (cp = netmask + skip; cp < cplim; cp++)
-+ if (*(u_char *)cp != 0xff)
-+ break;
-+ b = (cp - netmask) << 3;
-+ if (cp != cplim) {
-+ if (*cp != 0) {
-+ gotOddMasks = 1;
-+ for (j = 0x80; j; b++, j >>= 1)
-+ if ((j & *cp) == 0)
-+ break;
-+ }
-+ }
-+ x->rj_b = -1 - b;
-+ return (x);
-+}
-+
-+#if 0
-+struct radij_node *
-+#endif
-+int
-+rj_addroute(v_arg, n_arg, head, treenodes)
-+ void *v_arg, *n_arg;
-+ struct radij_node_head *head;
-+ struct radij_node treenodes[2];
-+{
-+ caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
-+ register struct radij_node *t, *x=NULL, *tt;
-+ struct radij_node *saved_tt, *top = head->rnh_treetop;
-+ short b = 0, b_leaf;
-+ int mlen, keyduplicated;
-+ caddr_t cplim;
-+ struct radij_mask *m, **mp;
-+
-+ /*
-+ * In dealing with non-contiguous masks, there may be
-+ * many different routes which have the same mask.
-+ * We will find it useful to have a unique pointer to
-+ * the mask to speed avoiding duplicate references at
-+ * nodes and possibly save time in calculating indices.
-+ */
-+ if (netmask) {
-+ x = rj_search(netmask, rj_masktop);
-+ mlen = *(u_char *)netmask;
-+ if (Bcmp(netmask, x->rj_key, mlen) != 0) {
-+ x = rj_addmask(netmask, 0, top->rj_off);
-+ if (x == 0)
-+ return -ENOMEM; /* (0) rgb */
-+ }
-+ netmask = x->rj_key;
-+ b = -1 - x->rj_b;
-+ }
-+ /*
-+ * Deal with duplicated keys: attach node to previous instance
-+ */
-+ saved_tt = tt = rj_insert(v, head, &keyduplicated, treenodes);
-+#ifdef RJ_DEBUG
-+ printk("addkey: duplicated: %d\n", keyduplicated);
-+#endif
-+ if (keyduplicated) {
-+ do {
-+ if (tt->rj_mask == netmask)
-+ return -EEXIST; /* -ENXIO; (0) rgb */
-+ t = tt;
-+ if (netmask == 0 ||
-+ (tt->rj_mask && rj_refines(netmask, tt->rj_mask)))
-+ break;
-+ } while ((tt = tt->rj_dupedkey));
-+ /*
-+ * If the mask is not duplicated, we wouldn't
-+ * find it among possible duplicate key entries
-+ * anyway, so the above test doesn't hurt.
-+ *
-+ * We sort the masks for a duplicated key the same way as
-+ * in a masklist -- most specific to least specific.
-+ * This may require the unfortunate nuisance of relocating
-+ * the head of the list.
-+ */
-+ if (tt && t == saved_tt) {
-+ struct radij_node *xx = x;
-+ /* link in at head of list */
-+ (tt = treenodes)->rj_dupedkey = t;
-+ tt->rj_flags = t->rj_flags;
-+ tt->rj_p = x = t->rj_p;
-+ if (x->rj_l == t) x->rj_l = tt; else x->rj_r = tt;
-+ saved_tt = tt; x = xx;
-+ } else {
-+ (tt = treenodes)->rj_dupedkey = t->rj_dupedkey;
-+ t->rj_dupedkey = tt;
-+ }
-+#ifdef RJ_DEBUG
-+ t=tt+1; tt->rj_info = rj_nodenum++; t->rj_info = rj_nodenum++;
-+ tt->rj_twin = t; tt->rj_ybro = rj_clist; rj_clist = tt;
-+#endif /* RJ_DEBUG */
-+ t = saved_tt;
-+ tt->rj_key = (caddr_t) v;
-+ tt->rj_b = -1;
-+ tt->rj_flags = t->rj_flags & ~RJF_ROOT;
-+ }
-+ /*
-+ * Put mask in tree.
-+ */
-+ if (netmask) {
-+ tt->rj_mask = netmask;
-+ tt->rj_b = x->rj_b;
-+ }
-+ t = saved_tt->rj_p;
-+ b_leaf = -1 - t->rj_b;
-+ if (t->rj_r == saved_tt) x = t->rj_l; else x = t->rj_r;
-+ /* Promote general routes from below */
-+ if (x->rj_b < 0) {
-+ if (x->rj_mask && (x->rj_b >= b_leaf) && x->rj_mklist == 0) {
-+ MKGet(m);
-+ if (m) {
-+ Bzero(m, sizeof *m);
-+ m->rm_b = x->rj_b;
-+ m->rm_mask = x->rj_mask;
-+ x->rj_mklist = t->rj_mklist = m;
-+ }
-+ }
-+ } else if (x->rj_mklist) {
-+ /*
-+ * Skip over masks whose index is > that of new node
-+ */
-+ for (mp = &x->rj_mklist; (m = *mp); mp = &m->rm_mklist)
-+ if (m->rm_b >= b_leaf)
-+ break;
-+ t->rj_mklist = m; *mp = 0;
-+ }
-+ /* Add new route to highest possible ancestor's list */
-+ if ((netmask == 0) || (b > t->rj_b )) {
-+#ifdef RJ_DEBUG
-+ printk("klips:radij.c: netmask = %p or b(%d)>t->rjb(%d)\n", netmask, b, t->rj_b);
-+#endif
-+ return 0; /* tt rgb */ /* can't lift at all */
-+ }
-+ b_leaf = tt->rj_b;
-+ do {
-+ x = t;
-+ t = t->rj_p;
-+ } while (b <= t->rj_b && x != top);
-+ /*
-+ * Search through routes associated with node to
-+ * insert new route according to index.
-+ * For nodes of equal index, place more specific
-+ * masks first.
-+ */
-+ cplim = netmask + mlen;
-+ for (mp = &x->rj_mklist; (m = *mp); mp = &m->rm_mklist) {
-+ if (m->rm_b < b_leaf)
-+ continue;
-+ if (m->rm_b > b_leaf)
-+ break;
-+ if (m->rm_mask == netmask) {
-+ m->rm_refs++;
-+ tt->rj_mklist = m;
-+#ifdef RJ_DEBUG
-+ printk("klips:radij.c: m->rm_mask %p == netmask\n", netmask);
-+#endif
-+ return 0; /* tt rgb */
-+ }
-+ if (rj_refines(netmask, m->rm_mask))
-+ break;
-+ }
-+ MKGet(m);
-+ if (m == 0) {
-+ printk("klips_debug:rj_addroute: "
-+ "Mask for route not entered\n");
-+ return 0; /* (tt) rgb */
-+ }
-+ Bzero(m, sizeof *m);
-+ m->rm_b = b_leaf;
-+ m->rm_mask = netmask;
-+ m->rm_mklist = *mp;
-+ *mp = m;
-+ tt->rj_mklist = m;
-+#ifdef RJ_DEBUG
-+ printk("klips:radij.c: addroute done\n");
-+#endif
-+ return 0; /* tt rgb */
-+}
-+
-+int
-+rj_delete(v_arg, netmask_arg, head, node)
-+ void *v_arg, *netmask_arg;
-+ struct radij_node_head *head;
-+ struct radij_node **node;
-+{
-+ register struct radij_node *t, *p, *x, *tt;
-+ struct radij_mask *m, *saved_m, **mp;
-+ struct radij_node *dupedkey, *saved_tt, *top;
-+ caddr_t v, netmask;
-+ int b, head_off, vlen;
-+
-+ v = v_arg;
-+ netmask = netmask_arg;
-+ x = head->rnh_treetop;
-+ tt = rj_search(v, x);
-+ head_off = x->rj_off;
-+ vlen = *(u_char *)v;
-+ saved_tt = tt;
-+ top = x;
-+ if (tt == 0 ||
-+ Bcmp(v + head_off, tt->rj_key + head_off, vlen - head_off))
-+ return -EFAULT; /* (0) rgb */
-+ /*
-+ * Delete our route from mask lists.
-+ */
-+ if ((dupedkey = tt->rj_dupedkey)) {
-+ if (netmask)
-+ netmask = rj_search(netmask, rj_masktop)->rj_key;
-+ while (tt->rj_mask != netmask)
-+ if ((tt = tt->rj_dupedkey) == 0)
-+ return -ENOENT; /* -ENXIO; (0) rgb */
-+ }
-+ if (tt->rj_mask == 0 || (saved_m = m = tt->rj_mklist) == 0)
-+ goto on1;
-+ if (m->rm_mask != tt->rj_mask) {
-+ printk("klips_debug:rj_delete: "
-+ "inconsistent annotation\n");
-+ goto on1;
-+ }
-+ if (--m->rm_refs >= 0)
-+ goto on1;
-+ b = -1 - tt->rj_b;
-+ t = saved_tt->rj_p;
-+ if (b > t->rj_b)
-+ goto on1; /* Wasn't lifted at all */
-+ do {
-+ x = t;
-+ t = t->rj_p;
-+ } while (b <= t->rj_b && x != top);
-+ for (mp = &x->rj_mklist; (m = *mp); mp = &m->rm_mklist)
-+ if (m == saved_m) {
-+ *mp = m->rm_mklist;
-+ MKFree(m);
-+ break;
-+ }
-+ if (m == 0)
-+ printk("klips_debug:rj_delete: "
-+ "couldn't find our annotation\n");
-+on1:
-+ /*
-+ * Eliminate us from tree
-+ */
-+ if (tt->rj_flags & RJF_ROOT)
-+ return -EFAULT; /* (0) rgb */
-+#ifdef RJ_DEBUG
-+ /* Get us out of the creation list */
-+ for (t = rj_clist; t && t->rj_ybro != tt; t = t->rj_ybro) {}
-+ if (t) t->rj_ybro = tt->rj_ybro;
-+#endif /* RJ_DEBUG */
-+ t = tt->rj_p;
-+ if (dupedkey) {
-+ if (tt == saved_tt) {
-+ x = dupedkey; x->rj_p = t;
-+ if (t->rj_l == tt) t->rj_l = x; else t->rj_r = x;
-+ } else {
-+ for (x = p = saved_tt; p && p->rj_dupedkey != tt;)
-+ p = p->rj_dupedkey;
-+ if (p) p->rj_dupedkey = tt->rj_dupedkey;
-+ else printk("klips_debug:rj_delete: "
-+ "couldn't find node that we started with\n");
-+ }
-+ t = tt + 1;
-+ if (t->rj_flags & RJF_ACTIVE) {
-+#ifndef RJ_DEBUG
-+ *++x = *t; p = t->rj_p;
-+#else
-+ b = t->rj_info; *++x = *t; t->rj_info = b; p = t->rj_p;
-+#endif /* RJ_DEBUG */
-+ if (p->rj_l == t) p->rj_l = x; else p->rj_r = x;
-+ x->rj_l->rj_p = x; x->rj_r->rj_p = x;
-+ }
-+ goto out;
-+ }
-+ if (t->rj_l == tt) x = t->rj_r; else x = t->rj_l;
-+ p = t->rj_p;
-+ if (p->rj_r == t) p->rj_r = x; else p->rj_l = x;
-+ x->rj_p = p;
-+ /*
-+ * Demote routes attached to us.
-+ */
-+ if (t->rj_mklist) {
-+ if (x->rj_b >= 0) {
-+ for (mp = &x->rj_mklist; (m = *mp);)
-+ mp = &m->rm_mklist;
-+ *mp = t->rj_mklist;
-+ } else {
-+ for (m = t->rj_mklist; m;) {
-+ struct radij_mask *mm = m->rm_mklist;
-+ if (m == x->rj_mklist && (--(m->rm_refs) < 0)) {
-+ x->rj_mklist = 0;
-+ MKFree(m);
-+ } else
-+ printk("klips_debug:rj_delete: "
-+ "Orphaned Mask 0p%p at 0p%p\n", m, x);
-+ m = mm;
-+ }
-+ }
-+ }
-+ /*
-+ * We may be holding an active internal node in the tree.
-+ */
-+ x = tt + 1;
-+ if (t != x) {
-+#ifndef RJ_DEBUG
-+ *t = *x;
-+#else
-+ b = t->rj_info; *t = *x; t->rj_info = b;
-+#endif /* RJ_DEBUG */
-+ t->rj_l->rj_p = t; t->rj_r->rj_p = t;
-+ p = x->rj_p;
-+ if (p->rj_l == x) p->rj_l = t; else p->rj_r = t;
-+ }
-+out:
-+ tt->rj_flags &= ~RJF_ACTIVE;
-+ tt[1].rj_flags &= ~RJF_ACTIVE;
-+ *node = tt;
-+ return 0; /* (tt) rgb */
-+}
-+
-+int
-+rj_walktree(h, f, w)
-+ struct radij_node_head *h;
-+ register int (*f)(struct radij_node *,void *);
-+ void *w;
-+{
-+ int error;
-+ struct radij_node *base, *next;
-+ register struct radij_node *rn;
-+
-+ if(!h || !f /* || !w */) {
-+ return -ENODATA;
-+ }
-+
-+ rn = h->rnh_treetop;
-+ /*
-+ * This gets complicated because we may delete the node
-+ * while applying the function f to it, so we need to calculate
-+ * the successor node in advance.
-+ */
-+ /* First time through node, go left */
-+ while (rn->rj_b >= 0)
-+ rn = rn->rj_l;
-+ for (;;) {
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_radij) {
-+ printk("klips_debug:rj_walktree: "
-+ "for: rn=0p%p rj_b=%d rj_flags=%x",
-+ rn,
-+ rn->rj_b,
-+ rn->rj_flags);
-+ rn->rj_b >= 0 ?
-+ printk(" node off=%x\n",
-+ rn->rj_off) :
-+ printk(" leaf key = %08x->%08x\n",
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr),
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr))
-+ ;
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ base = rn;
-+ /* If at right child go back up, otherwise, go right */
-+ while (rn->rj_p->rj_r == rn && (rn->rj_flags & RJF_ROOT) == 0)
-+ rn = rn->rj_p;
-+ /* Find the next *leaf* since next node might vanish, too */
-+ for (rn = rn->rj_p->rj_r; rn->rj_b >= 0;)
-+ rn = rn->rj_l;
-+ next = rn;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_radij) {
-+ printk("klips_debug:rj_walktree: "
-+ "processing leaves, rn=0p%p rj_b=%d rj_flags=%x",
-+ rn,
-+ rn->rj_b,
-+ rn->rj_flags);
-+ rn->rj_b >= 0 ?
-+ printk(" node off=%x\n",
-+ rn->rj_off) :
-+ printk(" leaf key = %08x->%08x\n",
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr),
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr))
-+ ;
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ /* Process leaves */
-+ while ((rn = base)) {
-+ base = rn->rj_dupedkey;
-+#ifdef CONFIG_KLIPS_DEBUG
-+ if(debug_radij) {
-+ printk("klips_debug:rj_walktree: "
-+ "while: base=0p%p rn=0p%p rj_b=%d rj_flags=%x",
-+ base,
-+ rn,
-+ rn->rj_b,
-+ rn->rj_flags);
-+ rn->rj_b >= 0 ?
-+ printk(" node off=%x\n",
-+ rn->rj_off) :
-+ printk(" leaf key = %08x->%08x\n",
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr),
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr))
-+ ;
-+ }
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ if (!(rn->rj_flags & RJF_ROOT) && (error = (*f)(rn, w)))
-+ return (-error);
-+ }
-+ rn = next;
-+ if (rn->rj_flags & RJF_ROOT)
-+ return (0);
-+ }
-+ /* NOTREACHED */
-+}
-+
-+int
-+rj_inithead(head, off)
-+ void **head;
-+ int off;
-+{
-+ register struct radij_node_head *rnh;
-+ register struct radij_node *t, *tt, *ttt;
-+ if (*head)
-+ return (1);
-+ R_Malloc(rnh, struct radij_node_head *, sizeof (*rnh));
-+ if (rnh == NULL)
-+ return (0);
-+ Bzero(rnh, sizeof (*rnh));
-+ *head = rnh;
-+ t = rj_newpair(rj_zeroes, off, rnh->rnh_nodes);
-+ ttt = rnh->rnh_nodes + 2;
-+ t->rj_r = ttt;
-+ t->rj_p = t;
-+ tt = t->rj_l;
-+ tt->rj_flags = t->rj_flags = RJF_ROOT | RJF_ACTIVE;
-+ tt->rj_b = -1 - off;
-+ *ttt = *tt;
-+ ttt->rj_key = rj_ones;
-+ rnh->rnh_addaddr = rj_addroute;
-+ rnh->rnh_deladdr = rj_delete;
-+ rnh->rnh_matchaddr = rj_match;
-+ rnh->rnh_walktree = rj_walktree;
-+ rnh->rnh_treetop = t;
-+ return (1);
-+}
-+
-+void
-+rj_init()
-+{
-+ char *cp, *cplim;
-+
-+ if (maj_keylen == 0) {
-+ printk("klips_debug:rj_init: "
-+ "radij functions require maj_keylen be set\n");
-+ return;
-+ }
-+ R_Malloc(rj_zeroes, char *, 3 * maj_keylen);
-+ if (rj_zeroes == NULL)
-+ panic("rj_init");
-+ Bzero(rj_zeroes, 3 * maj_keylen);
-+ rj_ones = cp = rj_zeroes + maj_keylen;
-+ maskedKey = cplim = rj_ones + maj_keylen;
-+ while (cp < cplim)
-+ *cp++ = -1;
-+ if (rj_inithead((void **)&mask_rjhead, 0) == 0)
-+ panic("rj_init 2");
-+}
-+
-+void
-+rj_preorder(struct radij_node *rn, int l)
-+{
-+ int i;
-+
-+ if (rn == NULL){
-+ printk("klips_debug:rj_preorder: "
-+ "NULL pointer\n");
-+ return;
-+ }
-+
-+ if (rn->rj_b >= 0){
-+ rj_preorder(rn->rj_l, l+1);
-+ rj_preorder(rn->rj_r, l+1);
-+ printk("klips_debug:");
-+ for (i=0; i<l; i++)
-+ printk("*");
-+ printk(" off = %d\n",
-+ rn->rj_off);
-+ } else {
-+ printk("klips_debug:");
-+ for (i=0; i<l; i++)
-+ printk("@");
-+ printk(" flags = %x",
-+ (u_int)rn->rj_flags);
-+ if (rn->rj_flags & RJF_ACTIVE) {
-+ printk(" @key=0p%p",
-+ rn->rj_key);
-+ printk(" key = %08x->%08x",
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_src.s_addr),
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_key)->sen_ip_dst.s_addr));
-+ printk(" @mask=0p%p",
-+ rn->rj_mask);
-+ if (rn->rj_mask)
-+ printk(" mask = %08x->%08x",
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_mask)->sen_ip_src.s_addr),
-+ (u_int)ntohl(((struct sockaddr_encap *)rn->rj_mask)->sen_ip_dst.s_addr));
-+ if (rn->rj_dupedkey)
-+ printk(" dupedkey = 0p%p",
-+ rn->rj_dupedkey);
-+ }
-+ printk("\n");
-+ }
-+}
-+
-+#ifdef RJ_DEBUG
-+DEBUG_NO_STATIC void traverse(struct radij_node *p)
-+{
-+ rj_preorder(p, 0);
-+}
-+#endif /* RJ_DEBUG */
-+
-+void
-+rj_dumptrees(void)
-+{
-+ rj_preorder(rnh->rnh_treetop, 0);
-+}
-+
-+void
-+rj_free_mkfreelist(void)
-+{
-+ struct radij_mask *mknp, *mknp2;
-+
-+ mknp = rj_mkfreelist;
-+ while(mknp)
-+ {
-+ mknp2 = mknp;
-+ mknp = mknp->rm_mklist;
-+ kfree(mknp2);
-+ }
-+}
-+
-+int
-+radijcleartree(void)
-+{
-+ return rj_walktree(rnh, ipsec_rj_walker_delete, NULL);
-+}
-+
-+int
-+radijcleanup(void)
-+{
-+ int error = 0;
-+
-+ error = radijcleartree();
-+
-+ rj_free_mkfreelist();
-+
-+/* rj_walktree(mask_rjhead, ipsec_rj_walker_delete, NULL); */
-+ if(mask_rjhead) {
-+ kfree(mask_rjhead);
-+ }
-+
-+ if(rj_zeroes) {
-+ kfree(rj_zeroes);
-+ }
-+
-+ if(rnh) {
-+ kfree(rnh);
-+ }
-+
-+ return error;
-+}
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/rangetoa.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,60 @@
-+/*
-+ * convert binary form of address range to ASCII
-+ * Copyright (C) 1998, 1999 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: rangetoa.c,v 1.9 2004/07/10 07:48:37 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+/*
-+ - rangetoa - convert address range to ASCII
-+ */
-+size_t /* space needed for full conversion */
-+rangetoa(addrs, format, dst, dstlen)
-+struct in_addr addrs[2];
-+int format; /* character */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ size_t len;
-+ size_t rest;
-+ int n;
-+ char *p;
-+
-+ switch (format) {
-+ case 0:
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ len = addrtoa(addrs[0], 0, dst, dstlen);
-+ if (len < dstlen)
-+ for (p = dst + len - 1, n = 3; len < dstlen && n > 0;
-+ p++, len++, n--)
-+ *p = '.';
-+ else
-+ p = NULL;
-+ if (len < dstlen)
-+ rest = dstlen - len;
-+ else {
-+ if (dstlen > 0)
-+ *(dst + dstlen - 1) = '\0';
-+ rest = 0;
-+ }
-+
-+ len += addrtoa(addrs[1], 0, p, rest);
-+
-+ return len;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/satot.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,134 @@
-+/*
-+ * convert from binary form of SA ID to text
-+ * Copyright (C) 2000, 2001 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: satot.c,v 1.13 2004/07/10 07:48:37 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+static struct typename {
-+ char type;
-+ char *name;
-+} typenames[] = {
-+ { SA_AH, "ah" },
-+ { SA_ESP, "esp" },
-+ { SA_IPIP, "tun" },
-+ { SA_COMP, "comp" },
-+ { SA_INT, "int" },
-+ { 0, NULL }
-+};
-+
-+/*
-+ - satot - convert SA to text "ah507@1.2.3.4"
-+ */
-+size_t /* space needed for full conversion */
-+satot(sa, format, dst, dstlen)
-+const ip_said *sa;
-+int format; /* character */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ size_t len = 0; /* 0 means "not recognized yet" */
-+ int base;
-+ int showversion; /* use delimiter to show IP version? */
-+ struct typename *tn;
-+ char *p;
-+ char *pre;
-+ char buf[10+1+ULTOT_BUF+ADDRTOT_BUF];
-+ char unk[10];
-+
-+ switch (format) {
-+ case 0:
-+ base = 16;
-+ showversion = 1;
-+ break;
-+ case 'f':
-+ base = 17;
-+ showversion = 1;
-+ break;
-+ case 'x':
-+ base = 'x';
-+ showversion = 0;
-+ break;
-+ case 'd':
-+ base = 10;
-+ showversion = 0;
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ memset(buf, 0, sizeof(buf));
-+
-+ pre = NULL;
-+ for (tn = typenames; tn->name != NULL; tn++)
-+ if (sa->proto == tn->type) {
-+ pre = tn->name;
-+ break; /* NOTE BREAK OUT */
-+ }
-+ if (pre == NULL) { /* unknown protocol */
-+ strcpy(unk, "unk");
-+ (void) ultot((unsigned char)sa->proto, 10, unk+strlen(unk),
-+ sizeof(unk)-strlen(unk));
-+ pre = unk;
-+ }
-+
-+ if (strcmp(pre, PASSTHROUGHTYPE) == 0 &&
-+ sa->spi == PASSTHROUGHSPI &&
-+ isunspecaddr(&sa->dst)) {
-+ strcpy(buf, (addrtypeof(&sa->dst) == AF_INET) ?
-+ PASSTHROUGH4NAME :
-+ PASSTHROUGH6NAME);
-+ len = strlen(buf);
-+ }
-+
-+ if (sa->proto == SA_INT) {
-+ char intunk[10];
-+ switch (ntohl(sa->spi)) {
-+ case SPI_PASS: p = "%pass"; break;
-+ case SPI_DROP: p = "%drop"; break;
-+ case SPI_REJECT: p = "%reject"; break;
-+ case SPI_HOLD: p = "%hold"; break;
-+ case SPI_TRAP: p = "%trap"; break;
-+ case SPI_TRAPSUBNET: p = "%trapsubnet"; break;
-+ default: snprintf(intunk, 10, "%%unk-%d", ntohl(sa->spi)); p = intunk; break;
-+ }
-+ if (p != NULL) {
-+ strcpy(buf, p);
-+ len = strlen(buf);
-+ }
-+ }
-+
-+ if (len == 0) { /* general case needed */
-+ strcpy(buf, pre);
-+ len = strlen(buf);
-+ if (showversion) {
-+ *(buf+len) = (addrtypeof(&sa->dst) == AF_INET) ? '.' :
-+ ':';
-+ len++;
-+ *(buf+len) = '\0';
-+ }
-+ len += ultot(ntohl(sa->spi), base, buf+len, sizeof(buf)-len);
-+ *(buf+len-1) = '@';
-+ len += addrtot(&sa->dst, 0, buf+len, sizeof(buf)-len);
-+ *(buf+len) = '\0';
-+ }
-+
-+ if (dst != NULL) {
-+ if (len > dstlen)
-+ *(buf+dstlen-1) = '\0';
-+ strcpy(dst, buf);
-+ }
-+ return len;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/subnetof.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,59 @@
-+/*
-+ * minor network-address manipulation utilities
-+ * Copyright (C) 1998, 1999 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: subnetof.c,v 1.8 2004/07/10 07:48:37 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+/*
-+ - subnetof - given address and mask, return subnet part
-+ */
-+struct in_addr
-+subnetof(addr, mask)
-+struct in_addr addr;
-+struct in_addr mask;
-+{
-+ struct in_addr result;
-+
-+ result.s_addr = addr.s_addr & mask.s_addr;
-+ return result;
-+}
-+
-+/*
-+ - hostof - given address and mask, return host part
-+ */
-+struct in_addr
-+hostof(addr, mask)
-+struct in_addr addr;
-+struct in_addr mask;
-+{
-+ struct in_addr result;
-+
-+ result.s_addr = addr.s_addr & ~mask.s_addr;
-+ return result;
-+}
-+
-+/*
-+ - broadcastof - given (network) address and mask, return broadcast address
-+ */
-+struct in_addr
-+broadcastof(addr, mask)
-+struct in_addr addr;
-+struct in_addr mask;
-+{
-+ struct in_addr result;
-+
-+ result.s_addr = addr.s_addr | ~mask.s_addr;
-+ return result;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/subnettoa.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,61 @@
-+/*
-+ * convert binary form of subnet description to ASCII
-+ * Copyright (C) 1998, 1999 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: subnettoa.c,v 1.11 2004/07/10 07:48:37 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+/*
-+ - subnettoa - convert address and mask to ASCII "addr/mask"
-+ * Output expresses the mask as a bit count if possible, else dotted decimal.
-+ */
-+size_t /* space needed for full conversion */
-+subnettoa(addr, mask, format, dst, dstlen)
-+struct in_addr addr;
-+struct in_addr mask;
-+int format; /* character */
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ size_t len;
-+ size_t rest;
-+ int n;
-+ char *p;
-+
-+ switch (format) {
-+ case 0:
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ len = addrtoa(addr, 0, dst, dstlen);
-+ if (len < dstlen) {
-+ dst[len - 1] = '/';
-+ p = dst + len;
-+ rest = dstlen - len;
-+ } else {
-+ p = NULL;
-+ rest = 0;
-+ }
-+
-+ n = masktobits(mask);
-+ if (n >= 0)
-+ len += ultoa((unsigned long)n, 10, p, rest);
-+ else
-+ len += addrtoa(mask, 0, p, rest);
-+
-+ return len;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/sysctl_net_ipsec.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,387 @@
-+/*
-+ * sysctl interface to net IPSEC subsystem.
-+ * Copyright (C) 1998, 1999, 2000, 2001 Richard Guy Briggs.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+ * for more details.
-+ *
-+ * RCSID $Id: sysctl_net_ipsec.c,v 1.17.10.2 2007/10/30 21:42:25 paul Exp $
-+ */
-+
-+/* -*- linux-c -*-
-+ *
-+ * Initiated April 3, 1998, Richard Guy Briggs <rgb@conscoop.ottawa.on.ca>
-+ */
-+
-+#include <linux/version.h>
-+#include <linux/mm.h>
-+#include <linux/sysctl.h>
-+
-+#include "openswan/ipsec_param.h"
-+
-+#ifdef CONFIG_SYSCTL
-+
-+#define NET_IPSEC 2112 /* Random number */
-+#ifdef CONFIG_KLIPS_DEBUG
-+extern int debug_ah;
-+extern int debug_esp;
-+extern int debug_mast;
-+extern int debug_tunnel;
-+extern int debug_xmit;
-+extern int debug_eroute;
-+extern int debug_spi;
-+extern int debug_radij;
-+extern int debug_netlink;
-+extern int debug_xform;
-+extern int debug_rcv;
-+extern int debug_pfkey;
-+extern int sysctl_ipsec_debug_verbose;
-+#ifdef CONFIG_KLIPS_IPCOMP
-+extern int sysctl_ipsec_debug_ipcomp;
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+extern int sysctl_ipsec_icmp;
-+extern int sysctl_ipsec_inbound_policy_check;
-+extern int sysctl_ipsec_tos;
-+int sysctl_ipsec_regress_pfkey_lossage;
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24)
-+#ifdef CONFIG_KLIPS_DEBUG
-+ #define NET_IPSEC_DEBUG_AH CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_ESP CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_TUNNEL CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_EROUTE CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_SPI CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_RADIJ CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_NETLINK CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_XFORM CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_RCV CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_PFKEY CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_VERBOSE CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_IPCOMP CTL_UNNUMBERED
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ #define NET_IPSEC_ICMP CTL_UNNUMBERED
-+ #define NET_IPSEC_INBOUND_POLICY_CHECK CTL_UNNUMBERED
-+ #define NET_IPSEC_TOS CTL_UNNUMBERED
-+ #define NET_IPSEC_REGRESS_PFKEY_LOSSAGE CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_MAST CTL_UNNUMBERED
-+ #define NET_IPSEC_DEBUG_XMIT CTL_UNNUMBERED
-+#else
-+enum {
-+#ifdef CONFIG_KLIPS_DEBUG
-+ NET_IPSEC_DEBUG_AH=1,
-+ NET_IPSEC_DEBUG_ESP=2,
-+ NET_IPSEC_DEBUG_TUNNEL=3,
-+ NET_IPSEC_DEBUG_EROUTE=4,
-+ NET_IPSEC_DEBUG_SPI=5,
-+ NET_IPSEC_DEBUG_RADIJ=6,
-+ NET_IPSEC_DEBUG_NETLINK=7,
-+ NET_IPSEC_DEBUG_XFORM=8,
-+ NET_IPSEC_DEBUG_RCV=9,
-+ NET_IPSEC_DEBUG_PFKEY=10,
-+ NET_IPSEC_DEBUG_VERBOSE=11,
-+ NET_IPSEC_DEBUG_IPCOMP=12,
-+#endif /* CONFIG_KLIPS_DEBUG */
-+ NET_IPSEC_ICMP=13,
-+ NET_IPSEC_INBOUND_POLICY_CHECK=14,
-+ NET_IPSEC_TOS=15,
-+ NET_IPSEC_REGRESS_PFKEY_LOSSAGE=16,
-+ NET_IPSEC_DEBUG_MAST=17,
-+ NET_IPSEC_DEBUG_XMIT=18,
-+};
-+#endif
-+
-+static ctl_table ipsec_table[] = {
-+
-+#ifdef CONFIG_KLIPS_DEBUG
-+#ifdef CTL_TABLE_PARENT
-+ { .ctl_name = NET_IPSEC_DEBUG_AH,
-+ .procname = "debug_ah",
-+ .data = &debug_ah,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_ESP,
-+ .procname = "debug_esp",
-+ .data = &debug_esp,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_MAST,
-+ .procname = "debug_mast",
-+ .data = &debug_mast,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_TUNNEL,
-+ .procname = "debug_tunnel",
-+ .data = &debug_tunnel,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_XMIT,
-+ .procname = "debug_xmit",
-+ .data = &debug_xmit,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_EROUTE,
-+ .procname = "debug_eroute",
-+ .data = &debug_eroute,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_SPI,
-+ .procname = "debug_spi",
-+ .data = &debug_spi,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_RADIJ,
-+ .procname = "debug_radij",
-+ .data = &debug_radij,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_NETLINK,
-+ .procname = "debug_netlink",
-+ .data = &debug_netlink,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_XFORM,
-+ .procname = "debug_xform",
-+ .data = &debug_xform,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_RCV,
-+ .procname = "debug_rcv",
-+ .data = &debug_rcv,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_PFKEY,
-+ .procname = "debug_pfkey",
-+ .data = &debug_pfkey,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_DEBUG_VERBOSE,
-+ .procname = "debug_verbose",
-+ .data = &sysctl_ipsec_debug_verbose,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+#else
-+ { NET_IPSEC_DEBUG_AH, "debug_ah", &debug_ah,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_ESP, "debug_esp", &debug_esp,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_MAST, "debug_mast", &debug_mast,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_TUNNEL, "debug_tunnel", &debug_tunnel,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_TUNNEL, "debug_xmit", &debug_xmit,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_EROUTE, "debug_eroute", &debug_eroute,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_SPI, "debug_spi", &debug_spi,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_RADIJ, "debug_radij", &debug_radij,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_NETLINK, "debug_netlink", &debug_netlink,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_XFORM, "debug_xform", &debug_xform,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_RCV, "debug_rcv", &debug_rcv,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_PFKEY, "debug_pfkey", &debug_pfkey,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_DEBUG_VERBOSE, "debug_verbose",&sysctl_ipsec_debug_verbose,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+#endif /* CTL_TABLE_PARENT */
-+#endif /* CONFIG_KLIPS_DEBUG */
-+
-+#ifdef CONFIG_KLIPS_IPCOMP
-+#ifdef CTL_TABLE_PARENT
-+ { .ctl_name = NET_IPSEC_DEBUG_IPCOMP,
-+ .procname = "debug_ipcomp",
-+ .data = &sysctl_ipsec_debug_ipcomp,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+#else
-+ { NET_IPSEC_DEBUG_IPCOMP, "debug_ipcomp", &sysctl_ipsec_debug_ipcomp,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+#endif
-+#endif /* CONFIG_KLIPS_IPCOMP */
-+
-+#ifdef CONFIG_KLIPS_REGRESS
-+#ifdef CTL_TABLE_PARENT
-+ {
-+ .ctl_name = NET_IPSEC_REGRESS_PFKEY_LOSSAGE,
-+ .procname = "pfkey_lossage",
-+ .data = &sysctl_ipsec_regress_pfkey_lossage,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+#else
-+ { NET_IPSEC_REGRESS_PFKEY_LOSSAGE, "pfkey_lossage",
-+ &sysctl_ipsec_regress_pfkey_lossage,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+#endif /* CTL_TABLE_PARENT */
-+#endif /* CONFIG_KLIPS_REGRESS */
-+
-+#ifdef CTL_TABLE_PARENT
-+ { .ctl_name = NET_IPSEC_ICMP,
-+ .procname = "icmp",
-+ .data = &sysctl_ipsec_icmp,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_INBOUND_POLICY_CHECK,
-+ .procname = "inbound_policy_check",
-+ .data = &sysctl_ipsec_inbound_policy_check,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+
-+ { .ctl_name = NET_IPSEC_TOS,
-+ .procname = "tos",
-+ .data = &sysctl_ipsec_tos,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .child = NULL,
-+ .proc_handler = &proc_dointvec,
-+ },
-+ {0}
-+#else
-+ { NET_IPSEC_ICMP, "icmp", &sysctl_ipsec_icmp,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_INBOUND_POLICY_CHECK, "inbound_policy_check", &sysctl_ipsec_inbound_policy_check,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ { NET_IPSEC_TOS, "tos", &sysctl_ipsec_tos,
-+ sizeof(int), 0644, NULL, .proc_handler = &proc_dointvec},
-+ {0}
-+#endif
-+};
-+
-+static ctl_table ipsec_net_table[] = {
-+#ifdef CTL_TABLE_PARENT
-+ { .ctl_name = NET_IPSEC,
-+ .procname = "ipsec",
-+ .data = NULL,
-+ .maxlen = 0,
-+ .mode = 0555,
-+ .child = ipsec_table,
-+ .proc_handler = NULL,
-+ },
-+ { 0 }
-+#else
-+ { NET_IPSEC, "ipsec", NULL, 0, 0555, ipsec_table },
-+ { 0 }
-+#endif
-+};
-+
-+static ctl_table ipsec_root_table[] = {
-+#ifdef CTL_TABLE_PARENT
-+ { .ctl_name = CTL_NET,
-+ .procname = "net",
-+ .data = NULL,
-+ .maxlen = 0,
-+ .mode = 0555,
-+ .child = ipsec_net_table,
-+ .proc_handler = NULL,
-+ },
-+ { 0 }
-+#else
-+ { CTL_NET, "net", NULL, 0, 0555, ipsec_net_table },
-+ { 0 }
-+#endif
-+};
-+
-+static struct ctl_table_header *ipsec_table_header;
-+
-+int ipsec_sysctl_register(void)
-+{
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
-+ ipsec_table_header = register_sysctl_table(ipsec_root_table);
-+#else
-+ ipsec_table_header = register_sysctl_table(ipsec_root_table, 0);
-+#endif
-+ if (!ipsec_table_header) {
-+ return -ENOMEM;
-+ }
-+ return 0;
-+}
-+
-+void ipsec_sysctl_unregister(void)
-+{
-+ unregister_sysctl_table(ipsec_table_header);
-+}
-+
-+#endif /* CONFIG_SYSCTL */
-+
-+/*
-+ *
-+ * Local Variables:
-+ * c-file-style: "linux"
-+ * End:
-+ *
-+ */
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/trees.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,1214 @@
-+/* trees.c -- output deflated data using Huffman coding
-+ * Copyright (C) 1995-2002 Jean-loup Gailly
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/*
-+ * ALGORITHM
-+ *
-+ * The "deflation" process uses several Huffman trees. The more
-+ * common source values are represented by shorter bit sequences.
-+ *
-+ * Each code tree is stored in a compressed form which is itself
-+ * a Huffman encoding of the lengths of all the code strings (in
-+ * ascending order by source values). The actual code strings are
-+ * reconstructed from the lengths in the inflate process, as described
-+ * in the deflate specification.
-+ *
-+ * REFERENCES
-+ *
-+ * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
-+ * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
-+ *
-+ * Storer, James A.
-+ * Data Compression: Methods and Theory, pp. 49-50.
-+ * Computer Science Press, 1988. ISBN 0-7167-8156-5.
-+ *
-+ * Sedgewick, R.
-+ * Algorithms, p290.
-+ * Addison-Wesley, 1983. ISBN 0-201-06672-6.
-+ */
-+
-+/* @(#) $Id: trees.c,v 1.4 2004/07/10 07:48:39 mcr Exp $ */
-+
-+/* #define GEN_TREES_H */
-+
-+#include "deflate.h"
-+
-+#ifdef DEBUG
-+# include <ctype.h>
-+#endif
-+
-+/* ===========================================================================
-+ * Constants
-+ */
-+
-+#define MAX_BL_BITS 7
-+/* Bit length codes must not exceed MAX_BL_BITS bits */
-+
-+#define END_BLOCK 256
-+/* end of block literal code */
-+
-+#define REP_3_6 16
-+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
-+
-+#define REPZ_3_10 17
-+/* repeat a zero length 3-10 times (3 bits of repeat count) */
-+
-+#define REPZ_11_138 18
-+/* repeat a zero length 11-138 times (7 bits of repeat count) */
-+
-+local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
-+ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
-+
-+local const int extra_dbits[D_CODES] /* extra bits for each distance code */
-+ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
-+
-+local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
-+ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
-+
-+local const uch bl_order[BL_CODES]
-+ = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
-+/* The lengths of the bit length codes are sent in order of decreasing
-+ * probability, to avoid transmitting the lengths for unused bit length codes.
-+ */
-+
-+#define Buf_size (8 * 2*sizeof(char))
-+/* Number of bits used within bi_buf. (bi_buf might be implemented on
-+ * more than 16 bits on some systems.)
-+ */
-+
-+/* ===========================================================================
-+ * Local data. These are initialized only once.
-+ */
-+
-+#define DIST_CODE_LEN 512 /* see definition of array dist_code below */
-+
-+#if defined(GEN_TREES_H) || !defined(STDC)
-+/* non ANSI compilers may not accept trees.h */
-+
-+local ct_data static_ltree[L_CODES+2];
-+/* The static literal tree. Since the bit lengths are imposed, there is no
-+ * need for the L_CODES extra codes used during heap construction. However
-+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
-+ * below).
-+ */
-+
-+local ct_data static_dtree[D_CODES];
-+/* The static distance tree. (Actually a trivial tree since all codes use
-+ * 5 bits.)
-+ */
-+
-+uch _dist_code[DIST_CODE_LEN];
-+/* Distance codes. The first 256 values correspond to the distances
-+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
-+ * the 15 bit distances.
-+ */
-+
-+uch _length_code[MAX_MATCH-MIN_MATCH+1];
-+/* length code for each normalized match length (0 == MIN_MATCH) */
-+
-+local int base_length[LENGTH_CODES];
-+/* First normalized length for each code (0 = MIN_MATCH) */
-+
-+local int base_dist[D_CODES];
-+/* First normalized distance for each code (0 = distance of 1) */
-+
-+#else
-+# include "trees.h"
-+#endif /* GEN_TREES_H */
-+
-+struct static_tree_desc_s {
-+ const ct_data *static_tree; /* static tree or NULL */
-+ const intf *extra_bits; /* extra bits for each code or NULL */
-+ int extra_base; /* base index for extra_bits */
-+ int elems; /* max number of elements in the tree */
-+ int max_length; /* max bit length for the codes */
-+};
-+
-+local static_tree_desc static_l_desc =
-+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
-+
-+local static_tree_desc static_d_desc =
-+{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
-+
-+local static_tree_desc static_bl_desc =
-+{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
-+
-+/* ===========================================================================
-+ * Local (static) routines in this file.
-+ */
-+
-+local void tr_static_init OF((void));
-+local void init_block OF((deflate_state *s));
-+local void pqdownheap OF((deflate_state *s, ct_data *tree, int k));
-+local void gen_bitlen OF((deflate_state *s, tree_desc *desc));
-+local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count));
-+local void build_tree OF((deflate_state *s, tree_desc *desc));
-+local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
-+local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
-+local int build_bl_tree OF((deflate_state *s));
-+local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
-+ int blcodes));
-+local void compress_block OF((deflate_state *s, const ct_data *ltree,
-+ const ct_data *dtree));
-+local void set_data_type OF((deflate_state *s));
-+local unsigned bi_reverse OF((unsigned value, int length));
-+local void bi_windup OF((deflate_state *s));
-+local void bi_flush OF((deflate_state *s));
-+local void copy_block OF((deflate_state *s, charf *buf, unsigned len,
-+ int header));
-+
-+#ifdef GEN_TREES_H
-+local void gen_trees_header OF((void));
-+#endif
-+
-+#ifndef DEBUG
-+# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
-+ /* Send a code of the given tree. c and tree must not have side effects */
-+
-+#else /* DEBUG */
-+# define send_code(s, c, tree) \
-+ { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
-+ send_bits(s, tree[c].Code, tree[c].Len); }
-+#endif
-+
-+/* ===========================================================================
-+ * Output a short LSB first on the stream.
-+ * IN assertion: there is enough room in pendingBuf.
-+ */
-+#define put_short(s, w) { \
-+ put_byte(s, (uch)((w) & 0xff)); \
-+ put_byte(s, (uch)((ush)(w) >> 8)); \
-+}
-+
-+/* ===========================================================================
-+ * Send a value on a given number of bits.
-+ * IN assertion: length <= 16 and value fits in length bits.
-+ */
-+#ifdef DEBUG
-+local void send_bits OF((deflate_state *s, int value, int length));
-+
-+local void send_bits(s, value, length)
-+ deflate_state *s;
-+ int value; /* value to send */
-+ int length; /* number of bits */
-+{
-+ Tracevv((stderr," l %2d v %4x ", length, value));
-+ Assert(length > 0 && length <= 15, "invalid length");
-+ s->bits_sent += (ulg)length;
-+
-+ /* If not enough room in bi_buf, use (valid) bits from bi_buf and
-+ * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
-+ * unused bits in value.
-+ */
-+ if (s->bi_valid > (int)Buf_size - length) {
-+ s->bi_buf |= (value << s->bi_valid);
-+ put_short(s, s->bi_buf);
-+ s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
-+ s->bi_valid += length - Buf_size;
-+ } else {
-+ s->bi_buf |= value << s->bi_valid;
-+ s->bi_valid += length;
-+ }
-+}
-+#else /* !DEBUG */
-+
-+#define send_bits(s, value, length) \
-+{ int len = length;\
-+ if (s->bi_valid > (int)Buf_size - len) {\
-+ int val = value;\
-+ s->bi_buf |= (val << s->bi_valid);\
-+ put_short(s, s->bi_buf);\
-+ s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
-+ s->bi_valid += len - Buf_size;\
-+ } else {\
-+ s->bi_buf |= (value) << s->bi_valid;\
-+ s->bi_valid += len;\
-+ }\
-+}
-+#endif /* DEBUG */
-+
-+
-+#define MAX(a,b) (a >= b ? a : b)
-+/* the arguments must not have side effects */
-+
-+/* ===========================================================================
-+ * Initialize the various 'constant' tables.
-+ */
-+local void tr_static_init()
-+{
-+#if defined(GEN_TREES_H) || !defined(STDC)
-+ static int static_init_done = 0;
-+ int n; /* iterates over tree elements */
-+ int bits; /* bit counter */
-+ int length; /* length value */
-+ int code; /* code value */
-+ int dist; /* distance index */
-+ ush bl_count[MAX_BITS+1];
-+ /* number of codes at each bit length for an optimal tree */
-+
-+ if (static_init_done) return;
-+
-+ /* For some embedded targets, global variables are not initialized: */
-+ static_l_desc.static_tree = static_ltree;
-+ static_l_desc.extra_bits = extra_lbits;
-+ static_d_desc.static_tree = static_dtree;
-+ static_d_desc.extra_bits = extra_dbits;
-+ static_bl_desc.extra_bits = extra_blbits;
-+
-+ /* Initialize the mapping length (0..255) -> length code (0..28) */
-+ length = 0;
-+ for (code = 0; code < LENGTH_CODES-1; code++) {
-+ base_length[code] = length;
-+ for (n = 0; n < (1<<extra_lbits[code]); n++) {
-+ _length_code[length++] = (uch)code;
-+ }
-+ }
-+ Assert (length == 256, "tr_static_init: length != 256");
-+ /* Note that the length 255 (match length 258) can be represented
-+ * in two different ways: code 284 + 5 bits or code 285, so we
-+ * overwrite length_code[255] to use the best encoding:
-+ */
-+ _length_code[length-1] = (uch)code;
-+
-+ /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
-+ dist = 0;
-+ for (code = 0 ; code < 16; code++) {
-+ base_dist[code] = dist;
-+ for (n = 0; n < (1<<extra_dbits[code]); n++) {
-+ _dist_code[dist++] = (uch)code;
-+ }
-+ }
-+ Assert (dist == 256, "tr_static_init: dist != 256");
-+ dist >>= 7; /* from now on, all distances are divided by 128 */
-+ for ( ; code < D_CODES; code++) {
-+ base_dist[code] = dist << 7;
-+ for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
-+ _dist_code[256 + dist++] = (uch)code;
-+ }
-+ }
-+ Assert (dist == 256, "tr_static_init: 256+dist != 512");
-+
-+ /* Construct the codes of the static literal tree */
-+ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
-+ n = 0;
-+ while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
-+ while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
-+ while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
-+ while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
-+ /* Codes 286 and 287 do not exist, but we must include them in the
-+ * tree construction to get a canonical Huffman tree (longest code
-+ * all ones)
-+ */
-+ gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
-+
-+ /* The static distance tree is trivial: */
-+ for (n = 0; n < D_CODES; n++) {
-+ static_dtree[n].Len = 5;
-+ static_dtree[n].Code = bi_reverse((unsigned)n, 5);
-+ }
-+ static_init_done = 1;
-+
-+# ifdef GEN_TREES_H
-+ gen_trees_header();
-+# endif
-+#endif /* defined(GEN_TREES_H) || !defined(STDC) */
-+}
-+
-+/* ===========================================================================
-+ * Genererate the file trees.h describing the static trees.
-+ */
-+#ifdef GEN_TREES_H
-+# ifndef DEBUG
-+# include <stdio.h>
-+# endif
-+
-+# define SEPARATOR(i, last, width) \
-+ ((i) == (last)? "\n};\n\n" : \
-+ ((i) % (width) == (width)-1 ? ",\n" : ", "))
-+
-+void gen_trees_header()
-+{
-+ FILE *header = fopen("trees.h", "w");
-+ int i;
-+
-+ Assert (header != NULL, "Can't open trees.h");
-+ fprintf(header,
-+ "/* header created automatically with -DGEN_TREES_H */\n\n");
-+
-+ fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
-+ for (i = 0; i < L_CODES+2; i++) {
-+ fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
-+ static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
-+ }
-+
-+ fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
-+ for (i = 0; i < D_CODES; i++) {
-+ fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
-+ static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
-+ }
-+
-+ fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
-+ for (i = 0; i < DIST_CODE_LEN; i++) {
-+ fprintf(header, "%2u%s", _dist_code[i],
-+ SEPARATOR(i, DIST_CODE_LEN-1, 20));
-+ }
-+
-+ fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
-+ for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
-+ fprintf(header, "%2u%s", _length_code[i],
-+ SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
-+ }
-+
-+ fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
-+ for (i = 0; i < LENGTH_CODES; i++) {
-+ fprintf(header, "%1u%s", base_length[i],
-+ SEPARATOR(i, LENGTH_CODES-1, 20));
-+ }
-+
-+ fprintf(header, "local const int base_dist[D_CODES] = {\n");
-+ for (i = 0; i < D_CODES; i++) {
-+ fprintf(header, "%5u%s", base_dist[i],
-+ SEPARATOR(i, D_CODES-1, 10));
-+ }
-+
-+ fclose(header);
-+}
-+#endif /* GEN_TREES_H */
-+
-+/* ===========================================================================
-+ * Initialize the tree data structures for a new zlib stream.
-+ */
-+void _tr_init(s)
-+ deflate_state *s;
-+{
-+ tr_static_init();
-+
-+ s->l_desc.dyn_tree = s->dyn_ltree;
-+ s->l_desc.stat_desc = &static_l_desc;
-+
-+ s->d_desc.dyn_tree = s->dyn_dtree;
-+ s->d_desc.stat_desc = &static_d_desc;
-+
-+ s->bl_desc.dyn_tree = s->bl_tree;
-+ s->bl_desc.stat_desc = &static_bl_desc;
-+
-+ s->bi_buf = 0;
-+ s->bi_valid = 0;
-+ s->last_eob_len = 8; /* enough lookahead for inflate */
-+#ifdef DEBUG
-+ s->compressed_len = 0L;
-+ s->bits_sent = 0L;
-+#endif
-+
-+ /* Initialize the first block of the first file: */
-+ init_block(s);
-+}
-+
-+/* ===========================================================================
-+ * Initialize a new block.
-+ */
-+local void init_block(s)
-+ deflate_state *s;
-+{
-+ int n; /* iterates over tree elements */
-+
-+ /* Initialize the trees. */
-+ for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
-+ for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
-+ for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
-+
-+ s->dyn_ltree[END_BLOCK].Freq = 1;
-+ s->opt_len = s->static_len = 0L;
-+ s->last_lit = s->matches = 0;
-+}
-+
-+#define SMALLEST 1
-+/* Index within the heap array of least frequent node in the Huffman tree */
-+
-+
-+/* ===========================================================================
-+ * Remove the smallest element from the heap and recreate the heap with
-+ * one less element. Updates heap and heap_len.
-+ */
-+#define pqremove(s, tree, top) \
-+{\
-+ top = s->heap[SMALLEST]; \
-+ s->heap[SMALLEST] = s->heap[s->heap_len--]; \
-+ pqdownheap(s, tree, SMALLEST); \
-+}
-+
-+/* ===========================================================================
-+ * Compares to subtrees, using the tree depth as tie breaker when
-+ * the subtrees have equal frequency. This minimizes the worst case length.
-+ */
-+#define smaller(tree, n, m, depth) \
-+ (tree[n].Freq < tree[m].Freq || \
-+ (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
-+
-+/* ===========================================================================
-+ * Restore the heap property by moving down the tree starting at node k,
-+ * exchanging a node with the smallest of its two sons if necessary, stopping
-+ * when the heap property is re-established (each father smaller than its
-+ * two sons).
-+ */
-+local void pqdownheap(s, tree, k)
-+ deflate_state *s;
-+ ct_data *tree; /* the tree to restore */
-+ int k; /* node to move down */
-+{
-+ int v = s->heap[k];
-+ int j = k << 1; /* left son of k */
-+ while (j <= s->heap_len) {
-+ /* Set j to the smallest of the two sons: */
-+ if (j < s->heap_len &&
-+ smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
-+ j++;
-+ }
-+ /* Exit if v is smaller than both sons */
-+ if (smaller(tree, v, s->heap[j], s->depth)) break;
-+
-+ /* Exchange v with the smallest son */
-+ s->heap[k] = s->heap[j]; k = j;
-+
-+ /* And continue down the tree, setting j to the left son of k */
-+ j <<= 1;
-+ }
-+ s->heap[k] = v;
-+}
-+
-+/* ===========================================================================
-+ * Compute the optimal bit lengths for a tree and update the total bit length
-+ * for the current block.
-+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
-+ * above are the tree nodes sorted by increasing frequency.
-+ * OUT assertions: the field len is set to the optimal bit length, the
-+ * array bl_count contains the frequencies for each bit length.
-+ * The length opt_len is updated; static_len is also updated if stree is
-+ * not null.
-+ */
-+local void gen_bitlen(s, desc)
-+ deflate_state *s;
-+ tree_desc *desc; /* the tree descriptor */
-+{
-+ ct_data *tree = desc->dyn_tree;
-+ int max_code = desc->max_code;
-+ const ct_data *stree = desc->stat_desc->static_tree;
-+ const intf *extra = desc->stat_desc->extra_bits;
-+ int base = desc->stat_desc->extra_base;
-+ int max_length = desc->stat_desc->max_length;
-+ int h; /* heap index */
-+ int n, m; /* iterate over the tree elements */
-+ int bits; /* bit length */
-+ int xbits; /* extra bits */
-+ ush f; /* frequency */
-+ int overflow = 0; /* number of elements with bit length too large */
-+
-+ for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
-+
-+ /* In a first pass, compute the optimal bit lengths (which may
-+ * overflow in the case of the bit length tree).
-+ */
-+ tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
-+
-+ for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
-+ n = s->heap[h];
-+ bits = tree[tree[n].Dad].Len + 1;
-+ if (bits > max_length) bits = max_length, overflow++;
-+ tree[n].Len = (ush)bits;
-+ /* We overwrite tree[n].Dad which is no longer needed */
-+
-+ if (n > max_code) continue; /* not a leaf node */
-+
-+ s->bl_count[bits]++;
-+ xbits = 0;
-+ if (n >= base) xbits = extra[n-base];
-+ f = tree[n].Freq;
-+ s->opt_len += (ulg)f * (bits + xbits);
-+ if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
-+ }
-+ if (overflow == 0) return;
-+
-+ Trace((stderr,"\nbit length overflow\n"));
-+ /* This happens for example on obj2 and pic of the Calgary corpus */
-+
-+ /* Find the first bit length which could increase: */
-+ do {
-+ bits = max_length-1;
-+ while (s->bl_count[bits] == 0) bits--;
-+ s->bl_count[bits]--; /* move one leaf down the tree */
-+ s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
-+ s->bl_count[max_length]--;
-+ /* The brother of the overflow item also moves one step up,
-+ * but this does not affect bl_count[max_length]
-+ */
-+ overflow -= 2;
-+ } while (overflow > 0);
-+
-+ /* Now recompute all bit lengths, scanning in increasing frequency.
-+ * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
-+ * lengths instead of fixing only the wrong ones. This idea is taken
-+ * from 'ar' written by Haruhiko Okumura.)
-+ */
-+ for (bits = max_length; bits != 0; bits--) {
-+ n = s->bl_count[bits];
-+ while (n != 0) {
-+ m = s->heap[--h];
-+ if (m > max_code) continue;
-+ if (tree[m].Len != (unsigned) bits) {
-+ Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
-+ s->opt_len += ((long)bits - (long)tree[m].Len)
-+ *(long)tree[m].Freq;
-+ tree[m].Len = (ush)bits;
-+ }
-+ n--;
-+ }
-+ }
-+}
-+
-+/* ===========================================================================
-+ * Generate the codes for a given tree and bit counts (which need not be
-+ * optimal).
-+ * IN assertion: the array bl_count contains the bit length statistics for
-+ * the given tree and the field len is set for all tree elements.
-+ * OUT assertion: the field code is set for all tree elements of non
-+ * zero code length.
-+ */
-+local void gen_codes (tree, max_code, bl_count)
-+ ct_data *tree; /* the tree to decorate */
-+ int max_code; /* largest code with non zero frequency */
-+ ushf *bl_count; /* number of codes at each bit length */
-+{
-+ ush next_code[MAX_BITS+1]; /* next code value for each bit length */
-+ ush code = 0; /* running code value */
-+ int bits; /* bit index */
-+ int n; /* code index */
-+
-+ /* The distribution counts are first used to generate the code values
-+ * without bit reversal.
-+ */
-+ for (bits = 1; bits <= MAX_BITS; bits++) {
-+ next_code[bits] = code = (code + bl_count[bits-1]) << 1;
-+ }
-+ /* Check that the bit counts in bl_count are consistent. The last code
-+ * must be all ones.
-+ */
-+ Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
-+ "inconsistent bit counts");
-+ Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
-+
-+ for (n = 0; n <= max_code; n++) {
-+ int len = tree[n].Len;
-+ if (len == 0) continue;
-+ /* Now reverse the bits */
-+ tree[n].Code = bi_reverse(next_code[len]++, len);
-+
-+ Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
-+ n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
-+ }
-+}
-+
-+/* ===========================================================================
-+ * Construct one Huffman tree and assigns the code bit strings and lengths.
-+ * Update the total bit length for the current block.
-+ * IN assertion: the field freq is set for all tree elements.
-+ * OUT assertions: the fields len and code are set to the optimal bit length
-+ * and corresponding code. The length opt_len is updated; static_len is
-+ * also updated if stree is not null. The field max_code is set.
-+ */
-+local void build_tree(s, desc)
-+ deflate_state *s;
-+ tree_desc *desc; /* the tree descriptor */
-+{
-+ ct_data *tree = desc->dyn_tree;
-+ const ct_data *stree = desc->stat_desc->static_tree;
-+ int elems = desc->stat_desc->elems;
-+ int n, m; /* iterate over heap elements */
-+ int max_code = -1; /* largest code with non zero frequency */
-+ int node; /* new node being created */
-+
-+ /* Construct the initial heap, with least frequent element in
-+ * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
-+ * heap[0] is not used.
-+ */
-+ s->heap_len = 0, s->heap_max = HEAP_SIZE;
-+
-+ for (n = 0; n < elems; n++) {
-+ if (tree[n].Freq != 0) {
-+ s->heap[++(s->heap_len)] = max_code = n;
-+ s->depth[n] = 0;
-+ } else {
-+ tree[n].Len = 0;
-+ }
-+ }
-+
-+ /* The pkzip format requires that at least one distance code exists,
-+ * and that at least one bit should be sent even if there is only one
-+ * possible code. So to avoid special checks later on we force at least
-+ * two codes of non zero frequency.
-+ */
-+ while (s->heap_len < 2) {
-+ node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
-+ tree[node].Freq = 1;
-+ s->depth[node] = 0;
-+ s->opt_len--; if (stree) s->static_len -= stree[node].Len;
-+ /* node is 0 or 1 so it does not have extra bits */
-+ }
-+ desc->max_code = max_code;
-+
-+ /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
-+ * establish sub-heaps of increasing lengths:
-+ */
-+ for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
-+
-+ /* Construct the Huffman tree by repeatedly combining the least two
-+ * frequent nodes.
-+ */
-+ node = elems; /* next internal node of the tree */
-+ do {
-+ pqremove(s, tree, n); /* n = node of least frequency */
-+ m = s->heap[SMALLEST]; /* m = node of next least frequency */
-+
-+ s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
-+ s->heap[--(s->heap_max)] = m;
-+
-+ /* Create a new node father of n and m */
-+ tree[node].Freq = tree[n].Freq + tree[m].Freq;
-+ s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1);
-+ tree[n].Dad = tree[m].Dad = (ush)node;
-+#ifdef DUMP_BL_TREE
-+ if (tree == s->bl_tree) {
-+ fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
-+ node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
-+ }
-+#endif
-+ /* and insert the new node in the heap */
-+ s->heap[SMALLEST] = node++;
-+ pqdownheap(s, tree, SMALLEST);
-+
-+ } while (s->heap_len >= 2);
-+
-+ s->heap[--(s->heap_max)] = s->heap[SMALLEST];
-+
-+ /* At this point, the fields freq and dad are set. We can now
-+ * generate the bit lengths.
-+ */
-+ gen_bitlen(s, (tree_desc *)desc);
-+
-+ /* The field len is now set, we can generate the bit codes */
-+ gen_codes ((ct_data *)tree, max_code, s->bl_count);
-+}
-+
-+/* ===========================================================================
-+ * Scan a literal or distance tree to determine the frequencies of the codes
-+ * in the bit length tree.
-+ */
-+local void scan_tree (s, tree, max_code)
-+ deflate_state *s;
-+ ct_data *tree; /* the tree to be scanned */
-+ int max_code; /* and its largest code of non zero frequency */
-+{
-+ int n; /* iterates over all tree elements */
-+ int prevlen = -1; /* last emitted length */
-+ int curlen; /* length of current code */
-+ int nextlen = tree[0].Len; /* length of next code */
-+ int count = 0; /* repeat count of the current code */
-+ int max_count = 7; /* max repeat count */
-+ int min_count = 4; /* min repeat count */
-+
-+ if (nextlen == 0) max_count = 138, min_count = 3;
-+ tree[max_code+1].Len = (ush)0xffff; /* guard */
-+
-+ for (n = 0; n <= max_code; n++) {
-+ curlen = nextlen; nextlen = tree[n+1].Len;
-+ if (++count < max_count && curlen == nextlen) {
-+ continue;
-+ } else if (count < min_count) {
-+ s->bl_tree[curlen].Freq += count;
-+ } else if (curlen != 0) {
-+ if (curlen != prevlen) s->bl_tree[curlen].Freq++;
-+ s->bl_tree[REP_3_6].Freq++;
-+ } else if (count <= 10) {
-+ s->bl_tree[REPZ_3_10].Freq++;
-+ } else {
-+ s->bl_tree[REPZ_11_138].Freq++;
-+ }
-+ count = 0; prevlen = curlen;
-+ if (nextlen == 0) {
-+ max_count = 138, min_count = 3;
-+ } else if (curlen == nextlen) {
-+ max_count = 6, min_count = 3;
-+ } else {
-+ max_count = 7, min_count = 4;
-+ }
-+ }
-+}
-+
-+/* ===========================================================================
-+ * Send a literal or distance tree in compressed form, using the codes in
-+ * bl_tree.
-+ */
-+local void send_tree (s, tree, max_code)
-+ deflate_state *s;
-+ ct_data *tree; /* the tree to be scanned */
-+ int max_code; /* and its largest code of non zero frequency */
-+{
-+ int n; /* iterates over all tree elements */
-+ int prevlen = -1; /* last emitted length */
-+ int curlen; /* length of current code */
-+ int nextlen = tree[0].Len; /* length of next code */
-+ int count = 0; /* repeat count of the current code */
-+ int max_count = 7; /* max repeat count */
-+ int min_count = 4; /* min repeat count */
-+
-+ /* tree[max_code+1].Len = -1; */ /* guard already set */
-+ if (nextlen == 0) max_count = 138, min_count = 3;
-+
-+ for (n = 0; n <= max_code; n++) {
-+ curlen = nextlen; nextlen = tree[n+1].Len;
-+ if (++count < max_count && curlen == nextlen) {
-+ continue;
-+ } else if (count < min_count) {
-+ do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
-+
-+ } else if (curlen != 0) {
-+ if (curlen != prevlen) {
-+ send_code(s, curlen, s->bl_tree); count--;
-+ }
-+ Assert(count >= 3 && count <= 6, " 3_6?");
-+ send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
-+
-+ } else if (count <= 10) {
-+ send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
-+
-+ } else {
-+ send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
-+ }
-+ count = 0; prevlen = curlen;
-+ if (nextlen == 0) {
-+ max_count = 138, min_count = 3;
-+ } else if (curlen == nextlen) {
-+ max_count = 6, min_count = 3;
-+ } else {
-+ max_count = 7, min_count = 4;
-+ }
-+ }
-+}
-+
-+/* ===========================================================================
-+ * Construct the Huffman tree for the bit lengths and return the index in
-+ * bl_order of the last bit length code to send.
-+ */
-+local int build_bl_tree(s)
-+ deflate_state *s;
-+{
-+ int max_blindex; /* index of last bit length code of non zero freq */
-+
-+ /* Determine the bit length frequencies for literal and distance trees */
-+ scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
-+ scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
-+
-+ /* Build the bit length tree: */
-+ build_tree(s, (tree_desc *)(&(s->bl_desc)));
-+ /* opt_len now includes the length of the tree representations, except
-+ * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
-+ */
-+
-+ /* Determine the number of bit length codes to send. The pkzip format
-+ * requires that at least 4 bit length codes be sent. (appnote.txt says
-+ * 3 but the actual value used is 4.)
-+ */
-+ for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
-+ if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
-+ }
-+ /* Update opt_len to include the bit length tree and counts */
-+ s->opt_len += 3*(max_blindex+1) + 5+5+4;
-+ Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
-+ s->opt_len, s->static_len));
-+
-+ return max_blindex;
-+}
-+
-+/* ===========================================================================
-+ * Send the header for a block using dynamic Huffman trees: the counts, the
-+ * lengths of the bit length codes, the literal tree and the distance tree.
-+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
-+ */
-+local void send_all_trees(s, lcodes, dcodes, blcodes)
-+ deflate_state *s;
-+ int lcodes, dcodes, blcodes; /* number of codes for each tree */
-+{
-+ int rank; /* index in bl_order */
-+
-+ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
-+ Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
-+ "too many codes");
-+ Tracev((stderr, "\nbl counts: "));
-+ send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
-+ send_bits(s, dcodes-1, 5);
-+ send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
-+ for (rank = 0; rank < blcodes; rank++) {
-+ Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
-+ send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
-+ }
-+ Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
-+
-+ send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
-+ Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
-+
-+ send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
-+ Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
-+}
-+
-+/* ===========================================================================
-+ * Send a stored block
-+ */
-+void _tr_stored_block(s, buf, stored_len, eof)
-+ deflate_state *s;
-+ charf *buf; /* input block */
-+ ulg stored_len; /* length of input block */
-+ int eof; /* true if this is the last block for a file */
-+{
-+ send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
-+#ifdef DEBUG
-+ s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
-+ s->compressed_len += (stored_len + 4) << 3;
-+#endif
-+ copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
-+}
-+
-+/* ===========================================================================
-+ * Send one empty static block to give enough lookahead for inflate.
-+ * This takes 10 bits, of which 7 may remain in the bit buffer.
-+ * The current inflate code requires 9 bits of lookahead. If the
-+ * last two codes for the previous block (real code plus EOB) were coded
-+ * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
-+ * the last real code. In this case we send two empty static blocks instead
-+ * of one. (There are no problems if the previous block is stored or fixed.)
-+ * To simplify the code, we assume the worst case of last real code encoded
-+ * on one bit only.
-+ */
-+void _tr_align(s)
-+ deflate_state *s;
-+{
-+ send_bits(s, STATIC_TREES<<1, 3);
-+ send_code(s, END_BLOCK, static_ltree);
-+#ifdef DEBUG
-+ s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
-+#endif
-+ bi_flush(s);
-+ /* Of the 10 bits for the empty block, we have already sent
-+ * (10 - bi_valid) bits. The lookahead for the last real code (before
-+ * the EOB of the previous block) was thus at least one plus the length
-+ * of the EOB plus what we have just sent of the empty static block.
-+ */
-+ if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
-+ send_bits(s, STATIC_TREES<<1, 3);
-+ send_code(s, END_BLOCK, static_ltree);
-+#ifdef DEBUG
-+ s->compressed_len += 10L;
-+#endif
-+ bi_flush(s);
-+ }
-+ s->last_eob_len = 7;
-+}
-+
-+/* ===========================================================================
-+ * Determine the best encoding for the current block: dynamic trees, static
-+ * trees or store, and output the encoded block to the zip file.
-+ */
-+void _tr_flush_block(s, buf, stored_len, eof)
-+ deflate_state *s;
-+ charf *buf; /* input block, or NULL if too old */
-+ ulg stored_len; /* length of input block */
-+ int eof; /* true if this is the last block for a file */
-+{
-+ ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
-+ int max_blindex = 0; /* index of last bit length code of non zero freq */
-+
-+ /* Build the Huffman trees unless a stored block is forced */
-+ if (s->level > 0) {
-+
-+ /* Check if the file is ascii or binary */
-+ if (s->data_type == Z_UNKNOWN) set_data_type(s);
-+
-+ /* Construct the literal and distance trees */
-+ build_tree(s, (tree_desc *)(&(s->l_desc)));
-+ Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
-+ s->static_len));
-+
-+ build_tree(s, (tree_desc *)(&(s->d_desc)));
-+ Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
-+ s->static_len));
-+ /* At this point, opt_len and static_len are the total bit lengths of
-+ * the compressed block data, excluding the tree representations.
-+ */
-+
-+ /* Build the bit length tree for the above two trees, and get the index
-+ * in bl_order of the last bit length code to send.
-+ */
-+ max_blindex = build_bl_tree(s);
-+
-+ /* Determine the best encoding. Compute first the block length in bytes*/
-+ opt_lenb = (s->opt_len+3+7)>>3;
-+ static_lenb = (s->static_len+3+7)>>3;
-+
-+ Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
-+ opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
-+ s->last_lit));
-+
-+ if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
-+
-+ } else {
-+ Assert(buf != (char*)0, "lost buf");
-+ opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
-+ }
-+
-+#ifdef FORCE_STORED
-+ if (buf != (char*)0) { /* force stored block */
-+#else
-+ if (stored_len+4 <= opt_lenb && buf != (char*)0) {
-+ /* 4: two words for the lengths */
-+#endif
-+ /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
-+ * Otherwise we can't have processed more than WSIZE input bytes since
-+ * the last block flush, because compression would have been
-+ * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
-+ * transform a block into a stored block.
-+ */
-+ _tr_stored_block(s, buf, stored_len, eof);
-+
-+#ifdef FORCE_STATIC
-+ } else if (static_lenb >= 0) { /* force static trees */
-+#else
-+ } else if (static_lenb == opt_lenb) {
-+#endif
-+ send_bits(s, (STATIC_TREES<<1)+eof, 3);
-+ compress_block(s, static_ltree, static_dtree);
-+#ifdef DEBUG
-+ s->compressed_len += 3 + s->static_len;
-+#endif
-+ } else {
-+ send_bits(s, (DYN_TREES<<1)+eof, 3);
-+ send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
-+ max_blindex+1);
-+ compress_block(s, s->dyn_ltree, s->dyn_dtree);
-+#ifdef DEBUG
-+ s->compressed_len += 3 + s->opt_len;
-+#endif
-+ }
-+ Assert (s->compressed_len == s->bits_sent, "bad compressed size");
-+ /* The above check is made mod 2^32, for files larger than 512 MB
-+ * and uLong implemented on 32 bits.
-+ */
-+ init_block(s);
-+
-+ if (eof) {
-+ bi_windup(s);
-+#ifdef DEBUG
-+ s->compressed_len += 7; /* align on byte boundary */
-+#endif
-+ }
-+ Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
-+ s->compressed_len-7*eof));
-+}
-+
-+/* ===========================================================================
-+ * Save the match info and tally the frequency counts. Return true if
-+ * the current block must be flushed.
-+ */
-+int _tr_tally (s, dist, lc)
-+ deflate_state *s;
-+ unsigned dist; /* distance of matched string */
-+ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
-+{
-+ s->d_buf[s->last_lit] = (ush)dist;
-+ s->l_buf[s->last_lit++] = (uch)lc;
-+ if (dist == 0) {
-+ /* lc is the unmatched char */
-+ s->dyn_ltree[lc].Freq++;
-+ } else {
-+ s->matches++;
-+ /* Here, lc is the match length - MIN_MATCH */
-+ dist--; /* dist = match distance - 1 */
-+ Assert((ush)dist < (ush)MAX_DIST(s) &&
-+ (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
-+ (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
-+
-+ s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
-+ s->dyn_dtree[d_code(dist)].Freq++;
-+ }
-+
-+#ifdef TRUNCATE_BLOCK
-+ /* Try to guess if it is profitable to stop the current block here */
-+ if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
-+ /* Compute an upper bound for the compressed length */
-+ ulg out_length = (ulg)s->last_lit*8L;
-+ ulg in_length = (ulg)((long)s->strstart - s->block_start);
-+ int dcode;
-+ for (dcode = 0; dcode < D_CODES; dcode++) {
-+ out_length += (ulg)s->dyn_dtree[dcode].Freq *
-+ (5L+extra_dbits[dcode]);
-+ }
-+ out_length >>= 3;
-+ Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
-+ s->last_lit, in_length, out_length,
-+ 100L - out_length*100L/in_length));
-+ if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
-+ }
-+#endif
-+ return (s->last_lit == s->lit_bufsize-1);
-+ /* We avoid equality with lit_bufsize because of wraparound at 64K
-+ * on 16 bit machines and because stored blocks are restricted to
-+ * 64K-1 bytes.
-+ */
-+}
-+
-+/* ===========================================================================
-+ * Send the block data compressed using the given Huffman trees
-+ */
-+local void compress_block(s, ltree, dtree)
-+ deflate_state *s;
-+ const ct_data *ltree; /* literal tree */
-+ const ct_data *dtree; /* distance tree */
-+{
-+ unsigned dist; /* distance of matched string */
-+ int lc; /* match length or unmatched char (if dist == 0) */
-+ unsigned lx = 0; /* running index in l_buf */
-+ unsigned code; /* the code to send */
-+ int extra; /* number of extra bits to send */
-+
-+ if (s->last_lit != 0) do {
-+ dist = s->d_buf[lx];
-+ lc = s->l_buf[lx++];
-+ if (dist == 0) {
-+ send_code(s, lc, ltree); /* send a literal byte */
-+ Tracecv(isgraph(lc), (stderr," '%c' ", lc));
-+ } else {
-+ /* Here, lc is the match length - MIN_MATCH */
-+ code = _length_code[lc];
-+ send_code(s, code+LITERALS+1, ltree); /* send the length code */
-+ extra = extra_lbits[code];
-+ if (extra != 0) {
-+ lc -= base_length[code];
-+ send_bits(s, lc, extra); /* send the extra length bits */
-+ }
-+ dist--; /* dist is now the match distance - 1 */
-+ code = d_code(dist);
-+ Assert (code < D_CODES, "bad d_code");
-+
-+ send_code(s, code, dtree); /* send the distance code */
-+ extra = extra_dbits[code];
-+ if (extra != 0) {
-+ dist -= base_dist[code];
-+ send_bits(s, dist, extra); /* send the extra distance bits */
-+ }
-+ } /* literal or match pair ? */
-+
-+ /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
-+ Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow");
-+
-+ } while (lx < s->last_lit);
-+
-+ send_code(s, END_BLOCK, ltree);
-+ s->last_eob_len = ltree[END_BLOCK].Len;
-+}
-+
-+/* ===========================================================================
-+ * Set the data type to ASCII or BINARY, using a crude approximation:
-+ * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
-+ * IN assertion: the fields freq of dyn_ltree are set and the total of all
-+ * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
-+ */
-+local void set_data_type(s)
-+ deflate_state *s;
-+{
-+ int n = 0;
-+ unsigned ascii_freq = 0;
-+ unsigned bin_freq = 0;
-+ while (n < 7) bin_freq += s->dyn_ltree[n++].Freq;
-+ while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq;
-+ while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
-+ s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
-+}
-+
-+/* ===========================================================================
-+ * Reverse the first len bits of a code, using straightforward code (a faster
-+ * method would use a table)
-+ * IN assertion: 1 <= len <= 15
-+ */
-+local unsigned bi_reverse(code, len)
-+ unsigned code; /* the value to invert */
-+ int len; /* its bit length */
-+{
-+ register unsigned res = 0;
-+ do {
-+ res |= code & 1;
-+ code >>= 1, res <<= 1;
-+ } while (--len > 0);
-+ return res >> 1;
-+}
-+
-+/* ===========================================================================
-+ * Flush the bit buffer, keeping at most 7 bits in it.
-+ */
-+local void bi_flush(s)
-+ deflate_state *s;
-+{
-+ if (s->bi_valid == 16) {
-+ put_short(s, s->bi_buf);
-+ s->bi_buf = 0;
-+ s->bi_valid = 0;
-+ } else if (s->bi_valid >= 8) {
-+ put_byte(s, (Byte)s->bi_buf);
-+ s->bi_buf >>= 8;
-+ s->bi_valid -= 8;
-+ }
-+}
-+
-+/* ===========================================================================
-+ * Flush the bit buffer and align the output on a byte boundary
-+ */
-+local void bi_windup(s)
-+ deflate_state *s;
-+{
-+ if (s->bi_valid > 8) {
-+ put_short(s, s->bi_buf);
-+ } else if (s->bi_valid > 0) {
-+ put_byte(s, (Byte)s->bi_buf);
-+ }
-+ s->bi_buf = 0;
-+ s->bi_valid = 0;
-+#ifdef DEBUG
-+ s->bits_sent = (s->bits_sent+7) & ~7;
-+#endif
-+}
-+
-+/* ===========================================================================
-+ * Copy a stored block, storing first the length and its
-+ * one's complement if requested.
-+ */
-+local void copy_block(s, buf, len, header)
-+ deflate_state *s;
-+ charf *buf; /* the input data */
-+ unsigned len; /* its length */
-+ int header; /* true if block header must be written */
-+{
-+ bi_windup(s); /* align on byte boundary */
-+ s->last_eob_len = 8; /* enough lookahead for inflate */
-+
-+ if (header) {
-+ put_short(s, (ush)len);
-+ put_short(s, (ush)~len);
-+#ifdef DEBUG
-+ s->bits_sent += 2*16;
-+#endif
-+ }
-+#ifdef DEBUG
-+ s->bits_sent += (ulg)len<<3;
-+#endif
-+ while (len--) {
-+ put_byte(s, *buf++);
-+ }
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/trees.h Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,128 @@
-+/* header created automatically with -DGEN_TREES_H */
-+
-+local const ct_data static_ltree[L_CODES+2] = {
-+{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}},
-+{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}},
-+{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}},
-+{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}},
-+{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}},
-+{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}},
-+{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}},
-+{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}},
-+{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}},
-+{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}},
-+{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}},
-+{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}},
-+{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}},
-+{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}},
-+{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}},
-+{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}},
-+{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}},
-+{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}},
-+{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}},
-+{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}},
-+{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}},
-+{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}},
-+{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}},
-+{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}},
-+{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}},
-+{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}},
-+{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}},
-+{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}},
-+{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}},
-+{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}},
-+{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}},
-+{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}},
-+{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}},
-+{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}},
-+{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}},
-+{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}},
-+{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}},
-+{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}},
-+{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}},
-+{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}},
-+{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}},
-+{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}},
-+{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}},
-+{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}},
-+{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}},
-+{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}},
-+{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}},
-+{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}},
-+{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}},
-+{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}},
-+{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}},
-+{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}},
-+{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}},
-+{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}},
-+{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}},
-+{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}},
-+{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}},
-+{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}}
-+};
-+
-+local const ct_data static_dtree[D_CODES] = {
-+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
-+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
-+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
-+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
-+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
-+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
-+};
-+
-+const uch _dist_code[DIST_CODE_LEN] = {
-+ 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
-+ 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
-+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
-+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
-+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17,
-+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
-+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
-+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
-+};
-+
-+const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {
-+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
-+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
-+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
-+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
-+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
-+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
-+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
-+};
-+
-+local const int base_length[LENGTH_CODES] = {
-+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
-+64, 80, 96, 112, 128, 160, 192, 224, 0
-+};
-+
-+local const int base_dist[D_CODES] = {
-+ 0, 1, 2, 3, 4, 6, 8, 12, 16, 24,
-+ 32, 48, 64, 96, 128, 192, 256, 384, 512, 768,
-+ 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576
-+};
-+
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ultoa.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,66 @@
-+/*
-+ * convert unsigned long to ASCII
-+ * Copyright (C) 1998, 1999 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: ultoa.c,v 1.10 2004/07/10 07:48:37 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+/*
-+ - ultoa - convert unsigned long to decimal ASCII
-+ */
-+size_t /* length required for full conversion */
-+ultoa(n, base, dst, dstlen)
-+unsigned long n;
-+int base;
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ char buf[3*sizeof(unsigned long) + 1];
-+ char *bufend = buf + sizeof(buf);
-+ size_t len;
-+ char *p;
-+ static char hex[] = "0123456789abcdef";
-+
-+ p = bufend;
-+ *--p = '\0';
-+ if (base == 10) {
-+ do {
-+ *--p = n%10 + '0';
-+ n /= 10;
-+ } while (n != 0);
-+ } else if (base == 16) {
-+ do {
-+ *--p = hex[n&0xf];
-+ n >>= 4;
-+ } while (n != 0);
-+ *--p = 'x';
-+ *--p = '0';
-+ } else if (base == 8) {
-+ do {
-+ *--p = (n&07) + '0';
-+ n >>= 3;
-+ } while (n != 0);
-+ *--p = '0';
-+ } else
-+ *--p = '?';
-+
-+ len = bufend - p;
-+
-+ if (dstlen > 0) {
-+ if (len > dstlen)
-+ *(p + dstlen - 1) = '\0';
-+ strcpy(dst, p);
-+ }
-+ return len;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/ultot.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,82 @@
-+/*
-+ * convert unsigned long to text
-+ * Copyright (C) 2000 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: ultot.c,v 1.5 2004/07/10 07:48:37 mcr Exp $
-+ */
-+#include "openswan.h"
-+
-+/*
-+ - ultot - convert unsigned long to text
-+ */
-+size_t /* length required for full conversion */
-+ultot(n, base, dst, dstlen)
-+unsigned long n;
-+int base;
-+char *dst; /* need not be valid if dstlen is 0 */
-+size_t dstlen;
-+{
-+ char buf[3*sizeof(unsigned long) + 1];
-+ char *bufend = buf + sizeof(buf);
-+ size_t len;
-+ char *p;
-+ static char hex[] = "0123456789abcdef";
-+# define HEX32 (32/4)
-+
-+ p = bufend;
-+ *--p = '\0';
-+ switch (base) {
-+ case 10:
-+ case 'd':
-+ do {
-+ *--p = n%10 + '0';
-+ n /= 10;
-+ } while (n != 0);
-+ break;
-+ case 16:
-+ case 17:
-+ case 'x':
-+ do {
-+ *--p = hex[n&0xf];
-+ n >>= 4;
-+ } while (n != 0);
-+ if (base == 17)
-+ while (bufend - p < HEX32 + 1)
-+ *--p = '0';
-+ if (base == 'x') {
-+ *--p = 'x';
-+ *--p = '0';
-+ }
-+ break;
-+ case 8:
-+ case 'o':
-+ do {
-+ *--p = (n&07) + '0';
-+ n >>= 3;
-+ } while (n != 0);
-+ if (base == 'o')
-+ *--p = '0';
-+ break;
-+ default:
-+ return 0;
-+ break;
-+ }
-+
-+ len = bufend - p;
-+ if (dstlen > 0) {
-+ if (len > dstlen)
-+ *(p + dstlen - 1) = '\0';
-+ strcpy(dst, p);
-+ }
-+ return len;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/version.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,44 @@
-+/*
-+ * return IPsec version information
-+ * Copyright (C) 2001 Henry Spencer.
-+ *
-+ * This library is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU Library General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at your
-+ * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
-+ *
-+ * This library is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-+ * License for more details.
-+ *
-+ * RCSID $Id: version.in.c,v 1.2 2004/04/14 05:09:46 ken Exp $
-+ */
-+
-+#ifdef __KERNEL__
-+#include <linux/netdevice.h>
-+#endif
-+
-+#include "openswan.h"
-+
-+#define V "2.6.16dr2" /* substituted in by Makefile */
-+static const char openswan_number[] = V;
-+static const char openswan_string[] = "Openswan " V;
-+
-+/*
-+ - ipsec_version_code - return IPsec version number/code, as string
-+ */
-+const char *
-+ipsec_version_code()
-+{
-+ return openswan_number;
-+}
-+
-+/*
-+ - ipsec_version_string - return full version string
-+ */
-+const char *
-+ipsec_version_string()
-+{
-+ return openswan_string;
-+}
---- /dev/null Tue Mar 11 13:02:56 2003
-+++ linux/net/ipsec/zutil.c Mon Feb 9 13:51:03 2004
-@@ -0,0 +1,227 @@
-+/* zutil.c -- target dependent utility functions for the compression library
-+ * Copyright (C) 1995-2002 Jean-loup Gailly.
-+ * For conditions of distribution and use, see copyright notice in zlib.h
-+ */
-+
-+/* @(#) $Id: zutil.c,v 1.5 2004/07/10 07:48:40 mcr Exp $ */
-+
-+#include <zlib/zutil.h>
-+
-+#define MY_ZCALLOC
-+
-+struct internal_state {int dummy;}; /* for buggy compilers */
-+
-+#ifndef STDC
-+extern void exit OF((int));
-+#endif
-+
-+const char *z_errmsg[10] = {
-+"need dictionary", /* Z_NEED_DICT 2 */
-+"stream end", /* Z_STREAM_END 1 */
-+"", /* Z_OK 0 */
-+"file error", /* Z_ERRNO (-1) */
-+"stream error", /* Z_STREAM_ERROR (-2) */
-+"data error", /* Z_DATA_ERROR (-3) */
-+"insufficient memory", /* Z_MEM_ERROR (-4) */
-+"buffer error", /* Z_BUF_ERROR (-5) */
-+"incompatible version",/* Z_VERSION_ERROR (-6) */
-+""};
-+
-+
-+const char * ZEXPORT zlibVersion()
-+{
-+ return ZLIB_VERSION;
-+}
-+
-+#ifdef DEBUG
-+
-+# ifndef verbose
-+# define verbose 0
-+# endif
-+int z_verbose = verbose;
-+
-+void z_error (m)
-+ char *m;
-+{
-+ fprintf(stderr, "%s\n", m);
-+ exit(1);
-+}
-+#endif
-+
-+/* exported to allow conversion of error code to string for compress() and
-+ * uncompress()
-+ */
-+const char * ZEXPORT zError(err)
-+ int err;
-+{
-+ return ERR_MSG(err);
-+}
-+
-+
-+#ifndef HAVE_MEMCPY
-+
-+void zmemcpy(dest, source, len)
-+ Bytef* dest;
-+ const Bytef* source;
-+ uInt len;
-+{
-+ if (len == 0) return;
-+ do {
-+ *dest++ = *source++; /* ??? to be unrolled */
-+ } while (--len != 0);
-+}
-+
-+int zmemcmp(s1, s2, len)
-+ const Bytef* s1;
-+ const Bytef* s2;
-+ uInt len;
-+{
-+ uInt j;
-+
-+ for (j = 0; j < len; j++) {
-+ if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
-+ }
-+ return 0;
-+}
-+
-+void zmemzero(dest, len)
-+ Bytef* dest;
-+ uInt len;
-+{
-+ if (len == 0) return;
-+ do {
-+ *dest++ = 0; /* ??? to be unrolled */
-+ } while (--len != 0);
-+}
-+#endif
-+
-+#ifdef __TURBOC__
-+#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__)
-+/* Small and medium model in Turbo C are for now limited to near allocation
-+ * with reduced MAX_WBITS and MAX_MEM_LEVEL
-+ */
-+# define MY_ZCALLOC
-+
-+/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
-+ * and farmalloc(64K) returns a pointer with an offset of 8, so we
-+ * must fix the pointer. Warning: the pointer must be put back to its
-+ * original form in order to free it, use zcfree().
-+ */
-+
-+#define MAX_PTR 10
-+/* 10*64K = 640K */
-+
-+local int next_ptr = 0;
-+
-+typedef struct ptr_table_s {
-+ voidpf org_ptr;
-+ voidpf new_ptr;
-+} ptr_table;
-+
-+local ptr_table table[MAX_PTR];
-+/* This table is used to remember the original form of pointers
-+ * to large buffers (64K). Such pointers are normalized with a zero offset.
-+ * Since MSDOS is not a preemptive multitasking OS, this table is not
-+ * protected from concurrent access. This hack doesn't work anyway on
-+ * a protected system like OS/2. Use Microsoft C instead.
-+ */
-+
-+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
-+{
-+ voidpf buf = opaque; /* just to make some compilers happy */
-+ ulg bsize = (ulg)items*size;
-+
-+ /* If we allocate less than 65520 bytes, we assume that farmalloc
-+ * will return a usable pointer which doesn't have to be normalized.
-+ */
-+ if (bsize < 65520L) {
-+ buf = farmalloc(bsize);
-+ if (*(ush*)&buf != 0) return buf;
-+ } else {
-+ buf = farmalloc(bsize + 16L);
-+ }
-+ if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
-+ table[next_ptr].org_ptr = buf;
-+
-+ /* Normalize the pointer to seg:0 */
-+ *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
-+ *(ush*)&buf = 0;
-+ table[next_ptr++].new_ptr = buf;
-+ return buf;
-+}
-+
-+void zcfree (voidpf opaque, voidpf ptr)
-+{
-+ int n;
-+ if (*(ush*)&ptr != 0) { /* object < 64K */
-+ farfree(ptr);
-+ return;
-+ }
-+ /* Find the original pointer */
-+ for (n = 0; n < next_ptr; n++) {
-+ if (ptr != table[n].new_ptr) continue;
-+
-+ farfree(table[n].org_ptr);
-+ while (++n < next_ptr) {
-+ table[n-1] = table[n];
-+ }
-+ next_ptr--;
-+ return;
-+ }
-+ ptr = opaque; /* just to make some compilers happy */
-+ Assert(0, "zcfree: ptr not found");
-+}
-+#endif
-+#endif /* __TURBOC__ */
-+
-+
-+#if defined(M_I86) && !defined(__32BIT__)
-+/* Microsoft C in 16-bit mode */
-+
-+# define MY_ZCALLOC
-+
-+#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
-+# define _halloc halloc
-+# define _hfree hfree
-+#endif
-+
-+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
-+{
-+ if (opaque) opaque = 0; /* to make compiler happy */
-+ return _halloc((long)items, size);
-+}
-+
-+void zcfree (voidpf opaque, voidpf ptr)
-+{
-+ if (opaque) opaque = 0; /* to make compiler happy */
-+ _hfree(ptr);
-+}
-+
-+#endif /* MSC */
-+
-+
-+#ifndef MY_ZCALLOC /* Any system without a special alloc function */
-+
-+#ifndef STDC
-+extern voidp calloc OF((uInt items, uInt size));
-+extern void free OF((voidpf ptr));
-+#endif
-+
-+voidpf zcalloc (opaque, items, size)
-+ voidpf opaque;
-+ unsigned items;
-+ unsigned size;
-+{
-+ if (opaque) items += size - size; /* make compiler happy */
-+ return (voidpf)calloc(items, size);
-+}
-+
-+void zcfree (opaque, ptr)
-+ voidpf opaque;
-+ voidpf ptr;
-+{
-+ free(ptr);
-+ if (opaque) return; /* make compiler happy */
-+}
-+
-+#endif /* MY_ZCALLOC */
---- swan26/net/ipv4/af_inet.c.orig Wed Jun 16 01:18:58 2004
-+++ swan26/net/ipv4/af_inet.c Fri Aug 13 23:09:27 2004
-@@ -1169,6 +1169,18 @@
- #if defined(CONFIG_IP_MROUTE)
- ip_mr_init();
- #endif
-+
-+#if defined(CONFIG_KLIPS)
-+ {
-+ extern int ipsec_klips_init(void);
-+ /*
-+ * Initialise AF_INET ESP and AH protocol support including
-+ * e-routing and SA tables
-+ */
-+ ipsec_klips_init();
-+ }
-+#endif /* CONFIG_IPSEC */
-+
- /*
- * Initialise per-cpu ipv4 mibs
- */
---- /dev/null Fri May 10 13:59:54 2002
-+++ linux/net/ipsec/Makefile.ver Sun Jul 28 22:10:40 2002
-@@ -0,0 +1 @@
-+IPSECVERSION='2.6.16dr2'
+++ /dev/null
-Index: linux-2.6.x/net/ipv4/Kconfig
-===================================================================
-RCS file: /cvs/sw/linux-2.6.x/net/ipv4/Kconfig,v
-retrieving revision 1.1.1.28
-retrieving revision 1.10
-diff -u -r1.1.1.28 -r1.10
---- linux-2.6.x/net/ipv4/Kconfig 10 Oct 2007 00:54:30 -0000 1.1.1.28
-+++ linux-2.6.x/net/ipv4/Kconfig 10 Oct 2007 04:53:57 -0000 1.10
-@@ -367,6 +367,12 @@
- tristate
- default n
-
-+config IPSEC_NAT_TRAVERSAL
-+ bool "IPSEC NAT-Traversal (KLIPS compatible)"
-+ depends on INET
-+ ---help---
-+ Includes support for RFC3947/RFC3948 NAT-Traversal of ESP over UDP.
-+
- config INET_XFRM_MODE_TRANSPORT
- tristate "IP: IPsec transport mode"
- default y
-Index: linux-2.6.x/net/ipv4/udp.c
-===================================================================
-RCS file: /cvs/sw/linux-2.6.x/net/ipv4/udp.c,v
-retrieving revision 1.1.1.46
-diff -u -r1.1.1.46 udp.c
---- linux-2.6.x/net/ipv4/udp.c 10 Oct 2007 00:54:30 -0000 1.1.1.46
-+++ linux-2.6.x/net/ipv4/udp.c 9 Nov 2007 00:11:33 -0000
-@@ -102,6 +102,7 @@
- #include <net/route.h>
- #include <net/checksum.h>
- #include <net/xfrm.h>
-+#include <net/xfrmudp.h>
- #include "udp_impl.h"
-
- /*
-@@ -920,6 +921,128 @@
- return 0;
- }
-
-+#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+
-+static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = NULL;
-+
-+/*
-+ * de-encapsulate and pass to the registered xfrm4_rcv_encap_func function.
-+ * Most of this code stolen from net/ipv4/xfrm4_input.c
-+ * which is attributed to YOSHIFUJI Hideaki @USAGI, and
-+ * Derek Atkins <derek@ihtfp.com>
-+ */
-+
-+static int xfrm4_udp_encap_rcv_wrapper(struct sock *sk, struct sk_buff *skb)
-+{
-+ struct udp_sock *up = udp_sk(sk);
-+ struct udphdr *uh;
-+ struct iphdr *iph;
-+ int iphlen, len;
-+ int ret;
-+
-+ __u8 *udpdata;
-+ __be32 *udpdata32;
-+ __u16 encap_type = up->encap_type;
-+
-+ /* if this is not encapsulated socket, then just return now */
-+ if (!encap_type && !xfrm4_rcv_encap_func)
-+ return 1;
-+
-+ /* If this is a paged skb, make sure we pull up
-+ * whatever data we need to look at. */
-+ len = skb->len - sizeof(struct udphdr);
-+ if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
-+ return 1;
-+
-+ /* Now we can get the pointers */
-+ uh = udp_hdr(skb);
-+ udpdata = (__u8 *)uh + sizeof(struct udphdr);
-+ udpdata32 = (__be32 *)udpdata;
-+
-+ switch (encap_type) {
-+ default:
-+ case UDP_ENCAP_ESPINUDP:
-+ /* Check if this is a keepalive packet. If so, eat it. */
-+ if (len == 1 && udpdata[0] == 0xff) {
-+ goto drop;
-+ } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
-+ /* ESP Packet without Non-ESP header */
-+ len = sizeof(struct udphdr);
-+ } else
-+ /* Must be an IKE packet.. pass it through */
-+ return 1;
-+ break;
-+ case UDP_ENCAP_ESPINUDP_NON_IKE:
-+ /* Check if this is a keepalive packet. If so, eat it. */
-+ if (len == 1 && udpdata[0] == 0xff) {
-+ goto drop;
-+ } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
-+ udpdata32[0] == 0 && udpdata32[1] == 0) {
-+
-+ /* ESP Packet with Non-IKE marker */
-+ len = sizeof(struct udphdr) + 2 * sizeof(u32);
-+ } else
-+ /* Must be an IKE packet.. pass it through */
-+ return 1;
-+ break;
-+ }
-+
-+ /* At this point we are sure that this is an ESPinUDP packet,
-+ * so we need to remove 'len' bytes from the packet (the UDP
-+ * header and optional ESP marker bytes) and then modify the
-+ * protocol to ESP, and then call into the transform receiver.
-+ */
-+ if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-+ goto drop;
-+
-+ /* Now we can update and verify the packet length... */
-+ iph = ip_hdr(skb);
-+ iphlen = iph->ihl << 2;
-+ iph->tot_len = htons(ntohs(iph->tot_len) - len);
-+ if (skb->len < iphlen + len) {
-+ /* packet is too small!?! */
-+ goto drop;
-+ }
-+
-+ /* pull the data buffer up to the ESP header and set the
-+ * transport header to point to ESP. Keep UDP on the stack
-+ * for later.
-+ */
-+ __skb_pull(skb, len);
-+ skb_reset_transport_header(skb);
-+
-+ /* modify the protocol (it's ESP!) */
-+ iph->protocol = IPPROTO_ESP;
-+
-+ /* process ESP */
-+ ret = (*xfrm4_rcv_encap_func)(skb, encap_type);
-+ return ret;
-+
-+drop:
-+ kfree_skb(skb);
-+ return 0;
-+}
-+
-+int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func,
-+ xfrm4_rcv_encap_t *oldfunc)
-+{
-+ if (oldfunc != NULL)
-+ *oldfunc = xfrm4_rcv_encap_func;
-+ xfrm4_rcv_encap_func = func;
-+ return 0;
-+}
-+
-+int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func)
-+{
-+ if (xfrm4_rcv_encap_func != func)
-+ return -1;
-+
-+ xfrm4_rcv_encap_func = NULL;
-+ return 0;
-+}
-+
-+#endif /* CONFIG_XFRM_MODULE || CONFIG_IPSEC_NAT_TRAVERSAL */
-+
- /* returns:
- * -1: error
- * 0: success
-@@ -1252,6 +1375,11 @@
- case 0:
- case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
-+#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+ if (xfrm4_rcv_encap_func)
-+ up->encap_rcv = xfrm4_udp_encap_rcv_wrapper;
-+ else
-+#endif
- up->encap_rcv = xfrm4_udp_encap_rcv;
- /* FALLTHROUGH */
- case UDP_ENCAP_L2TPINUDP:
-@@ -1648,3 +1776,9 @@
- EXPORT_SYMBOL(udp_proc_register);
- EXPORT_SYMBOL(udp_proc_unregister);
- #endif
-+
-+#if defined(CONFIG_IPSEC_NAT_TRAVERSAL)
-+EXPORT_SYMBOL(udp4_register_esp_rcvencap);
-+EXPORT_SYMBOL(udp4_unregister_esp_rcvencap);
-+#endif
-+
-Index: linux-2.6.x/include/net/xfrmudp.h
-===================================================================
-RCS file: linux-2.6.x/include/net/xfrmudp.h
-diff -N linux-2.6.x/include/net/xfrmudp.h
---- /dev/null 1 Jan 1970 00:00:00 -0000
-+++ linux-2.6.x/include/net/xfrmudp.h 3 Nov 2005 01:55:55 -0000 1.1
-@@ -0,0 +1,10 @@
-+/*
-+ * pointer to function for type that xfrm4_input wants, to permit
-+ * decoupling of XFRM from udp.c
-+ */
-+#define HAVE_XFRM4_UDP_REGISTER
-+
-+typedef int (*xfrm4_rcv_encap_t)(struct sk_buff *skb, __u16 encap_type);
-+extern int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func
-+ , xfrm4_rcv_encap_t *oldfunc);
-+extern int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func);
+++ /dev/null
-diff -urN linux-2.6.20.orig/arch/i386/lib/usercopy.c linux-2.6.20/arch/i386/lib/usercopy.c
---- linux-2.6.20.orig/arch/i386/lib/usercopy.c 2006-11-30 00:57:37.000000000 +0300
-+++ linux-2.6.20/arch/i386/lib/usercopy.c 2007-05-06 14:50:43.658963226 +0400
-@@ -812,6 +812,7 @@
- #endif
- return n;
- }
-+EXPORT_SYMBOL(__copy_from_user_ll_nocache);
-
- unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
- unsigned long n)
-@@ -827,6 +828,7 @@
- #endif
- return n;
- }
-+EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
-
- /**
- * copy_to_user: - Copy a block of data into user space.
-diff -urN linux-2.6.20.orig/Documentation/Changes linux-2.6.20/Documentation/Changes
---- linux-2.6.20.orig/Documentation/Changes 2007-05-06 15:04:34.226399593 +0400
-+++ linux-2.6.20/Documentation/Changes 2007-05-06 14:50:43.658963226 +0400
-@@ -36,6 +36,7 @@
- o e2fsprogs 1.29 # tune2fs
- o jfsutils 1.1.3 # fsck.jfs -V
- o reiserfsprogs 3.6.3 # reiserfsck -V 2>&1|grep reiserfsprogs
-+o reiser4progs 1.0.0 # fsck.reiser4 -V
- o xfsprogs 2.6.0 # xfs_db -V
- o pcmciautils 004 # pccardctl -V
- o quota-tools 3.09 # quota -V
-@@ -144,6 +145,13 @@
- versions of mkreiserfs, resize_reiserfs, debugreiserfs and
- reiserfsck. These utils work on both i386 and alpha platforms.
-
-+Reiser4progs
-+------------
-+
-+The reiser4progs package contains utilities for the reiser4 file system.
-+Detailed instructions are provided in the README file located at:
-+<ftp://ftp.namesys.com/pub/reiser4progs/README>.
-+
- Xfsprogs
- --------
-
-@@ -322,6 +330,10 @@
- -------------
- o <http://www.namesys.com/pub/reiserfsprogs/reiserfsprogs-3.6.3.tar.gz>
-
-+Reiser4progs
-+------------
-+o <ftp://ftp.namesys.com/pub/reiser4progs/>
-+
- Xfsprogs
- --------
- o <ftp://oss.sgi.com/projects/xfs/download/>
-diff -urN linux-2.6.20.orig/Documentation/filesystems/reiser4.txt linux-2.6.20/Documentation/filesystems/reiser4.txt
---- linux-2.6.20.orig/Documentation/filesystems/reiser4.txt 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/Documentation/filesystems/reiser4.txt 2007-05-06 14:50:43.658963226 +0400
-@@ -0,0 +1,75 @@
-+Reiser4 filesystem
-+==================
-+Reiser4 is a file system based on dancing tree algorithms, and is
-+described at http://www.namesys.com
-+
-+
-+References
-+==========
-+web page http://namesys.com/v4/v4.html
-+source code ftp://ftp.namesys.com/pub/reiser4-for-2.6/
-+userland tools ftp://ftp.namesys.com/pub/reiser4progs/
-+install page http://www.namesys.com/install_v4.html
-+
-+Compile options
-+===============
-+Enable reiser4 debug mode
-+ This checks everything imaginable while reiser4
-+ runs
-+
-+Mount options
-+=============
-+tmgr.atom_max_size=N
-+ Atoms containing more than N blocks will be forced to commit.
-+ N is decimal.
-+ Default is nr_free_pagecache_pages() / 2 at mount time.
-+
-+tmgr.atom_max_age=N
-+ Atoms older than N seconds will be forced to commit. N is decimal.
-+ Default is 600.
-+
-+tmgr.atom_max_flushers=N
-+ Limit of concurrent flushers for one atom. 0 means no limit.
-+ Default is 0.
-+
-+tree.cbk_cache.nr_slots=N
-+ Number of slots in the cbk cache.
-+
-+flush.relocate_threshold=N
-+ If flush finds more than N adjacent dirty leaf-level blocks it
-+ will force them to be relocated.
-+ Default is 64.
-+
-+flush.relocate_distance=N
-+ If flush finds can find a block allocation closer than at most
-+ N from the preceder it will relocate to that position.
-+ Default is 64.
-+
-+flush.scan_maxnodes=N
-+ The maximum number of nodes to scan left on a level during
-+ flush.
-+ Default is 10000.
-+
-+optimal_io_size=N
-+ Preferred IO size. This value is used to set st_blksize of
-+ struct stat.
-+ Default is 65536.
-+
-+bsdgroups
-+ Turn on BSD-style gid assignment.
-+
-+32bittimes
-+ By default file in reiser4 have 64 bit timestamps. Files
-+ created when filesystem is mounted with 32bittimes mount
-+ option will get 32 bit timestamps.
-+
-+mtflush
-+ Turn off concurrent flushing.
-+
-+nopseudo
-+ Disable pseudo files support. See
-+ http://namesys.com/v4/pseudo.html for more about pseudo files.
-+
-+dont_load_bitmap
-+ Don't load all bitmap blocks at mount time, it is useful for
-+ machines with tiny RAM and large disks.
-diff -urN linux-2.6.20.orig/fs/fs-writeback.c linux-2.6.20/fs/fs-writeback.c
---- linux-2.6.20.orig/fs/fs-writeback.c 2007-05-06 15:04:39.848155607 +0400
-+++ linux-2.6.20/fs/fs-writeback.c 2007-05-06 14:50:43.662964476 +0400
-@@ -296,8 +296,6 @@
- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
- * that it can be located for waiting on in __writeback_single_inode().
- *
-- * Called under inode_lock.
-- *
- * If `bdi' is non-zero then we're being asked to writeback a specific queue.
- * This function assumes that the blockdev superblock's inodes are backed by
- * a variety of queues, so all inodes are searched. For other superblocks,
-@@ -313,11 +311,13 @@
- * on the writer throttling path, and we get decent balancing between many
- * throttled threads: we don't want them all piling up on __wait_on_inode.
- */
--static void
--sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
-+void
-+generic_sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
- {
- const unsigned long start = jiffies; /* livelock avoidance */
-
-+ spin_lock(&inode_lock);
-+
- if (!wbc->for_kupdate || list_empty(&sb->s_io))
- list_splice_init(&sb->s_dirty, &sb->s_io);
-
-@@ -397,8 +397,19 @@
- if (wbc->nr_to_write <= 0)
- break;
- }
-+ spin_unlock(&inode_lock);
- return; /* Leave any unwritten inodes on s_io */
- }
-+EXPORT_SYMBOL(generic_sync_sb_inodes);
-+
-+static void
-+sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
-+{
-+ if (sb->s_op->sync_inodes)
-+ sb->s_op->sync_inodes(sb, wbc);
-+ else
-+ generic_sync_sb_inodes(sb, wbc);
-+}
-
- /*
- * Start writeback of dirty pagecache data against all unlocked inodes.
-@@ -439,11 +450,8 @@
- * be unmounted by the time it is released.
- */
- if (down_read_trylock(&sb->s_umount)) {
-- if (sb->s_root) {
-- spin_lock(&inode_lock);
-+ if (sb->s_root)
- sync_sb_inodes(sb, wbc);
-- spin_unlock(&inode_lock);
-- }
- up_read(&sb->s_umount);
- }
- spin_lock(&sb_lock);
-@@ -481,9 +489,7 @@
- (inodes_stat.nr_inodes - inodes_stat.nr_unused) +
- nr_dirty + nr_unstable;
- wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
-- spin_lock(&inode_lock);
- sync_sb_inodes(sb, &wbc);
-- spin_unlock(&inode_lock);
- }
-
- /*
-diff -urN linux-2.6.20.orig/fs/Kconfig linux-2.6.20/fs/Kconfig
---- linux-2.6.20.orig/fs/Kconfig 2007-05-06 15:04:39.668099364 +0400
-+++ linux-2.6.20/fs/Kconfig 2007-05-06 14:50:43.662964476 +0400
-@@ -272,6 +272,8 @@
- default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
- default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m
-
-+source "fs/reiser4/Kconfig"
-+
- config REISERFS_FS
- tristate "Reiserfs support"
- help
-diff -urN linux-2.6.20.orig/fs/Makefile linux-2.6.20/fs/Makefile
---- linux-2.6.20.orig/fs/Makefile 2007-05-06 15:04:39.668099364 +0400
-+++ linux-2.6.20/fs/Makefile 2007-05-06 14:50:43.666965726 +0400
-@@ -62,6 +62,7 @@
-
- # Do not add any filesystems before this line
- obj-$(CONFIG_REISERFS_FS) += reiserfs/
-+obj-$(CONFIG_REISER4_FS) += reiser4/
- obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
- obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev
- obj-$(CONFIG_JBD) += jbd/
-diff -urN linux-2.6.20.orig/fs/reiser4/as_ops.c linux-2.6.20/fs/reiser4/as_ops.c
---- linux-2.6.20.orig/fs/reiser4/as_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/as_ops.c 2007-05-06 14:50:43.666965726 +0400
-@@ -0,0 +1,337 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Interface to VFS. Reiser4 address_space_operations are defined here. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/file/file.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/disk_format/disk_format.h"
-+#include "plugin/plugin.h"
-+#include "plugin/plugin_set.h"
-+#include "plugin/object.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "reiser4.h"
-+#include "entd.h"
-+
-+#include <linux/profile.h>
-+#include <linux/types.h>
-+#include <linux/mount.h>
-+#include <linux/vfs.h>
-+#include <linux/mm.h>
-+#include <linux/buffer_head.h>
-+#include <linux/dcache.h>
-+#include <linux/list.h>
-+#include <linux/pagemap.h>
-+#include <linux/slab.h>
-+#include <linux/seq_file.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/writeback.h>
-+#include <linux/backing-dev.h>
-+#include <linux/quotaops.h>
-+#include <linux/security.h>
-+
-+/* address space operations */
-+
-+/**
-+ * reiser4_set_page_dirty - set dirty bit, tag in page tree, dirty accounting
-+ * @page: page to be dirtied
-+ *
-+ * Operation of struct address_space_operations. This implementation is used by
-+ * unix and cryptcompress file plugins.
-+ *
-+ * This is called when reiser4 page gets dirtied outside of reiser4, for
-+ * example, when dirty bit is moved from pte to physical page.
-+ *
-+ * Tags page in the mapping's page tree with special tag so that it is possible
-+ * to do all the reiser4 specific work wrt dirty pages (jnode creation,
-+ * capturing by an atom) later because it can not be done in the contexts where
-+ * set_page_dirty is called.
-+ */
-+int reiser4_set_page_dirty(struct page *page)
-+{
-+ /* this page can be unformatted only */
-+ assert("vs-1734", (page->mapping &&
-+ page->mapping->host &&
-+ reiser4_get_super_fake(page->mapping->host->i_sb) !=
-+ page->mapping->host
-+ && reiser4_get_cc_fake(page->mapping->host->i_sb) !=
-+ page->mapping->host
-+ && reiser4_get_bitmap_fake(page->mapping->host->i_sb) !=
-+ page->mapping->host));
-+
-+ if (!TestSetPageDirty(page)) {
-+ struct address_space *mapping = page->mapping;
-+
-+ if (mapping) {
-+ write_lock_irq(&mapping->tree_lock);
-+
-+ /* check for race with truncate */
-+ if (page->mapping) {
-+ assert("vs-1652", page->mapping == mapping);
-+ if (mapping_cap_account_dirty(mapping))
-+ inc_zone_page_state(page,
-+ NR_FILE_DIRTY);
-+ radix_tree_tag_set(&mapping->page_tree,
-+ page->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ }
-+ write_unlock_irq(&mapping->tree_lock);
-+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-+ }
-+ }
-+ return 0;
-+}
-+
-+/* ->invalidatepage method for reiser4 */
-+
-+/*
-+ * this is called for each truncated page from
-+ * truncate_inode_pages()->truncate_{complete,partial}_page().
-+ *
-+ * At the moment of call, page is under lock, and outstanding io (if any) has
-+ * completed.
-+ */
-+
-+/**
-+ * reiser4_invalidatepage
-+ * @page: page to invalidate
-+ * @offset: starting offset for partial invalidation
-+ *
-+ */
-+void reiser4_invalidatepage(struct page *page, unsigned long offset)
-+{
-+ int ret = 0;
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ jnode *node;
-+
-+ /*
-+ * This is called to truncate file's page.
-+ *
-+ * Originally, reiser4 implemented truncate in a standard way
-+ * (vmtruncate() calls ->invalidatepage() on all truncated pages
-+ * first, then file system ->truncate() call-back is invoked).
-+ *
-+ * This lead to the problem when ->invalidatepage() was called on a
-+ * page with jnode that was captured into atom in ASTAGE_PRE_COMMIT
-+ * process. That is, truncate was bypassing transactions. To avoid
-+ * this, try_capture_page_to_invalidate() call was added here.
-+ *
-+ * After many troubles with vmtruncate() based truncate (including
-+ * races with flush, tail conversion, etc.) it was re-written in the
-+ * top-to-bottom style: items are killed in reiser4_cut_tree_object()
-+ * and pages belonging to extent are invalidated in kill_hook_extent().
-+ * So probably now additional call to capture is not needed here.
-+ */
-+
-+ assert("nikita-3137", PageLocked(page));
-+ assert("nikita-3138", !PageWriteback(page));
-+ inode = page->mapping->host;
-+
-+ /*
-+ * ->invalidatepage() should only be called for the unformatted
-+ * jnodes. Destruction of all other types of jnodes is performed
-+ * separately. But, during some corner cases (like handling errors
-+ * during mount) it is simpler to let ->invalidatepage to be called on
-+ * them. Check for this, and do nothing.
-+ */
-+ if (reiser4_get_super_fake(inode->i_sb) == inode)
-+ return;
-+ if (reiser4_get_cc_fake(inode->i_sb) == inode)
-+ return;
-+ if (reiser4_get_bitmap_fake(inode->i_sb) == inode)
-+ return;
-+ assert("vs-1426", PagePrivate(page));
-+ assert("vs-1427",
-+ page->mapping == jnode_get_mapping(jnode_by_page(page)));
-+ assert("", jprivate(page) != NULL);
-+ assert("", ergo(inode_file_plugin(inode) !=
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID),
-+ offset == 0));
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return;
-+
-+ node = jprivate(page);
-+ spin_lock_jnode(node);
-+ if (!(node->state & ((1 << JNODE_DIRTY) | (1<< JNODE_FLUSH_QUEUED) |
-+ (1 << JNODE_WRITEBACK) | (1 << JNODE_OVRWR)))) {
-+ /* there is not need to capture */
-+ jref(node);
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ page_clear_jnode(page, node);
-+ reiser4_uncapture_jnode(node);
-+ unhash_unformatted_jnode(node);
-+ jput(node);
-+ reiser4_exit_context(ctx);
-+ return;
-+ }
-+ spin_unlock_jnode(node);
-+
-+ /* capture page being truncated. */
-+ ret = try_capture_page_to_invalidate(page);
-+ if (ret != 0)
-+ warning("nikita-3141", "Cannot capture: %i", ret);
-+
-+ if (offset == 0) {
-+ /* remove jnode from transaction and detach it from page. */
-+ jref(node);
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ /* page cannot be detached from jnode concurrently, because it
-+ * is locked */
-+ reiser4_uncapture_page(page);
-+
-+ /* this detaches page from jnode, so that jdelete will not try
-+ * to lock page which is already locked */
-+ spin_lock_jnode(node);
-+ page_clear_jnode(page, node);
-+ spin_unlock_jnode(node);
-+ unhash_unformatted_jnode(node);
-+
-+ jput(node);
-+ }
-+
-+ reiser4_exit_context(ctx);
-+}
-+
-+/* help function called from reiser4_releasepage(). It returns true if jnode
-+ * can be detached from its page and page released. */
-+int jnode_is_releasable(jnode * node /* node to check */ )
-+{
-+ assert("nikita-2781", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ assert_spin_locked(&(node->load));
-+
-+ /* is some thread is currently using jnode page, later cannot be
-+ * detached */
-+ if (atomic_read(&node->d_count) != 0) {
-+ return 0;
-+ }
-+
-+ assert("vs-1214", !jnode_is_loaded(node));
-+
-+ /*
-+ * can only release page if real block number is assigned to it. Simple
-+ * check for ->atom wouldn't do, because it is possible for node to be
-+ * clean, not it atom yet, and still having fake block number. For
-+ * example, node just created in jinit_new().
-+ */
-+ if (reiser4_blocknr_is_fake(jnode_get_block(node)))
-+ return 0;
-+
-+ /*
-+ * pages prepared for write can not be released anyway, so avoid
-+ * detaching jnode from the page
-+ */
-+ if (JF_ISSET(node, JNODE_WRITE_PREPARED))
-+ return 0;
-+
-+ /*
-+ * dirty jnode cannot be released. It can however be submitted to disk
-+ * as part of early flushing, but only after getting flush-prepped.
-+ */
-+ if (JF_ISSET(node, JNODE_DIRTY))
-+ return 0;
-+
-+ /* overwrite set is only written by log writer. */
-+ if (JF_ISSET(node, JNODE_OVRWR))
-+ return 0;
-+
-+ /* jnode is already under writeback */
-+ if (JF_ISSET(node, JNODE_WRITEBACK))
-+ return 0;
-+
-+ /* don't flush bitmaps or journal records */
-+ if (!jnode_is_znode(node) && !jnode_is_unformatted(node))
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/*
-+ * ->releasepage method for reiser4
-+ *
-+ * This is called by VM scanner when it comes across clean page. What we have
-+ * to do here is to check whether page can really be released (freed that is)
-+ * and if so, detach jnode from it and remove page from the page cache.
-+ *
-+ * Check for releasability is done by releasable() function.
-+ */
-+int reiser4_releasepage(struct page *page, gfp_t gfp UNUSED_ARG)
-+{
-+ jnode *node;
-+
-+ assert("nikita-2257", PagePrivate(page));
-+ assert("nikita-2259", PageLocked(page));
-+ assert("nikita-2892", !PageWriteback(page));
-+ assert("nikita-3019", reiser4_schedulable());
-+
-+ /* NOTE-NIKITA: this can be called in the context of reiser4 call. It
-+ is not clear what to do in this case. A lot of deadlocks seems be
-+ possible. */
-+
-+ node = jnode_by_page(page);
-+ assert("nikita-2258", node != NULL);
-+ assert("reiser4-4", page->mapping != NULL);
-+ assert("reiser4-5", page->mapping->host != NULL);
-+
-+ if (PageDirty(page))
-+ return 0;
-+
-+ /* extra page reference is used by reiser4 to protect
-+ * jnode<->page link from this ->releasepage(). */
-+ if (page_count(page) > 3)
-+ return 0;
-+
-+ /* releasable() needs jnode lock, because it looks at the jnode fields
-+ * and we need jload_lock here to avoid races with jload(). */
-+ spin_lock_jnode(node);
-+ spin_lock(&(node->load));
-+ if (jnode_is_releasable(node)) {
-+ struct address_space *mapping;
-+
-+ mapping = page->mapping;
-+ jref(node);
-+ /* there is no need to synchronize against
-+ * jnode_extent_write() here, because pages seen by
-+ * jnode_extent_write() are !releasable(). */
-+ page_clear_jnode(page, node);
-+ spin_unlock(&(node->load));
-+ spin_unlock_jnode(node);
-+
-+ /* we are under memory pressure so release jnode also. */
-+ jput(node);
-+
-+ return 1;
-+ } else {
-+ spin_unlock(&(node->load));
-+ spin_unlock_jnode(node);
-+ assert("nikita-3020", reiser4_schedulable());
-+ return 0;
-+ }
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/block_alloc.c linux-2.6.20/fs/reiser4/block_alloc.c
---- linux-2.6.20.orig/fs/reiser4/block_alloc.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/block_alloc.c 2007-05-06 14:50:43.682970725 +0400
-@@ -0,0 +1,1137 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+
-+/* THE REISER4 DISK SPACE RESERVATION SCHEME. */
-+
-+/* We need to be able to reserve enough disk space to ensure that an atomic
-+ operation will have enough disk space to flush (see flush.c and
-+ http://namesys.com/v4/v4.html) and commit it once it is started.
-+
-+ In our design a call for reserving disk space may fail but not an actual
-+ block allocation.
-+
-+ All free blocks, already allocated blocks, and all kinds of reserved blocks
-+ are counted in different per-fs block counters.
-+
-+ A reiser4 super block's set of block counters currently is:
-+
-+ free -- free blocks,
-+ used -- already allocated blocks,
-+
-+ grabbed -- initially reserved for performing an fs operation, those blocks
-+ are taken from free blocks, then grabbed disk space leaks from grabbed
-+ blocks counter to other counters like "fake allocated", "flush
-+ reserved", "used", the rest of not used grabbed space is returned to
-+ free space at the end of fs operation;
-+
-+ fake allocated -- counts all nodes without real disk block numbers assigned,
-+ we have separate accounting for formatted and unformatted
-+ nodes (for easier debugging);
-+
-+ flush reserved -- disk space needed for flushing and committing an atom.
-+ Each dirty already allocated block could be written as a
-+ part of atom's overwrite set or as a part of atom's
-+ relocate set. In both case one additional block is needed,
-+ it is used as a wandered block if we do overwrite or as a
-+ new location for a relocated block.
-+
-+ In addition, blocks in some states are counted on per-thread and per-atom
-+ basis. A reiser4 context has a counter of blocks grabbed by this transaction
-+ and the sb's grabbed blocks counter is a sum of grabbed blocks counter values
-+ of each reiser4 context. Each reiser4 atom has a counter of "flush reserved"
-+ blocks, which are reserved for flush processing and atom commit. */
-+
-+/* AN EXAMPLE: suppose we insert new item to the reiser4 tree. We estimate
-+ number of blocks to grab for most expensive case of balancing when the leaf
-+ node we insert new item to gets split and new leaf node is allocated.
-+
-+ So, we need to grab blocks for
-+
-+ 1) one block for possible dirtying the node we insert an item to. That block
-+ would be used for node relocation at flush time or for allocating of a
-+ wandered one, it depends what will be a result (what set, relocate or
-+ overwrite the node gets assigned to) of the node processing by the flush
-+ algorithm.
-+
-+ 2) one block for either allocating a new node, or dirtying of right or left
-+ clean neighbor, only one case may happen.
-+
-+ VS-FIXME-HANS: why can only one case happen? I would expect to see dirtying of left neighbor, right neighbor, current
-+ node, and creation of new node. have I forgotten something? email me.
-+
-+ These grabbed blocks are counted in both reiser4 context "grabbed blocks"
-+ counter and in the fs-wide one (both ctx->grabbed_blocks and
-+ sbinfo->blocks_grabbed get incremented by 2), sb's free blocks counter is
-+ decremented by 2.
-+
-+ Suppose both two blocks were spent for dirtying of an already allocated clean
-+ node (one block went from "grabbed" to "flush reserved") and for new block
-+ allocating (one block went from "grabbed" to "fake allocated formatted").
-+
-+ Inserting of a child pointer to the parent node caused parent node to be
-+ split, the balancing code takes care about this grabbing necessary space
-+ immediately by calling reiser4_grab with BA_RESERVED flag set which means
-+ "can use the 5% reserved disk space".
-+
-+ At this moment insertion completes and grabbed blocks (if they were not used)
-+ should be returned to the free space counter.
-+
-+ However the atom life-cycle is not completed. The atom had one "flush
-+ reserved" block added by our insertion and the new fake allocated node is
-+ counted as a "fake allocated formatted" one. The atom has to be fully
-+ processed by flush before commit. Suppose that the flush moved the first,
-+ already allocated node to the atom's overwrite list, the new fake allocated
-+ node, obviously, went into the atom relocate set. The reiser4 flush
-+ allocates the new node using one unit from "fake allocated formatted"
-+ counter, the log writer uses one from "flush reserved" for wandered block
-+ allocation.
-+
-+ And, it is not the end. When the wandered block is deallocated after the
-+ atom gets fully played (see wander.c for term description), the disk space
-+ occupied for it is returned to free blocks. */
-+
-+/* BLOCK NUMBERS */
-+
-+/* Any reiser4 node has a block number assigned to it. We use these numbers for
-+ indexing in hash tables, so if a block has not yet been assigned a location
-+ on disk we need to give it a temporary fake block number.
-+
-+ Current implementation of reiser4 uses 64-bit integers for block numbers. We
-+ use highest bit in 64-bit block number to distinguish fake and real block
-+ numbers. So, only 63 bits may be used to addressing of real device
-+ blocks. That "fake" block numbers space is divided into subspaces of fake
-+ block numbers for data blocks and for shadow (working) bitmap blocks.
-+
-+ Fake block numbers for data blocks are generated by a cyclic counter, which
-+ gets incremented after each real block allocation. We assume that it is
-+ impossible to overload this counter during one transaction life. */
-+
-+/* Initialize a blocknr hint. */
-+void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint)
-+{
-+ memset(hint, 0, sizeof(reiser4_blocknr_hint));
-+}
-+
-+/* Release any resources of a blocknr hint. */
-+void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint UNUSED_ARG)
-+{
-+ /* No resources should be freed in current blocknr_hint implementation. */
-+}
-+
-+/* see above for explanation of fake block number. */
-+/* Audited by: green(2002.06.11) */
-+int reiser4_blocknr_is_fake(const reiser4_block_nr * da)
-+{
-+ /* The reason for not simply returning result of '&' operation is that
-+ while return value is (possibly 32bit) int, the reiser4_block_nr is
-+ at least 64 bits long, and high bit (which is the only possible
-+ non zero bit after the masking) would be stripped off */
-+ return (*da & REISER4_FAKE_BLOCKNR_BIT_MASK) ? 1 : 0;
-+}
-+
-+/* Static functions for <reiser4 super block>/<reiser4 context> block counters
-+ arithmetic. Mostly, they are isolated to not to code same assertions in
-+ several places. */
-+static void sub_from_ctx_grabbed(reiser4_context * ctx, __u64 count)
-+{
-+ BUG_ON(ctx->grabbed_blocks < count);
-+ assert("zam-527", ctx->grabbed_blocks >= count);
-+ ctx->grabbed_blocks -= count;
-+}
-+
-+static void add_to_ctx_grabbed(reiser4_context * ctx, __u64 count)
-+{
-+ ctx->grabbed_blocks += count;
-+}
-+
-+static void sub_from_sb_grabbed(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("zam-525", sbinfo->blocks_grabbed >= count);
-+ sbinfo->blocks_grabbed -= count;
-+}
-+
-+/* Decrease the counter of block reserved for flush in super block. */
-+static void
-+sub_from_sb_flush_reserved(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("vpf-291", sbinfo->blocks_flush_reserved >= count);
-+ sbinfo->blocks_flush_reserved -= count;
-+}
-+
-+static void
-+sub_from_sb_fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
-+ reiser4_ba_flags_t flags)
-+{
-+ if (flags & BA_FORMATTED) {
-+ assert("zam-806", sbinfo->blocks_fake_allocated >= count);
-+ sbinfo->blocks_fake_allocated -= count;
-+ } else {
-+ assert("zam-528",
-+ sbinfo->blocks_fake_allocated_unformatted >= count);
-+ sbinfo->blocks_fake_allocated_unformatted -= count;
-+ }
-+}
-+
-+static void sub_from_sb_used(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("zam-530",
-+ sbinfo->blocks_used >= count + sbinfo->min_blocks_used);
-+ sbinfo->blocks_used -= count;
-+}
-+
-+static void
-+sub_from_cluster_reserved(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("edward-501", sbinfo->blocks_clustered >= count);
-+ sbinfo->blocks_clustered -= count;
-+}
-+
-+/* Increase the counter of block reserved for flush in atom. */
-+static void add_to_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
-+{
-+ assert("zam-772", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ atom->flush_reserved += count;
-+}
-+
-+/* Decrease the counter of block reserved for flush in atom. */
-+static void sub_from_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
-+{
-+ assert("zam-774", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-2790", atom->flush_reserved >= count);
-+ atom->flush_reserved -= count;
-+}
-+
-+/* super block has 6 counters: free, used, grabbed, fake allocated
-+ (formatted and unformatted) and flush reserved. Their sum must be
-+ number of blocks on a device. This function checks this */
-+int reiser4_check_block_counters(const struct super_block *super)
-+{
-+ __u64 sum;
-+
-+ sum = reiser4_grabbed_blocks(super) + reiser4_free_blocks(super) +
-+ reiser4_data_blocks(super) + reiser4_fake_allocated(super) +
-+ reiser4_fake_allocated_unformatted(super) + reiser4_flush_reserved(super) +
-+ reiser4_clustered_blocks(super);
-+ if (reiser4_block_count(super) != sum) {
-+ printk("super block counters: "
-+ "used %llu, free %llu, "
-+ "grabbed %llu, fake allocated (formatetd %llu, unformatted %llu), "
-+ "reserved %llu, clustered %llu, sum %llu, must be (block count) %llu\n",
-+ (unsigned long long)reiser4_data_blocks(super),
-+ (unsigned long long)reiser4_free_blocks(super),
-+ (unsigned long long)reiser4_grabbed_blocks(super),
-+ (unsigned long long)reiser4_fake_allocated(super),
-+ (unsigned long long)
-+ reiser4_fake_allocated_unformatted(super),
-+ (unsigned long long)reiser4_flush_reserved(super),
-+ (unsigned long long)reiser4_clustered_blocks(super),
-+ (unsigned long long)sum,
-+ (unsigned long long)reiser4_block_count(super));
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* Adjust "working" free blocks counter for number of blocks we are going to
-+ allocate. Record number of grabbed blocks in fs-wide and per-thread
-+ counters. This function should be called before bitmap scanning or
-+ allocating fake block numbers
-+
-+ @super -- pointer to reiser4 super block;
-+ @count -- number of blocks we reserve;
-+
-+ @return -- 0 if success, -ENOSPC, if all
-+ free blocks are preserved or already allocated.
-+*/
-+
-+static int
-+reiser4_grab(reiser4_context * ctx, __u64 count, reiser4_ba_flags_t flags)
-+{
-+ __u64 free_blocks;
-+ int ret = 0, use_reserved = flags & BA_RESERVED;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("vs-1276", ctx == get_current_context());
-+
-+ /* Do not grab anything on ro-mounted fs. */
-+ if (rofs_super(ctx->super)) {
-+ ctx->grab_enabled = 0;
-+ return 0;
-+ }
-+
-+ sbinfo = get_super_private(ctx->super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ free_blocks = sbinfo->blocks_free;
-+
-+ if ((use_reserved && free_blocks < count) ||
-+ (!use_reserved && free_blocks < count + sbinfo->blocks_reserved)) {
-+ ret = RETERR(-ENOSPC);
-+ goto unlock_and_ret;
-+ }
-+
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ sbinfo->blocks_grabbed += count;
-+ sbinfo->blocks_free -= count;
-+
-+#if REISER4_DEBUG
-+ if (ctx->grabbed_initially == 0)
-+ ctx->grabbed_initially = count;
-+#endif
-+
-+ assert("nikita-2986", reiser4_check_block_counters(ctx->super));
-+
-+ /* disable grab space in current context */
-+ ctx->grab_enabled = 0;
-+
-+ unlock_and_ret:
-+ spin_unlock_reiser4_super(sbinfo);
-+
-+ return ret;
-+}
-+
-+int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags)
-+{
-+ int ret;
-+ reiser4_context *ctx;
-+
-+ assert("nikita-2964", ergo(flags & BA_CAN_COMMIT,
-+ lock_stack_isclean(get_current_lock_stack
-+ ())));
-+ ctx = get_current_context();
-+ if (!(flags & BA_FORCE) && !is_grab_enabled(ctx)) {
-+ return 0;
-+ }
-+
-+ ret = reiser4_grab(ctx, count, flags);
-+ if (ret == -ENOSPC) {
-+
-+ /* Trying to commit the all transactions if BA_CAN_COMMIT flag present */
-+ if (flags & BA_CAN_COMMIT) {
-+ txnmgr_force_commit_all(ctx->super, 0);
-+ ctx->grab_enabled = 1;
-+ ret = reiser4_grab(ctx, count, flags);
-+ }
-+ }
-+ /*
-+ * allocation from reserved pool cannot fail. This is severe error.
-+ */
-+ assert("nikita-3005", ergo(flags & BA_RESERVED, ret == 0));
-+ return ret;
-+}
-+
-+/*
-+ * SPACE RESERVED FOR UNLINK/TRUNCATE
-+ *
-+ * Unlink and truncate require space in transaction (to update stat data, at
-+ * least). But we don't want rm(1) to fail with "No space on device" error.
-+ *
-+ * Solution is to reserve 5% of disk space for truncates and
-+ * unlinks. Specifically, normal space grabbing requests don't grab space from
-+ * reserved area. Only requests with BA_RESERVED bit in flags are allowed to
-+ * drain it. Per super block delete mutex is used to allow only one
-+ * thread at a time to grab from reserved area.
-+ *
-+ * Grabbing from reserved area should always be performed with BA_CAN_COMMIT
-+ * flag.
-+ *
-+ */
-+
-+int reiser4_grab_reserved(struct super_block *super,
-+ __u64 count, reiser4_ba_flags_t flags)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+
-+ assert("nikita-3175", flags & BA_CAN_COMMIT);
-+
-+ /* Check the delete mutex already taken by us, we assume that
-+ * reading of machine word is atomic. */
-+ if (sbinfo->delete_mutex_owner == current) {
-+ if (reiser4_grab_space
-+ (count, (flags | BA_RESERVED) & ~BA_CAN_COMMIT)) {
-+ warning("zam-1003",
-+ "nested call of grab_reserved fails count=(%llu)",
-+ (unsigned long long)count);
-+ reiser4_release_reserved(super);
-+ return RETERR(-ENOSPC);
-+ }
-+ return 0;
-+ }
-+
-+ if (reiser4_grab_space(count, flags)) {
-+ mutex_lock(&sbinfo->delete_mutex);
-+ assert("nikita-2929", sbinfo->delete_mutex_owner == NULL);
-+ sbinfo->delete_mutex_owner = current;
-+
-+ if (reiser4_grab_space(count, flags | BA_RESERVED)) {
-+ warning("zam-833",
-+ "reserved space is not enough (%llu)",
-+ (unsigned long long)count);
-+ reiser4_release_reserved(super);
-+ return RETERR(-ENOSPC);
-+ }
-+ }
-+ return 0;
-+}
-+
-+void reiser4_release_reserved(struct super_block *super)
-+{
-+ reiser4_super_info_data *info;
-+
-+ info = get_super_private(super);
-+ if (info->delete_mutex_owner == current) {
-+ info->delete_mutex_owner = NULL;
-+ mutex_unlock(&info->delete_mutex);
-+ }
-+}
-+
-+static reiser4_super_info_data *grabbed2fake_allocated_head(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ sbinfo = get_super_private(ctx->super);
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ /* return sbinfo locked */
-+ return sbinfo;
-+}
-+
-+/* is called after @count fake block numbers are allocated and pointer to
-+ those blocks are inserted into tree. */
-+static void grabbed2fake_allocated_formatted(void)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = grabbed2fake_allocated_head(1);
-+ sbinfo->blocks_fake_allocated++;
-+
-+ assert("vs-922", reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/**
-+ * grabbed2fake_allocated_unformatted
-+ * @count:
-+ *
-+ */
-+static void grabbed2fake_allocated_unformatted(int count)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = grabbed2fake_allocated_head(count);
-+ sbinfo->blocks_fake_allocated_unformatted += count;
-+
-+ assert("vs-9221", reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void grabbed2cluster_reserved(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ sbinfo = get_super_private(ctx->super);
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ sbinfo->blocks_clustered += count;
-+
-+ assert("edward-504", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void cluster_reserved2grabbed(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+
-+ sbinfo = get_super_private(ctx->super);
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_cluster_reserved(sbinfo, count);
-+ sbinfo->blocks_grabbed += count;
-+
-+ assert("edward-505", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+ add_to_ctx_grabbed(ctx, count);
-+}
-+
-+void cluster_reserved2free(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ cluster_reserved2grabbed(count);
-+ grabbed2free(ctx, sbinfo, count);
-+}
-+
-+static DEFINE_SPINLOCK(fake_lock);
-+static reiser4_block_nr fake_gen = 0;
-+
-+/**
-+ * assign_fake_blocknr
-+ * @blocknr:
-+ * @count:
-+ *
-+ * Obtain a fake block number for new node which will be used to refer to
-+ * this newly allocated node until real allocation is done.
-+ */
-+static void assign_fake_blocknr(reiser4_block_nr *blocknr, int count)
-+{
-+ spin_lock(&fake_lock);
-+ *blocknr = fake_gen;
-+ fake_gen += count;
-+ spin_unlock(&fake_lock);
-+
-+ BUG_ON(*blocknr & REISER4_BLOCKNR_STATUS_BIT_MASK);
-+ /**blocknr &= ~REISER4_BLOCKNR_STATUS_BIT_MASK;*/
-+ *blocknr |= REISER4_UNALLOCATED_STATUS_VALUE;
-+ assert("zam-394", zlook(current_tree, blocknr) == NULL);
-+}
-+
-+int assign_fake_blocknr_formatted(reiser4_block_nr * blocknr)
-+{
-+ assign_fake_blocknr(blocknr, 1);
-+ grabbed2fake_allocated_formatted();
-+ return 0;
-+}
-+
-+/**
-+ * fake_blocknrs_unformatted
-+ * @count: number of fake numbers to get
-+ *
-+ * Allocates @count fake block numbers which will be assigned to jnodes
-+ */
-+reiser4_block_nr fake_blocknr_unformatted(int count)
-+{
-+ reiser4_block_nr blocknr;
-+
-+ assign_fake_blocknr(&blocknr, count);
-+ grabbed2fake_allocated_unformatted(count);
-+
-+ return blocknr;
-+}
-+
-+/* adjust sb block counters, if real (on-disk) block allocation immediately
-+ follows grabbing of free disk space. */
-+static void grabbed2used(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
-+ __u64 count)
-+{
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ sbinfo->blocks_used += count;
-+
-+ assert("nikita-2679", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* adjust sb block counters when @count unallocated blocks get mapped to disk */
-+static void fake_allocated2used(reiser4_super_info_data *sbinfo, __u64 count,
-+ reiser4_ba_flags_t flags)
-+{
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_fake_allocated(sbinfo, count, flags);
-+ sbinfo->blocks_used += count;
-+
-+ assert("nikita-2680",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+static void flush_reserved2used(txn_atom * atom, __u64 count)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("zam-787", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
-+
-+ sbinfo = get_current_super_private();
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_flush_reserved(sbinfo, count);
-+ sbinfo->blocks_used += count;
-+
-+ assert("zam-789",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* update the per fs blocknr hint default value. */
-+void
-+update_blocknr_hint_default(const struct super_block *s,
-+ const reiser4_block_nr * block)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+
-+ assert("nikita-3342", !reiser4_blocknr_is_fake(block));
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ if (*block < sbinfo->block_count) {
-+ sbinfo->blocknr_hint_default = *block;
-+ } else {
-+ warning("zam-676",
-+ "block number %llu is too large to be used in a blocknr hint\n",
-+ (unsigned long long)*block);
-+ dump_stack();
-+ DEBUGON(1);
-+ }
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* get current value of the default blocknr hint. */
-+void get_blocknr_hint_default(reiser4_block_nr * result)
-+{
-+ reiser4_super_info_data *sbinfo = get_current_super_private();
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ *result = sbinfo->blocknr_hint_default;
-+ assert("zam-677", *result < sbinfo->block_count);
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* Allocate "real" disk blocks by calling a proper space allocation plugin
-+ * method. Blocks are allocated in one contiguous disk region. The plugin
-+ * independent part accounts blocks by subtracting allocated amount from grabbed
-+ * or fake block counter and add the same amount to the counter of allocated
-+ * blocks.
-+ *
-+ * @hint -- a reiser4 blocknr hint object which contains further block
-+ * allocation hints and parameters (search start, a stage of block
-+ * which will be mapped to disk, etc.),
-+ * @blk -- an out parameter for the beginning of the allocated region,
-+ * @len -- in/out parameter, it should contain the maximum number of allocated
-+ * blocks, after block allocation completes, it contains the length of
-+ * allocated disk region.
-+ * @flags -- see reiser4_ba_flags_t description.
-+ *
-+ * @return -- 0 if success, error code otherwise.
-+ */
-+int
-+reiser4_alloc_blocks(reiser4_blocknr_hint * hint, reiser4_block_nr * blk,
-+ reiser4_block_nr * len, reiser4_ba_flags_t flags)
-+{
-+ __u64 needed = *len;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+ int ret;
-+
-+ assert("zam-986", hint != NULL);
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ /* For write-optimized data we use default search start value, which is
-+ * close to last write location. */
-+ if (flags & BA_USE_DEFAULT_SEARCH_START) {
-+ get_blocknr_hint_default(&hint->blk);
-+ }
-+
-+ /* VITALY: allocator should grab this for internal/tx-lists/similar only. */
-+/* VS-FIXME-HANS: why is this comment above addressed to vitaly (from vitaly)? */
-+ if (hint->block_stage == BLOCK_NOT_COUNTED) {
-+ ret = reiser4_grab_space_force(*len, flags);
-+ if (ret != 0)
-+ return ret;
-+ }
-+
-+ ret =
-+ sa_alloc_blocks(reiser4_get_space_allocator(ctx->super),
-+ hint, (int)needed, blk, len);
-+
-+ if (!ret) {
-+ assert("zam-680", *blk < reiser4_block_count(ctx->super));
-+ assert("zam-681",
-+ *blk + *len <= reiser4_block_count(ctx->super));
-+
-+ if (flags & BA_PERMANENT) {
-+ /* we assume that current atom exists at this moment */
-+ txn_atom *atom = get_current_atom_locked();
-+ atom->nr_blocks_allocated += *len;
-+ spin_unlock_atom(atom);
-+ }
-+
-+ switch (hint->block_stage) {
-+ case BLOCK_NOT_COUNTED:
-+ case BLOCK_GRABBED:
-+ grabbed2used(ctx, sbinfo, *len);
-+ break;
-+ case BLOCK_UNALLOCATED:
-+ fake_allocated2used(sbinfo, *len, flags);
-+ break;
-+ case BLOCK_FLUSH_RESERVED:
-+ {
-+ txn_atom *atom = get_current_atom_locked();
-+ flush_reserved2used(atom, *len);
-+ spin_unlock_atom(atom);
-+ }
-+ break;
-+ default:
-+ impossible("zam-531", "wrong block stage");
-+ }
-+ } else {
-+ assert("zam-821",
-+ ergo(hint->max_dist == 0
-+ && !hint->backward, ret != -ENOSPC));
-+ if (hint->block_stage == BLOCK_NOT_COUNTED)
-+ grabbed2free(ctx, sbinfo, needed);
-+ }
-+
-+ return ret;
-+}
-+
-+/* used -> fake_allocated -> grabbed -> free */
-+
-+/* adjust sb block counters when @count unallocated blocks get unmapped from
-+ disk */
-+static void
-+used2fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
-+ int formatted)
-+{
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ if (formatted)
-+ sbinfo->blocks_fake_allocated += count;
-+ else
-+ sbinfo->blocks_fake_allocated_unformatted += count;
-+
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2681",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+static void
-+used2flush_reserved(reiser4_super_info_data * sbinfo, txn_atom * atom,
-+ __u64 count, reiser4_ba_flags_t flags UNUSED_ARG)
-+{
-+ assert("nikita-2791", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ add_to_atom_flush_reserved_nolock(atom, (__u32) count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_flush_reserved += count;
-+ /*add_to_sb_flush_reserved(sbinfo, count); */
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2681",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* disk space, virtually used by fake block numbers is counted as "grabbed" again. */
-+static void
-+fake_allocated2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
-+ __u64 count, reiser4_ba_flags_t flags)
-+{
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ assert("nikita-2682", reiser4_check_block_counters(ctx->super));
-+
-+ sbinfo->blocks_grabbed += count;
-+ sub_from_sb_fake_allocated(sbinfo, count, flags & BA_FORMATTED);
-+
-+ assert("nikita-2683", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ fake_allocated2grabbed(ctx, sbinfo, count, flags);
-+ grabbed2free(ctx, sbinfo, count);
-+}
-+
-+void grabbed2free_mark(__u64 mark)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ assert("nikita-3007", (__s64) mark >= 0);
-+ assert("nikita-3006", ctx->grabbed_blocks >= mark);
-+ grabbed2free(ctx, sbinfo, ctx->grabbed_blocks - mark);
-+}
-+
-+/**
-+ * grabbed2free - adjust grabbed and free block counters
-+ * @ctx: context to update grabbed block counter of
-+ * @sbinfo: super block to update grabbed and free block counters of
-+ * @count: number of blocks to adjust counters by
-+ *
-+ * Decreases context's and per filesystem's counters of grabbed
-+ * blocks. Increases per filesystem's counter of free blocks.
-+ */
-+void grabbed2free(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
-+ __u64 count)
-+{
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ sbinfo->blocks_free += count;
-+ assert("nikita-2684", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("vs-1095", atom);
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ add_to_atom_flush_reserved_nolock(atom, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_flush_reserved += count;
-+ sub_from_sb_grabbed(sbinfo, count);
-+
-+ assert("vpf-292", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void grabbed2flush_reserved(__u64 count)
-+{
-+ txn_atom *atom = get_current_atom_locked();
-+
-+ grabbed2flush_reserved_nolock(atom, count);
-+
-+ spin_unlock_atom(atom);
-+}
-+
-+void flush_reserved2grabbed(txn_atom * atom, __u64 count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("nikita-2788", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_grabbed += count;
-+ sub_from_sb_flush_reserved(sbinfo, count);
-+
-+ assert("vpf-292", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/**
-+ * all_grabbed2free - releases all blocks grabbed in context
-+ *
-+ * Decreases context's and super block's grabbed block counters by number of
-+ * blocks grabbed by current context and increases super block's free block
-+ * counter correspondingly.
-+ */
-+void all_grabbed2free(void)
-+{
-+ reiser4_context *ctx = get_current_context();
-+
-+ grabbed2free(ctx, get_super_private(ctx->super), ctx->grabbed_blocks);
-+}
-+
-+/* adjust sb block counters if real (on-disk) blocks do not become unallocated
-+ after freeing, @count blocks become "grabbed". */
-+static void
-+used2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
-+ __u64 count)
-+{
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_grabbed += count;
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2685", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* this used to be done through used2grabbed and grabbed2free*/
-+static void used2free(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_free += count;
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2685",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+#if REISER4_DEBUG
-+
-+/* check "allocated" state of given block range */
-+static void
-+reiser4_check_blocks(const reiser4_block_nr * start,
-+ const reiser4_block_nr * len, int desired)
-+{
-+ sa_check_blocks(start, len, desired);
-+}
-+
-+/* check "allocated" state of given block */
-+void reiser4_check_block(const reiser4_block_nr * block, int desired)
-+{
-+ const reiser4_block_nr one = 1;
-+
-+ reiser4_check_blocks(block, &one, desired);
-+}
-+
-+#endif
-+
-+/* Blocks deallocation function may do an actual deallocation through space
-+ plugin allocation or store deleted block numbers in atom's delete_set data
-+ structure depend on @defer parameter. */
-+
-+/* if BA_DEFER bit is not turned on, @target_stage means the stage of blocks which
-+ will be deleted from WORKING bitmap. They might be just unmapped from disk, or
-+ freed but disk space is still grabbed by current thread, or these blocks must
-+ not be counted in any reiser4 sb block counters, see block_stage_t comment */
-+
-+/* BA_FORMATTED bit is only used when BA_DEFER in not present: it is used to
-+ distinguish blocks allocated for unformatted and formatted nodes */
-+
-+int
-+reiser4_dealloc_blocks(const reiser4_block_nr * start,
-+ const reiser4_block_nr * len,
-+ block_stage_t target_stage, reiser4_ba_flags_t flags)
-+{
-+ txn_atom *atom = NULL;
-+ int ret;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ if (REISER4_DEBUG) {
-+ assert("zam-431", *len != 0);
-+ assert("zam-432", *start != 0);
-+ assert("zam-558", !reiser4_blocknr_is_fake(start));
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ assert("zam-562", *start < sbinfo->block_count);
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+
-+ if (flags & BA_DEFER) {
-+ blocknr_set_entry *bsep = NULL;
-+
-+ /* storing deleted block numbers in a blocknr set
-+ datastructure for further actual deletion */
-+ do {
-+ atom = get_current_atom_locked();
-+ assert("zam-430", atom != NULL);
-+
-+ ret =
-+ blocknr_set_add_extent(atom, &atom->delete_set,
-+ &bsep, start, len);
-+
-+ if (ret == -ENOMEM)
-+ return ret;
-+
-+ /* This loop might spin at most two times */
-+ } while (ret == -E_REPEAT);
-+
-+ assert("zam-477", ret == 0);
-+ assert("zam-433", atom != NULL);
-+
-+ spin_unlock_atom(atom);
-+
-+ } else {
-+ assert("zam-425", get_current_super_private() != NULL);
-+ sa_dealloc_blocks(reiser4_get_space_allocator(ctx->super),
-+ *start, *len);
-+
-+ if (flags & BA_PERMANENT) {
-+ /* These blocks were counted as allocated, we have to revert it
-+ * back if allocation is discarded. */
-+ txn_atom *atom = get_current_atom_locked();
-+ atom->nr_blocks_allocated -= *len;
-+ spin_unlock_atom(atom);
-+ }
-+
-+ switch (target_stage) {
-+ case BLOCK_NOT_COUNTED:
-+ assert("vs-960", flags & BA_FORMATTED);
-+ /* VITALY: This is what was grabbed for internal/tx-lists/similar only */
-+ used2free(sbinfo, *len);
-+ break;
-+
-+ case BLOCK_GRABBED:
-+ used2grabbed(ctx, sbinfo, *len);
-+ break;
-+
-+ case BLOCK_UNALLOCATED:
-+ used2fake_allocated(sbinfo, *len, flags & BA_FORMATTED);
-+ break;
-+
-+ case BLOCK_FLUSH_RESERVED:{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ used2flush_reserved(sbinfo, atom, *len,
-+ flags & BA_FORMATTED);
-+ spin_unlock_atom(atom);
-+ break;
-+ }
-+ default:
-+ impossible("zam-532", "wrong block stage");
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* wrappers for block allocator plugin methods */
-+int reiser4_pre_commit_hook(void)
-+{
-+ assert("zam-502", get_current_super_private() != NULL);
-+ sa_pre_commit_hook();
-+ return 0;
-+}
-+
-+/* an actor which applies delete set to block allocator data */
-+static int
-+apply_dset(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b, void *data UNUSED_ARG)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ __u64 len = 1;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ assert("zam-877", atom->stage >= ASTAGE_PRE_COMMIT);
-+ assert("zam-552", sbinfo != NULL);
-+
-+ if (b != NULL)
-+ len = *b;
-+
-+ if (REISER4_DEBUG) {
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ assert("zam-554", *a < reiser4_block_count(ctx->super));
-+ assert("zam-555", *a + len <= reiser4_block_count(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+
-+ sa_dealloc_blocks(&sbinfo->space_allocator, *a, len);
-+ /* adjust sb block counters */
-+ used2free(sbinfo, len);
-+ return 0;
-+}
-+
-+void reiser4_post_commit_hook(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ assert("zam-452", atom->stage == ASTAGE_POST_COMMIT);
-+ spin_unlock_atom(atom);
-+
-+ /* do the block deallocation which was deferred
-+ until commit is done */
-+ blocknr_set_iterator(atom, &atom->delete_set, apply_dset, NULL, 1);
-+
-+ assert("zam-504", get_current_super_private() != NULL);
-+ sa_post_commit_hook();
-+}
-+
-+void reiser4_post_write_back_hook(void)
-+{
-+ assert("zam-504", get_current_super_private() != NULL);
-+
-+ sa_post_commit_hook();
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/block_alloc.h linux-2.6.20/fs/reiser4/block_alloc.h
---- linux-2.6.20.orig/fs/reiser4/block_alloc.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/block_alloc.h 2007-05-06 14:50:43.682970725 +0400
-@@ -0,0 +1,175 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__FS_REISER4_BLOCK_ALLOC_H__)
-+#define __FS_REISER4_BLOCK_ALLOC_H__
-+
-+#include "dformat.h"
-+#include "forward.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h>
-+
-+/* Mask when is applied to given block number shows is that block number is a fake one */
-+#define REISER4_FAKE_BLOCKNR_BIT_MASK 0x8000000000000000ULL
-+/* Mask which isolates a type of object this fake block number was assigned to */
-+#define REISER4_BLOCKNR_STATUS_BIT_MASK 0xC000000000000000ULL
-+
-+/*result after applying the REISER4_BLOCKNR_STATUS_BIT_MASK should be compared
-+ against these two values to understand is the object unallocated or bitmap
-+ shadow object (WORKING BITMAP block, look at the plugin/space/bitmap.c) */
-+#define REISER4_UNALLOCATED_STATUS_VALUE 0xC000000000000000ULL
-+#define REISER4_BITMAP_BLOCKS_STATUS_VALUE 0x8000000000000000ULL
-+
-+/* specification how block allocation was counted in sb block counters */
-+typedef enum {
-+ BLOCK_NOT_COUNTED = 0, /* reiser4 has no info about this block yet */
-+ BLOCK_GRABBED = 1, /* free space grabbed for further allocation
-+ of this block */
-+ BLOCK_FLUSH_RESERVED = 2, /* block is reserved for flush needs. */
-+ BLOCK_UNALLOCATED = 3, /* block is used for existing in-memory object
-+ ( unallocated formatted or unformatted
-+ node) */
-+ BLOCK_ALLOCATED = 4 /* block is mapped to disk, real on-disk block
-+ number assigned */
-+} block_stage_t;
-+
-+/* a hint for block allocator */
-+struct reiser4_blocknr_hint {
-+ /* FIXME: I think we want to add a longterm lock on the bitmap block here. This
-+ is to prevent jnode_flush() calls from interleaving allocations on the same
-+ bitmap, once a hint is established. */
-+
-+ /* search start hint */
-+ reiser4_block_nr blk;
-+ /* if not zero, it is a region size we search for free blocks in */
-+ reiser4_block_nr max_dist;
-+ /* level for allocation, may be useful have branch-level and higher
-+ write-optimized. */
-+ tree_level level;
-+ /* block allocator assumes that blocks, which will be mapped to disk,
-+ are in this specified block_stage */
-+ block_stage_t block_stage;
-+ /* If direction = 1 allocate blocks in backward direction from the end
-+ * of disk to the beginning of disk. */
-+ unsigned int backward:1;
-+
-+};
-+
-+/* These flags control block allocation/deallocation behavior */
-+enum reiser4_ba_flags {
-+ /* do allocatations from reserved (5%) area */
-+ BA_RESERVED = (1 << 0),
-+
-+ /* block allocator can do commit trying to recover free space */
-+ BA_CAN_COMMIT = (1 << 1),
-+
-+ /* if operation will be applied to formatted block */
-+ BA_FORMATTED = (1 << 2),
-+
-+ /* defer actual block freeing until transaction commit */
-+ BA_DEFER = (1 << 3),
-+
-+ /* allocate blocks for permanent fs objects (formatted or unformatted), not
-+ wandered of log blocks */
-+ BA_PERMANENT = (1 << 4),
-+
-+ /* grab space even it was disabled */
-+ BA_FORCE = (1 << 5),
-+
-+ /* use default start value for free blocks search. */
-+ BA_USE_DEFAULT_SEARCH_START = (1 << 6)
-+};
-+
-+typedef enum reiser4_ba_flags reiser4_ba_flags_t;
-+
-+extern void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint);
-+extern void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint);
-+extern void update_blocknr_hint_default(const struct super_block *,
-+ const reiser4_block_nr *);
-+extern void get_blocknr_hint_default(reiser4_block_nr *);
-+
-+extern reiser4_block_nr reiser4_fs_reserved_space(struct super_block *super);
-+
-+int assign_fake_blocknr_formatted(reiser4_block_nr *);
-+reiser4_block_nr fake_blocknr_unformatted(int);
-+
-+/* free -> grabbed -> fake_allocated -> used */
-+
-+int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags);
-+void all_grabbed2free(void);
-+void grabbed2free(reiser4_context *, reiser4_super_info_data *, __u64 count);
-+void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags);
-+void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count);
-+void grabbed2flush_reserved(__u64 count);
-+int reiser4_alloc_blocks(reiser4_blocknr_hint * hint,
-+ reiser4_block_nr * start,
-+ reiser4_block_nr * len, reiser4_ba_flags_t flags);
-+int reiser4_dealloc_blocks(const reiser4_block_nr *,
-+ const reiser4_block_nr *,
-+ block_stage_t, reiser4_ba_flags_t flags);
-+
-+static inline int reiser4_alloc_block(reiser4_blocknr_hint * hint,
-+ reiser4_block_nr * start,
-+ reiser4_ba_flags_t flags)
-+{
-+ reiser4_block_nr one = 1;
-+ return reiser4_alloc_blocks(hint, start, &one, flags);
-+}
-+
-+static inline int reiser4_dealloc_block(const reiser4_block_nr * block,
-+ block_stage_t stage,
-+ reiser4_ba_flags_t flags)
-+{
-+ const reiser4_block_nr one = 1;
-+ return reiser4_dealloc_blocks(block, &one, stage, flags);
-+}
-+
-+#define reiser4_grab_space_force(count, flags) \
-+ reiser4_grab_space(count, flags | BA_FORCE)
-+
-+extern void grabbed2free_mark(__u64 mark);
-+extern int reiser4_grab_reserved(struct super_block *,
-+ __u64, reiser4_ba_flags_t);
-+extern void reiser4_release_reserved(struct super_block *super);
-+
-+/* grabbed -> fake_allocated */
-+
-+/* fake_allocated -> used */
-+
-+/* used -> fake_allocated -> grabbed -> free */
-+
-+extern void flush_reserved2grabbed(txn_atom * atom, __u64 count);
-+
-+extern int reiser4_blocknr_is_fake(const reiser4_block_nr * da);
-+
-+extern void grabbed2cluster_reserved(int count);
-+extern void cluster_reserved2grabbed(int count);
-+extern void cluster_reserved2free(int count);
-+
-+extern int reiser4_check_block_counters(const struct super_block *);
-+
-+#if REISER4_DEBUG
-+
-+extern void reiser4_check_block(const reiser4_block_nr *, int);
-+
-+#else
-+
-+# define reiser4_check_block(beg, val) noop
-+
-+#endif
-+
-+extern int reiser4_pre_commit_hook(void);
-+extern void reiser4_post_commit_hook(void);
-+extern void reiser4_post_write_back_hook(void);
-+
-+#endif /* __FS_REISER4_BLOCK_ALLOC_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/blocknrset.c linux-2.6.20/fs/reiser4/blocknrset.c
---- linux-2.6.20.orig/fs/reiser4/blocknrset.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/blocknrset.c 2007-05-06 14:50:43.686971975 +0400
-@@ -0,0 +1,368 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* This file contains code for various block number sets used by the atom to
-+ track the deleted set and wandered block mappings. */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "txnmgr.h"
-+#include "context.h"
-+
-+#include <linux/slab.h>
-+
-+/* The proposed data structure for storing unordered block number sets is a
-+ list of elements, each of which contains an array of block number or/and
-+ array of block number pairs. That element called blocknr_set_entry is used
-+ to store block numbers from the beginning and for extents from the end of
-+ the data field (char data[...]). The ->nr_blocks and ->nr_pairs fields
-+ count numbers of blocks and extents.
-+
-+ +------------------- blocknr_set_entry->data ------------------+
-+ |block1|block2| ... <free space> ... |pair3|pair2|pair1|
-+ +------------------------------------------------------------+
-+
-+ When current blocknr_set_entry is full, allocate a new one. */
-+
-+/* Usage examples: blocknr sets are used in reiser4 for storing atom's delete
-+ * set (single blocks and block extents), in that case blocknr pair represent an
-+ * extent; atom's wandered map is also stored as a blocknr set, blocknr pairs
-+ * there represent a (real block) -> (wandered block) mapping. */
-+
-+/* Protection: blocknr sets belong to reiser4 atom, and
-+ * their modifications are performed with the atom lock held */
-+
-+typedef struct blocknr_pair blocknr_pair;
-+
-+/* The total size of a blocknr_set_entry. */
-+#define BLOCKNR_SET_ENTRY_SIZE 128
-+
-+/* The number of blocks that can fit the blocknr data area. */
-+#define BLOCKNR_SET_ENTRIES_NUMBER \
-+ ((BLOCKNR_SET_ENTRY_SIZE - \
-+ 2 * sizeof (unsigned) - \
-+ sizeof(struct list_head)) / \
-+ sizeof(reiser4_block_nr))
-+
-+/* An entry of the blocknr_set */
-+struct blocknr_set_entry {
-+ unsigned nr_singles;
-+ unsigned nr_pairs;
-+ struct list_head link;
-+ reiser4_block_nr entries[BLOCKNR_SET_ENTRIES_NUMBER];
-+};
-+
-+/* A pair of blocks as recorded in the blocknr_set_entry data. */
-+struct blocknr_pair {
-+ reiser4_block_nr a;
-+ reiser4_block_nr b;
-+};
-+
-+/* Return the number of blocknr slots available in a blocknr_set_entry. */
-+/* Audited by: green(2002.06.11) */
-+static unsigned bse_avail(blocknr_set_entry * bse)
-+{
-+ unsigned used = bse->nr_singles + 2 * bse->nr_pairs;
-+
-+ assert("jmacd-5088", BLOCKNR_SET_ENTRIES_NUMBER >= used);
-+ cassert(sizeof(blocknr_set_entry) == BLOCKNR_SET_ENTRY_SIZE);
-+
-+ return BLOCKNR_SET_ENTRIES_NUMBER - used;
-+}
-+
-+/* Initialize a blocknr_set_entry. */
-+static void bse_init(blocknr_set_entry *bse)
-+{
-+ bse->nr_singles = 0;
-+ bse->nr_pairs = 0;
-+ INIT_LIST_HEAD(&bse->link);
-+}
-+
-+/* Allocate and initialize a blocknr_set_entry. */
-+/* Audited by: green(2002.06.11) */
-+static blocknr_set_entry *bse_alloc(void)
-+{
-+ blocknr_set_entry *e;
-+
-+ if ((e = (blocknr_set_entry *) kmalloc(sizeof(blocknr_set_entry),
-+ reiser4_ctx_gfp_mask_get())) == NULL)
-+ return NULL;
-+
-+ bse_init(e);
-+
-+ return e;
-+}
-+
-+/* Free a blocknr_set_entry. */
-+/* Audited by: green(2002.06.11) */
-+static void bse_free(blocknr_set_entry * bse)
-+{
-+ kfree(bse);
-+}
-+
-+/* Add a block number to a blocknr_set_entry */
-+/* Audited by: green(2002.06.11) */
-+static void
-+bse_put_single(blocknr_set_entry * bse, const reiser4_block_nr * block)
-+{
-+ assert("jmacd-5099", bse_avail(bse) >= 1);
-+
-+ bse->entries[bse->nr_singles++] = *block;
-+}
-+
-+/* Get a pair of block numbers */
-+/* Audited by: green(2002.06.11) */
-+static inline blocknr_pair *bse_get_pair(blocknr_set_entry * bse, unsigned pno)
-+{
-+ assert("green-1", BLOCKNR_SET_ENTRIES_NUMBER >= 2 * (pno + 1));
-+
-+ return (blocknr_pair *) (bse->entries + BLOCKNR_SET_ENTRIES_NUMBER -
-+ 2 * (pno + 1));
-+}
-+
-+/* Add a pair of block numbers to a blocknr_set_entry */
-+/* Audited by: green(2002.06.11) */
-+static void
-+bse_put_pair(blocknr_set_entry * bse, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b)
-+{
-+ blocknr_pair *pair;
-+
-+ assert("jmacd-5100", bse_avail(bse) >= 2 && a != NULL && b != NULL);
-+
-+ pair = bse_get_pair(bse, bse->nr_pairs++);
-+
-+ pair->a = *a;
-+ pair->b = *b;
-+}
-+
-+/* Add either a block or pair of blocks to the block number set. The first
-+ blocknr (@a) must be non-NULL. If @b is NULL a single blocknr is added, if
-+ @b is non-NULL a pair is added. The block number set belongs to atom, and
-+ the call is made with the atom lock held. There may not be enough space in
-+ the current blocknr_set_entry. If new_bsep points to a non-NULL
-+ blocknr_set_entry then it will be added to the blocknr_set and new_bsep
-+ will be set to NULL. If new_bsep contains NULL then the atom lock will be
-+ released and a new bse will be allocated in new_bsep. E_REPEAT will be
-+ returned with the atom unlocked for the operation to be tried again. If
-+ the operation succeeds, 0 is returned. If new_bsep is non-NULL and not
-+ used during the call, it will be freed automatically. */
-+static int blocknr_set_add(txn_atom *atom, struct list_head *bset,
-+ blocknr_set_entry **new_bsep, const reiser4_block_nr *a,
-+ const reiser4_block_nr *b)
-+{
-+ blocknr_set_entry *bse;
-+ unsigned entries_needed;
-+
-+ assert("jmacd-5101", a != NULL);
-+
-+ entries_needed = (b == NULL) ? 1 : 2;
-+ if (list_empty(bset) ||
-+ bse_avail(list_entry(bset->next, blocknr_set_entry, link)) < entries_needed) {
-+ /* See if a bse was previously allocated. */
-+ if (*new_bsep == NULL) {
-+ spin_unlock_atom(atom);
-+ *new_bsep = bse_alloc();
-+ return (*new_bsep != NULL) ? -E_REPEAT :
-+ RETERR(-ENOMEM);
-+ }
-+
-+ /* Put it on the head of the list. */
-+ list_add(&((*new_bsep)->link), bset);
-+
-+ *new_bsep = NULL;
-+ }
-+
-+ /* Add the single or pair. */
-+ bse = list_entry(bset->next, blocknr_set_entry, link);
-+ if (b == NULL) {
-+ bse_put_single(bse, a);
-+ } else {
-+ bse_put_pair(bse, a, b);
-+ }
-+
-+ /* If new_bsep is non-NULL then there was an allocation race, free this copy. */
-+ if (*new_bsep != NULL) {
-+ bse_free(*new_bsep);
-+ *new_bsep = NULL;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Add an extent to the block set. If the length is 1, it is treated as a
-+ single block (e.g., reiser4_set_add_block). */
-+/* Audited by: green(2002.06.11) */
-+/* Auditor note: Entire call chain cannot hold any spinlocks, because
-+ kmalloc might schedule. The only exception is atom spinlock, which is
-+ properly freed. */
-+int
-+blocknr_set_add_extent(txn_atom * atom,
-+ struct list_head * bset,
-+ blocknr_set_entry ** new_bsep,
-+ const reiser4_block_nr * start,
-+ const reiser4_block_nr * len)
-+{
-+ assert("jmacd-5102", start != NULL && len != NULL && *len > 0);
-+ return blocknr_set_add(atom, bset, new_bsep, start,
-+ *len == 1 ? NULL : len);
-+}
-+
-+/* Add a block pair to the block set. It adds exactly a pair, which is checked
-+ * by an assertion that both arguments are not null.*/
-+/* Audited by: green(2002.06.11) */
-+/* Auditor note: Entire call chain cannot hold any spinlocks, because
-+ kmalloc might schedule. The only exception is atom spinlock, which is
-+ properly freed. */
-+int
-+blocknr_set_add_pair(txn_atom * atom,
-+ struct list_head * bset,
-+ blocknr_set_entry ** new_bsep, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b)
-+{
-+ assert("jmacd-5103", a != NULL && b != NULL);
-+ return blocknr_set_add(atom, bset, new_bsep, a, b);
-+}
-+
-+/* Initialize a blocknr_set. */
-+void blocknr_set_init(struct list_head *bset)
-+{
-+ INIT_LIST_HEAD(bset);
-+}
-+
-+/* Release the entries of a blocknr_set. */
-+void blocknr_set_destroy(struct list_head *bset)
-+{
-+ blocknr_set_entry *bse;
-+
-+ while (!list_empty(bset)) {
-+ bse = list_entry(bset->next, blocknr_set_entry, link);
-+ list_del_init(&bse->link);
-+ bse_free(bse);
-+ }
-+}
-+
-+/* Merge blocknr_set entries out of @from into @into. */
-+/* Audited by: green(2002.06.11) */
-+/* Auditor comments: This merge does not know if merged sets contain
-+ blocks pairs (As for wandered sets) or extents, so it cannot really merge
-+ overlapping ranges if there is some. So I believe it may lead to
-+ some blocks being presented several times in one blocknr_set. To help
-+ debugging such problems it might help to check for duplicate entries on
-+ actual processing of this set. Testing this kind of stuff right here is
-+ also complicated by the fact that these sets are not sorted and going
-+ through whole set on each element addition is going to be CPU-heavy task */
-+void blocknr_set_merge(struct list_head * from, struct list_head * into)
-+{
-+ blocknr_set_entry *bse_into = NULL;
-+
-+ /* If @from is empty, no work to perform. */
-+ if (list_empty(from))
-+ return;
-+ /* If @into is not empty, try merging partial-entries. */
-+ if (!list_empty(into)) {
-+
-+ /* Neither set is empty, pop the front to members and try to combine them. */
-+ blocknr_set_entry *bse_from;
-+ unsigned into_avail;
-+
-+ bse_into = list_entry(into->next, blocknr_set_entry, link);
-+ list_del_init(&bse_into->link);
-+ bse_from = list_entry(from->next, blocknr_set_entry, link);
-+ list_del_init(&bse_from->link);
-+
-+ /* Combine singles. */
-+ for (into_avail = bse_avail(bse_into);
-+ into_avail != 0 && bse_from->nr_singles != 0;
-+ into_avail -= 1) {
-+ bse_put_single(bse_into,
-+ &bse_from->entries[--bse_from->
-+ nr_singles]);
-+ }
-+
-+ /* Combine pairs. */
-+ for (; into_avail > 1 && bse_from->nr_pairs != 0;
-+ into_avail -= 2) {
-+ blocknr_pair *pair =
-+ bse_get_pair(bse_from, --bse_from->nr_pairs);
-+ bse_put_pair(bse_into, &pair->a, &pair->b);
-+ }
-+
-+ /* If bse_from is empty, delete it now. */
-+ if (bse_avail(bse_from) == BLOCKNR_SET_ENTRIES_NUMBER) {
-+ bse_free(bse_from);
-+ } else {
-+ /* Otherwise, bse_into is full or nearly full (e.g.,
-+ it could have one slot avail and bse_from has one
-+ pair left). Push it back onto the list. bse_from
-+ becomes bse_into, which will be the new partial. */
-+ list_add(&bse_into->link, into);
-+ bse_into = bse_from;
-+ }
-+ }
-+
-+ /* Splice lists together. */
-+ list_splice_init(from, into->prev);
-+
-+ /* Add the partial entry back to the head of the list. */
-+ if (bse_into != NULL)
-+ list_add(&bse_into->link, into);
-+}
-+
-+/* Iterate over all blocknr set elements. */
-+int blocknr_set_iterator(txn_atom *atom, struct list_head *bset,
-+ blocknr_set_actor_f actor, void *data, int delete)
-+{
-+
-+ blocknr_set_entry *entry;
-+
-+ assert("zam-429", atom != NULL);
-+ assert("zam-430", atom_is_protected(atom));
-+ assert("zam-431", bset != 0);
-+ assert("zam-432", actor != NULL);
-+
-+ entry = list_entry(bset->next, blocknr_set_entry, link);
-+ while (bset != &entry->link) {
-+ blocknr_set_entry *tmp = list_entry(entry->link.next, blocknr_set_entry, link);
-+ unsigned int i;
-+ int ret;
-+
-+ for (i = 0; i < entry->nr_singles; i++) {
-+ ret = actor(atom, &entry->entries[i], NULL, data);
-+
-+ /* We can't break a loop if delete flag is set. */
-+ if (ret != 0 && !delete)
-+ return ret;
-+ }
-+
-+ for (i = 0; i < entry->nr_pairs; i++) {
-+ struct blocknr_pair *ab;
-+
-+ ab = bse_get_pair(entry, i);
-+
-+ ret = actor(atom, &ab->a, &ab->b, data);
-+
-+ if (ret != 0 && !delete)
-+ return ret;
-+ }
-+
-+ if (delete) {
-+ list_del(&entry->link);
-+ bse_free(entry);
-+ }
-+
-+ entry = tmp;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/carry.c linux-2.6.20/fs/reiser4/carry.c
---- linux-2.6.20.orig/fs/reiser4/carry.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/carry.c 2007-05-06 14:50:43.686971975 +0400
-@@ -0,0 +1,1391 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Functions to "carry" tree modification(s) upward. */
-+/* Tree is modified one level at a time. As we modify a level we accumulate a
-+ set of changes that need to be propagated to the next level. We manage
-+ node locking such that any searches that collide with carrying are
-+ restarted, from the root if necessary.
-+
-+ Insertion of a new item may result in items being moved among nodes and
-+ this requires the delimiting key to be updated at the least common parent
-+ of the nodes modified to preserve search tree invariants. Also, insertion
-+ may require allocation of a new node. A pointer to the new node has to be
-+ inserted into some node on the parent level, etc.
-+
-+ Tree carrying is meant to be analogous to arithmetic carrying.
-+
-+ A carry operation is always associated with some node (&carry_node).
-+
-+ Carry process starts with some initial set of operations to be performed
-+ and an initial set of already locked nodes. Operations are performed one
-+ by one. Performing each single operation has following possible effects:
-+
-+ - content of carry node associated with operation is modified
-+ - new carry nodes are locked and involved into carry process on this level
-+ - new carry operations are posted to the next level
-+
-+ After all carry operations on this level are done, process is repeated for
-+ the accumulated sequence on carry operations for the next level. This
-+ starts by trying to lock (in left to right order) all carry nodes
-+ associated with carry operations on the parent level. After this, we decide
-+ whether more nodes are required on the left of already locked set. If so,
-+ all locks taken on the parent level are released, new carry nodes are
-+ added, and locking process repeats.
-+
-+ It may happen that balancing process fails owing to unrecoverable error on
-+ some of upper levels of a tree (possible causes are io error, failure to
-+ allocate new node, etc.). In this case we should unmount the filesystem,
-+ rebooting if it is the root, and possibly advise the use of fsck.
-+
-+ USAGE:
-+
-+ int some_tree_operation( znode *node, ... )
-+ {
-+ // Allocate on a stack pool of carry objects: operations and nodes.
-+ // Most carry processes will only take objects from here, without
-+ // dynamic allocation.
-+
-+I feel uneasy about this pool. It adds to code complexity, I understand why it exists, but.... -Hans
-+
-+ carry_pool pool;
-+ carry_level lowest_level;
-+ carry_op *op;
-+
-+ init_carry_pool( &pool );
-+ init_carry_level( &lowest_level, &pool );
-+
-+ // operation may be one of:
-+ // COP_INSERT --- insert new item into node
-+ // COP_CUT --- remove part of or whole node
-+ // COP_PASTE --- increase size of item
-+ // COP_DELETE --- delete pointer from parent node
-+ // COP_UPDATE --- update delimiting key in least
-+ // common ancestor of two
-+
-+ op = reiser4_post_carry( &lowest_level, operation, node, 0 );
-+ if( IS_ERR( op ) || ( op == NULL ) ) {
-+ handle error
-+ } else {
-+ // fill in remaining fields in @op, according to carry.h:carry_op
-+ result = carry( &lowest_level, NULL );
-+ }
-+ done_carry_pool( &pool );
-+ }
-+
-+ When you are implementing node plugin method that participates in carry
-+ (shifting, insertion, deletion, etc.), do the following:
-+
-+ int foo_node_method( znode *node, ..., carry_level *todo )
-+ {
-+ carry_op *op;
-+
-+ ....
-+
-+ // note, that last argument to reiser4_post_carry() is non-null
-+ // here, because @op is to be applied to the parent of @node, rather
-+ // than to the @node itself as in the previous case.
-+
-+ op = node_post_carry( todo, operation, node, 1 );
-+ // fill in remaining fields in @op, according to carry.h:carry_op
-+
-+ ....
-+
-+ }
-+
-+ BATCHING:
-+
-+ One of the main advantages of level-by-level balancing implemented here is
-+ ability to batch updates on a parent level and to peform them more
-+ efficiently as a result.
-+
-+ Description To Be Done (TBD).
-+
-+ DIFFICULTIES AND SUBTLE POINTS:
-+
-+ 1. complex plumbing is required, because:
-+
-+ a. effective allocation through pools is needed
-+
-+ b. target of operation is not exactly known when operation is
-+ posted. This is worked around through bitfields in &carry_node and
-+ logic in lock_carry_node()
-+
-+ c. of interaction with locking code: node should be added into sibling
-+ list when pointer to it is inserted into its parent, which is some time
-+ after node was created. Between these moments, node is somewhat in
-+ suspended state and is only registered in the carry lists
-+
-+ 2. whole balancing logic is implemented here, in particular, insertion
-+ logic is coded in make_space().
-+
-+ 3. special cases like insertion (reiser4_add_tree_root()) or deletion
-+ (reiser4_kill_tree_root()) of tree root and morphing of paste into insert
-+ (insert_paste()) have to be handled.
-+
-+ 4. there is non-trivial interdependency between allocation of new nodes
-+ and almost everything else. This is mainly due to the (1.c) above. I shall
-+ write about this later.
-+
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/item/extent.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "tree_mod.h"
-+#include "tree_walk.h"
-+#include "block_alloc.h"
-+#include "pool.h"
-+#include "tree.h"
-+#include "carry.h"
-+#include "carry_ops.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h>
-+
-+/* level locking/unlocking */
-+static int lock_carry_level(carry_level * level);
-+static void unlock_carry_level(carry_level * level, int failure);
-+static void done_carry_level(carry_level * level);
-+static void unlock_carry_node(carry_level * level, carry_node * node, int fail);
-+
-+int lock_carry_node(carry_level * level, carry_node * node);
-+int lock_carry_node_tail(carry_node * node);
-+
-+/* carry processing proper */
-+static int carry_on_level(carry_level * doing, carry_level * todo);
-+
-+static carry_op *add_op(carry_level * level, pool_ordering order,
-+ carry_op * reference);
-+
-+/* handlers for carry operations. */
-+
-+static void fatal_carry_error(carry_level * doing, int ecode);
-+static int add_new_root(carry_level * level, carry_node * node, znode * fake);
-+
-+static void print_level(const char *prefix, carry_level * level);
-+
-+#if REISER4_DEBUG
-+typedef enum {
-+ CARRY_TODO,
-+ CARRY_DOING
-+} carry_queue_state;
-+static int carry_level_invariant(carry_level * level, carry_queue_state state);
-+#endif
-+
-+/* main entry point for tree balancing.
-+
-+ Tree carry performs operations from @doing and while doing so accumulates
-+ information about operations to be performed on the next level ("carried"
-+ to the parent level). Carried operations are performed, causing possibly
-+ more operations to be carried upward etc. carry() takes care about
-+ locking and pinning znodes while operating on them.
-+
-+ For usage, see comment at the top of fs/reiser4/carry.c
-+
-+*/
-+int reiser4_carry(carry_level * doing /* set of carry operations to be
-+ * performed */ ,
-+ carry_level * done /* set of nodes, already performed
-+ * at the previous level.
-+ * NULL in most cases */)
-+{
-+ int result = 0;
-+ /* queue of new requests */
-+ carry_level *todo;
-+ ON_DEBUG(STORE_COUNTERS);
-+
-+ assert("nikita-888", doing != NULL);
-+ BUG_ON(done != NULL);
-+
-+ todo = doing + 1;
-+ init_carry_level(todo, doing->pool);
-+
-+ /* queue of requests preformed on the previous level */
-+ done = todo + 1;
-+ init_carry_level(done, doing->pool);
-+
-+ /* iterate until there is nothing more to do */
-+ while (result == 0 && doing->ops_num > 0) {
-+ carry_level *tmp;
-+
-+ /* at this point @done is locked. */
-+ /* repeat lock/do/unlock while
-+
-+ (1) lock_carry_level() fails due to deadlock avoidance, or
-+
-+ (2) carry_on_level() decides that more nodes have to
-+ be involved.
-+
-+ (3) some unexpected error occurred while balancing on the
-+ upper levels. In this case all changes are rolled back.
-+
-+ */
-+ while (1) {
-+ result = lock_carry_level(doing);
-+ if (result == 0) {
-+ /* perform operations from @doing and
-+ accumulate new requests in @todo */
-+ result = carry_on_level(doing, todo);
-+ if (result == 0)
-+ break;
-+ else if (result != -E_REPEAT ||
-+ !doing->restartable) {
-+ warning("nikita-1043",
-+ "Fatal error during carry: %i",
-+ result);
-+ print_level("done", done);
-+ print_level("doing", doing);
-+ print_level("todo", todo);
-+ /* do some rough stuff like aborting
-+ all pending transcrashes and thus
-+ pushing tree back to the consistent
-+ state. Alternatvely, just panic.
-+ */
-+ fatal_carry_error(doing, result);
-+ return result;
-+ }
-+ } else if (result != -E_REPEAT) {
-+ fatal_carry_error(doing, result);
-+ return result;
-+ }
-+ unlock_carry_level(doing, 1);
-+ }
-+ /* at this point @done can be safely unlocked */
-+ done_carry_level(done);
-+
-+ /* cyclically shift queues */
-+ tmp = done;
-+ done = doing;
-+ doing = todo;
-+ todo = tmp;
-+ init_carry_level(todo, doing->pool);
-+
-+ /* give other threads chance to run */
-+ reiser4_preempt_point();
-+ }
-+ done_carry_level(done);
-+
-+ /* all counters, but x_refs should remain the same. x_refs can change
-+ owing to transaction manager */
-+ ON_DEBUG(CHECK_COUNTERS);
-+ return result;
-+}
-+
-+/* perform carry operations on given level.
-+
-+ Optimizations proposed by pooh:
-+
-+ (1) don't lock all nodes from queue at the same time. Lock nodes lazily as
-+ required;
-+
-+ (2) unlock node if there are no more operations to be performed upon it and
-+ node didn't add any operation to @todo. This can be implemented by
-+ attaching to each node two counters: counter of operaions working on this
-+ node and counter and operations carried upward from this node.
-+
-+*/
-+static int carry_on_level(carry_level * doing /* queue of carry operations to
-+ * do on this level */ ,
-+ carry_level * todo /* queue where new carry
-+ * operations to be performed on
-+ * the * parent level are
-+ * accumulated during @doing
-+ * processing. */ )
-+{
-+ int result;
-+ int (*f) (carry_op *, carry_level *, carry_level *);
-+ carry_op *op;
-+ carry_op *tmp_op;
-+
-+ assert("nikita-1034", doing != NULL);
-+ assert("nikita-1035", todo != NULL);
-+
-+ /* @doing->nodes are locked. */
-+
-+ /* This function can be split into two phases: analysis and modification.
-+
-+ Analysis calculates precisely what items should be moved between
-+ nodes. This information is gathered in some structures attached to
-+ each carry_node in a @doing queue. Analysis also determines whether
-+ new nodes are to be allocated etc.
-+
-+ After analysis is completed, actual modification is performed. Here
-+ we can take advantage of "batch modification": if there are several
-+ operations acting on the same node, modifications can be performed
-+ more efficiently when batched together.
-+
-+ Above is an optimization left for the future.
-+ */
-+ /* Important, but delayed optimization: it's possible to batch
-+ operations together and perform them more efficiently as a
-+ result. For example, deletion of several neighboring items from a
-+ node can be converted to a single ->cut() operation.
-+
-+ Before processing queue, it should be scanned and "mergeable"
-+ operations merged.
-+ */
-+ result = 0;
-+ for_all_ops(doing, op, tmp_op) {
-+ carry_opcode opcode;
-+
-+ assert("nikita-1041", op != NULL);
-+ opcode = op->op;
-+ assert("nikita-1042", op->op < COP_LAST_OP);
-+ f = op_dispatch_table[op->op].handler;
-+ result = f(op, doing, todo);
-+ /* locking can fail with -E_REPEAT. Any different error is fatal
-+ and will be handled by fatal_carry_error() sledgehammer.
-+ */
-+ if (result != 0)
-+ break;
-+ }
-+ if (result == 0) {
-+ carry_plugin_info info;
-+ carry_node *scan;
-+ carry_node *tmp_scan;
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ assert("nikita-3002",
-+ carry_level_invariant(doing, CARRY_DOING));
-+ for_all_nodes(doing, scan, tmp_scan) {
-+ znode *node;
-+
-+ node = reiser4_carry_real(scan);
-+ assert("nikita-2547", node != NULL);
-+ if (node_is_empty(node)) {
-+ result =
-+ node_plugin_by_node(node)->
-+ prepare_removal(node, &info);
-+ if (result != 0)
-+ break;
-+ }
-+ }
-+ }
-+ return result;
-+}
-+
-+/* post carry operation
-+
-+ This is main function used by external carry clients: node layout plugins
-+ and tree operations to create new carry operation to be performed on some
-+ level.
-+
-+ New operation will be included in the @level queue. To actually perform it,
-+ call carry( level, ... ). This function takes write lock on @node. Carry
-+ manages all its locks by itself, don't worry about this.
-+
-+ This function adds operation and node at the end of the queue. It is up to
-+ caller to guarantee proper ordering of node queue.
-+
-+*/
-+carry_op * reiser4_post_carry(carry_level * level /* queue where new operation
-+ * is to be posted at */ ,
-+ carry_opcode op /* opcode of operation */ ,
-+ znode * node /* node on which this operation
-+ * will operate */ ,
-+ int apply_to_parent_p /* whether operation will
-+ * operate directly on @node
-+ * or on it parent. */)
-+{
-+ carry_op *result;
-+ carry_node *child;
-+
-+ assert("nikita-1046", level != NULL);
-+ assert("nikita-1788", znode_is_write_locked(node));
-+
-+ result = add_op(level, POOLO_LAST, NULL);
-+ if (IS_ERR(result))
-+ return result;
-+ child = reiser4_add_carry(level, POOLO_LAST, NULL);
-+ if (IS_ERR(child)) {
-+ reiser4_pool_free(&level->pool->op_pool, &result->header);
-+ return (carry_op *) child;
-+ }
-+ result->node = child;
-+ result->op = op;
-+ child->parent = apply_to_parent_p;
-+ if (ZF_ISSET(node, JNODE_ORPHAN))
-+ child->left_before = 1;
-+ child->node = node;
-+ return result;
-+}
-+
-+/* initialize carry queue */
-+void init_carry_level(carry_level * level /* level to initialize */ ,
-+ carry_pool * pool /* pool @level will allocate objects
-+ * from */ )
-+{
-+ assert("nikita-1045", level != NULL);
-+ assert("nikita-967", pool != NULL);
-+
-+ memset(level, 0, sizeof *level);
-+ level->pool = pool;
-+
-+ INIT_LIST_HEAD(&level->nodes);
-+ INIT_LIST_HEAD(&level->ops);
-+}
-+
-+/* allocate carry pool and initialize pools within queue */
-+carry_pool *init_carry_pool(int size)
-+{
-+ carry_pool *pool;
-+
-+ assert("", size >= sizeof(carry_pool) + 3 * sizeof(carry_level));
-+ pool = kmalloc(size, reiser4_ctx_gfp_mask_get());
-+ if (pool == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ reiser4_init_pool(&pool->op_pool, sizeof(carry_op), CARRIES_POOL_SIZE,
-+ (char *)pool->op);
-+ reiser4_init_pool(&pool->node_pool, sizeof(carry_node),
-+ NODES_LOCKED_POOL_SIZE, (char *)pool->node);
-+ return pool;
-+}
-+
-+/* finish with queue pools */
-+void done_carry_pool(carry_pool * pool /* pool to destroy */ )
-+{
-+ reiser4_done_pool(&pool->op_pool);
-+ reiser4_done_pool(&pool->node_pool);
-+ kfree(pool);
-+}
-+
-+/* add new carry node to the @level.
-+
-+ Returns pointer to the new carry node allocated from pool. It's up to
-+ callers to maintain proper order in the @level. Assumption is that if carry
-+ nodes on one level are already sorted and modifications are peroformed from
-+ left to right, carry nodes added on the parent level will be ordered
-+ automatically. To control ordering use @order and @reference parameters.
-+
-+*/
-+carry_node *reiser4_add_carry_skip(carry_level * level /* &carry_level to add
-+ * node to */ ,
-+ pool_ordering order /* where to insert:
-+ * at the beginning of
-+ * @level,
-+ * before @reference,
-+ * after @reference,
-+ * at the end of @level
-+ */ ,
-+ carry_node * reference/* reference node for
-+ * insertion */)
-+{
-+ ON_DEBUG(carry_node * orig_ref = reference);
-+
-+ if (order == POOLO_BEFORE) {
-+ reference = find_left_carry(reference, level);
-+ if (reference == NULL)
-+ reference = list_entry(level->nodes.next, carry_node,
-+ header.level_linkage);
-+ else
-+ reference = list_entry(reference->header.level_linkage.next,
-+ carry_node, header.level_linkage);
-+ } else if (order == POOLO_AFTER) {
-+ reference = find_right_carry(reference, level);
-+ if (reference == NULL)
-+ reference = list_entry(level->nodes.prev, carry_node,
-+ header.level_linkage);
-+ else
-+ reference = list_entry(reference->header.level_linkage.prev,
-+ carry_node, header.level_linkage);
-+ }
-+ assert("nikita-2209",
-+ ergo(orig_ref != NULL,
-+ reiser4_carry_real(reference) ==
-+ reiser4_carry_real(orig_ref)));
-+ return reiser4_add_carry(level, order, reference);
-+}
-+
-+carry_node *reiser4_add_carry(carry_level * level /* &carry_level to add node
-+ * to */ ,
-+ pool_ordering order /* where to insert: at the
-+ * beginning of @level, before
-+ * @reference, after @reference,
-+ * at the end of @level */ ,
-+ carry_node * reference /* reference node for
-+ * insertion */ )
-+{
-+ carry_node *result;
-+
-+ result =
-+ (carry_node *) reiser4_add_obj(&level->pool->node_pool,
-+ &level->nodes,
-+ order, &reference->header);
-+ if (!IS_ERR(result) && (result != NULL))
-+ ++level->nodes_num;
-+ return result;
-+}
-+
-+/* add new carry operation to the @level.
-+
-+ Returns pointer to the new carry operations allocated from pool. It's up to
-+ callers to maintain proper order in the @level. To control ordering use
-+ @order and @reference parameters.
-+
-+*/
-+static carry_op *add_op(carry_level * level /* &carry_level to add node to */ ,
-+ pool_ordering order /* where to insert: at the beginning of
-+ * @level, before @reference, after
-+ * @reference, at the end of @level */ ,
-+ carry_op *
-+ reference /* reference node for insertion */ )
-+{
-+ carry_op *result;
-+
-+ result =
-+ (carry_op *) reiser4_add_obj(&level->pool->op_pool, &level->ops,
-+ order, &reference->header);
-+ if (!IS_ERR(result) && (result != NULL))
-+ ++level->ops_num;
-+ return result;
-+}
-+
-+/* Return node on the right of which @node was created.
-+
-+ Each node is created on the right of some existing node (or it is new root,
-+ which is special case not handled here).
-+
-+ @node is new node created on some level, but not yet inserted into its
-+ parent, it has corresponding bit (JNODE_ORPHAN) set in zstate.
-+
-+*/
-+static carry_node *find_begetting_brother(carry_node * node /* node to start search
-+ * from */ ,
-+ carry_level * kin UNUSED_ARG /* level to
-+ * scan */ )
-+{
-+ carry_node *scan;
-+
-+ assert("nikita-1614", node != NULL);
-+ assert("nikita-1615", kin != NULL);
-+ assert("nikita-1616", LOCK_CNT_GTZ(rw_locked_tree));
-+ assert("nikita-1619", ergo(reiser4_carry_real(node) != NULL,
-+ ZF_ISSET(reiser4_carry_real(node),
-+ JNODE_ORPHAN)));
-+ for (scan = node;;
-+ scan = list_entry(scan->header.level_linkage.prev, carry_node,
-+ header.level_linkage)) {
-+ assert("nikita-1617", &kin->nodes != &scan->header.level_linkage);
-+ if ((scan->node != node->node) &&
-+ !ZF_ISSET(scan->node, JNODE_ORPHAN)) {
-+ assert("nikita-1618", reiser4_carry_real(scan) != NULL);
-+ break;
-+ }
-+ }
-+ return scan;
-+}
-+
-+static cmp_t
-+carry_node_cmp(carry_level * level, carry_node * n1, carry_node * n2)
-+{
-+ assert("nikita-2199", n1 != NULL);
-+ assert("nikita-2200", n2 != NULL);
-+
-+ if (n1 == n2)
-+ return EQUAL_TO;
-+ while (1) {
-+ n1 = carry_node_next(n1);
-+ if (carry_node_end(level, n1))
-+ return GREATER_THAN;
-+ if (n1 == n2)
-+ return LESS_THAN;
-+ }
-+ impossible("nikita-2201", "End of level reached");
-+}
-+
-+carry_node *find_carry_node(carry_level * level, const znode * node)
-+{
-+ carry_node *scan;
-+ carry_node *tmp_scan;
-+
-+ assert("nikita-2202", level != NULL);
-+ assert("nikita-2203", node != NULL);
-+
-+ for_all_nodes(level, scan, tmp_scan) {
-+ if (reiser4_carry_real(scan) == node)
-+ return scan;
-+ }
-+ return NULL;
-+}
-+
-+znode *reiser4_carry_real(const carry_node * node)
-+{
-+ assert("nikita-3061", node != NULL);
-+
-+ return node->lock_handle.node;
-+}
-+
-+carry_node *insert_carry_node(carry_level * doing, carry_level * todo,
-+ const znode * node)
-+{
-+ carry_node *base;
-+ carry_node *scan;
-+ carry_node *tmp_scan;
-+ carry_node *proj;
-+
-+ base = find_carry_node(doing, node);
-+ assert("nikita-2204", base != NULL);
-+
-+ for_all_nodes(todo, scan, tmp_scan) {
-+ proj = find_carry_node(doing, scan->node);
-+ assert("nikita-2205", proj != NULL);
-+ if (carry_node_cmp(doing, proj, base) != LESS_THAN)
-+ break;
-+ }
-+ return scan;
-+}
-+
-+static carry_node *add_carry_atplace(carry_level * doing, carry_level * todo,
-+ znode * node)
-+{
-+ carry_node *reference;
-+
-+ assert("nikita-2994", doing != NULL);
-+ assert("nikita-2995", todo != NULL);
-+ assert("nikita-2996", node != NULL);
-+
-+ reference = insert_carry_node(doing, todo, node);
-+ assert("nikita-2997", reference != NULL);
-+
-+ return reiser4_add_carry(todo, POOLO_BEFORE, reference);
-+}
-+
-+/* like reiser4_post_carry(), but designed to be called from node plugin methods.
-+ This function is different from reiser4_post_carry() in that it finds proper
-+ place to insert node in the queue. */
-+carry_op *node_post_carry(carry_plugin_info * info /* carry parameters
-+ * passed down to node
-+ * plugin */ ,
-+ carry_opcode op /* opcode of operation */ ,
-+ znode * node /* node on which this
-+ * operation will operate */ ,
-+ int apply_to_parent_p /* whether operation will
-+ * operate directly on @node
-+ * or on it parent. */ )
-+{
-+ carry_op *result;
-+ carry_node *child;
-+
-+ assert("nikita-2207", info != NULL);
-+ assert("nikita-2208", info->todo != NULL);
-+
-+ if (info->doing == NULL)
-+ return reiser4_post_carry(info->todo, op, node,
-+ apply_to_parent_p);
-+
-+ result = add_op(info->todo, POOLO_LAST, NULL);
-+ if (IS_ERR(result))
-+ return result;
-+ child = add_carry_atplace(info->doing, info->todo, node);
-+ if (IS_ERR(child)) {
-+ reiser4_pool_free(&info->todo->pool->op_pool, &result->header);
-+ return (carry_op *) child;
-+ }
-+ result->node = child;
-+ result->op = op;
-+ child->parent = apply_to_parent_p;
-+ if (ZF_ISSET(node, JNODE_ORPHAN))
-+ child->left_before = 1;
-+ child->node = node;
-+ return result;
-+}
-+
-+/* lock all carry nodes in @level */
-+static int lock_carry_level(carry_level * level /* level to lock */ )
-+{
-+ int result;
-+ carry_node *node;
-+ carry_node *tmp_node;
-+
-+ assert("nikita-881", level != NULL);
-+ assert("nikita-2229", carry_level_invariant(level, CARRY_TODO));
-+
-+ /* lock nodes from left to right */
-+ result = 0;
-+ for_all_nodes(level, node, tmp_node) {
-+ result = lock_carry_node(level, node);
-+ if (result != 0)
-+ break;
-+ }
-+ return result;
-+}
-+
-+/* Synchronize delimiting keys between @node and its left neighbor.
-+
-+ To reduce contention on dk key and simplify carry code, we synchronize
-+ delimiting keys only when carry ultimately leaves tree level (carrying
-+ changes upward) and unlocks nodes at this level.
-+
-+ This function first finds left neighbor of @node and then updates left
-+ neighbor's right delimiting key to conincide with least key in @node.
-+
-+*/
-+
-+ON_DEBUG(extern atomic_t delim_key_version;
-+ )
-+
-+static void sync_dkeys(znode * spot /* node to update */ )
-+{
-+ reiser4_key pivot;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1610", spot != NULL);
-+ assert("nikita-1612", LOCK_CNT_NIL(rw_locked_dk));
-+
-+ tree = znode_get_tree(spot);
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+
-+ assert("nikita-2192", znode_is_loaded(spot));
-+
-+ /* sync left delimiting key of @spot with key in its leftmost item */
-+ if (node_is_empty(spot))
-+ pivot = *znode_get_rd_key(spot);
-+ else
-+ leftmost_key_in_node(spot, &pivot);
-+
-+ znode_set_ld_key(spot, &pivot);
-+
-+ /* there can be sequence of empty nodes pending removal on the left of
-+ @spot. Scan them and update their left and right delimiting keys to
-+ match left delimiting key of @spot. Also, update right delimiting
-+ key of first non-empty left neighbor.
-+ */
-+ while (1) {
-+ if (!ZF_ISSET(spot, JNODE_LEFT_CONNECTED))
-+ break;
-+
-+ spot = spot->left;
-+ if (spot == NULL)
-+ break;
-+
-+ znode_set_rd_key(spot, &pivot);
-+ /* don't sink into the domain of another balancing */
-+ if (!znode_is_write_locked(spot))
-+ break;
-+ if (ZF_ISSET(spot, JNODE_HEARD_BANSHEE))
-+ znode_set_ld_key(spot, &pivot);
-+ else
-+ break;
-+ }
-+
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+}
-+
-+/* unlock all carry nodes in @level */
-+static void unlock_carry_level(carry_level * level /* level to unlock */ ,
-+ int failure /* true if unlocking owing to
-+ * failure */ )
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+
-+ assert("nikita-889", level != NULL);
-+
-+ if (!failure) {
-+ znode *spot;
-+
-+ spot = NULL;
-+ /* update delimiting keys */
-+ for_all_nodes(level, node, tmp_node) {
-+ if (reiser4_carry_real(node) != spot) {
-+ spot = reiser4_carry_real(node);
-+ sync_dkeys(spot);
-+ }
-+ }
-+ }
-+
-+ /* nodes can be unlocked in arbitrary order. In preemptible
-+ environment it's better to unlock in reverse order of locking,
-+ though.
-+ */
-+ for_all_nodes_back(level, node, tmp_node) {
-+ /* all allocated nodes should be already linked to their
-+ parents at this moment. */
-+ assert("nikita-1631",
-+ ergo(!failure, !ZF_ISSET(reiser4_carry_real(node),
-+ JNODE_ORPHAN)));
-+ ON_DEBUG(check_dkeys(reiser4_carry_real(node)));
-+ unlock_carry_node(level, node, failure);
-+ }
-+ level->new_root = NULL;
-+}
-+
-+/* finish with @level
-+
-+ Unlock nodes and release all allocated resources */
-+static void done_carry_level(carry_level * level /* level to finish */ )
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+ carry_op *op;
-+ carry_op *tmp_op;
-+
-+ assert("nikita-1076", level != NULL);
-+
-+ unlock_carry_level(level, 0);
-+ for_all_nodes(level, node, tmp_node) {
-+ assert("nikita-2113", list_empty_careful(&node->lock_handle.locks_link));
-+ assert("nikita-2114", list_empty_careful(&node->lock_handle.owners_link));
-+ reiser4_pool_free(&level->pool->node_pool, &node->header);
-+ }
-+ for_all_ops(level, op, tmp_op)
-+ reiser4_pool_free(&level->pool->op_pool, &op->header);
-+}
-+
-+/* helper function to complete locking of carry node
-+
-+ Finish locking of carry node. There are several ways in which new carry
-+ node can be added into carry level and locked. Normal is through
-+ lock_carry_node(), but also from find_{left|right}_neighbor(). This
-+ function factors out common final part of all locking scenarios. It
-+ supposes that @node -> lock_handle is lock handle for lock just taken and
-+ fills ->real_node from this lock handle.
-+
-+*/
-+int lock_carry_node_tail(carry_node * node /* node to complete locking of */ )
-+{
-+ assert("nikita-1052", node != NULL);
-+ assert("nikita-1187", reiser4_carry_real(node) != NULL);
-+ assert("nikita-1188", !node->unlock);
-+
-+ node->unlock = 1;
-+ /* Load node content into memory and install node plugin by
-+ looking at the node header.
-+
-+ Most of the time this call is cheap because the node is
-+ already in memory.
-+
-+ Corresponding zrelse() is in unlock_carry_node()
-+ */
-+ return zload(reiser4_carry_real(node));
-+}
-+
-+/* lock carry node
-+
-+ "Resolve" node to real znode, lock it and mark as locked.
-+ This requires recursive locking of znodes.
-+
-+ When operation is posted to the parent level, node it will be applied to is
-+ not yet known. For example, when shifting data between two nodes,
-+ delimiting has to be updated in parent or parents of nodes involved. But
-+ their parents is not yet locked and, moreover said nodes can be reparented
-+ by concurrent balancing.
-+
-+ To work around this, carry operation is applied to special "carry node"
-+ rather than to the znode itself. Carry node consists of some "base" or
-+ "reference" znode and flags indicating how to get to the target of carry
-+ operation (->real_node field of carry_node) from base.
-+
-+*/
-+int lock_carry_node(carry_level * level /* level @node is in */ ,
-+ carry_node * node /* node to lock */ )
-+{
-+ int result;
-+ znode *reference_point;
-+ lock_handle lh;
-+ lock_handle tmp_lh;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-887", level != NULL);
-+ assert("nikita-882", node != NULL);
-+
-+ result = 0;
-+ reference_point = node->node;
-+ init_lh(&lh);
-+ init_lh(&tmp_lh);
-+ if (node->left_before) {
-+ /* handling of new nodes, allocated on the previous level:
-+
-+ some carry ops were propably posted from the new node, but
-+ this node neither has parent pointer set, nor is
-+ connected. This will be done in ->create_hook() for
-+ internal item.
-+
-+ No then less, parent of new node has to be locked. To do
-+ this, first go to the "left" in the carry order. This
-+ depends on the decision to always allocate new node on the
-+ right of existing one.
-+
-+ Loop handles case when multiple nodes, all orphans, were
-+ inserted.
-+
-+ Strictly speaking, taking tree lock is not necessary here,
-+ because all nodes scanned by loop in
-+ find_begetting_brother() are write-locked by this thread,
-+ and thus, their sibling linkage cannot change.
-+
-+ */
-+ tree = znode_get_tree(reference_point);
-+ read_lock_tree(tree);
-+ reference_point = find_begetting_brother(node, level)->node;
-+ read_unlock_tree(tree);
-+ assert("nikita-1186", reference_point != NULL);
-+ }
-+ if (node->parent && (result == 0)) {
-+ result =
-+ reiser4_get_parent(&tmp_lh, reference_point,
-+ ZNODE_WRITE_LOCK);
-+ if (result != 0) {
-+ ; /* nothing */
-+ } else if (znode_get_level(tmp_lh.node) == 0) {
-+ assert("nikita-1347", znode_above_root(tmp_lh.node));
-+ result = add_new_root(level, node, tmp_lh.node);
-+ if (result == 0) {
-+ reference_point = level->new_root;
-+ move_lh(&lh, &node->lock_handle);
-+ }
-+ } else if ((level->new_root != NULL)
-+ && (level->new_root !=
-+ znode_parent_nolock(reference_point))) {
-+ /* parent of node exists, but this level aready
-+ created different new root, so */
-+ warning("nikita-1109",
-+ /* it should be "radicis", but tradition is
-+ tradition. do banshees read latin? */
-+ "hodie natus est radici frater");
-+ result = -EIO;
-+ } else {
-+ move_lh(&lh, &tmp_lh);
-+ reference_point = lh.node;
-+ }
-+ }
-+ if (node->left && (result == 0)) {
-+ assert("nikita-1183", node->parent);
-+ assert("nikita-883", reference_point != NULL);
-+ result =
-+ reiser4_get_left_neighbor(&tmp_lh, reference_point,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result == 0) {
-+ done_lh(&lh);
-+ move_lh(&lh, &tmp_lh);
-+ reference_point = lh.node;
-+ }
-+ }
-+ if (!node->parent && !node->left && !node->left_before) {
-+ result =
-+ longterm_lock_znode(&lh, reference_point, ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_HIPRI);
-+ }
-+ if (result == 0) {
-+ move_lh(&node->lock_handle, &lh);
-+ result = lock_carry_node_tail(node);
-+ }
-+ done_lh(&tmp_lh);
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+/* release a lock on &carry_node.
-+
-+ Release if necessary lock on @node. This opearion is pair of
-+ lock_carry_node() and is idempotent: you can call it more than once on the
-+ same node.
-+
-+*/
-+static void
-+unlock_carry_node(carry_level * level,
-+ carry_node * node /* node to be released */ ,
-+ int failure /* 0 if node is unlocked due
-+ * to some error */ )
-+{
-+ znode *real_node;
-+
-+ assert("nikita-884", node != NULL);
-+
-+ real_node = reiser4_carry_real(node);
-+ /* pair to zload() in lock_carry_node_tail() */
-+ zrelse(real_node);
-+ if (node->unlock && (real_node != NULL)) {
-+ assert("nikita-899", real_node == node->lock_handle.node);
-+ longterm_unlock_znode(&node->lock_handle);
-+ }
-+ if (failure) {
-+ if (node->deallocate && (real_node != NULL)) {
-+ /* free node in bitmap
-+
-+ Prepare node for removal. Last zput() will finish
-+ with it.
-+ */
-+ ZF_SET(real_node, JNODE_HEARD_BANSHEE);
-+ }
-+ if (node->free) {
-+ assert("nikita-2177",
-+ list_empty_careful(&node->lock_handle.locks_link));
-+ assert("nikita-2112",
-+ list_empty_careful(&node->lock_handle.owners_link));
-+ reiser4_pool_free(&level->pool->node_pool,
-+ &node->header);
-+ }
-+ }
-+}
-+
-+/* fatal_carry_error() - all-catching error handling function
-+
-+ It is possible that carry faces unrecoverable error, like unability to
-+ insert pointer at the internal level. Our simple solution is just panic in
-+ this situation. More sophisticated things like attempt to remount
-+ file-system as read-only can be implemented without much difficlties.
-+
-+ It is believed, that:
-+
-+ 1. in stead of panicking, all current transactions can be aborted rolling
-+ system back to the consistent state.
-+
-+Umm, if you simply panic without doing anything more at all, then all current
-+transactions are aborted and the system is rolled back to a consistent state,
-+by virtue of the design of the transactional mechanism. Well, wait, let's be
-+precise. If an internal node is corrupted on disk due to hardware failure,
-+then there may be no consistent state that can be rolled back to, so instead
-+we should say that it will rollback the transactions, which barring other
-+factors means rolling back to a consistent state.
-+
-+# Nikita: there is a subtle difference between panic and aborting
-+# transactions: machine doesn't reboot. Processes aren't killed. Processes
-+# don't using reiser4 (not that we care about such processes), or using other
-+# reiser4 mounts (about them we do care) will simply continue to run. With
-+# some luck, even application using aborted file system can survive: it will
-+# get some error, like EBADF, from each file descriptor on failed file system,
-+# but applications that do care about tolerance will cope with this (squid
-+# will).
-+
-+It would be a nice feature though to support rollback without rebooting
-+followed by remount, but this can wait for later versions.
-+
-+ 2. once isolated transactions will be implemented it will be possible to
-+ roll back offending transaction.
-+
-+2. is additional code complexity of inconsistent value (it implies that a broken tree should be kept in operation), so we must think about
-+it more before deciding if it should be done. -Hans
-+
-+*/
-+static void fatal_carry_error(carry_level * doing UNUSED_ARG /* carry level
-+ * where
-+ * unrecoverable
-+ * error
-+ * occurred */ ,
-+ int ecode /* error code */ )
-+{
-+ assert("nikita-1230", doing != NULL);
-+ assert("nikita-1231", ecode < 0);
-+
-+ reiser4_panic("nikita-1232", "Carry failed: %i", ecode);
-+}
-+
-+/* add new root to the tree
-+
-+ This function itself only manages changes in carry structures and delegates
-+ all hard work (allocation of znode for new root, changes of parent and
-+ sibling pointers to the reiser4_add_tree_root().
-+
-+ Locking: old tree root is locked by carry at this point. Fake znode is also
-+ locked.
-+
-+*/
-+static int add_new_root(carry_level * level /* carry level in context of which
-+ * operation is performed */ ,
-+ carry_node * node /* carry node for existing root */ ,
-+ znode * fake /* "fake" znode already locked by
-+ * us */ )
-+{
-+ int result;
-+
-+ assert("nikita-1104", level != NULL);
-+ assert("nikita-1105", node != NULL);
-+
-+ assert("nikita-1403", znode_is_write_locked(node->node));
-+ assert("nikita-1404", znode_is_write_locked(fake));
-+
-+ /* trying to create new root. */
-+ /* @node is root and it's already locked by us. This
-+ means that nobody else can be trying to add/remove
-+ tree root right now.
-+ */
-+ if (level->new_root == NULL)
-+ level->new_root = reiser4_add_tree_root(node->node, fake);
-+ if (!IS_ERR(level->new_root)) {
-+ assert("nikita-1210", znode_is_root(level->new_root));
-+ node->deallocate = 1;
-+ result =
-+ longterm_lock_znode(&node->lock_handle, level->new_root,
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
-+ if (result == 0)
-+ zput(level->new_root);
-+ } else {
-+ result = PTR_ERR(level->new_root);
-+ level->new_root = NULL;
-+ }
-+ return result;
-+}
-+
-+/* allocate new znode and add the operation that inserts the
-+ pointer to it into the parent node into the todo level
-+
-+ Allocate new znode, add it into carry queue and post into @todo queue
-+ request to add pointer to new node into its parent.
-+
-+ This is carry related routing that calls reiser4_new_node() to allocate new
-+ node.
-+*/
-+carry_node *add_new_znode(znode * brother /* existing left neighbor of new
-+ * node */ ,
-+ carry_node * ref /* carry node after which new
-+ * carry node is to be inserted
-+ * into queue. This affects
-+ * locking. */ ,
-+ carry_level * doing /* carry queue where new node is
-+ * to be added */ ,
-+ carry_level * todo /* carry queue where COP_INSERT
-+ * operation to add pointer to
-+ * new node will ne added */ )
-+{
-+ carry_node *fresh;
-+ znode *new_znode;
-+ carry_op *add_pointer;
-+ carry_plugin_info info;
-+
-+ assert("nikita-1048", brother != NULL);
-+ assert("nikita-1049", todo != NULL);
-+
-+ /* There is a lot of possible variations here: to what parent
-+ new node will be attached and where. For simplicity, always
-+ do the following:
-+
-+ (1) new node and @brother will have the same parent.
-+
-+ (2) new node is added on the right of @brother
-+
-+ */
-+
-+ fresh = reiser4_add_carry_skip(doing,
-+ ref ? POOLO_AFTER : POOLO_LAST, ref);
-+ if (IS_ERR(fresh))
-+ return fresh;
-+
-+ fresh->deallocate = 1;
-+ fresh->free = 1;
-+
-+ new_znode = reiser4_new_node(brother, znode_get_level(brother));
-+ if (IS_ERR(new_znode))
-+ /* @fresh will be deallocated automatically by error
-+ handling code in the caller. */
-+ return (carry_node *) new_znode;
-+
-+ /* new_znode returned znode with x_count 1. Caller has to decrease
-+ it. make_space() does. */
-+
-+ ZF_SET(new_znode, JNODE_ORPHAN);
-+ fresh->node = new_znode;
-+
-+ while (ZF_ISSET(reiser4_carry_real(ref), JNODE_ORPHAN)) {
-+ ref = carry_node_prev(ref);
-+ assert("nikita-1606", !carry_node_end(doing, ref));
-+ }
-+
-+ info.todo = todo;
-+ info.doing = doing;
-+ add_pointer = node_post_carry(&info, COP_INSERT,
-+ reiser4_carry_real(ref), 1);
-+ if (IS_ERR(add_pointer)) {
-+ /* no need to deallocate @new_znode here: it will be
-+ deallocated during carry error handling. */
-+ return (carry_node *) add_pointer;
-+ }
-+
-+ add_pointer->u.insert.type = COPT_CHILD;
-+ add_pointer->u.insert.child = fresh;
-+ add_pointer->u.insert.brother = brother;
-+ /* initially new node spawns empty key range */
-+ write_lock_dk(znode_get_tree(brother));
-+ znode_set_ld_key(new_znode,
-+ znode_set_rd_key(new_znode,
-+ znode_get_rd_key(brother)));
-+ write_unlock_dk(znode_get_tree(brother));
-+ return fresh;
-+}
-+
-+/* DEBUGGING FUNCTIONS.
-+
-+ Probably we also should leave them on even when
-+ debugging is turned off to print dumps at errors.
-+*/
-+#if REISER4_DEBUG
-+static int carry_level_invariant(carry_level * level, carry_queue_state state)
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+
-+ if (level == NULL)
-+ return 0;
-+
-+ if (level->track_type != 0 &&
-+ level->track_type != CARRY_TRACK_NODE &&
-+ level->track_type != CARRY_TRACK_CHANGE)
-+ return 0;
-+
-+ /* check that nodes are in ascending order */
-+ for_all_nodes(level, node, tmp_node) {
-+ znode *left;
-+ znode *right;
-+
-+ reiser4_key lkey;
-+ reiser4_key rkey;
-+
-+ if (node != carry_node_front(level)) {
-+ if (state == CARRY_TODO) {
-+ right = node->node;
-+ left = carry_node_prev(node)->node;
-+ } else {
-+ right = reiser4_carry_real(node);
-+ left = reiser4_carry_real(carry_node_prev(node));
-+ }
-+ if (right == NULL || left == NULL)
-+ continue;
-+ if (node_is_empty(right) || node_is_empty(left))
-+ continue;
-+ if (!keyle(leftmost_key_in_node(left, &lkey),
-+ leftmost_key_in_node(right, &rkey))) {
-+ warning("", "wrong key order");
-+ return 0;
-+ }
-+ }
-+ }
-+ return 1;
-+}
-+#endif
-+
-+/* get symbolic name for boolean */
-+static const char *tf(int boolean /* truth value */ )
-+{
-+ return boolean ? "t" : "f";
-+}
-+
-+/* symbolic name for carry operation */
-+static const char *carry_op_name(carry_opcode op /* carry opcode */ )
-+{
-+ switch (op) {
-+ case COP_INSERT:
-+ return "COP_INSERT";
-+ case COP_DELETE:
-+ return "COP_DELETE";
-+ case COP_CUT:
-+ return "COP_CUT";
-+ case COP_PASTE:
-+ return "COP_PASTE";
-+ case COP_UPDATE:
-+ return "COP_UPDATE";
-+ case COP_EXTENT:
-+ return "COP_EXTENT";
-+ case COP_INSERT_FLOW:
-+ return "COP_INSERT_FLOW";
-+ default:{
-+ /* not mt safe, but who cares? */
-+ static char buf[20];
-+
-+ sprintf(buf, "unknown op: %x", op);
-+ return buf;
-+ }
-+ }
-+}
-+
-+/* dump information about carry node */
-+static void print_carry(const char *prefix /* prefix to print */ ,
-+ carry_node * node /* node to print */ )
-+{
-+ if (node == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ printk
-+ ("%s: %p parent: %s, left: %s, unlock: %s, free: %s, dealloc: %s\n",
-+ prefix, node, tf(node->parent), tf(node->left), tf(node->unlock),
-+ tf(node->free), tf(node->deallocate));
-+}
-+
-+/* dump information about carry operation */
-+static void print_op(const char *prefix /* prefix to print */ ,
-+ carry_op * op /* operation to print */ )
-+{
-+ if (op == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ printk("%s: %p carry_opcode: %s\n", prefix, op, carry_op_name(op->op));
-+ print_carry("\tnode", op->node);
-+ switch (op->op) {
-+ case COP_INSERT:
-+ case COP_PASTE:
-+ print_coord("\tcoord",
-+ op->u.insert.d ? op->u.insert.d->coord : NULL, 0);
-+ reiser4_print_key("\tkey",
-+ op->u.insert.d ? op->u.insert.d->key : NULL);
-+ print_carry("\tchild", op->u.insert.child);
-+ break;
-+ case COP_DELETE:
-+ print_carry("\tchild", op->u.delete.child);
-+ break;
-+ case COP_CUT:
-+ if (op->u.cut_or_kill.is_cut) {
-+ print_coord("\tfrom",
-+ op->u.cut_or_kill.u.kill->params.from, 0);
-+ print_coord("\tto", op->u.cut_or_kill.u.kill->params.to,
-+ 0);
-+ } else {
-+ print_coord("\tfrom",
-+ op->u.cut_or_kill.u.cut->params.from, 0);
-+ print_coord("\tto", op->u.cut_or_kill.u.cut->params.to,
-+ 0);
-+ }
-+ break;
-+ case COP_UPDATE:
-+ print_carry("\tleft", op->u.update.left);
-+ break;
-+ default:
-+ /* do nothing */
-+ break;
-+ }
-+}
-+
-+/* dump information about all nodes and operations in a @level */
-+static void print_level(const char *prefix /* prefix to print */ ,
-+ carry_level * level /* level to print */ )
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+ carry_op *op;
-+ carry_op *tmp_op;
-+
-+ if (level == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ printk("%s: %p, restartable: %s\n",
-+ prefix, level, tf(level->restartable));
-+
-+ for_all_nodes(level, node, tmp_node)
-+ print_carry("\tcarry node", node);
-+ for_all_ops(level, op, tmp_op)
-+ print_op("\tcarry op", op);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/carry.h linux-2.6.20/fs/reiser4/carry.h
---- linux-2.6.20.orig/fs/reiser4/carry.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/carry.h 2007-05-06 14:50:43.690973225 +0400
-@@ -0,0 +1,442 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Functions and data types to "carry" tree modification(s) upward.
-+ See fs/reiser4/carry.c for details. */
-+
-+#if !defined( __FS_REISER4_CARRY_H__ )
-+#define __FS_REISER4_CARRY_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "pool.h"
-+#include "znode.h"
-+
-+#include <linux/types.h>
-+
-+/* &carry_node - "location" of carry node.
-+
-+ "location" of node that is involved or going to be involved into
-+ carry process. Node where operation will be carried to on the
-+ parent level cannot be recorded explicitly. Operation will be carried
-+ usually to the parent of some node (where changes are performed at
-+ the current level) or, to the left neighbor of its parent. But while
-+ modifications are performed at the current level, parent may
-+ change. So, we have to allow some indirection (or, positevly,
-+ flexibility) in locating carry nodes.
-+
-+*/
-+typedef struct carry_node {
-+ /* pool linkage */
-+ reiser4_pool_header header;
-+
-+ /* base node from which real_node is calculated. See
-+ fs/reiser4/carry.c:lock_carry_node(). */
-+ znode *node;
-+
-+ /* how to get ->real_node */
-+ /* to get ->real_node obtain parent of ->node */
-+ __u32 parent:1;
-+ /* to get ->real_node obtain left neighbor of parent of
-+ ->node */
-+ __u32 left:1;
-+ __u32 left_before:1;
-+
-+ /* locking */
-+
-+ /* this node was locked by carry process and should be
-+ unlocked when carry leaves a level */
-+ __u32 unlock:1;
-+
-+ /* disk block for this node was allocated by carry process and
-+ should be deallocated when carry leaves a level */
-+ __u32 deallocate:1;
-+ /* this carry node was allocated by carry process and should be
-+ freed when carry leaves a level */
-+ __u32 free:1;
-+
-+ /* type of lock we want to take on this node */
-+ lock_handle lock_handle;
-+} carry_node;
-+
-+/* &carry_opcode - elementary operations that can be carried upward
-+
-+ Operations that carry() can handle. This list is supposed to be
-+ expanded.
-+
-+ Each carry operation (cop) is handled by appropriate function defined
-+ in fs/reiser4/carry.c. For example COP_INSERT is handled by
-+ fs/reiser4/carry.c:carry_insert() etc. These functions in turn
-+ call plugins of nodes affected by operation to modify nodes' content
-+ and to gather operations to be performed on the next level.
-+
-+*/
-+typedef enum {
-+ /* insert new item into node. */
-+ COP_INSERT,
-+ /* delete pointer from parent node */
-+ COP_DELETE,
-+ /* remove part of or whole node. */
-+ COP_CUT,
-+ /* increase size of item. */
-+ COP_PASTE,
-+ /* insert extent (that is sequence of unformatted nodes). */
-+ COP_EXTENT,
-+ /* update delimiting key in least common ancestor of two
-+ nodes. This is performed when items are moved between two
-+ nodes.
-+ */
-+ COP_UPDATE,
-+ /* insert flow */
-+ COP_INSERT_FLOW,
-+ COP_LAST_OP,
-+} carry_opcode;
-+
-+#define CARRY_FLOW_NEW_NODES_LIMIT 20
-+
-+/* mode (or subtype) of COP_{INSERT|PASTE} operation. Specifies how target
-+ item is determined. */
-+typedef enum {
-+ /* target item is one containing pointer to the ->child node */
-+ COPT_CHILD,
-+ /* target item is given explicitly by @coord */
-+ COPT_ITEM_DATA,
-+ /* target item is given by key */
-+ COPT_KEY,
-+ /* see insert_paste_common() for more comments on this. */
-+ COPT_PASTE_RESTARTED,
-+} cop_insert_pos_type;
-+
-+/* flags to cut and delete */
-+typedef enum {
-+ /* don't kill node even if it became completely empty as results of
-+ * cut. This is needed for eottl handling. See carry_extent() for
-+ * details. */
-+ DELETE_RETAIN_EMPTY = (1 << 0)
-+} cop_delete_flag;
-+
-+/*
-+ * carry() implements "lock handle tracking" feature.
-+ *
-+ * Callers supply carry with node where to perform initial operation and lock
-+ * handle on this node. Trying to optimize node utilization carry may actually
-+ * move insertion point to different node. Callers expect that lock handle
-+ * will rebe transferred to the new node also.
-+ *
-+ */
-+typedef enum {
-+ /* transfer lock handle along with insertion point */
-+ CARRY_TRACK_CHANGE = 1,
-+ /* acquire new lock handle to the node where insertion point is. This
-+ * is used when carry() client doesn't initially possess lock handle
-+ * on the insertion point node, for example, by extent insertion
-+ * code. See carry_extent(). */
-+ CARRY_TRACK_NODE = 2
-+} carry_track_type;
-+
-+/* data supplied to COP_{INSERT|PASTE} by callers */
-+typedef struct carry_insert_data {
-+ /* position where new item is to be inserted */
-+ coord_t *coord;
-+ /* new item description */
-+ reiser4_item_data *data;
-+ /* key of new item */
-+ const reiser4_key *key;
-+} carry_insert_data;
-+
-+/* cut and kill are similar, so carry_cut_data and carry_kill_data share the below structure of parameters */
-+struct cut_kill_params {
-+ /* coord where cut starts (inclusive) */
-+ coord_t *from;
-+ /* coord where cut stops (inclusive, this item/unit will also be
-+ * cut) */
-+ coord_t *to;
-+ /* starting key. This is necessary when item and unit pos don't
-+ * uniquely identify what portion or tree to remove. For example, this
-+ * indicates what portion of extent unit will be affected. */
-+ const reiser4_key *from_key;
-+ /* exclusive stop key */
-+ const reiser4_key *to_key;
-+ /* if this is not NULL, smallest actually removed key is stored
-+ * here. */
-+ reiser4_key *smallest_removed;
-+ /* kill_node_content() is called for file truncate */
-+ int truncate;
-+};
-+
-+struct carry_cut_data {
-+ struct cut_kill_params params;
-+};
-+
-+struct carry_kill_data {
-+ struct cut_kill_params params;
-+ /* parameter to be passed to the ->kill_hook() method of item
-+ * plugin */
-+ /*void *iplug_params; *//* FIXME: unused currently */
-+ /* if not NULL---inode whose items are being removed. This is needed
-+ * for ->kill_hook() of extent item to update VM structures when
-+ * removing pages. */
-+ struct inode *inode;
-+ /* sibling list maintenance is complicated by existence of eottl. When
-+ * eottl whose left and right neighbors are formatted leaves is
-+ * removed, one has to connect said leaves in the sibling list. This
-+ * cannot be done when extent removal is just started as locking rules
-+ * require sibling list update to happen atomically with removal of
-+ * extent item. Therefore: 1. pointers to left and right neighbors
-+ * have to be passed down to the ->kill_hook() of extent item, and
-+ * 2. said neighbors have to be locked. */
-+ lock_handle *left;
-+ lock_handle *right;
-+ /* flags modifying behavior of kill. Currently, it may have DELETE_RETAIN_EMPTY set. */
-+ unsigned flags;
-+ char *buf;
-+};
-+
-+/* &carry_tree_op - operation to "carry" upward.
-+
-+ Description of an operation we want to "carry" to the upper level of
-+ a tree: e.g, when we insert something and there is not enough space
-+ we allocate a new node and "carry" the operation of inserting a
-+ pointer to the new node to the upper level, on removal of empty node,
-+ we carry up operation of removing appropriate entry from parent.
-+
-+ There are two types of carry ops: when adding or deleting node we
-+ node at the parent level where appropriate modification has to be
-+ performed is known in advance. When shifting items between nodes
-+ (split, merge), delimiting key should be changed in the least common
-+ parent of the nodes involved that is not known in advance.
-+
-+ For the operations of the first type we store in &carry_op pointer to
-+ the &carry_node at the parent level. For the operation of the second
-+ type we store &carry_node or parents of the left and right nodes
-+ modified and keep track of them upward until they coincide.
-+
-+*/
-+typedef struct carry_op {
-+ /* pool linkage */
-+ reiser4_pool_header header;
-+ carry_opcode op;
-+ /* node on which operation is to be performed:
-+
-+ for insert, paste: node where new item is to be inserted
-+
-+ for delete: node where pointer is to be deleted
-+
-+ for cut: node to cut from
-+
-+ for update: node where delimiting key is to be modified
-+
-+ for modify: parent of modified node
-+
-+ */
-+ carry_node *node;
-+ union {
-+ struct {
-+ /* (sub-)type of insertion/paste. Taken from
-+ cop_insert_pos_type. */
-+ __u8 type;
-+ /* various operation flags. Taken from
-+ cop_insert_flag. */
-+ __u8 flags;
-+ carry_insert_data *d;
-+ carry_node *child;
-+ znode *brother;
-+ } insert, paste, extent;
-+
-+ struct {
-+ int is_cut;
-+ union {
-+ carry_kill_data *kill;
-+ carry_cut_data *cut;
-+ } u;
-+ } cut_or_kill;
-+
-+ struct {
-+ carry_node *left;
-+ } update;
-+ struct {
-+ /* changed child */
-+ carry_node *child;
-+ /* bitmask of changes. See &cop_modify_flag */
-+ __u32 flag;
-+ } modify;
-+ struct {
-+ /* flags to deletion operation. Are taken from
-+ cop_delete_flag */
-+ __u32 flags;
-+ /* child to delete from parent. If this is
-+ NULL, delete op->node. */
-+ carry_node *child;
-+ } delete;
-+ struct {
-+ /* various operation flags. Taken from
-+ cop_insert_flag. */
-+ __u32 flags;
-+ flow_t *flow;
-+ coord_t *insert_point;
-+ reiser4_item_data *data;
-+ /* flow insertion is limited by number of new blocks
-+ added in that operation which do not get any data
-+ but part of flow. This limit is set by macro
-+ CARRY_FLOW_NEW_NODES_LIMIT. This field stores number
-+ of nodes added already during one carry_flow */
-+ int new_nodes;
-+ } insert_flow;
-+ } u;
-+} carry_op;
-+
-+/* &carry_op_pool - preallocated pool of carry operations, and nodes */
-+typedef struct carry_pool {
-+ carry_op op[CARRIES_POOL_SIZE];
-+ reiser4_pool op_pool;
-+ carry_node node[NODES_LOCKED_POOL_SIZE];
-+ reiser4_pool node_pool;
-+} carry_pool;
-+
-+/* &carry_tree_level - carry process on given level
-+
-+ Description of balancing process on the given level.
-+
-+ No need for locking here, as carry_tree_level is essentially per
-+ thread thing (for now).
-+
-+*/
-+struct carry_level {
-+ /* this level may be restarted */
-+ __u32 restartable:1;
-+ /* list of carry nodes on this level, ordered by key order */
-+ struct list_head nodes;
-+ struct list_head ops;
-+ /* pool where new objects are allocated from */
-+ carry_pool *pool;
-+ int ops_num;
-+ int nodes_num;
-+ /* new root created on this level, if any */
-+ znode *new_root;
-+ /* This is set by caller (insert_by_key(), rreiser4_esize_item(), etc.)
-+ when they want ->tracked to automagically wander to the node where
-+ insertion point moved after insert or paste.
-+ */
-+ carry_track_type track_type;
-+ /* lock handle supplied by user that we are tracking. See
-+ above. */
-+ lock_handle *tracked;
-+};
-+
-+/* information carry passes to plugin methods that may add new operations to
-+ the @todo queue */
-+struct carry_plugin_info {
-+ carry_level *doing;
-+ carry_level *todo;
-+};
-+
-+int reiser4_carry(carry_level * doing, carry_level * done);
-+
-+carry_node *reiser4_add_carry(carry_level * level, pool_ordering order,
-+ carry_node * reference);
-+carry_node *reiser4_add_carry_skip(carry_level * level, pool_ordering order,
-+ carry_node * reference);
-+
-+extern carry_node *insert_carry_node(carry_level * doing,
-+ carry_level * todo, const znode * node);
-+
-+extern carry_pool *init_carry_pool(int);
-+extern void done_carry_pool(carry_pool * pool);
-+
-+extern void init_carry_level(carry_level * level, carry_pool * pool);
-+
-+extern carry_op *reiser4_post_carry(carry_level * level, carry_opcode op,
-+ znode * node, int apply_to_parent);
-+extern carry_op *node_post_carry(carry_plugin_info * info, carry_opcode op,
-+ znode * node, int apply_to_parent_p);
-+
-+carry_node *add_new_znode(znode * brother, carry_node * reference,
-+ carry_level * doing, carry_level * todo);
-+
-+carry_node *find_carry_node(carry_level * level, const znode * node);
-+
-+extern znode *reiser4_carry_real(const carry_node * node);
-+
-+/* helper macros to iterate over carry queues */
-+
-+#define carry_node_next( node ) \
-+ list_entry((node)->header.level_linkage.next, carry_node, \
-+ header.level_linkage)
-+
-+#define carry_node_prev( node ) \
-+ list_entry((node)->header.level_linkage.prev, carry_node, \
-+ header.level_linkage)
-+
-+#define carry_node_front( level ) \
-+ list_entry((level)->nodes.next, carry_node, header.level_linkage)
-+
-+#define carry_node_back( level ) \
-+ list_entry((level)->nodes.prev, carry_node, header.level_linkage)
-+
-+#define carry_node_end( level, node ) \
-+ (&(level)->nodes == &(node)->header.level_linkage)
-+
-+/* macro to iterate over all operations in a @level */
-+#define for_all_ops( level /* carry level (of type carry_level *) */, \
-+ op /* pointer to carry operation, modified by loop (of \
-+ * type carry_op *) */, \
-+ tmp /* pointer to carry operation (of type carry_op *), \
-+ * used to make iterator stable in the face of \
-+ * deletions from the level */ ) \
-+for (op = list_entry(level->ops.next, carry_op, header.level_linkage), \
-+ tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage); \
-+ &op->header.level_linkage != &level->ops; \
-+ op = tmp, \
-+ tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage))
-+
-+#if 0
-+for( op = ( carry_op * ) pool_level_list_front( &level -> ops ), \
-+ tmp = ( carry_op * ) pool_level_list_next( &op -> header ) ; \
-+ ! pool_level_list_end( &level -> ops, &op -> header ) ; \
-+ op = tmp, tmp = ( carry_op * ) pool_level_list_next( &op -> header ) )
-+#endif
-+
-+/* macro to iterate over all nodes in a @level */ \
-+#define for_all_nodes( level /* carry level (of type carry_level *) */, \
-+ node /* pointer to carry node, modified by loop (of \
-+ * type carry_node *) */, \
-+ tmp /* pointer to carry node (of type carry_node *), \
-+ * used to make iterator stable in the face of * \
-+ * deletions from the level */ ) \
-+for (node = list_entry(level->nodes.next, carry_node, header.level_linkage), \
-+ tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage); \
-+ &node->header.level_linkage != &level->nodes; \
-+ node = tmp, \
-+ tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage))
-+
-+#if 0
-+for( node = carry_node_front( level ), \
-+ tmp = carry_node_next( node ) ; ! carry_node_end( level, node ) ; \
-+ node = tmp, tmp = carry_node_next( node ) )
-+#endif
-+
-+/* macro to iterate over all nodes in a @level in reverse order
-+
-+ This is used, because nodes are unlocked in reversed order of locking */
-+#define for_all_nodes_back( level /* carry level (of type carry_level *) */, \
-+ node /* pointer to carry node, modified by loop \
-+ * (of type carry_node *) */, \
-+ tmp /* pointer to carry node (of type carry_node \
-+ * *), used to make iterator stable in the \
-+ * face of deletions from the level */ ) \
-+for( node = carry_node_back( level ), \
-+ tmp = carry_node_prev( node ) ; ! carry_node_end( level, node ) ; \
-+ node = tmp, tmp = carry_node_prev( node ) )
-+
-+/* __FS_REISER4_CARRY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/carry_ops.c linux-2.6.20/fs/reiser4/carry_ops.c
---- linux-2.6.20.orig/fs/reiser4/carry_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/carry_ops.c 2007-05-06 14:50:43.694974475 +0400
-@@ -0,0 +1,2131 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* implementation of carry operations */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "pool.h"
-+#include "tree_mod.h"
-+#include "carry.h"
-+#include "carry_ops.h"
-+#include "tree.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h>
-+#include <linux/err.h>
-+
-+static int carry_shift_data(sideof side, coord_t * insert_coord, znode * node,
-+ carry_level * doing, carry_level * todo,
-+ unsigned int including_insert_coord_p);
-+
-+extern int lock_carry_node(carry_level * level, carry_node * node);
-+extern int lock_carry_node_tail(carry_node * node);
-+
-+/* find left neighbor of a carry node
-+
-+ Look for left neighbor of @node and add it to the @doing queue. See
-+ comments in the body.
-+
-+*/
-+static carry_node *find_left_neighbor(carry_op * op /* node to find left
-+ * neighbor of */ ,
-+ carry_level * doing /* level to scan */ )
-+{
-+ int result;
-+ carry_node *node;
-+ carry_node *left;
-+ int flags;
-+ reiser4_tree *tree;
-+
-+ node = op->node;
-+
-+ tree = current_tree;
-+ read_lock_tree(tree);
-+ /* first, check whether left neighbor is already in a @doing queue */
-+ if (reiser4_carry_real(node)->left != NULL) {
-+ /* NOTE: there is locking subtlety here. Look into
-+ * find_right_neighbor() for more info */
-+ if (find_carry_node(doing,
-+ reiser4_carry_real(node)->left) != NULL) {
-+ read_unlock_tree(tree);
-+ left = node;
-+ do {
-+ left = list_entry(left->header.level_linkage.prev,
-+ carry_node, header.level_linkage);
-+ assert("nikita-3408", !carry_node_end(doing,
-+ left));
-+ } while (reiser4_carry_real(left) ==
-+ reiser4_carry_real(node));
-+ return left;
-+ }
-+ }
-+ read_unlock_tree(tree);
-+
-+ left = reiser4_add_carry_skip(doing, POOLO_BEFORE, node);
-+ if (IS_ERR(left))
-+ return left;
-+
-+ left->node = node->node;
-+ left->free = 1;
-+
-+ flags = GN_TRY_LOCK;
-+ if (!op->u.insert.flags & COPI_LOAD_LEFT)
-+ flags |= GN_NO_ALLOC;
-+
-+ /* then, feeling lucky, peek left neighbor in the cache. */
-+ result = reiser4_get_left_neighbor(&left->lock_handle,
-+ reiser4_carry_real(node),
-+ ZNODE_WRITE_LOCK, flags);
-+ if (result == 0) {
-+ /* ok, node found and locked. */
-+ result = lock_carry_node_tail(left);
-+ if (result != 0)
-+ left = ERR_PTR(result);
-+ } else if (result == -E_NO_NEIGHBOR || result == -ENOENT) {
-+ /* node is leftmost node in a tree, or neighbor wasn't in
-+ cache, or there is an extent on the left. */
-+ reiser4_pool_free(&doing->pool->node_pool, &left->header);
-+ left = NULL;
-+ } else if (doing->restartable) {
-+ /* if left neighbor is locked, and level is restartable, add
-+ new node to @doing and restart. */
-+ assert("nikita-913", node->parent != 0);
-+ assert("nikita-914", node->node != NULL);
-+ left->left = 1;
-+ left->free = 0;
-+ left = ERR_PTR(-E_REPEAT);
-+ } else {
-+ /* left neighbor is locked, level cannot be restarted. Just
-+ ignore left neighbor. */
-+ reiser4_pool_free(&doing->pool->node_pool, &left->header);
-+ left = NULL;
-+ }
-+ return left;
-+}
-+
-+/* find right neighbor of a carry node
-+
-+ Look for right neighbor of @node and add it to the @doing queue. See
-+ comments in the body.
-+
-+*/
-+static carry_node *find_right_neighbor(carry_op * op /* node to find right
-+ * neighbor of */ ,
-+ carry_level * doing /* level to scan */ )
-+{
-+ int result;
-+ carry_node *node;
-+ carry_node *right;
-+ lock_handle lh;
-+ int flags;
-+ reiser4_tree *tree;
-+
-+ init_lh(&lh);
-+
-+ node = op->node;
-+
-+ tree = current_tree;
-+ read_lock_tree(tree);
-+ /* first, check whether right neighbor is already in a @doing queue */
-+ if (reiser4_carry_real(node)->right != NULL) {
-+ /*
-+ * Tree lock is taken here anyway, because, even if _outcome_
-+ * of (find_carry_node() != NULL) doesn't depends on
-+ * concurrent updates to ->right, find_carry_node() cannot
-+ * work with second argument NULL. Hence, following comment is
-+ * of historic importance only.
-+ *
-+ * Subtle:
-+ *
-+ * Q: why don't we need tree lock here, looking for the right
-+ * neighbor?
-+ *
-+ * A: even if value of node->real_node->right were changed
-+ * during find_carry_node() execution, outcome of execution
-+ * wouldn't change, because (in short) other thread cannot add
-+ * elements to the @doing, and if node->real_node->right
-+ * already was in @doing, value of node->real_node->right
-+ * couldn't change, because node cannot be inserted between
-+ * locked neighbors.
-+ */
-+ if (find_carry_node(doing,
-+ reiser4_carry_real(node)->right) != NULL) {
-+ read_unlock_tree(tree);
-+ /*
-+ * What we are doing here (this is also applicable to
-+ * the find_left_neighbor()).
-+ *
-+ * tree_walk.c code requires that insertion of a
-+ * pointer to a child, modification of parent pointer
-+ * in the child, and insertion of the child into
-+ * sibling list are atomic (see
-+ * plugin/item/internal.c:create_hook_internal()).
-+ *
-+ * carry allocates new node long before pointer to it
-+ * is inserted into parent and, actually, long before
-+ * parent is even known. Such allocated-but-orphaned
-+ * nodes are only trackable through carry level lists.
-+ *
-+ * Situation that is handled here is following: @node
-+ * has valid ->right pointer, but there is
-+ * allocated-but-orphaned node in the carry queue that
-+ * is logically between @node and @node->right. Here
-+ * we are searching for it. Critical point is that
-+ * this is only possible if @node->right is also in
-+ * the carry queue (this is checked above), because
-+ * this is the only way new orphaned node could be
-+ * inserted between them (before inserting new node,
-+ * make_space() first tries to shift to the right, so,
-+ * right neighbor will be locked and queued).
-+ *
-+ */
-+ right = node;
-+ do {
-+ right = list_entry(right->header.level_linkage.next,
-+ carry_node, header.level_linkage);
-+ assert("nikita-3408", !carry_node_end(doing,
-+ right));
-+ } while (reiser4_carry_real(right) ==
-+ reiser4_carry_real(node));
-+ return right;
-+ }
-+ }
-+ read_unlock_tree(tree);
-+
-+ flags = GN_CAN_USE_UPPER_LEVELS;
-+ if (!op->u.insert.flags & COPI_LOAD_RIGHT)
-+ flags = GN_NO_ALLOC;
-+
-+ /* then, try to lock right neighbor */
-+ init_lh(&lh);
-+ result = reiser4_get_right_neighbor(&lh,
-+ reiser4_carry_real(node),
-+ ZNODE_WRITE_LOCK, flags);
-+ if (result == 0) {
-+ /* ok, node found and locked. */
-+ right = reiser4_add_carry_skip(doing, POOLO_AFTER, node);
-+ if (!IS_ERR(right)) {
-+ right->node = lh.node;
-+ move_lh(&right->lock_handle, &lh);
-+ right->free = 1;
-+ result = lock_carry_node_tail(right);
-+ if (result != 0)
-+ right = ERR_PTR(result);
-+ }
-+ } else if ((result == -E_NO_NEIGHBOR) || (result == -ENOENT)) {
-+ /* node is rightmost node in a tree, or neighbor wasn't in
-+ cache, or there is an extent on the right. */
-+ right = NULL;
-+ } else
-+ right = ERR_PTR(result);
-+ done_lh(&lh);
-+ return right;
-+}
-+
-+/* how much free space in a @node is needed for @op
-+
-+ How much space in @node is required for completion of @op, where @op is
-+ insert or paste operation.
-+*/
-+static unsigned int space_needed_for_op(znode * node /* znode data are
-+ * inserted or
-+ * pasted in */ ,
-+ carry_op * op /* carry
-+ operation */ )
-+{
-+ assert("nikita-919", op != NULL);
-+
-+ switch (op->op) {
-+ default:
-+ impossible("nikita-1701", "Wrong opcode");
-+ case COP_INSERT:
-+ return space_needed(node, NULL, op->u.insert.d->data, 1);
-+ case COP_PASTE:
-+ return space_needed(node, op->u.insert.d->coord,
-+ op->u.insert.d->data, 0);
-+ }
-+}
-+
-+/* how much space in @node is required to insert or paste @data at
-+ @coord. */
-+unsigned int space_needed(const znode * node /* node data are inserted or
-+ * pasted in */ ,
-+ const coord_t * coord /* coord where data are
-+ * inserted or pasted
-+ * at */ ,
-+ const reiser4_item_data * data /* data to insert or
-+ * paste */ ,
-+ int insertion /* non-0 is inserting, 0---paste */ )
-+{
-+ int result;
-+ item_plugin *iplug;
-+
-+ assert("nikita-917", node != NULL);
-+ assert("nikita-918", node_plugin_by_node(node) != NULL);
-+ assert("vs-230", !insertion || (coord == NULL));
-+
-+ result = 0;
-+ iplug = data->iplug;
-+ if (iplug->b.estimate != NULL) {
-+ /* ask item plugin how much space is needed to insert this
-+ item */
-+ result += iplug->b.estimate(insertion ? NULL : coord, data);
-+ } else {
-+ /* reasonable default */
-+ result += data->length;
-+ }
-+ if (insertion) {
-+ node_plugin *nplug;
-+
-+ nplug = node->nplug;
-+ /* and add node overhead */
-+ if (nplug->item_overhead != NULL) {
-+ result += nplug->item_overhead(node, NULL);
-+ }
-+ }
-+ return result;
-+}
-+
-+/* find &coord in parent where pointer to new child is to be stored. */
-+static int find_new_child_coord(carry_op * op /* COP_INSERT carry operation to
-+ * insert pointer to new
-+ * child */ )
-+{
-+ int result;
-+ znode *node;
-+ znode *child;
-+
-+ assert("nikita-941", op != NULL);
-+ assert("nikita-942", op->op == COP_INSERT);
-+
-+ node = reiser4_carry_real(op->node);
-+ assert("nikita-943", node != NULL);
-+ assert("nikita-944", node_plugin_by_node(node) != NULL);
-+
-+ child = reiser4_carry_real(op->u.insert.child);
-+ result =
-+ find_new_child_ptr(node, child, op->u.insert.brother,
-+ op->u.insert.d->coord);
-+
-+ build_child_ptr_data(child, op->u.insert.d->data);
-+ return result;
-+}
-+
-+/* additional amount of free space in @node required to complete @op */
-+static int free_space_shortage(znode * node /* node to check */ ,
-+ carry_op * op /* operation being performed */ )
-+{
-+ assert("nikita-1061", node != NULL);
-+ assert("nikita-1062", op != NULL);
-+
-+ switch (op->op) {
-+ default:
-+ impossible("nikita-1702", "Wrong opcode");
-+ case COP_INSERT:
-+ case COP_PASTE:
-+ return space_needed_for_op(node, op) - znode_free_space(node);
-+ case COP_EXTENT:
-+ /* when inserting extent shift data around until insertion
-+ point is utmost in the node. */
-+ if (coord_wrt(op->u.insert.d->coord) == COORD_INSIDE)
-+ return +1;
-+ else
-+ return -1;
-+ }
-+}
-+
-+/* helper function: update node pointer in operation after insertion
-+ point was probably shifted into @target. */
-+static znode *sync_op(carry_op * op, carry_node * target)
-+{
-+ znode *insertion_node;
-+
-+ /* reget node from coord: shift might move insertion coord to
-+ the neighbor */
-+ insertion_node = op->u.insert.d->coord->node;
-+ /* if insertion point was actually moved into new node,
-+ update carry node pointer in operation. */
-+ if (insertion_node != reiser4_carry_real(op->node)) {
-+ op->node = target;
-+ assert("nikita-2540",
-+ reiser4_carry_real(target) == insertion_node);
-+ }
-+ assert("nikita-2541",
-+ reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
-+ return insertion_node;
-+}
-+
-+/*
-+ * complete make_space() call: update tracked lock handle if necessary. See
-+ * comments for fs/reiser4/carry.h:carry_track_type
-+ */
-+static int
-+make_space_tail(carry_op * op, carry_level * doing, znode * orig_node)
-+{
-+ int result;
-+ carry_track_type tracking;
-+ znode *node;
-+
-+ tracking = doing->track_type;
-+ node = op->u.insert.d->coord->node;
-+
-+ if (tracking == CARRY_TRACK_NODE ||
-+ (tracking == CARRY_TRACK_CHANGE && node != orig_node)) {
-+ /* inserting or pasting into node different from
-+ original. Update lock handle supplied by caller. */
-+ assert("nikita-1417", doing->tracked != NULL);
-+ done_lh(doing->tracked);
-+ init_lh(doing->tracked);
-+ result = longterm_lock_znode(doing->tracked, node,
-+ ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_HIPRI);
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/* This is insertion policy function. It shifts data to the left and right
-+ neighbors of insertion coord and allocates new nodes until there is enough
-+ free space to complete @op.
-+
-+ See comments in the body.
-+
-+ Assumes that the node format favors insertions at the right end of the node
-+ as node40 does.
-+
-+ See carry_flow() on detail about flow insertion
-+*/
-+static int make_space(carry_op * op /* carry operation, insert or paste */ ,
-+ carry_level * doing /* current carry queue */ ,
-+ carry_level * todo /* carry queue on the parent level */ )
-+{
-+ znode *node;
-+ int result;
-+ int not_enough_space;
-+ int blk_alloc;
-+ znode *orig_node;
-+ __u32 flags;
-+
-+ coord_t *coord;
-+
-+ assert("nikita-890", op != NULL);
-+ assert("nikita-891", todo != NULL);
-+ assert("nikita-892",
-+ op->op == COP_INSERT ||
-+ op->op == COP_PASTE || op->op == COP_EXTENT);
-+ assert("nikita-1607",
-+ reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
-+
-+ flags = op->u.insert.flags;
-+
-+ /* NOTE check that new node can only be allocated after checking left
-+ * and right neighbors. This is necessary for proper work of
-+ * find_{left,right}_neighbor(). */
-+ assert("nikita-3410", ergo(flags & COPI_DONT_ALLOCATE,
-+ flags & COPI_DONT_SHIFT_LEFT));
-+ assert("nikita-3411", ergo(flags & COPI_DONT_ALLOCATE,
-+ flags & COPI_DONT_SHIFT_RIGHT));
-+
-+ coord = op->u.insert.d->coord;
-+ orig_node = node = coord->node;
-+
-+ assert("nikita-908", node != NULL);
-+ assert("nikita-909", node_plugin_by_node(node) != NULL);
-+
-+ result = 0;
-+ /* If there is not enough space in a node, try to shift something to
-+ the left neighbor. This is a bit tricky, as locking to the left is
-+ low priority. This is handled by restart logic in carry().
-+ */
-+ not_enough_space = free_space_shortage(node, op);
-+ if (not_enough_space <= 0)
-+ /* it is possible that carry was called when there actually
-+ was enough space in the node. For example, when inserting
-+ leftmost item so that delimiting keys have to be updated.
-+ */
-+ return make_space_tail(op, doing, orig_node);
-+ if (!(flags & COPI_DONT_SHIFT_LEFT)) {
-+ carry_node *left;
-+ /* make note in statistics of an attempt to move
-+ something into the left neighbor */
-+ left = find_left_neighbor(op, doing);
-+ if (unlikely(IS_ERR(left))) {
-+ if (PTR_ERR(left) == -E_REPEAT)
-+ return -E_REPEAT;
-+ else {
-+ /* some error other than restart request
-+ occurred. This shouldn't happen. Issue a
-+ warning and continue as if left neighbor
-+ weren't existing.
-+ */
-+ warning("nikita-924",
-+ "Error accessing left neighbor: %li",
-+ PTR_ERR(left));
-+ }
-+ } else if (left != NULL) {
-+
-+ /* shift everything possible on the left of and
-+ including insertion coord into the left neighbor */
-+ result = carry_shift_data(LEFT_SIDE, coord,
-+ reiser4_carry_real(left),
-+ doing, todo,
-+ flags & COPI_GO_LEFT);
-+
-+ /* reget node from coord: shift_left() might move
-+ insertion coord to the left neighbor */
-+ node = sync_op(op, left);
-+
-+ not_enough_space = free_space_shortage(node, op);
-+ /* There is not enough free space in @node, but
-+ may be, there is enough free space in
-+ @left. Various balancing decisions are valid here.
-+ The same for the shifiting to the right.
-+ */
-+ }
-+ }
-+ /* If there still is not enough space, shift to the right */
-+ if (not_enough_space > 0 && !(flags & COPI_DONT_SHIFT_RIGHT)) {
-+ carry_node *right;
-+
-+ right = find_right_neighbor(op, doing);
-+ if (IS_ERR(right)) {
-+ warning("nikita-1065",
-+ "Error accessing right neighbor: %li",
-+ PTR_ERR(right));
-+ } else if (right != NULL) {
-+ /* node containing insertion point, and its right
-+ neighbor node are write locked by now.
-+
-+ shift everything possible on the right of but
-+ excluding insertion coord into the right neighbor
-+ */
-+ result = carry_shift_data(RIGHT_SIDE, coord,
-+ reiser4_carry_real(right),
-+ doing, todo,
-+ flags & COPI_GO_RIGHT);
-+ /* reget node from coord: shift_right() might move
-+ insertion coord to the right neighbor */
-+ node = sync_op(op, right);
-+ not_enough_space = free_space_shortage(node, op);
-+ }
-+ }
-+ /* If there is still not enough space, allocate new node(s).
-+
-+ We try to allocate new blocks if COPI_DONT_ALLOCATE is not set in
-+ the carry operation flags (currently this is needed during flush
-+ only).
-+ */
-+ for (blk_alloc = 0;
-+ not_enough_space > 0 && result == 0 && blk_alloc < 2 &&
-+ !(flags & COPI_DONT_ALLOCATE); ++blk_alloc) {
-+ carry_node *fresh; /* new node we are allocating */
-+ coord_t coord_shadow; /* remembered insertion point before
-+ * shifting data into new node */
-+ carry_node *node_shadow; /* remembered insertion node before
-+ * shifting */
-+ unsigned int gointo; /* whether insertion point should move
-+ * into newly allocated node */
-+
-+ /* allocate new node on the right of @node. Znode and disk
-+ fake block number for new node are allocated.
-+
-+ add_new_znode() posts carry operation COP_INSERT with
-+ COPT_CHILD option to the parent level to add
-+ pointer to newly created node to its parent.
-+
-+ Subtle point: if several new nodes are required to complete
-+ insertion operation at this level, they will be inserted
-+ into their parents in the order of creation, which means
-+ that @node will be valid "cookie" at the time of insertion.
-+
-+ */
-+ fresh = add_new_znode(node, op->node, doing, todo);
-+ if (IS_ERR(fresh))
-+ return PTR_ERR(fresh);
-+
-+ /* Try to shift into new node. */
-+ result = lock_carry_node(doing, fresh);
-+ zput(reiser4_carry_real(fresh));
-+ if (result != 0) {
-+ warning("nikita-947",
-+ "Cannot lock new node: %i", result);
-+ return result;
-+ }
-+
-+ /* both nodes are write locked by now.
-+
-+ shift everything possible on the right of and
-+ including insertion coord into the right neighbor.
-+ */
-+ coord_dup(&coord_shadow, op->u.insert.d->coord);
-+ node_shadow = op->node;
-+ /* move insertion point into newly created node if:
-+
-+ . insertion point is rightmost in the source node, or
-+ . this is not the first node we are allocating in a row.
-+ */
-+ gointo =
-+ (blk_alloc > 0) ||
-+ coord_is_after_rightmost(op->u.insert.d->coord);
-+
-+ if (gointo &&
-+ op->op == COP_PASTE &&
-+ coord_is_existing_item(op->u.insert.d->coord) &&
-+ is_solid_item((item_plugin_by_coord(op->u.insert.d->coord)))) {
-+ /* paste into solid (atomic) item, which can contain
-+ only one unit, so we need to shift it right, where
-+ insertion point supposed to be */
-+
-+ assert("edward-1444", op->u.insert.d->data->iplug ==
-+ item_plugin_by_id(STATIC_STAT_DATA_ID));
-+ assert("edward-1445",
-+ op->u.insert.d->data->length >
-+ node_plugin_by_node(coord->node)->free_space
-+ (coord->node));
-+
-+ op->u.insert.d->coord->between = BEFORE_UNIT;
-+ }
-+
-+ result = carry_shift_data(RIGHT_SIDE, coord,
-+ reiser4_carry_real(fresh),
-+ doing, todo, gointo);
-+ /* if insertion point was actually moved into new node,
-+ update carry node pointer in operation. */
-+ node = sync_op(op, fresh);
-+ not_enough_space = free_space_shortage(node, op);
-+ if ((not_enough_space > 0) && (node != coord_shadow.node)) {
-+ /* there is not enough free in new node. Shift
-+ insertion point back to the @shadow_node so that
-+ next new node would be inserted between
-+ @shadow_node and @fresh.
-+ */
-+ coord_normalize(&coord_shadow);
-+ coord_dup(coord, &coord_shadow);
-+ node = coord->node;
-+ op->node = node_shadow;
-+ if (1 || (flags & COPI_STEP_BACK)) {
-+ /* still not enough space?! Maybe there is
-+ enough space in the source node (i.e., node
-+ data are moved from) now.
-+ */
-+ not_enough_space =
-+ free_space_shortage(node, op);
-+ }
-+ }
-+ }
-+ if (not_enough_space > 0) {
-+ if (!(flags & COPI_DONT_ALLOCATE))
-+ warning("nikita-948", "Cannot insert new item");
-+ result = -E_NODE_FULL;
-+ }
-+ assert("nikita-1622", ergo(result == 0,
-+ reiser4_carry_real(op->node) == coord->node));
-+ assert("nikita-2616", coord == op->u.insert.d->coord);
-+ if (result == 0)
-+ result = make_space_tail(op, doing, orig_node);
-+ return result;
-+}
-+
-+/* insert_paste_common() - common part of insert and paste operations
-+
-+ This function performs common part of COP_INSERT and COP_PASTE.
-+
-+ There are two ways in which insertion/paste can be requested:
-+
-+ . by directly supplying reiser4_item_data. In this case, op ->
-+ u.insert.type is set to COPT_ITEM_DATA.
-+
-+ . by supplying child pointer to which is to inserted into parent. In this
-+ case op -> u.insert.type == COPT_CHILD.
-+
-+ . by supplying key of new item/unit. This is currently only used during
-+ extent insertion
-+
-+ This is required, because when new node is allocated we don't know at what
-+ position pointer to it is to be stored in the parent. Actually, we don't
-+ even know what its parent will be, because parent can be re-balanced
-+ concurrently and new node re-parented, and because parent can be full and
-+ pointer to the new node will go into some other node.
-+
-+ insert_paste_common() resolves pointer to child node into position in the
-+ parent by calling find_new_child_coord(), that fills
-+ reiser4_item_data. After this, insertion/paste proceeds uniformly.
-+
-+ Another complication is with finding free space during pasting. It may
-+ happen that while shifting items to the neighbors and newly allocated
-+ nodes, insertion coord can no longer be in the item we wanted to paste
-+ into. At this point, paste becomes (morphs) into insert. Moreover free
-+ space analysis has to be repeated, because amount of space required for
-+ insertion is different from that of paste (item header overhead, etc).
-+
-+ This function "unifies" different insertion modes (by resolving child
-+ pointer or key into insertion coord), and then calls make_space() to free
-+ enough space in the node by shifting data to the left and right and by
-+ allocating new nodes if necessary. Carry operation knows amount of space
-+ required for its completion. After enough free space is obtained, caller of
-+ this function (carry_{insert,paste,etc.}) performs actual insertion/paste
-+ by calling item plugin method.
-+
-+*/
-+static int insert_paste_common(carry_op * op /* carry operation being
-+ * performed */ ,
-+ carry_level * doing /* current carry level */ ,
-+ carry_level * todo /* next carry level */ ,
-+ carry_insert_data * cdata /* pointer to
-+ * cdata */ ,
-+ coord_t * coord /* insertion/paste coord */ ,
-+ reiser4_item_data * data /* data to be
-+ * inserted/pasted */ )
-+{
-+ assert("nikita-981", op != NULL);
-+ assert("nikita-980", todo != NULL);
-+ assert("nikita-979", (op->op == COP_INSERT) || (op->op == COP_PASTE)
-+ || (op->op == COP_EXTENT));
-+
-+ if (op->u.insert.type == COPT_PASTE_RESTARTED) {
-+ /* nothing to do. Fall through to make_space(). */
-+ ;
-+ } else if (op->u.insert.type == COPT_KEY) {
-+ node_search_result intra_node;
-+ znode *node;
-+ /* Problem with doing batching at the lowest level, is that
-+ operations here are given by coords where modification is
-+ to be performed, and one modification can invalidate coords
-+ of all following operations.
-+
-+ So, we are implementing yet another type for operation that
-+ will use (the only) "locator" stable across shifting of
-+ data between nodes, etc.: key (COPT_KEY).
-+
-+ This clause resolves key to the coord in the node.
-+
-+ But node can change also. Probably some pieces have to be
-+ added to the lock_carry_node(), to lock node by its key.
-+
-+ */
-+ /* NOTE-NIKITA Lookup bias is fixed to FIND_EXACT. Complain
-+ if you need something else. */
-+ op->u.insert.d->coord = coord;
-+ node = reiser4_carry_real(op->node);
-+ intra_node = node_plugin_by_node(node)->lookup
-+ (node, op->u.insert.d->key, FIND_EXACT,
-+ op->u.insert.d->coord);
-+ if ((intra_node != NS_FOUND) && (intra_node != NS_NOT_FOUND)) {
-+ warning("nikita-1715", "Intra node lookup failure: %i",
-+ intra_node);
-+ return intra_node;
-+ }
-+ } else if (op->u.insert.type == COPT_CHILD) {
-+ /* if we are asked to insert pointer to the child into
-+ internal node, first convert pointer to the child into
-+ coord within parent node.
-+ */
-+ znode *child;
-+ int result;
-+
-+ op->u.insert.d = cdata;
-+ op->u.insert.d->coord = coord;
-+ op->u.insert.d->data = data;
-+ op->u.insert.d->coord->node = reiser4_carry_real(op->node);
-+ result = find_new_child_coord(op);
-+ child = reiser4_carry_real(op->u.insert.child);
-+ if (result != NS_NOT_FOUND) {
-+ warning("nikita-993",
-+ "Cannot find a place for child pointer: %i",
-+ result);
-+ return result;
-+ }
-+ /* This only happens when we did multiple insertions at
-+ the previous level, trying to insert single item and
-+ it so happened, that insertion of pointers to all new
-+ nodes before this one already caused parent node to
-+ split (may be several times).
-+
-+ I am going to come up with better solution.
-+
-+ You are not expected to understand this.
-+ -- v6root/usr/sys/ken/slp.c
-+
-+ Basically, what happens here is the following: carry came
-+ to the parent level and is about to insert internal item
-+ pointing to the child node that it just inserted in the
-+ level below. Position where internal item is to be inserted
-+ was found by find_new_child_coord() above, but node of the
-+ current carry operation (that is, parent node of child
-+ inserted on the previous level), was determined earlier in
-+ the lock_carry_level/lock_carry_node. It could so happen
-+ that other carry operations already performed on the parent
-+ level already split parent node, so that insertion point
-+ moved into another node. Handle this by creating new carry
-+ node for insertion point if necessary.
-+ */
-+ if (reiser4_carry_real(op->node) !=
-+ op->u.insert.d->coord->node) {
-+ pool_ordering direction;
-+ znode *z1;
-+ znode *z2;
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ /*
-+ * determine in what direction insertion point
-+ * moved. Do this by comparing delimiting keys.
-+ */
-+ z1 = op->u.insert.d->coord->node;
-+ z2 = reiser4_carry_real(op->node);
-+ if (keyle(leftmost_key_in_node(z1, &k1),
-+ leftmost_key_in_node(z2, &k2)))
-+ /* insertion point moved to the left */
-+ direction = POOLO_BEFORE;
-+ else
-+ /* insertion point moved to the right */
-+ direction = POOLO_AFTER;
-+
-+ op->node = reiser4_add_carry_skip(doing,
-+ direction, op->node);
-+ if (IS_ERR(op->node))
-+ return PTR_ERR(op->node);
-+ op->node->node = op->u.insert.d->coord->node;
-+ op->node->free = 1;
-+ result = lock_carry_node(doing, op->node);
-+ if (result != 0)
-+ return result;
-+ }
-+
-+ /*
-+ * set up key of an item being inserted: we are inserting
-+ * internal item and its key is (by the very definition of
-+ * search tree) is leftmost key in the child node.
-+ */
-+ write_lock_dk(znode_get_tree(child));
-+ op->u.insert.d->key = leftmost_key_in_node(child,
-+ znode_get_ld_key(child));
-+ write_unlock_dk(znode_get_tree(child));
-+ op->u.insert.d->data->arg = op->u.insert.brother;
-+ } else {
-+ assert("vs-243", op->u.insert.d->coord != NULL);
-+ op->u.insert.d->coord->node = reiser4_carry_real(op->node);
-+ }
-+
-+ /* find free space. */
-+ return make_space(op, doing, todo);
-+}
-+
-+/* handle carry COP_INSERT operation.
-+
-+ Insert new item into node. New item can be given in one of two ways:
-+
-+ - by passing &tree_coord and &reiser4_item_data as part of @op. This is
-+ only applicable at the leaf/twig level.
-+
-+ - by passing a child node pointer to which is to be inserted by this
-+ operation.
-+
-+*/
-+static int carry_insert(carry_op * op /* operation to perform */ ,
-+ carry_level * doing /* queue of operations @op
-+ * is part of */ ,
-+ carry_level * todo /* queue where new operations
-+ * are accumulated */ )
-+{
-+ znode *node;
-+ carry_insert_data cdata;
-+ coord_t coord;
-+ reiser4_item_data data;
-+ carry_plugin_info info;
-+ int result;
-+
-+ assert("nikita-1036", op != NULL);
-+ assert("nikita-1037", todo != NULL);
-+ assert("nikita-1038", op->op == COP_INSERT);
-+
-+ coord_init_zero(&coord);
-+
-+ /* perform common functionality of insert and paste. */
-+ result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
-+ if (result != 0)
-+ return result;
-+
-+ node = op->u.insert.d->coord->node;
-+ assert("nikita-1039", node != NULL);
-+ assert("nikita-1040", node_plugin_by_node(node) != NULL);
-+
-+ assert("nikita-949",
-+ space_needed_for_op(node, op) <= znode_free_space(node));
-+
-+ /* ask node layout to create new item. */
-+ info.doing = doing;
-+ info.todo = todo;
-+ result = node_plugin_by_node(node)->create_item
-+ (op->u.insert.d->coord, op->u.insert.d->key, op->u.insert.d->data,
-+ &info);
-+ doing->restartable = 0;
-+ znode_make_dirty(node);
-+
-+ return result;
-+}
-+
-+/*
-+ * Flow insertion code. COP_INSERT_FLOW is special tree operation that is
-+ * supplied with a "flow" (that is, a stream of data) and inserts it into tree
-+ * by slicing into multiple items.
-+ */
-+
-+#define flow_insert_point(op) ( ( op ) -> u.insert_flow.insert_point )
-+#define flow_insert_flow(op) ( ( op ) -> u.insert_flow.flow )
-+#define flow_insert_data(op) ( ( op ) -> u.insert_flow.data )
-+
-+static size_t item_data_overhead(carry_op * op)
-+{
-+ if (flow_insert_data(op)->iplug->b.estimate == NULL)
-+ return 0;
-+ return (flow_insert_data(op)->iplug->b.
-+ estimate(NULL /* estimate insertion */ , flow_insert_data(op)) -
-+ flow_insert_data(op)->length);
-+}
-+
-+/* FIXME-VS: this is called several times during one make_flow_for_insertion
-+ and it will always return the same result. Some optimization could be made
-+ by calculating this value once at the beginning and passing it around. That
-+ would reduce some flexibility in future changes
-+*/
-+static int can_paste(coord_t *, const reiser4_key *, const reiser4_item_data *);
-+static size_t flow_insertion_overhead(carry_op * op)
-+{
-+ znode *node;
-+ size_t insertion_overhead;
-+
-+ node = flow_insert_point(op)->node;
-+ insertion_overhead = 0;
-+ if (node->nplug->item_overhead &&
-+ !can_paste(flow_insert_point(op), &flow_insert_flow(op)->key,
-+ flow_insert_data(op)))
-+ insertion_overhead =
-+ node->nplug->item_overhead(node, NULL) +
-+ item_data_overhead(op);
-+ return insertion_overhead;
-+}
-+
-+/* how many bytes of flow does fit to the node */
-+static int what_can_fit_into_node(carry_op * op)
-+{
-+ size_t free, overhead;
-+
-+ overhead = flow_insertion_overhead(op);
-+ free = znode_free_space(flow_insert_point(op)->node);
-+ if (free <= overhead)
-+ return 0;
-+ free -= overhead;
-+ /* FIXME: flow->length is loff_t only to not get overflowed in case of expandign truncate */
-+ if (free < op->u.insert_flow.flow->length)
-+ return free;
-+ return (int)op->u.insert_flow.flow->length;
-+}
-+
-+/* in make_space_for_flow_insertion we need to check either whether whole flow
-+ fits into a node or whether minimal fraction of flow fits into a node */
-+static int enough_space_for_whole_flow(carry_op * op)
-+{
-+ return (unsigned)what_can_fit_into_node(op) ==
-+ op->u.insert_flow.flow->length;
-+}
-+
-+#define MIN_FLOW_FRACTION 1
-+static int enough_space_for_min_flow_fraction(carry_op * op)
-+{
-+ assert("vs-902", coord_is_after_rightmost(flow_insert_point(op)));
-+
-+ return what_can_fit_into_node(op) >= MIN_FLOW_FRACTION;
-+}
-+
-+/* this returns 0 if left neighbor was obtained successfully and everything
-+ upto insertion point including it were shifted and left neighbor still has
-+ some free space to put minimal fraction of flow into it */
-+static int
-+make_space_by_shift_left(carry_op * op, carry_level * doing, carry_level * todo)
-+{
-+ carry_node *left;
-+ znode *orig;
-+
-+ left = find_left_neighbor(op, doing);
-+ if (unlikely(IS_ERR(left))) {
-+ warning("vs-899",
-+ "make_space_by_shift_left: "
-+ "error accessing left neighbor: %li", PTR_ERR(left));
-+ return 1;
-+ }
-+ if (left == NULL)
-+ /* left neighbor either does not exist or is unformatted
-+ node */
-+ return 1;
-+
-+ orig = flow_insert_point(op)->node;
-+ /* try to shift content of node @orig from its head upto insert point
-+ including insertion point into the left neighbor */
-+ carry_shift_data(LEFT_SIDE, flow_insert_point(op),
-+ reiser4_carry_real(left), doing, todo,
-+ 1 /* including insert point */);
-+ if (reiser4_carry_real(left) != flow_insert_point(op)->node) {
-+ /* insertion point did not move */
-+ return 1;
-+ }
-+
-+ /* insertion point is set after last item in the node */
-+ assert("vs-900", coord_is_after_rightmost(flow_insert_point(op)));
-+
-+ if (!enough_space_for_min_flow_fraction(op)) {
-+ /* insertion point node does not have enough free space to put
-+ even minimal portion of flow into it, therefore, move
-+ insertion point back to orig node (before first item) */
-+ coord_init_before_first_item(flow_insert_point(op), orig);
-+ return 1;
-+ }
-+
-+ /* part of flow is to be written to the end of node */
-+ op->node = left;
-+ return 0;
-+}
-+
-+/* this returns 0 if right neighbor was obtained successfully and everything to
-+ the right of insertion point was shifted to it and node got enough free
-+ space to put minimal fraction of flow into it */
-+static int
-+make_space_by_shift_right(carry_op * op, carry_level * doing,
-+ carry_level * todo)
-+{
-+ carry_node *right;
-+
-+ right = find_right_neighbor(op, doing);
-+ if (unlikely(IS_ERR(right))) {
-+ warning("nikita-1065", "shift_right_excluding_insert_point: "
-+ "error accessing right neighbor: %li", PTR_ERR(right));
-+ return 1;
-+ }
-+ if (right) {
-+ /* shift everything possible on the right of but excluding
-+ insertion coord into the right neighbor */
-+ carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
-+ reiser4_carry_real(right), doing, todo,
-+ 0 /* not including insert point */);
-+ } else {
-+ /* right neighbor either does not exist or is unformatted
-+ node */
-+ ;
-+ }
-+ if (coord_is_after_rightmost(flow_insert_point(op))) {
-+ if (enough_space_for_min_flow_fraction(op)) {
-+ /* part of flow is to be written to the end of node */
-+ return 0;
-+ }
-+ }
-+
-+ /* new node is to be added if insert point node did not get enough
-+ space for whole flow */
-+ return 1;
-+}
-+
-+/* this returns 0 when insert coord is set at the node end and fraction of flow
-+ fits into that node */
-+static int
-+make_space_by_new_nodes(carry_op * op, carry_level * doing, carry_level * todo)
-+{
-+ int result;
-+ znode *node;
-+ carry_node *new;
-+
-+ node = flow_insert_point(op)->node;
-+
-+ if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
-+ return RETERR(-E_NODE_FULL);
-+ /* add new node after insert point node */
-+ new = add_new_znode(node, op->node, doing, todo);
-+ if (unlikely(IS_ERR(new))) {
-+ return PTR_ERR(new);
-+ }
-+ result = lock_carry_node(doing, new);
-+ zput(reiser4_carry_real(new));
-+ if (unlikely(result)) {
-+ return result;
-+ }
-+ op->u.insert_flow.new_nodes++;
-+ if (!coord_is_after_rightmost(flow_insert_point(op))) {
-+ carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
-+ reiser4_carry_real(new), doing, todo,
-+ 0 /* not including insert point */);
-+ assert("vs-901",
-+ coord_is_after_rightmost(flow_insert_point(op)));
-+
-+ if (enough_space_for_min_flow_fraction(op)) {
-+ return 0;
-+ }
-+ if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
-+ return RETERR(-E_NODE_FULL);
-+
-+ /* add one more new node */
-+ new = add_new_znode(node, op->node, doing, todo);
-+ if (unlikely(IS_ERR(new))) {
-+ return PTR_ERR(new);
-+ }
-+ result = lock_carry_node(doing, new);
-+ zput(reiser4_carry_real(new));
-+ if (unlikely(result)) {
-+ return result;
-+ }
-+ op->u.insert_flow.new_nodes++;
-+ }
-+
-+ /* move insertion point to new node */
-+ coord_init_before_first_item(flow_insert_point(op),
-+ reiser4_carry_real(new));
-+ op->node = new;
-+ return 0;
-+}
-+
-+static int
-+make_space_for_flow_insertion(carry_op * op, carry_level * doing,
-+ carry_level * todo)
-+{
-+ __u32 flags = op->u.insert_flow.flags;
-+
-+ if (enough_space_for_whole_flow(op)) {
-+ /* whole flow fits into insert point node */
-+ return 0;
-+ }
-+
-+ if (!(flags & COPI_DONT_SHIFT_LEFT)
-+ && (make_space_by_shift_left(op, doing, todo) == 0)) {
-+ /* insert point is shifted to left neighbor of original insert
-+ point node and is set after last unit in that node. It has
-+ enough space to fit at least minimal fraction of flow. */
-+ return 0;
-+ }
-+
-+ if (enough_space_for_whole_flow(op)) {
-+ /* whole flow fits into insert point node */
-+ return 0;
-+ }
-+
-+ if (!(flags & COPI_DONT_SHIFT_RIGHT)
-+ && (make_space_by_shift_right(op, doing, todo) == 0)) {
-+ /* insert point is still set to the same node, but there is
-+ nothing to the right of insert point. */
-+ return 0;
-+ }
-+
-+ if (enough_space_for_whole_flow(op)) {
-+ /* whole flow fits into insert point node */
-+ return 0;
-+ }
-+
-+ return make_space_by_new_nodes(op, doing, todo);
-+}
-+
-+/* implements COP_INSERT_FLOW operation */
-+static int
-+carry_insert_flow(carry_op * op, carry_level * doing, carry_level * todo)
-+{
-+ int result;
-+ flow_t *f;
-+ coord_t *insert_point;
-+ node_plugin *nplug;
-+ carry_plugin_info info;
-+ znode *orig_node;
-+ lock_handle *orig_lh;
-+
-+ f = op->u.insert_flow.flow;
-+ result = 0;
-+
-+ /* carry system needs this to work */
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ orig_node = flow_insert_point(op)->node;
-+ orig_lh = doing->tracked;
-+
-+ while (f->length) {
-+ result = make_space_for_flow_insertion(op, doing, todo);
-+ if (result)
-+ break;
-+
-+ insert_point = flow_insert_point(op);
-+ nplug = node_plugin_by_node(insert_point->node);
-+
-+ /* compose item data for insertion/pasting */
-+ flow_insert_data(op)->data = f->data;
-+ flow_insert_data(op)->length = what_can_fit_into_node(op);
-+
-+ if (can_paste(insert_point, &f->key, flow_insert_data(op))) {
-+ /* insert point is set to item of file we are writing to and we have to append to it */
-+ assert("vs-903", insert_point->between == AFTER_UNIT);
-+ nplug->change_item_size(insert_point,
-+ flow_insert_data(op)->length);
-+ flow_insert_data(op)->iplug->b.paste(insert_point,
-+ flow_insert_data
-+ (op), &info);
-+ } else {
-+ /* new item must be inserted */
-+ pos_in_node_t new_pos;
-+ flow_insert_data(op)->length += item_data_overhead(op);
-+
-+ /* FIXME-VS: this is because node40_create_item changes
-+ insert_point for obscure reasons */
-+ switch (insert_point->between) {
-+ case AFTER_ITEM:
-+ new_pos = insert_point->item_pos + 1;
-+ break;
-+ case EMPTY_NODE:
-+ new_pos = 0;
-+ break;
-+ case BEFORE_ITEM:
-+ assert("vs-905", insert_point->item_pos == 0);
-+ new_pos = 0;
-+ break;
-+ default:
-+ impossible("vs-906",
-+ "carry_insert_flow: invalid coord");
-+ new_pos = 0;
-+ break;
-+ }
-+
-+ nplug->create_item(insert_point, &f->key,
-+ flow_insert_data(op), &info);
-+ coord_set_item_pos(insert_point, new_pos);
-+ }
-+ coord_init_after_item_end(insert_point);
-+ doing->restartable = 0;
-+ znode_make_dirty(insert_point->node);
-+
-+ move_flow_forward(f, (unsigned)flow_insert_data(op)->length);
-+ }
-+
-+ if (orig_node != flow_insert_point(op)->node) {
-+ /* move lock to new insert point */
-+ done_lh(orig_lh);
-+ init_lh(orig_lh);
-+ result =
-+ longterm_lock_znode(orig_lh, flow_insert_point(op)->node,
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
-+ }
-+
-+ return result;
-+}
-+
-+/* implements COP_DELETE operation
-+
-+ Remove pointer to @op -> u.delete.child from it's parent.
-+
-+ This function also handles killing of a tree root is last pointer from it
-+ was removed. This is complicated by our handling of "twig" level: root on
-+ twig level is never killed.
-+
-+*/
-+static int carry_delete(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing UNUSED_ARG /* current carry
-+ * level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ int result;
-+ coord_t coord;
-+ coord_t coord2;
-+ znode *parent;
-+ znode *child;
-+ carry_plugin_info info;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * This operation is called to delete internal item pointing to the
-+ * child node that was removed by carry from the tree on the previous
-+ * tree level.
-+ */
-+
-+ assert("nikita-893", op != NULL);
-+ assert("nikita-894", todo != NULL);
-+ assert("nikita-895", op->op == COP_DELETE);
-+
-+ coord_init_zero(&coord);
-+ coord_init_zero(&coord2);
-+
-+ parent = reiser4_carry_real(op->node);
-+ child = op->u.delete.child ?
-+ reiser4_carry_real(op->u.delete.child) : op->node->node;
-+ tree = znode_get_tree(child);
-+ read_lock_tree(tree);
-+
-+ /*
-+ * @parent was determined when carry entered parent level
-+ * (lock_carry_level/lock_carry_node). Since then, actual parent of
-+ * @child node could change due to other carry operations performed on
-+ * the parent level. Check for this.
-+ */
-+
-+ if (znode_parent(child) != parent) {
-+ /* NOTE-NIKITA add stat counter for this. */
-+ parent = znode_parent(child);
-+ assert("nikita-2581", find_carry_node(doing, parent));
-+ }
-+ read_unlock_tree(tree);
-+
-+ assert("nikita-1213", znode_get_level(parent) > LEAF_LEVEL);
-+
-+ /* Twig level horrors: tree should be of height at least 2. So, last
-+ pointer from the root at twig level is preserved even if child is
-+ empty. This is ugly, but so it was architectured.
-+ */
-+
-+ if (znode_is_root(parent) &&
-+ znode_get_level(parent) <= REISER4_MIN_TREE_HEIGHT &&
-+ node_num_items(parent) == 1) {
-+ /* Delimiting key manipulations. */
-+ write_lock_dk(tree);
-+ znode_set_ld_key(child, znode_set_ld_key(parent, reiser4_min_key()));
-+ znode_set_rd_key(child, znode_set_rd_key(parent, reiser4_max_key()));
-+ ZF_SET(child, JNODE_DKSET);
-+ write_unlock_dk(tree);
-+
-+ /* @child escaped imminent death! */
-+ ZF_CLR(child, JNODE_HEARD_BANSHEE);
-+ return 0;
-+ }
-+
-+ /* convert child pointer to the coord_t */
-+ result = find_child_ptr(parent, child, &coord);
-+ if (result != NS_FOUND) {
-+ warning("nikita-994", "Cannot find child pointer: %i", result);
-+ print_coord_content("coord", &coord);
-+ return result;
-+ }
-+
-+ coord_dup(&coord2, &coord);
-+ info.doing = doing;
-+ info.todo = todo;
-+ {
-+ /*
-+ * Actually kill internal item: prepare structure with
-+ * arguments for ->cut_and_kill() method...
-+ */
-+
-+ struct carry_kill_data kdata;
-+ kdata.params.from = &coord;
-+ kdata.params.to = &coord2;
-+ kdata.params.from_key = NULL;
-+ kdata.params.to_key = NULL;
-+ kdata.params.smallest_removed = NULL;
-+ kdata.params.truncate = 1;
-+ kdata.flags = op->u.delete.flags;
-+ kdata.inode = NULL;
-+ kdata.left = NULL;
-+ kdata.right = NULL;
-+ kdata.buf = NULL;
-+ /* ... and call it. */
-+ result = node_plugin_by_node(parent)->cut_and_kill(&kdata,
-+ &info);
-+ }
-+ doing->restartable = 0;
-+
-+ /* check whether root should be killed violently */
-+ if (znode_is_root(parent) &&
-+ /* don't kill roots at and lower than twig level */
-+ znode_get_level(parent) > REISER4_MIN_TREE_HEIGHT &&
-+ node_num_items(parent) == 1) {
-+ result = reiser4_kill_tree_root(coord.node);
-+ }
-+
-+ return result < 0 ? : 0;
-+}
-+
-+/* implements COP_CUT opration
-+
-+ Cuts part or whole content of node.
-+
-+*/
-+static int carry_cut(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing /* current carry level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ int result;
-+ carry_plugin_info info;
-+ node_plugin *nplug;
-+
-+ assert("nikita-896", op != NULL);
-+ assert("nikita-897", todo != NULL);
-+ assert("nikita-898", op->op == COP_CUT);
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ nplug = node_plugin_by_node(reiser4_carry_real(op->node));
-+ if (op->u.cut_or_kill.is_cut)
-+ result = nplug->cut(op->u.cut_or_kill.u.cut, &info);
-+ else
-+ result = nplug->cut_and_kill(op->u.cut_or_kill.u.kill, &info);
-+
-+ doing->restartable = 0;
-+ return result < 0 ? : 0;
-+}
-+
-+/* helper function for carry_paste(): returns true if @op can be continued as
-+ paste */
-+static int
-+can_paste(coord_t * icoord, const reiser4_key * key,
-+ const reiser4_item_data * data)
-+{
-+ coord_t circa;
-+ item_plugin *new_iplug;
-+ item_plugin *old_iplug;
-+ int result = 0; /* to keep gcc shut */
-+
-+ assert("", icoord->between != AT_UNIT);
-+
-+ /* obviously, one cannot paste when node is empty---there is nothing
-+ to paste into. */
-+ if (node_is_empty(icoord->node))
-+ return 0;
-+ /* if insertion point is at the middle of the item, then paste */
-+ if (!coord_is_between_items(icoord))
-+ return 1;
-+ coord_dup(&circa, icoord);
-+ circa.between = AT_UNIT;
-+
-+ old_iplug = item_plugin_by_coord(&circa);
-+ new_iplug = data->iplug;
-+
-+ /* check whether we can paste to the item @icoord is "at" when we
-+ ignore ->between field */
-+ if (old_iplug == new_iplug && item_can_contain_key(&circa, key, data)) {
-+ result = 1;
-+ } else if (icoord->between == BEFORE_UNIT
-+ || icoord->between == BEFORE_ITEM) {
-+ /* otherwise, try to glue to the item at the left, if any */
-+ coord_dup(&circa, icoord);
-+ if (coord_set_to_left(&circa)) {
-+ result = 0;
-+ coord_init_before_item(icoord);
-+ } else {
-+ old_iplug = item_plugin_by_coord(&circa);
-+ result = (old_iplug == new_iplug)
-+ && item_can_contain_key(icoord, key, data);
-+ if (result) {
-+ coord_dup(icoord, &circa);
-+ icoord->between = AFTER_UNIT;
-+ }
-+ }
-+ } else if (icoord->between == AFTER_UNIT
-+ || icoord->between == AFTER_ITEM) {
-+ coord_dup(&circa, icoord);
-+ /* otherwise, try to glue to the item at the right, if any */
-+ if (coord_set_to_right(&circa)) {
-+ result = 0;
-+ coord_init_after_item(icoord);
-+ } else {
-+ int (*cck) (const coord_t *, const reiser4_key *,
-+ const reiser4_item_data *);
-+
-+ old_iplug = item_plugin_by_coord(&circa);
-+
-+ cck = old_iplug->b.can_contain_key;
-+ if (cck == NULL)
-+ /* item doesn't define ->can_contain_key
-+ method? So it is not expandable. */
-+ result = 0;
-+ else {
-+ result = (old_iplug == new_iplug)
-+ && cck(&circa /*icoord */ , key, data);
-+ if (result) {
-+ coord_dup(icoord, &circa);
-+ icoord->between = BEFORE_UNIT;
-+ }
-+ }
-+ }
-+ } else
-+ impossible("nikita-2513", "Nothing works");
-+ if (result) {
-+ if (icoord->between == BEFORE_ITEM) {
-+ assert("vs-912", icoord->unit_pos == 0);
-+ icoord->between = BEFORE_UNIT;
-+ } else if (icoord->between == AFTER_ITEM) {
-+ coord_init_after_item_end(icoord);
-+ }
-+ }
-+ return result;
-+}
-+
-+/* implements COP_PASTE operation
-+
-+ Paste data into existing item. This is complicated by the fact that after
-+ we shifted something to the left or right neighbors trying to free some
-+ space, item we were supposed to paste into can be in different node than
-+ insertion coord. If so, we are no longer doing paste, but insert. See
-+ comments in insert_paste_common().
-+
-+*/
-+static int carry_paste(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing UNUSED_ARG /* current carry
-+ * level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ znode *node;
-+ carry_insert_data cdata;
-+ coord_t dcoord;
-+ reiser4_item_data data;
-+ int result;
-+ int real_size;
-+ item_plugin *iplug;
-+ carry_plugin_info info;
-+ coord_t *coord;
-+
-+ assert("nikita-982", op != NULL);
-+ assert("nikita-983", todo != NULL);
-+ assert("nikita-984", op->op == COP_PASTE);
-+
-+ coord_init_zero(&dcoord);
-+
-+ result = insert_paste_common(op, doing, todo, &cdata, &dcoord, &data);
-+ if (result != 0)
-+ return result;
-+
-+ coord = op->u.insert.d->coord;
-+
-+ /* handle case when op -> u.insert.coord doesn't point to the item
-+ of required type. restart as insert. */
-+ if (!can_paste(coord, op->u.insert.d->key, op->u.insert.d->data)) {
-+ op->op = COP_INSERT;
-+ op->u.insert.type = COPT_PASTE_RESTARTED;
-+ result = op_dispatch_table[COP_INSERT].handler(op, doing, todo);
-+
-+ return result;
-+ }
-+
-+ node = coord->node;
-+ iplug = item_plugin_by_coord(coord);
-+ assert("nikita-992", iplug != NULL);
-+
-+ assert("nikita-985", node != NULL);
-+ assert("nikita-986", node_plugin_by_node(node) != NULL);
-+
-+ assert("nikita-987",
-+ space_needed_for_op(node, op) <= znode_free_space(node));
-+
-+ assert("nikita-1286", coord_is_existing_item(coord));
-+
-+ /*
-+ * if item is expanded as a result of this operation, we should first
-+ * change item size, than call ->b.paste item method. If item is
-+ * shrunk, it should be done other way around: first call ->b.paste
-+ * method, then reduce item size.
-+ */
-+
-+ real_size = space_needed_for_op(node, op);
-+ if (real_size > 0)
-+ node->nplug->change_item_size(coord, real_size);
-+
-+ doing->restartable = 0;
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ result = iplug->b.paste(coord, op->u.insert.d->data, &info);
-+
-+ if (real_size < 0)
-+ node->nplug->change_item_size(coord, real_size);
-+
-+ /* if we pasted at the beginning of the item, update item's key. */
-+ if (coord->unit_pos == 0 && coord->between != AFTER_UNIT)
-+ node->nplug->update_item_key(coord, op->u.insert.d->key, &info);
-+
-+ znode_make_dirty(node);
-+ return result;
-+}
-+
-+/* handle carry COP_EXTENT operation. */
-+static int carry_extent(carry_op * op /* operation to perform */ ,
-+ carry_level * doing /* queue of operations @op
-+ * is part of */ ,
-+ carry_level * todo /* queue where new operations
-+ * are accumulated */ )
-+{
-+ znode *node;
-+ carry_insert_data cdata;
-+ coord_t coord;
-+ reiser4_item_data data;
-+ carry_op *delete_dummy;
-+ carry_op *insert_extent;
-+ int result;
-+ carry_plugin_info info;
-+
-+ assert("nikita-1751", op != NULL);
-+ assert("nikita-1752", todo != NULL);
-+ assert("nikita-1753", op->op == COP_EXTENT);
-+
-+ /* extent insertion overview:
-+
-+ extents live on the TWIG LEVEL, which is level one above the leaf
-+ one. This complicates extent insertion logic somewhat: it may
-+ happen (and going to happen all the time) that in logical key
-+ ordering extent has to be placed between items I1 and I2, located
-+ at the leaf level, but I1 and I2 are in the same formatted leaf
-+ node N1. To insert extent one has to
-+
-+ (1) reach node N1 and shift data between N1, its neighbors and
-+ possibly newly allocated nodes until I1 and I2 fall into different
-+ nodes. Since I1 and I2 are still neighboring items in logical key
-+ order, they will be necessary utmost items in their respective
-+ nodes.
-+
-+ (2) After this new extent item is inserted into node on the twig
-+ level.
-+
-+ Fortunately this process can reuse almost all code from standard
-+ insertion procedure (viz. make_space() and insert_paste_common()),
-+ due to the following observation: make_space() only shifts data up
-+ to and excluding or including insertion point. It never
-+ "over-moves" through insertion point. Thus, one can use
-+ make_space() to perform step (1). All required for this is just to
-+ instruct free_space_shortage() to keep make_space() shifting data
-+ until insertion point is at the node border.
-+
-+ */
-+
-+ /* perform common functionality of insert and paste. */
-+ result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
-+ if (result != 0)
-+ return result;
-+
-+ node = op->u.extent.d->coord->node;
-+ assert("nikita-1754", node != NULL);
-+ assert("nikita-1755", node_plugin_by_node(node) != NULL);
-+ assert("nikita-1700", coord_wrt(op->u.extent.d->coord) != COORD_INSIDE);
-+
-+ /* NOTE-NIKITA add some checks here. Not assertions, -EIO. Check that
-+ extent fits between items. */
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ /* there is another complication due to placement of extents on the
-+ twig level: extents are "rigid" in the sense that key-range
-+ occupied by extent cannot grow indefinitely to the right as it is
-+ for the formatted leaf nodes. Because of this when search finds two
-+ adjacent extents on the twig level, it has to "drill" to the leaf
-+ level, creating new node. Here we are removing this node.
-+ */
-+ if (node_is_empty(node)) {
-+ delete_dummy = node_post_carry(&info, COP_DELETE, node, 1);
-+ if (IS_ERR(delete_dummy))
-+ return PTR_ERR(delete_dummy);
-+ delete_dummy->u.delete.child = NULL;
-+ delete_dummy->u.delete.flags = DELETE_RETAIN_EMPTY;
-+ ZF_SET(node, JNODE_HEARD_BANSHEE);
-+ }
-+
-+ /* proceed with inserting extent item into parent. We are definitely
-+ inserting rather than pasting if we get that far. */
-+ insert_extent = node_post_carry(&info, COP_INSERT, node, 1);
-+ if (IS_ERR(insert_extent))
-+ /* @delete_dummy will be automatically destroyed on the level
-+ exiting */
-+ return PTR_ERR(insert_extent);
-+ /* NOTE-NIKITA insertion by key is simplest option here. Another
-+ possibility is to insert on the left or right of already existing
-+ item.
-+ */
-+ insert_extent->u.insert.type = COPT_KEY;
-+ insert_extent->u.insert.d = op->u.extent.d;
-+ assert("nikita-1719", op->u.extent.d->key != NULL);
-+ insert_extent->u.insert.d->data->arg = op->u.extent.d->coord;
-+ insert_extent->u.insert.flags =
-+ znode_get_tree(node)->carry.new_extent_flags;
-+
-+ /*
-+ * if carry was asked to track lock handle we should actually track
-+ * lock handle on the twig node rather than on the leaf where
-+ * operation was started from. Transfer tracked lock handle.
-+ */
-+ if (doing->track_type) {
-+ assert("nikita-3242", doing->tracked != NULL);
-+ assert("nikita-3244", todo->tracked == NULL);
-+ todo->tracked = doing->tracked;
-+ todo->track_type = CARRY_TRACK_NODE;
-+ doing->tracked = NULL;
-+ doing->track_type = 0;
-+ }
-+
-+ return 0;
-+}
-+
-+/* update key in @parent between pointers to @left and @right.
-+
-+ Find coords of @left and @right and update delimiting key between them.
-+ This is helper function called by carry_update(). Finds position of
-+ internal item involved. Updates item key. Updates delimiting keys of child
-+ nodes involved.
-+*/
-+static int update_delimiting_key(znode * parent /* node key is updated
-+ * in */ ,
-+ znode * left /* child of @parent */ ,
-+ znode * right /* child of @parent */ ,
-+ carry_level * doing /* current carry
-+ * level */ ,
-+ carry_level * todo /* parent carry
-+ * level */ ,
-+ const char **error_msg /* place to
-+ * store error
-+ * message */ )
-+{
-+ coord_t left_pos;
-+ coord_t right_pos;
-+ int result;
-+ reiser4_key ldkey;
-+ carry_plugin_info info;
-+
-+ assert("nikita-1177", right != NULL);
-+ /* find position of right left child in a parent */
-+ result = find_child_ptr(parent, right, &right_pos);
-+ if (result != NS_FOUND) {
-+ *error_msg = "Cannot find position of right child";
-+ return result;
-+ }
-+
-+ if ((left != NULL) && !coord_is_leftmost_unit(&right_pos)) {
-+ /* find position of the left child in a parent */
-+ result = find_child_ptr(parent, left, &left_pos);
-+ if (result != NS_FOUND) {
-+ *error_msg = "Cannot find position of left child";
-+ return result;
-+ }
-+ assert("nikita-1355", left_pos.node != NULL);
-+ } else
-+ left_pos.node = NULL;
-+
-+ /* check that they are separated by exactly one key and are basically
-+ sane */
-+ if (REISER4_DEBUG) {
-+ if ((left_pos.node != NULL)
-+ && !coord_is_existing_unit(&left_pos)) {
-+ *error_msg = "Left child is bastard";
-+ return RETERR(-EIO);
-+ }
-+ if (!coord_is_existing_unit(&right_pos)) {
-+ *error_msg = "Right child is bastard";
-+ return RETERR(-EIO);
-+ }
-+ if (left_pos.node != NULL &&
-+ !coord_are_neighbors(&left_pos, &right_pos)) {
-+ *error_msg = "Children are not direct siblings";
-+ return RETERR(-EIO);
-+ }
-+ }
-+ *error_msg = NULL;
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ /*
-+ * If child node is not empty, new key of internal item is a key of
-+ * leftmost item in the child node. If the child is empty, take its
-+ * right delimiting key as a new key of the internal item. Precise key
-+ * in the latter case is not important per se, because the child (and
-+ * the internal item) are going to be killed shortly anyway, but we
-+ * have to preserve correct order of keys in the parent node.
-+ */
-+
-+ if (!ZF_ISSET(right, JNODE_HEARD_BANSHEE))
-+ leftmost_key_in_node(right, &ldkey);
-+ else {
-+ read_lock_dk(znode_get_tree(parent));
-+ ldkey = *znode_get_rd_key(right);
-+ read_unlock_dk(znode_get_tree(parent));
-+ }
-+ node_plugin_by_node(parent)->update_item_key(&right_pos, &ldkey, &info);
-+ doing->restartable = 0;
-+ znode_make_dirty(parent);
-+ return 0;
-+}
-+
-+/* implements COP_UPDATE opration
-+
-+ Update delimiting keys.
-+
-+*/
-+static int carry_update(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing /* current carry level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ int result;
-+ carry_node *missing UNUSED_ARG;
-+ znode *left;
-+ znode *right;
-+ carry_node *lchild;
-+ carry_node *rchild;
-+ const char *error_msg;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * This operation is called to update key of internal item. This is
-+ * necessary when carry shifted of cut data on the child
-+ * level. Arguments of this operation are:
-+ *
-+ * @right --- child node. Operation should update key of internal
-+ * item pointing to @right.
-+ *
-+ * @left --- left neighbor of @right. This parameter is optional.
-+ */
-+
-+ assert("nikita-902", op != NULL);
-+ assert("nikita-903", todo != NULL);
-+ assert("nikita-904", op->op == COP_UPDATE);
-+
-+ lchild = op->u.update.left;
-+ rchild = op->node;
-+
-+ if (lchild != NULL) {
-+ assert("nikita-1001", lchild->parent);
-+ assert("nikita-1003", !lchild->left);
-+ left = reiser4_carry_real(lchild);
-+ } else
-+ left = NULL;
-+
-+ tree = znode_get_tree(rchild->node);
-+ read_lock_tree(tree);
-+ right = znode_parent(rchild->node);
-+ read_unlock_tree(tree);
-+
-+ if (right != NULL) {
-+ result = update_delimiting_key(right,
-+ lchild ? lchild->node : NULL,
-+ rchild->node,
-+ doing, todo, &error_msg);
-+ } else {
-+ error_msg = "Cannot find node to update key in";
-+ result = RETERR(-EIO);
-+ }
-+ /* operation will be reposted to the next level by the
-+ ->update_item_key() method of node plugin, if necessary. */
-+
-+ if (result != 0) {
-+ warning("nikita-999", "Error updating delimiting key: %s (%i)",
-+ error_msg ? : "", result);
-+ }
-+ return result;
-+}
-+
-+/* move items from @node during carry */
-+static int carry_shift_data(sideof side /* in what direction to move data */ ,
-+ coord_t * insert_coord /* coord where new item
-+ * is to be inserted */ ,
-+ znode * node /* node which data are moved from */ ,
-+ carry_level * doing /* active carry queue */ ,
-+ carry_level * todo /* carry queue where new
-+ * operations are to be put
-+ * in */ ,
-+ unsigned int including_insert_coord_p /* true if
-+ * @insertion_coord
-+ * can be moved */ )
-+{
-+ int result;
-+ znode *source;
-+ carry_plugin_info info;
-+ node_plugin *nplug;
-+
-+ source = insert_coord->node;
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ nplug = node_plugin_by_node(node);
-+ result = nplug->shift(insert_coord, node,
-+ (side == LEFT_SIDE) ? SHIFT_LEFT : SHIFT_RIGHT, 0,
-+ (int)including_insert_coord_p, &info);
-+ /* the only error ->shift() method of node plugin can return is
-+ -ENOMEM due to carry node/operation allocation. */
-+ assert("nikita-915", result >= 0 || result == -ENOMEM);
-+ if (result > 0) {
-+ /*
-+ * if some number of bytes was actually shifted, mark nodes
-+ * dirty, and carry level as non-restartable.
-+ */
-+ doing->restartable = 0;
-+ znode_make_dirty(source);
-+ znode_make_dirty(node);
-+ }
-+
-+ assert("nikita-2077", coord_check(insert_coord));
-+ return 0;
-+}
-+
-+typedef carry_node *(*carry_iterator) (carry_node * node);
-+static carry_node *find_dir_carry(carry_node * node, carry_level * level,
-+ carry_iterator iterator);
-+
-+static carry_node *pool_level_list_prev(carry_node *node)
-+{
-+ return list_entry(node->header.level_linkage.prev, carry_node, header.level_linkage);
-+}
-+
-+/* look for the left neighbor of given carry node in a carry queue.
-+
-+ This is used by find_left_neighbor(), but I am not sure that this
-+ really gives any advantage. More statistics required.
-+
-+*/
-+carry_node *find_left_carry(carry_node * node /* node to find left neighbor
-+ * of */ ,
-+ carry_level * level /* level to scan */ )
-+{
-+ return find_dir_carry(node, level,
-+ (carry_iterator) pool_level_list_prev);
-+}
-+
-+static carry_node *pool_level_list_next(carry_node *node)
-+{
-+ return list_entry(node->header.level_linkage.next, carry_node, header.level_linkage);
-+}
-+
-+/* look for the right neighbor of given carry node in a
-+ carry queue.
-+
-+ This is used by find_right_neighbor(), but I am not sure that this
-+ really gives any advantage. More statistics required.
-+
-+*/
-+carry_node *find_right_carry(carry_node * node /* node to find right neighbor
-+ * of */ ,
-+ carry_level * level /* level to scan */ )
-+{
-+ return find_dir_carry(node, level,
-+ (carry_iterator) pool_level_list_next);
-+}
-+
-+/* look for the left or right neighbor of given carry node in a carry
-+ queue.
-+
-+ Helper function used by find_{left|right}_carry().
-+*/
-+static carry_node *find_dir_carry(carry_node * node /* node to start scanning
-+ * from */ ,
-+ carry_level * level /* level to scan */ ,
-+ carry_iterator iterator /* operation to
-+ * move to the next
-+ * node */ )
-+{
-+ carry_node *neighbor;
-+
-+ assert("nikita-1059", node != NULL);
-+ assert("nikita-1060", level != NULL);
-+
-+ /* scan list of carry nodes on this list dir-ward, skipping all
-+ carry nodes referencing the same znode. */
-+ neighbor = node;
-+ while (1) {
-+ neighbor = iterator(neighbor);
-+ if (carry_node_end(level, neighbor))
-+ /* list head is reached */
-+ return NULL;
-+ if (reiser4_carry_real(neighbor) != reiser4_carry_real(node))
-+ return neighbor;
-+ }
-+}
-+
-+/*
-+ * Memory reservation estimation.
-+ *
-+ * Carry process proceeds through tree levels upwards. Carry assumes that it
-+ * takes tree in consistent state (e.g., that search tree invariants hold),
-+ * and leaves tree consistent after it finishes. This means that when some
-+ * error occurs carry cannot simply return if there are pending carry
-+ * operations. Generic solution for this problem is carry-undo either as
-+ * transaction manager feature (requiring checkpoints and isolation), or
-+ * through some carry specific mechanism.
-+ *
-+ * Our current approach is to panic if carry hits an error while tree is
-+ * inconsistent. Unfortunately -ENOMEM can easily be triggered. To work around
-+ * this "memory reservation" mechanism was added.
-+ *
-+ * Memory reservation is implemented by perthread-pages.diff patch from
-+ * core-patches. Its API is defined in <linux/gfp.h>
-+ *
-+ * int perthread_pages_reserve(int nrpages, gfp_t gfp);
-+ * void perthread_pages_release(int nrpages);
-+ * int perthread_pages_count(void);
-+ *
-+ * carry estimates its worst case memory requirements at the entry, reserved
-+ * enough memory, and released unused pages before returning.
-+ *
-+ * Code below estimates worst case memory requirements for a given carry
-+ * queue. This is dome by summing worst case memory requirements for each
-+ * operation in the queue.
-+ *
-+ */
-+
-+/*
-+ * Memory memory requirements of many operations depends on the tree
-+ * height. For example, item insertion requires new node to be inserted at
-+ * each tree level in the worst case. What tree height should be used for
-+ * estimation? Current tree height is wrong, because tree height can change
-+ * between the time when estimation was done and the time when operation is
-+ * actually performed. Maximal possible tree height (REISER4_MAX_ZTREE_HEIGHT)
-+ * is also not desirable, because it would lead to the huge over-estimation
-+ * all the time. Plausible solution is "capped tree height": if current tree
-+ * height is less than some TREE_HEIGHT_CAP constant, capped tree height is
-+ * TREE_HEIGHT_CAP, otherwise it's current tree height. Idea behind this is
-+ * that if tree height is TREE_HEIGHT_CAP or larger, it's extremely unlikely
-+ * to be increased even more during short interval of time.
-+ */
-+#define TREE_HEIGHT_CAP (5)
-+
-+/* return capped tree height for the @tree. See comment above. */
-+static int cap_tree_height(reiser4_tree * tree)
-+{
-+ return max_t(int, tree->height, TREE_HEIGHT_CAP);
-+}
-+
-+/* return capped tree height for the current tree. */
-+static int capped_height(void)
-+{
-+ return cap_tree_height(current_tree);
-+}
-+
-+/* return number of pages required to store given number of bytes */
-+static int bytes_to_pages(int bytes)
-+{
-+ return (bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-+}
-+
-+/* how many pages are required to allocate znodes during item insertion. */
-+static int carry_estimate_znodes(void)
-+{
-+ /*
-+ * Note, that there we have some problem here: there is no way to
-+ * reserve pages specifically for the given slab. This means that
-+ * these pages can be hijacked for some other end.
-+ */
-+
-+ /* in the worst case we need 3 new znode on each tree level */
-+ return bytes_to_pages(capped_height() * sizeof(znode) * 3);
-+}
-+
-+/*
-+ * how many pages are required to load bitmaps. One bitmap per level.
-+ */
-+static int carry_estimate_bitmaps(void)
-+{
-+ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_LOAD_BITMAP)) {
-+ int bytes;
-+
-+ bytes = capped_height() * (0 + /* bnode should be added, but its is private to
-+ * bitmap.c, skip for now. */
-+ 2 * sizeof(jnode)); /* working and commit jnodes */
-+ return bytes_to_pages(bytes) + 2; /* and their contents */
-+ } else
-+ /* bitmaps were pre-loaded during mount */
-+ return 0;
-+}
-+
-+/* worst case item insertion memory requirements */
-+static int carry_estimate_insert(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ capped_height() + /* new block on each level */
-+ 1 + /* and possibly extra new block at the leaf level */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case item deletion memory requirements */
-+static int carry_estimate_delete(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case tree cut memory requirements */
-+static int carry_estimate_cut(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case memory requirements of pasting into item */
-+static int carry_estimate_paste(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ capped_height() + /* new block on each level */
-+ 1 + /* and possibly extra new block at the leaf level */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case memory requirements of extent insertion */
-+static int carry_estimate_extent(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_insert(op, level) + /* insert extent */
-+ carry_estimate_delete(op, level); /* kill leaf */
-+}
-+
-+/* worst case memory requirements of key update */
-+static int carry_estimate_update(carry_op * op, carry_level * level)
-+{
-+ return 0;
-+}
-+
-+/* worst case memory requirements of flow insertion */
-+static int carry_estimate_insert_flow(carry_op * op, carry_level * level)
-+{
-+ int newnodes;
-+
-+ newnodes = min(bytes_to_pages(op->u.insert_flow.flow->length),
-+ CARRY_FLOW_NEW_NODES_LIMIT);
-+ /*
-+ * roughly estimate insert_flow as a sequence of insertions.
-+ */
-+ return newnodes * carry_estimate_insert(op, level);
-+}
-+
-+/* This is dispatch table for carry operations. It can be trivially
-+ abstracted into useful plugin: tunable balancing policy is a good
-+ thing. */
-+carry_op_handler op_dispatch_table[COP_LAST_OP] = {
-+ [COP_INSERT] = {
-+ .handler = carry_insert,
-+ .estimate = carry_estimate_insert}
-+ ,
-+ [COP_DELETE] = {
-+ .handler = carry_delete,
-+ .estimate = carry_estimate_delete}
-+ ,
-+ [COP_CUT] = {
-+ .handler = carry_cut,
-+ .estimate = carry_estimate_cut}
-+ ,
-+ [COP_PASTE] = {
-+ .handler = carry_paste,
-+ .estimate = carry_estimate_paste}
-+ ,
-+ [COP_EXTENT] = {
-+ .handler = carry_extent,
-+ .estimate = carry_estimate_extent}
-+ ,
-+ [COP_UPDATE] = {
-+ .handler = carry_update,
-+ .estimate = carry_estimate_update}
-+ ,
-+ [COP_INSERT_FLOW] = {
-+ .handler = carry_insert_flow,
-+ .estimate = carry_estimate_insert_flow}
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/carry_ops.h linux-2.6.20/fs/reiser4/carry_ops.h
---- linux-2.6.20.orig/fs/reiser4/carry_ops.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/carry_ops.h 2007-05-06 14:50:43.694974475 +0400
-@@ -0,0 +1,42 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* implementation of carry operations. See carry_ops.c for details. */
-+
-+#if !defined( __CARRY_OPS_H__ )
-+#define __CARRY_OPS_H__
-+
-+#include "forward.h"
-+#include "znode.h"
-+#include "carry.h"
-+
-+/* carry operation handlers */
-+typedef struct carry_op_handler {
-+ /* perform operation */
-+ int (*handler) (carry_op * op, carry_level * doing, carry_level * todo);
-+ /* estimate memory requirements for @op */
-+ int (*estimate) (carry_op * op, carry_level * level);
-+} carry_op_handler;
-+
-+/* This is dispatch table for carry operations. It can be trivially
-+ abstracted into useful plugin: tunable balancing policy is a good
-+ thing. */
-+extern carry_op_handler op_dispatch_table[COP_LAST_OP];
-+
-+unsigned int space_needed(const znode * node, const coord_t * coord,
-+ const reiser4_item_data * data, int inserting);
-+extern carry_node *find_left_carry(carry_node * node, carry_level * level);
-+extern carry_node *find_right_carry(carry_node * node, carry_level * level);
-+
-+/* __CARRY_OPS_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/context.c linux-2.6.20/fs/reiser4/context.c
---- linux-2.6.20.orig/fs/reiser4/context.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/context.c 2007-05-06 14:50:43.694974475 +0400
-@@ -0,0 +1,288 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Manipulation of reiser4_context */
-+
-+/*
-+ * global context used during system call. Variable of this type is allocated
-+ * on the stack at the beginning of the reiser4 part of the system call and
-+ * pointer to it is stored in the current->fs_context. This allows us to avoid
-+ * passing pointer to current transaction and current lockstack (both in
-+ * one-to-one mapping with threads) all over the call chain.
-+ *
-+ * It's kind of like those global variables the prof used to tell you not to
-+ * use in CS1, except thread specific.;-) Nikita, this was a good idea.
-+ *
-+ * In some situations it is desirable to have ability to enter reiser4_context
-+ * more than once for the same thread (nested contexts). For example, there
-+ * are some functions that can be called either directly from VFS/VM or from
-+ * already active reiser4 context (->writepage, for example).
-+ *
-+ * In such situations "child" context acts like dummy: all activity is
-+ * actually performed in the top level context, and get_current_context()
-+ * always returns top level context.
-+ * Of course, reiser4_init_context()/reiser4_done_context() have to be properly
-+ * nested any way.
-+ *
-+ * Note that there is an important difference between reiser4 uses
-+ * ->fs_context and the way other file systems use it. Other file systems
-+ * (ext3 and reiserfs) use ->fs_context only for the duration of _transaction_
-+ * (this is why ->fs_context was initially called ->journal_info). This means,
-+ * that when ext3 or reiserfs finds that ->fs_context is not NULL on the entry
-+ * to the file system, they assume that some transaction is already underway,
-+ * and usually bail out, because starting nested transaction would most likely
-+ * lead to the deadlock. This gives false positives with reiser4, because we
-+ * set ->fs_context before starting transaction.
-+ */
-+
-+#include "debug.h"
-+#include "super.h"
-+#include "context.h"
-+
-+#include <linux/writeback.h> /* balance_dirty_pages() */
-+#include <linux/hardirq.h>
-+
-+static void _reiser4_init_context(reiser4_context * context,
-+ struct super_block *super)
-+{
-+ memset(context, 0, sizeof(*context));
-+
-+ context->super = super;
-+ context->magic = context_magic;
-+ context->outer = current->journal_info;
-+ current->journal_info = (void *)context;
-+ context->nr_children = 0;
-+ context->gfp_mask = GFP_KERNEL;
-+
-+ init_lock_stack(&context->stack);
-+
-+ reiser4_txn_begin(context);
-+
-+ /* initialize head of tap list */
-+ INIT_LIST_HEAD(&context->taps);
-+#if REISER4_DEBUG
-+ context->task = current;
-+#endif
-+ grab_space_enable();
-+}
-+
-+/* initialize context and bind it to the current thread
-+
-+ This function should be called at the beginning of reiser4 part of
-+ syscall.
-+*/
-+reiser4_context * reiser4_init_context(struct super_block * super)
-+{
-+ reiser4_context *context;
-+
-+ assert("nikita-2662", !in_interrupt() && !in_irq());
-+ assert("nikita-3357", super != NULL);
-+ assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
-+
-+ context = get_current_context_check();
-+ if (context && context->super == super) {
-+ context = (reiser4_context *) current->journal_info;
-+ context->nr_children++;
-+ return context;
-+ }
-+
-+ context = kmalloc(sizeof(*context), GFP_KERNEL);
-+ if (context == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ _reiser4_init_context(context, super);
-+ return context;
-+}
-+
-+/* this is used in scan_mgr which is called with spinlock held and in
-+ reiser4_fill_super magic */
-+void init_stack_context(reiser4_context *context, struct super_block *super)
-+{
-+ assert("nikita-2662", !in_interrupt() && !in_irq());
-+ assert("nikita-3357", super != NULL);
-+ assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
-+ assert("vs-12", !is_in_reiser4_context());
-+
-+ _reiser4_init_context(context, super);
-+ context->on_stack = 1;
-+ return;
-+}
-+
-+/* cast lock stack embedded into reiser4 context up to its container */
-+reiser4_context *get_context_by_lock_stack(lock_stack * owner)
-+{
-+ return container_of(owner, reiser4_context, stack);
-+}
-+
-+/* true if there is already _any_ reiser4 context for the current thread */
-+int is_in_reiser4_context(void)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = current->journal_info;
-+ return ctx != NULL && ((unsigned long)ctx->magic) == context_magic;
-+}
-+
-+/*
-+ * call balance dirty pages for the current context.
-+ *
-+ * File system is expected to call balance_dirty_pages_ratelimited() whenever
-+ * it dirties a page. reiser4 does this for unformatted nodes (that is, during
-+ * write---this covers vast majority of all dirty traffic), but we cannot do
-+ * this immediately when formatted node is dirtied, because long term lock is
-+ * usually held at that time. To work around this, dirtying of formatted node
-+ * simply increases ->nr_marked_dirty counter in the current reiser4
-+ * context. When we are about to leave this context,
-+ * balance_dirty_pages_ratelimited() is called, if necessary.
-+ *
-+ * This introduces another problem: sometimes we do not want to run
-+ * balance_dirty_pages_ratelimited() when leaving a context, for example
-+ * because some important lock (like ->i_mutex on the parent directory) is
-+ * held. To achieve this, ->nobalance flag can be set in the current context.
-+ */
-+static void balance_dirty_pages_at(reiser4_context *context)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(context->super);
-+
-+ /*
-+ * call balance_dirty_pages_ratelimited() to process formatted nodes
-+ * dirtied during this system call. Do that only if we are not in mount
-+ * and there were nodes dirtied in this context and we are not in
-+ * writepage (to avoid deadlock) and not in pdflush
-+ */
-+ if (sbinfo != NULL && sbinfo->fake != NULL &&
-+ context->nr_marked_dirty != 0 &&
-+ !(current->flags & PF_MEMALLOC) &&
-+ !current_is_pdflush())
-+ balance_dirty_pages_ratelimited(sbinfo->fake->i_mapping);
-+}
-+
-+/* release resources associated with context.
-+
-+ This function should be called at the end of "session" with reiser4,
-+ typically just before leaving reiser4 driver back to VFS.
-+
-+ This is good place to put some degugging consistency checks, like that
-+ thread released all locks and closed transcrash etc.
-+
-+*/
-+static void reiser4_done_context(reiser4_context * context /* context being released */ )
-+{
-+ assert("nikita-860", context != NULL);
-+ assert("nikita-859", context->magic == context_magic);
-+ assert("vs-646", (reiser4_context *) current->journal_info == context);
-+ assert("zam-686", !in_interrupt() && !in_irq());
-+
-+ /* only do anything when leaving top-level reiser4 context. All nested
-+ * contexts are just dummies. */
-+ if (context->nr_children == 0) {
-+ assert("jmacd-673", context->trans == NULL);
-+ assert("jmacd-1002", lock_stack_isclean(&context->stack));
-+ assert("nikita-1936", reiser4_no_counters_are_held());
-+ assert("nikita-2626", list_empty_careful(reiser4_taps_list()));
-+ assert("zam-1004", ergo(get_super_private(context->super),
-+ get_super_private(context->super)->delete_mutex_owner !=
-+ current));
-+
-+ /* release all grabbed but as yet unused blocks */
-+ if (context->grabbed_blocks != 0)
-+ all_grabbed2free();
-+
-+ /*
-+ * synchronize against longterm_unlock_znode():
-+ * wake_up_requestor() wakes up requestors without holding
-+ * zlock (otherwise they will immediately bump into that lock
-+ * after wake up on another CPU). To work around (rare)
-+ * situation where requestor has been woken up asynchronously
-+ * and managed to run until completion (and destroy its
-+ * context and lock stack) before wake_up_requestor() called
-+ * wake_up() on it, wake_up_requestor() synchronize on lock
-+ * stack spin lock. It has actually been observed that spin
-+ * lock _was_ locked at this point, because
-+ * wake_up_requestor() took interrupt.
-+ */
-+ spin_lock_stack(&context->stack);
-+ spin_unlock_stack(&context->stack);
-+
-+ assert("zam-684", context->nr_children == 0);
-+ /* restore original ->fs_context value */
-+ current->journal_info = context->outer;
-+ if (context->on_stack == 0)
-+ kfree(context);
-+ } else {
-+ context->nr_children--;
-+#if REISER4_DEBUG
-+ assert("zam-685", context->nr_children >= 0);
-+#endif
-+ }
-+}
-+
-+/*
-+ * exit reiser4 context. Call balance_dirty_pages_at() if necessary. Close
-+ * transaction. Call done_context() to do context related book-keeping.
-+ */
-+void reiser4_exit_context(reiser4_context * context)
-+{
-+ assert("nikita-3021", reiser4_schedulable());
-+
-+ if (context->nr_children == 0) {
-+ if (!context->nobalance) {
-+ reiser4_txn_restart(context);
-+ balance_dirty_pages_at(context);
-+ }
-+
-+ /* if filesystem is mounted with -o sync or -o dirsync - commit
-+ transaction. FIXME: TXNH_DONT_COMMIT is used to avoid
-+ commiting on exit_context when inode semaphore is held and
-+ to have ktxnmgrd to do commit instead to get better
-+ concurrent filesystem accesses. But, when one mounts with -o
-+ sync, he cares more about reliability than about
-+ performance. So, for now we have this simple mount -o sync
-+ support. */
-+ if (context->super->s_flags & (MS_SYNCHRONOUS | MS_DIRSYNC)) {
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked_nocheck();
-+ if (atom) {
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+ context->trans->flags &= ~TXNH_DONT_COMMIT;
-+ spin_unlock_atom(atom);
-+ }
-+ }
-+ reiser4_txn_end(context);
-+ }
-+ reiser4_done_context(context);
-+}
-+
-+void reiser4_ctx_gfp_mask_set(void)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = get_current_context();
-+ if (ctx->entd == 0 &&
-+ list_empty(&ctx->stack.locks) &&
-+ ctx->trans->atom == NULL)
-+ ctx->gfp_mask = GFP_KERNEL;
-+ else
-+ ctx->gfp_mask = GFP_NOFS;
-+}
-+
-+void reiser4_ctx_gfp_mask_force (gfp_t mask)
-+{
-+ reiser4_context *ctx;
-+ ctx = get_current_context();
-+
-+ assert("edward-1454", ctx != NULL);
-+
-+ ctx->gfp_mask = mask;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/context.h linux-2.6.20/fs/reiser4/context.h
---- linux-2.6.20.orig/fs/reiser4/context.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/context.h 2007-05-06 14:50:43.698975725 +0400
-@@ -0,0 +1,228 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Reiser4 context. See context.c for details. */
-+
-+#if !defined( __REISER4_CONTEXT_H__ )
-+#define __REISER4_CONTEXT_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "tap.h"
-+#include "lock.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+/* reiser4 per-thread context */
-+struct reiser4_context {
-+ /* magic constant. For identification of reiser4 contexts. */
-+ __u32 magic;
-+
-+ /* current lock stack. See lock.[ch]. This is where list of all
-+ locks taken by current thread is kept. This is also used in
-+ deadlock detection. */
-+ lock_stack stack;
-+
-+ /* current transcrash. */
-+ txn_handle *trans;
-+ /* transaction handle embedded into reiser4_context. ->trans points
-+ * here by default. */
-+ txn_handle trans_in_ctx;
-+
-+ /* super block we are working with. To get the current tree
-+ use &get_super_private (reiser4_get_current_sb ())->tree. */
-+ struct super_block *super;
-+
-+ /* parent fs activation */
-+ struct fs_activation *outer;
-+
-+ /* per-thread grabbed (for further allocation) blocks counter */
-+ reiser4_block_nr grabbed_blocks;
-+
-+ /* list of taps currently monitored. See tap.c */
-+ struct list_head taps;
-+
-+ /* grabbing space is enabled */
-+ unsigned int grab_enabled:1;
-+ /* should be set when we are write dirty nodes to disk in jnode_flush or
-+ * reiser4_write_logs() */
-+ unsigned int writeout_mode:1;
-+ /* true, if current thread is an ent thread */
-+ unsigned int entd:1;
-+ /* true, if balance_dirty_pages() should not be run when leaving this
-+ * context. This is used to avoid lengthly balance_dirty_pages()
-+ * operation when holding some important resource, like directory
-+ * ->i_mutex */
-+ unsigned int nobalance:1;
-+
-+ /* this bit is used on reiser4_done_context to decide whether context is
-+ kmalloc-ed and has to be kfree-ed */
-+ unsigned int on_stack:1;
-+
-+ /* count non-trivial jnode_set_dirty() calls */
-+ unsigned long nr_marked_dirty;
-+
-+ /* reiser4_sync_inodes calls (via generic_sync_sb_inodes)
-+ * reiser4_writepages for each of dirty inodes. Reiser4_writepages
-+ * captures pages. When number of pages captured in one
-+ * reiser4_sync_inodes reaches some threshold - some atoms get
-+ * flushed */
-+ int nr_captured;
-+ int nr_children; /* number of child contexts */
-+#if REISER4_DEBUG
-+ /* debugging information about reiser4 locks held by the current
-+ * thread */
-+ reiser4_lock_counters_info locks;
-+ struct task_struct *task; /* so we can easily find owner of the stack */
-+
-+ /*
-+ * disk space grabbing debugging support
-+ */
-+ /* how many disk blocks were grabbed by the first call to
-+ * reiser4_grab_space() in this context */
-+ reiser4_block_nr grabbed_initially;
-+
-+ /* list of all threads doing flush currently */
-+ struct list_head flushers_link;
-+ /* information about last error encountered by reiser4 */
-+ err_site err;
-+#endif
-+ void *vp;
-+ gfp_t gfp_mask;
-+};
-+
-+extern reiser4_context *get_context_by_lock_stack(lock_stack *);
-+
-+/* Debugging helps. */
-+#if REISER4_DEBUG
-+extern void print_contexts(void);
-+#endif
-+
-+#define current_tree (&(get_super_private(reiser4_get_current_sb())->tree))
-+#define current_blocksize reiser4_get_current_sb()->s_blocksize
-+#define current_blocksize_bits reiser4_get_current_sb()->s_blocksize_bits
-+
-+extern reiser4_context *reiser4_init_context(struct super_block *);
-+extern void init_stack_context(reiser4_context *, struct super_block *);
-+extern void reiser4_exit_context(reiser4_context *);
-+
-+/* magic constant we store in reiser4_context allocated at the stack. Used to
-+ catch accesses to staled or uninitialized contexts. */
-+#define context_magic ((__u32) 0x4b1b5d0b)
-+
-+extern int is_in_reiser4_context(void);
-+
-+/*
-+ * return reiser4_context for the thread @tsk
-+ */
-+static inline reiser4_context *get_context(const struct task_struct *tsk)
-+{
-+ assert("vs-1682",
-+ ((reiser4_context *) tsk->journal_info)->magic == context_magic);
-+ return (reiser4_context *) tsk->journal_info;
-+}
-+
-+/*
-+ * return reiser4 context of the current thread, or NULL if there is none.
-+ */
-+static inline reiser4_context *get_current_context_check(void)
-+{
-+ if (is_in_reiser4_context())
-+ return get_context(current);
-+ else
-+ return NULL;
-+}
-+
-+static inline reiser4_context *get_current_context(void); /* __attribute__((const)); */
-+
-+/* return context associated with current thread */
-+static inline reiser4_context *get_current_context(void)
-+{
-+ return get_context(current);
-+}
-+
-+static inline gfp_t reiser4_ctx_gfp_mask_get(void)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = get_current_context_check();
-+ return (ctx == NULL) ? GFP_KERNEL : ctx->gfp_mask;
-+}
-+
-+void reiser4_ctx_gfp_mask_set(void);
-+void reiser4_ctx_gfp_mask_force (gfp_t mask);
-+
-+/*
-+ * true if current thread is in the write-out mode. Thread enters write-out
-+ * mode during jnode_flush and reiser4_write_logs().
-+ */
-+static inline int is_writeout_mode(void)
-+{
-+ return get_current_context()->writeout_mode;
-+}
-+
-+/*
-+ * enter write-out mode
-+ */
-+static inline void writeout_mode_enable(void)
-+{
-+ assert("zam-941", !get_current_context()->writeout_mode);
-+ get_current_context()->writeout_mode = 1;
-+}
-+
-+/*
-+ * leave write-out mode
-+ */
-+static inline void writeout_mode_disable(void)
-+{
-+ assert("zam-942", get_current_context()->writeout_mode);
-+ get_current_context()->writeout_mode = 0;
-+}
-+
-+static inline void grab_space_enable(void)
-+{
-+ get_current_context()->grab_enabled = 1;
-+}
-+
-+static inline void grab_space_disable(void)
-+{
-+ get_current_context()->grab_enabled = 0;
-+}
-+
-+static inline void grab_space_set_enabled(int enabled)
-+{
-+ get_current_context()->grab_enabled = enabled;
-+}
-+
-+static inline int is_grab_enabled(reiser4_context * ctx)
-+{
-+ return ctx->grab_enabled;
-+}
-+
-+/* mark transaction handle in @ctx as TXNH_DONT_COMMIT, so that no commit or
-+ * flush would be performed when it is closed. This is necessary when handle
-+ * has to be closed under some coarse semaphore, like i_mutex of
-+ * directory. Commit will be performed by ktxnmgrd. */
-+static inline void context_set_commit_async(reiser4_context * context)
-+{
-+ context->nobalance = 1;
-+ context->trans->flags |= TXNH_DONT_COMMIT;
-+}
-+
-+/* __REISER4_CONTEXT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/coord.c linux-2.6.20/fs/reiser4/coord.c
---- linux-2.6.20.orig/fs/reiser4/coord.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/coord.c 2007-05-06 14:50:43.698975725 +0400
-@@ -0,0 +1,935 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "tree.h"
-+#include "plugin/item/item.h"
-+#include "znode.h"
-+#include "coord.h"
-+
-+/* Internal constructor. */
-+static inline void
-+coord_init_values(coord_t * coord, const znode * node, pos_in_node_t item_pos,
-+ pos_in_node_t unit_pos, between_enum between)
-+{
-+ coord->node = (znode *) node;
-+ coord_set_item_pos(coord, item_pos);
-+ coord->unit_pos = unit_pos;
-+ coord->between = between;
-+ ON_DEBUG(coord->plug_v = 0);
-+ ON_DEBUG(coord->body_v = 0);
-+
-+ /*ON_TRACE (TRACE_COORDS, "init coord %p node %p: %u %u %s\n", coord, node, item_pos, unit_pos, coord_tween_tostring (between)); */
-+}
-+
-+/* after shifting of node content, coord previously set properly may become
-+ invalid, try to "normalize" it. */
-+void coord_normalize(coord_t * coord)
-+{
-+ znode *node;
-+
-+ node = coord->node;
-+ assert("vs-683", node);
-+
-+ coord_clear_iplug(coord);
-+
-+ if (node_is_empty(node)) {
-+ coord_init_first_unit(coord, node);
-+ } else if ((coord->between == AFTER_ITEM)
-+ || (coord->between == AFTER_UNIT)) {
-+ return;
-+ } else if (coord->item_pos == coord_num_items(coord)
-+ && coord->between == BEFORE_ITEM) {
-+ coord_dec_item_pos(coord);
-+ coord->between = AFTER_ITEM;
-+ } else if (coord->unit_pos == coord_num_units(coord)
-+ && coord->between == BEFORE_UNIT) {
-+ coord->unit_pos--;
-+ coord->between = AFTER_UNIT;
-+ } else if (coord->item_pos == coord_num_items(coord)
-+ && coord->unit_pos == 0 && coord->between == BEFORE_UNIT) {
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+ }
-+}
-+
-+/* Copy a coordinate. */
-+void coord_dup(coord_t * coord, const coord_t * old_coord)
-+{
-+ assert("jmacd-9800", coord_check(old_coord));
-+ coord_dup_nocheck(coord, old_coord);
-+}
-+
-+/* Copy a coordinate without check. Useful when old_coord->node is not
-+ loaded. As in cbk_tree_lookup -> connect_znode -> connect_one_side */
-+void coord_dup_nocheck(coord_t * coord, const coord_t * old_coord)
-+{
-+ coord->node = old_coord->node;
-+ coord_set_item_pos(coord, old_coord->item_pos);
-+ coord->unit_pos = old_coord->unit_pos;
-+ coord->between = old_coord->between;
-+ coord->iplugid = old_coord->iplugid;
-+ ON_DEBUG(coord->plug_v = old_coord->plug_v);
-+ ON_DEBUG(coord->body_v = old_coord->body_v);
-+}
-+
-+/* Initialize an invalid coordinate. */
-+void coord_init_invalid(coord_t * coord, const znode * node)
-+{
-+ coord_init_values(coord, node, 0, 0, INVALID_COORD);
-+}
-+
-+void coord_init_first_unit_nocheck(coord_t * coord, const znode * node)
-+{
-+ coord_init_values(coord, node, 0, 0, AT_UNIT);
-+}
-+
-+/* Initialize a coordinate to point at the first unit of the first item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+void coord_init_first_unit(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node, 0, 0, (is_empty ? EMPTY_NODE : AT_UNIT));
-+
-+ assert("jmacd-9801", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to point at the last unit of the last item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+void coord_init_last_unit(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node,
-+ (is_empty ? 0 : node_num_items(node) - 1), 0,
-+ (is_empty ? EMPTY_NODE : AT_UNIT));
-+ if (!is_empty)
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ assert("jmacd-9802", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to before the first item. If the node is empty, it is
-+ positioned at the EMPTY_NODE. */
-+void coord_init_before_first_item(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node, 0, 0,
-+ (is_empty ? EMPTY_NODE : BEFORE_UNIT));
-+
-+ assert("jmacd-9803", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to after the last item. If the node is empty, it is positioned
-+ at the EMPTY_NODE. */
-+void coord_init_after_last_item(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node,
-+ (is_empty ? 0 : node_num_items(node) - 1), 0,
-+ (is_empty ? EMPTY_NODE : AFTER_ITEM));
-+
-+ assert("jmacd-9804", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to after last unit in the item. Coord must be set
-+ already to existing item */
-+void coord_init_after_item_end(coord_t * coord)
-+{
-+ coord->between = AFTER_UNIT;
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+}
-+
-+/* Initialize a coordinate to before the item. Coord must be set already to existing item */
-+void coord_init_before_item(coord_t * coord)
-+{
-+ coord->unit_pos = 0;
-+ coord->between = BEFORE_ITEM;
-+}
-+
-+/* Initialize a coordinate to after the item. Coord must be set already to existing item */
-+void coord_init_after_item(coord_t * coord)
-+{
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+}
-+
-+/* Initialize a coordinate by 0s. Used in places where init_coord was used and
-+ it was not clear how actually */
-+void coord_init_zero(coord_t * coord)
-+{
-+ memset(coord, 0, sizeof(*coord));
-+}
-+
-+/* Return the number of units at the present item. Asserts coord_is_existing_item(). */
-+unsigned coord_num_units(const coord_t * coord)
-+{
-+ assert("jmacd-9806", coord_is_existing_item(coord));
-+
-+ return item_plugin_by_coord(coord)->b.nr_units(coord);
-+}
-+
-+/* Returns true if the coord was initializewd by coord_init_invalid (). */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_invalid(const coord_t * coord)
-+{
-+ return coord->between == INVALID_COORD;
-+}
-+
-+/* Returns true if the coordinate is positioned at an existing item, not before or after
-+ an item. It may be placed at, before, or after any unit within the item, whether
-+ existing or not. */
-+int coord_is_existing_item(const coord_t * coord)
-+{
-+ switch (coord->between) {
-+ case EMPTY_NODE:
-+ case BEFORE_ITEM:
-+ case AFTER_ITEM:
-+ case INVALID_COORD:
-+ return 0;
-+
-+ case BEFORE_UNIT:
-+ case AT_UNIT:
-+ case AFTER_UNIT:
-+ return coord->item_pos < coord_num_items(coord);
-+ }
-+
-+ impossible("jmacd-9900", "unreachable coord: %p", coord);
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned at an existing unit, not before or after a
-+ unit. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_existing_unit(const coord_t * coord)
-+{
-+ switch (coord->between) {
-+ case EMPTY_NODE:
-+ case BEFORE_UNIT:
-+ case AFTER_UNIT:
-+ case BEFORE_ITEM:
-+ case AFTER_ITEM:
-+ case INVALID_COORD:
-+ return 0;
-+
-+ case AT_UNIT:
-+ return (coord->item_pos < coord_num_items(coord)
-+ && coord->unit_pos < coord_num_units(coord));
-+ }
-+
-+ impossible("jmacd-9902", "unreachable");
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned at the first unit of the first item. Not
-+ true for empty nodes nor coordinates positioned before the first item. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_leftmost_unit(const coord_t * coord)
-+{
-+ return (coord->between == AT_UNIT && coord->item_pos == 0
-+ && coord->unit_pos == 0);
-+}
-+
-+#if REISER4_DEBUG
-+/* For assertions only, checks for a valid coordinate. */
-+int coord_check(const coord_t * coord)
-+{
-+ if (coord->node == NULL) {
-+ return 0;
-+ }
-+ if (znode_above_root(coord->node))
-+ return 1;
-+
-+ switch (coord->between) {
-+ default:
-+ case INVALID_COORD:
-+ return 0;
-+ case EMPTY_NODE:
-+ if (!node_is_empty(coord->node)) {
-+ return 0;
-+ }
-+ return coord->item_pos == 0 && coord->unit_pos == 0;
-+
-+ case BEFORE_UNIT:
-+ case AFTER_UNIT:
-+ if (node_is_empty(coord->node) && (coord->item_pos == 0)
-+ && (coord->unit_pos == 0))
-+ return 1;
-+ case AT_UNIT:
-+ break;
-+ case AFTER_ITEM:
-+ case BEFORE_ITEM:
-+ /* before/after item should not set unit_pos. */
-+ if (coord->unit_pos != 0) {
-+ return 0;
-+ }
-+ break;
-+ }
-+
-+ if (coord->item_pos >= node_num_items(coord->node)) {
-+ return 0;
-+ }
-+
-+ /* FIXME-VS: we are going to check unit_pos. This makes no sense when
-+ between is set either AFTER_ITEM or BEFORE_ITEM */
-+ if (coord->between == AFTER_ITEM || coord->between == BEFORE_ITEM)
-+ return 1;
-+
-+ if (coord_is_iplug_set(coord) &&
-+ coord->unit_pos >
-+ item_plugin_by_coord(coord)->b.nr_units(coord) - 1) {
-+ return 0;
-+ }
-+ return 1;
-+}
-+#endif
-+
-+/* Adjust coordinate boundaries based on the number of items prior to coord_next/prev.
-+ Returns 1 if the new position is does not exist. */
-+static int coord_adjust_items(coord_t * coord, unsigned items, int is_next)
-+{
-+ /* If the node is invalid, leave it. */
-+ if (coord->between == INVALID_COORD) {
-+ return 1;
-+ }
-+
-+ /* If the node is empty, set it appropriately. */
-+ if (items == 0) {
-+ coord->between = EMPTY_NODE;
-+ coord_set_item_pos(coord, 0);
-+ coord->unit_pos = 0;
-+ return 1;
-+ }
-+
-+ /* If it was empty and it no longer is, set to BEFORE/AFTER_ITEM. */
-+ if (coord->between == EMPTY_NODE) {
-+ coord->between = (is_next ? BEFORE_ITEM : AFTER_ITEM);
-+ coord_set_item_pos(coord, 0);
-+ coord->unit_pos = 0;
-+ return 0;
-+ }
-+
-+ /* If the item_pos is out-of-range, set it appropriatly. */
-+ if (coord->item_pos >= items) {
-+ coord->between = AFTER_ITEM;
-+ coord_set_item_pos(coord, items - 1);
-+ coord->unit_pos = 0;
-+ /* If is_next, return 1 (can't go any further). */
-+ return is_next;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one unit to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is an
-+ existing unit. */
-+int coord_next_unit(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 1) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case BEFORE_UNIT:
-+ /* Now it is positioned at the same unit. */
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ case AT_UNIT:
-+ /* If it was at or after a unit and there are more units in this item,
-+ advance to the next one. */
-+ if (coord->unit_pos < coord_last_unit_pos(coord)) {
-+ coord->unit_pos += 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ /* Otherwise, it is crossing an item boundary and treated as if it was
-+ after the current item. */
-+ coord->between = AFTER_ITEM;
-+ coord->unit_pos = 0;
-+ /* FALLTHROUGH */
-+
-+ case AFTER_ITEM:
-+ /* Check for end-of-node. */
-+ if (coord->item_pos == items - 1) {
-+ return 1;
-+ }
-+
-+ coord_inc_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ /* The adjust_items checks ensure that we are valid here. */
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ /* Handled in coord_adjust_items(). */
-+ break;
-+ }
-+
-+ impossible("jmacd-9902", "unreachable");
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one item to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is
-+ an existing item. */
-+int coord_next_item(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 1) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AFTER_UNIT:
-+ case AT_UNIT:
-+ case BEFORE_UNIT:
-+ case AFTER_ITEM:
-+ /* Check for end-of-node. */
-+ if (coord->item_pos == items - 1) {
-+ coord->between = AFTER_ITEM;
-+ coord->unit_pos = 0;
-+ coord_clear_iplug(coord);
-+ return 1;
-+ }
-+
-+ /* Anywhere in an item, go to the next one. */
-+ coord->between = AT_UNIT;
-+ coord_inc_item_pos(coord);
-+ coord->unit_pos = 0;
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ /* The out-of-range check ensures that we are valid here. */
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ /* Handled in coord_adjust_items(). */
-+ break;
-+ }
-+
-+ impossible("jmacd-9903", "unreachable");
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one unit to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing unit. */
-+int coord_prev_unit(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 0) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ case BEFORE_UNIT:
-+ if (coord->unit_pos > 0) {
-+ coord->unit_pos -= 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ if (coord->item_pos == 0) {
-+ coord->between = BEFORE_ITEM;
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ /* What if unit_pos is out-of-range? */
-+ assert("jmacd-5442",
-+ coord->unit_pos <= coord_last_unit_pos(coord));
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ if (coord->item_pos == 0) {
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ /* FALLTHROUGH */
-+
-+ case AFTER_ITEM:
-+ coord->between = AT_UNIT;
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ return 0;
-+
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ break;
-+ }
-+
-+ impossible("jmacd-9904", "unreachable");
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one item to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing item. */
-+int coord_prev_item(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 0) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ case AFTER_UNIT:
-+ case BEFORE_UNIT:
-+ case BEFORE_ITEM:
-+
-+ if (coord->item_pos == 0) {
-+ coord->between = BEFORE_ITEM;
-+ coord->unit_pos = 0;
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_ITEM:
-+ coord->between = AT_UNIT;
-+ coord->unit_pos = 0;
-+ return 0;
-+
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ break;
-+ }
-+
-+ impossible("jmacd-9905", "unreachable");
-+ return 0;
-+}
-+
-+/* Calls either coord_init_first_unit or coord_init_last_unit depending on sideof argument. */
-+void coord_init_sideof_unit(coord_t * coord, const znode * node, sideof dir)
-+{
-+ assert("jmacd-9821", dir == LEFT_SIDE || dir == RIGHT_SIDE);
-+ if (dir == LEFT_SIDE) {
-+ coord_init_first_unit(coord, node);
-+ } else {
-+ coord_init_last_unit(coord, node);
-+ }
-+}
-+
-+/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending on sideof
-+ argument. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_after_sideof_unit(coord_t * coord, sideof dir)
-+{
-+ assert("jmacd-9822", dir == LEFT_SIDE || dir == RIGHT_SIDE);
-+ if (dir == LEFT_SIDE) {
-+ return coord_is_before_leftmost(coord);
-+ } else {
-+ return coord_is_after_rightmost(coord);
-+ }
-+}
-+
-+/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument. */
-+/* Audited by: green(2002.06.15) */
-+int coord_sideof_unit(coord_t * coord, sideof dir)
-+{
-+ assert("jmacd-9823", dir == LEFT_SIDE || dir == RIGHT_SIDE);
-+ if (dir == LEFT_SIDE) {
-+ return coord_prev_unit(coord);
-+ } else {
-+ return coord_next_unit(coord);
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+int coords_equal(const coord_t * c1, const coord_t * c2)
-+{
-+ assert("nikita-2840", c1 != NULL);
-+ assert("nikita-2841", c2 != NULL);
-+
-+ return
-+ c1->node == c2->node &&
-+ c1->item_pos == c2->item_pos &&
-+ c1->unit_pos == c2->unit_pos && c1->between == c2->between;
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if coord_is_after_leftmost
-+ return NCOORD_ON_THE_LEFT, otherwise return NCOORD_INSIDE. */
-+/* Audited by: green(2002.06.15) */
-+coord_wrt_node coord_wrt(const coord_t * coord)
-+{
-+ if (coord_is_before_leftmost(coord)) {
-+ return COORD_ON_THE_LEFT;
-+ }
-+
-+ if (coord_is_after_rightmost(coord)) {
-+ return COORD_ON_THE_RIGHT;
-+ }
-+
-+ return COORD_INSIDE;
-+}
-+
-+/* Returns true if the coordinate is positioned after the last item or after the last unit
-+ of the last item or it is an empty node. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_after_rightmost(const coord_t * coord)
-+{
-+ assert("jmacd-7313", coord_check(coord));
-+
-+ switch (coord->between) {
-+ case INVALID_COORD:
-+ case AT_UNIT:
-+ case BEFORE_UNIT:
-+ case BEFORE_ITEM:
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case AFTER_ITEM:
-+ return (coord->item_pos == node_num_items(coord->node) - 1);
-+
-+ case AFTER_UNIT:
-+ return ((coord->item_pos == node_num_items(coord->node) - 1) &&
-+ coord->unit_pos == coord_last_unit_pos(coord));
-+ }
-+
-+ impossible("jmacd-9908", "unreachable");
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned before the first item or it is an empty
-+ node. */
-+int coord_is_before_leftmost(const coord_t * coord)
-+{
-+ /* FIXME-VS: coord_check requires node to be loaded whereas it is not
-+ necessary to check if coord is set before leftmost
-+ assert ("jmacd-7313", coord_check (coord)); */
-+ switch (coord->between) {
-+ case INVALID_COORD:
-+ case AT_UNIT:
-+ case AFTER_ITEM:
-+ case AFTER_UNIT:
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case BEFORE_ITEM:
-+ case BEFORE_UNIT:
-+ return (coord->item_pos == 0) && (coord->unit_pos == 0);
-+ }
-+
-+ impossible("jmacd-9908", "unreachable");
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned after a item, before a item, after the
-+ last unit of an item, before the first unit of an item, or at an empty node. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_between_items(const coord_t * coord)
-+{
-+ assert("jmacd-7313", coord_check(coord));
-+
-+ switch (coord->between) {
-+ case INVALID_COORD:
-+ case AT_UNIT:
-+ return 0;
-+
-+ case AFTER_ITEM:
-+ case BEFORE_ITEM:
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case BEFORE_UNIT:
-+ return coord->unit_pos == 0;
-+
-+ case AFTER_UNIT:
-+ return coord->unit_pos == coord_last_unit_pos(coord);
-+ }
-+
-+ impossible("jmacd-9908", "unreachable");
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+/* Returns true if the coordinates are positioned at adjacent units, regardless of
-+ before-after or item boundaries. */
-+int coord_are_neighbors(coord_t * c1, coord_t * c2)
-+{
-+ coord_t *left;
-+ coord_t *right;
-+
-+ assert("nikita-1241", c1 != NULL);
-+ assert("nikita-1242", c2 != NULL);
-+ assert("nikita-1243", c1->node == c2->node);
-+ assert("nikita-1244", coord_is_existing_unit(c1));
-+ assert("nikita-1245", coord_is_existing_unit(c2));
-+
-+ left = right = NULL;
-+ switch (coord_compare(c1, c2)) {
-+ case COORD_CMP_ON_LEFT:
-+ left = c1;
-+ right = c2;
-+ break;
-+ case COORD_CMP_ON_RIGHT:
-+ left = c2;
-+ right = c1;
-+ break;
-+ case COORD_CMP_SAME:
-+ return 0;
-+ default:
-+ wrong_return_value("nikita-1246", "compare_coords()");
-+ }
-+ assert("vs-731", left && right);
-+ if (left->item_pos == right->item_pos) {
-+ return left->unit_pos + 1 == right->unit_pos;
-+ } else if (left->item_pos + 1 == right->item_pos) {
-+ return (left->unit_pos == coord_last_unit_pos(left))
-+ && (right->unit_pos == 0);
-+ } else {
-+ return 0;
-+ }
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* Assuming two coordinates are positioned in the same node, return COORD_CMP_ON_RIGHT,
-+ COORD_CMP_ON_LEFT, or COORD_CMP_SAME depending on c1's position relative to c2. */
-+/* Audited by: green(2002.06.15) */
-+coord_cmp coord_compare(coord_t * c1, coord_t * c2)
-+{
-+ assert("vs-209", c1->node == c2->node);
-+ assert("vs-194", coord_is_existing_unit(c1)
-+ && coord_is_existing_unit(c2));
-+
-+ if (c1->item_pos > c2->item_pos)
-+ return COORD_CMP_ON_RIGHT;
-+ if (c1->item_pos < c2->item_pos)
-+ return COORD_CMP_ON_LEFT;
-+ if (c1->unit_pos > c2->unit_pos)
-+ return COORD_CMP_ON_RIGHT;
-+ if (c1->unit_pos < c2->unit_pos)
-+ return COORD_CMP_ON_LEFT;
-+ return COORD_CMP_SAME;
-+}
-+
-+/* If the coordinate is between items, shifts it to the right. Returns 0 on success and
-+ non-zero if there is no position to the right. */
-+int coord_set_to_right(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 1) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ case BEFORE_UNIT:
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ if (coord->unit_pos < coord_last_unit_pos(coord)) {
-+ coord->unit_pos += 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ } else {
-+
-+ coord->unit_pos = 0;
-+
-+ if (coord->item_pos == items - 1) {
-+ coord->between = AFTER_ITEM;
-+ return 1;
-+ }
-+
-+ coord_inc_item_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ case AFTER_ITEM:
-+ if (coord->item_pos == items - 1) {
-+ return 1;
-+ }
-+
-+ coord_inc_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case INVALID_COORD:
-+ break;
-+ }
-+
-+ impossible("jmacd-9920", "unreachable");
-+ return 0;
-+}
-+
-+/* If the coordinate is between items, shifts it to the left. Returns 0 on success and
-+ non-zero if there is no position to the left. */
-+int coord_set_to_left(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 0) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_ITEM:
-+ coord->between = AT_UNIT;
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ return 0;
-+
-+ case BEFORE_UNIT:
-+ if (coord->unit_pos > 0) {
-+ coord->unit_pos -= 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ } else {
-+
-+ if (coord->item_pos == 0) {
-+ coord->between = BEFORE_ITEM;
-+ return 1;
-+ }
-+
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ coord_dec_item_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ case BEFORE_ITEM:
-+ if (coord->item_pos == 0) {
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case INVALID_COORD:
-+ break;
-+ }
-+
-+ impossible("jmacd-9920", "unreachable");
-+ return 0;
-+}
-+
-+static const char *coord_tween_tostring(between_enum n)
-+{
-+ switch (n) {
-+ case BEFORE_UNIT:
-+ return "before unit";
-+ case BEFORE_ITEM:
-+ return "before item";
-+ case AT_UNIT:
-+ return "at unit";
-+ case AFTER_UNIT:
-+ return "after unit";
-+ case AFTER_ITEM:
-+ return "after item";
-+ case EMPTY_NODE:
-+ return "empty node";
-+ case INVALID_COORD:
-+ return "invalid";
-+ default:
-+ {
-+ static char buf[30];
-+
-+ sprintf(buf, "unknown: %i", n);
-+ return buf;
-+ }
-+ }
-+}
-+
-+void print_coord(const char *mes, const coord_t * coord, int node)
-+{
-+ if (coord == NULL) {
-+ printk("%s: null\n", mes);
-+ return;
-+ }
-+ printk("%s: item_pos = %d, unit_pos %d, tween=%s, iplug=%d\n",
-+ mes, coord->item_pos, coord->unit_pos,
-+ coord_tween_tostring(coord->between), coord->iplugid);
-+}
-+
-+int
-+item_utmost_child_real_block(const coord_t * coord, sideof side,
-+ reiser4_block_nr * blk)
-+{
-+ return item_plugin_by_coord(coord)->f.utmost_child_real_block(coord,
-+ side,
-+ blk);
-+}
-+
-+int item_utmost_child(const coord_t * coord, sideof side, jnode ** child)
-+{
-+ return item_plugin_by_coord(coord)->f.utmost_child(coord, side, child);
-+}
-+
-+/* @count bytes of flow @f got written, update correspondingly f->length,
-+ f->data and f->key */
-+void move_flow_forward(flow_t * f, unsigned count)
-+{
-+ if (f->data)
-+ f->data += count;
-+ f->length -= count;
-+ set_key_offset(&f->key, get_key_offset(&f->key) + count);
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/coord.h linux-2.6.20/fs/reiser4/coord.h
---- linux-2.6.20.orig/fs/reiser4/coord.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/coord.h 2007-05-06 14:50:43.698975725 +0400
-@@ -0,0 +1,389 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Coords */
-+
-+#if !defined( __REISER4_COORD_H__ )
-+#define __REISER4_COORD_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+
-+/* insertions happen between coords in the tree, so we need some means
-+ of specifying the sense of betweenness. */
-+typedef enum {
-+ BEFORE_UNIT, /* Note: we/init_coord depends on this value being zero. */
-+ AT_UNIT,
-+ AFTER_UNIT,
-+ BEFORE_ITEM,
-+ AFTER_ITEM,
-+ INVALID_COORD,
-+ EMPTY_NODE,
-+} between_enum;
-+
-+/* location of coord w.r.t. its node */
-+typedef enum {
-+ COORD_ON_THE_LEFT = -1,
-+ COORD_ON_THE_RIGHT = +1,
-+ COORD_INSIDE = 0
-+} coord_wrt_node;
-+
-+typedef enum {
-+ COORD_CMP_SAME = 0, COORD_CMP_ON_LEFT = -1, COORD_CMP_ON_RIGHT = +1
-+} coord_cmp;
-+
-+struct coord {
-+ /* node in a tree */
-+ /* 0 */ znode *node;
-+
-+ /* position of item within node */
-+ /* 4 */ pos_in_node_t item_pos;
-+ /* position of unit within item */
-+ /* 6 */ pos_in_node_t unit_pos;
-+ /* optimization: plugin of item is stored in coord_t. Until this was
-+ implemented, item_plugin_by_coord() was major CPU consumer. ->iplugid
-+ is invalidated (set to 0xff) on each modification of ->item_pos,
-+ and all such modifications are funneled through coord_*_item_pos()
-+ functions below.
-+ */
-+ /* 8 */ char iplugid;
-+ /* position of coord w.r.t. to neighboring items and/or units.
-+ Values are taken from &between_enum above.
-+ */
-+ /* 9 */ char between;
-+ /* padding. It will be added by the compiler anyway to conform to the
-+ * C language alignment requirements. We keep it here to be on the
-+ * safe side and to have a clear picture of the memory layout of this
-+ * structure. */
-+ /* 10 */ __u16 pad;
-+ /* 12 */ int offset;
-+#if REISER4_DEBUG
-+ unsigned long plug_v;
-+ unsigned long body_v;
-+#endif
-+};
-+
-+#define INVALID_PLUGID ((char)((1 << 8) - 1))
-+#define INVALID_OFFSET -1
-+
-+static inline void coord_clear_iplug(coord_t * coord)
-+{
-+ assert("nikita-2835", coord != NULL);
-+ coord->iplugid = INVALID_PLUGID;
-+ coord->offset = INVALID_OFFSET;
-+}
-+
-+static inline int coord_is_iplug_set(const coord_t * coord)
-+{
-+ assert("nikita-2836", coord != NULL);
-+ return coord->iplugid != INVALID_PLUGID;
-+}
-+
-+static inline void coord_set_item_pos(coord_t * coord, pos_in_node_t pos)
-+{
-+ assert("nikita-2478", coord != NULL);
-+ coord->item_pos = pos;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_dec_item_pos(coord_t * coord)
-+{
-+ assert("nikita-2480", coord != NULL);
-+ --coord->item_pos;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_inc_item_pos(coord_t * coord)
-+{
-+ assert("nikita-2481", coord != NULL);
-+ ++coord->item_pos;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_add_item_pos(coord_t * coord, int delta)
-+{
-+ assert("nikita-2482", coord != NULL);
-+ coord->item_pos += delta;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_invalid_item_pos(coord_t * coord)
-+{
-+ assert("nikita-2832", coord != NULL);
-+ coord->item_pos = (unsigned short)~0;
-+ coord_clear_iplug(coord);
-+}
-+
-+/* Reverse a direction. */
-+static inline sideof sideof_reverse(sideof side)
-+{
-+ return side == LEFT_SIDE ? RIGHT_SIDE : LEFT_SIDE;
-+}
-+
-+/* NOTE: There is a somewhat odd mixture of the following opposed terms:
-+
-+ "first" and "last"
-+ "next" and "prev"
-+ "before" and "after"
-+ "leftmost" and "rightmost"
-+
-+ But I think the chosen names are decent the way they are.
-+*/
-+
-+/* COORD INITIALIZERS */
-+
-+/* Initialize an invalid coordinate. */
-+extern void coord_init_invalid(coord_t * coord, const znode * node);
-+
-+extern void coord_init_first_unit_nocheck(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to point at the first unit of the first item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+extern void coord_init_first_unit(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to point at the last unit of the last item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+extern void coord_init_last_unit(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to before the first item. If the node is empty, it is
-+ positioned at the EMPTY_NODE. */
-+extern void coord_init_before_first_item(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to after the last item. If the node is empty, it is positioned
-+ at the EMPTY_NODE. */
-+extern void coord_init_after_last_item(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to after last unit in the item. Coord must be set
-+ already to existing item */
-+void coord_init_after_item_end(coord_t * coord);
-+
-+/* Initialize a coordinate to before the item. Coord must be set already to existing item */
-+void coord_init_before_item(coord_t *);
-+/* Initialize a coordinate to after the item. Coord must be set already to existing item */
-+void coord_init_after_item(coord_t *);
-+
-+/* Calls either coord_init_first_unit or coord_init_last_unit depending on sideof argument. */
-+extern void coord_init_sideof_unit(coord_t * coord, const znode * node,
-+ sideof dir);
-+
-+/* Initialize a coordinate by 0s. Used in places where init_coord was used and
-+ it was not clear how actually
-+ FIXME-VS: added by vs (2002, june, 8) */
-+extern void coord_init_zero(coord_t * coord);
-+
-+/* COORD METHODS */
-+
-+/* after shifting of node content, coord previously set properly may become
-+ invalid, try to "normalize" it. */
-+void coord_normalize(coord_t * coord);
-+
-+/* Copy a coordinate. */
-+extern void coord_dup(coord_t * coord, const coord_t * old_coord);
-+
-+/* Copy a coordinate without check. */
-+void coord_dup_nocheck(coord_t * coord, const coord_t * old_coord);
-+
-+unsigned coord_num_units(const coord_t * coord);
-+
-+/* Return the last valid unit number at the present item (i.e.,
-+ coord_num_units() - 1). */
-+static inline unsigned coord_last_unit_pos(const coord_t * coord)
-+{
-+ return coord_num_units(coord) - 1;
-+}
-+
-+#if REISER4_DEBUG
-+/* For assertions only, checks for a valid coordinate. */
-+extern int coord_check(const coord_t * coord);
-+
-+extern unsigned long znode_times_locked(const znode * z);
-+
-+static inline void coord_update_v(coord_t * coord)
-+{
-+ coord->plug_v = coord->body_v = znode_times_locked(coord->node);
-+}
-+#endif
-+
-+extern int coords_equal(const coord_t * c1, const coord_t * c2);
-+
-+extern void print_coord(const char *mes, const coord_t * coord, int print_node);
-+
-+/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if coord_is_after_leftmost
-+ return NCOORD_ON_THE_LEFT, otherwise return NCOORD_INSIDE. */
-+extern coord_wrt_node coord_wrt(const coord_t * coord);
-+
-+/* Returns true if the coordinates are positioned at adjacent units, regardless of
-+ before-after or item boundaries. */
-+extern int coord_are_neighbors(coord_t * c1, coord_t * c2);
-+
-+/* Assuming two coordinates are positioned in the same node, return NCOORD_CMP_ON_RIGHT,
-+ NCOORD_CMP_ON_LEFT, or NCOORD_CMP_SAME depending on c1's position relative to c2. */
-+extern coord_cmp coord_compare(coord_t * c1, coord_t * c2);
-+
-+/* COORD PREDICATES */
-+
-+/* Returns true if the coord was initializewd by coord_init_invalid (). */
-+extern int coord_is_invalid(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at an existing item, not before or after
-+ an item. It may be placed at, before, or after any unit within the item, whether
-+ existing or not. If this is true you can call methods of the item plugin. */
-+extern int coord_is_existing_item(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned after a item, before a item, after the
-+ last unit of an item, before the first unit of an item, or at an empty node. */
-+extern int coord_is_between_items(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at an existing unit, not before or after a
-+ unit. */
-+extern int coord_is_existing_unit(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at an empty node. */
-+extern int coord_is_empty(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at the first unit of the first item. Not
-+ true for empty nodes nor coordinates positioned before the first item. */
-+extern int coord_is_leftmost_unit(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned after the last item or after the last unit
-+ of the last item or it is an empty node. */
-+extern int coord_is_after_rightmost(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned before the first item or it is an empty
-+ node. */
-+extern int coord_is_before_leftmost(const coord_t * coord);
-+
-+/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending on sideof
-+ argument. */
-+extern int coord_is_after_sideof_unit(coord_t * coord, sideof dir);
-+
-+/* COORD MODIFIERS */
-+
-+/* Advances the coordinate by one unit to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is
-+ an existing unit. */
-+extern int coord_next_unit(coord_t * coord);
-+
-+/* Advances the coordinate by one item to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is
-+ an existing item. */
-+extern int coord_next_item(coord_t * coord);
-+
-+/* Advances the coordinate by one unit to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing unit. */
-+extern int coord_prev_unit(coord_t * coord);
-+
-+/* Advances the coordinate by one item to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing item. */
-+extern int coord_prev_item(coord_t * coord);
-+
-+/* If the coordinate is between items, shifts it to the right. Returns 0 on success and
-+ non-zero if there is no position to the right. */
-+extern int coord_set_to_right(coord_t * coord);
-+
-+/* If the coordinate is between items, shifts it to the left. Returns 0 on success and
-+ non-zero if there is no position to the left. */
-+extern int coord_set_to_left(coord_t * coord);
-+
-+/* If the coordinate is at an existing unit, set to after that unit. Returns 0 on success
-+ and non-zero if the unit did not exist. */
-+extern int coord_set_after_unit(coord_t * coord);
-+
-+/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument. */
-+extern int coord_sideof_unit(coord_t * coord, sideof dir);
-+
-+/* iterate over all units in @node */
-+#define for_all_units( coord, node ) \
-+ for( coord_init_before_first_item( ( coord ), ( node ) ) ; \
-+ coord_next_unit( coord ) == 0 ; )
-+
-+/* iterate over all items in @node */
-+#define for_all_items( coord, node ) \
-+ for( coord_init_before_first_item( ( coord ), ( node ) ) ; \
-+ coord_next_item( coord ) == 0 ; )
-+
-+/* COORD/ITEM METHODS */
-+
-+extern int item_utmost_child_real_block(const coord_t * coord, sideof side,
-+ reiser4_block_nr * blk);
-+extern int item_utmost_child(const coord_t * coord, sideof side,
-+ jnode ** child);
-+
-+/* a flow is a sequence of bytes being written to or read from the tree. The
-+ tree will slice the flow into items while storing it into nodes, but all of
-+ that is hidden from anything outside the tree. */
-+
-+struct flow {
-+ reiser4_key key; /* key of start of flow's sequence of bytes */
-+ loff_t length; /* length of flow's sequence of bytes */
-+ char *data; /* start of flow's sequence of bytes */
-+ int user; /* if 1 data is user space, 0 - kernel space */
-+ rw_op op; /* NIKITA-FIXME-HANS: comment is where? */
-+};
-+
-+void move_flow_forward(flow_t * f, unsigned count);
-+
-+/* &reiser4_item_data - description of data to be inserted or pasted
-+
-+ Q: articulate the reasons for the difference between this and flow.
-+
-+ A: Becides flow we insert into tree other things: stat data, directory
-+ entry, etc. To insert them into tree one has to provide this structure. If
-+ one is going to insert flow - he can use insert_flow, where this structure
-+ does not have to be created
-+*/
-+struct reiser4_item_data {
-+ /* actual data to be inserted. If NULL, ->create_item() will not
-+ do xmemcpy itself, leaving this up to the caller. This can
-+ save some amount of unnecessary memory copying, for example,
-+ during insertion of stat data.
-+
-+ */
-+ char *data;
-+ /* 1 if 'char * data' contains pointer to user space and 0 if it is
-+ kernel space */
-+ int user;
-+ /* amount of data we are going to insert or paste */
-+ int length;
-+ /* "Arg" is opaque data that is passed down to the
-+ ->create_item() method of node layout, which in turn
-+ hands it to the ->create_hook() of item being created. This
-+ arg is currently used by:
-+
-+ . ->create_hook() of internal item
-+ (fs/reiser4/plugin/item/internal.c:internal_create_hook()),
-+ . ->paste() method of directory item.
-+ . ->create_hook() of extent item
-+
-+ For internal item, this is left "brother" of new node being
-+ inserted and it is used to add new node into sibling list
-+ after parent to it was just inserted into parent.
-+
-+ While ->arg does look somewhat of unnecessary compication,
-+ it actually saves a lot of headache in many places, because
-+ all data necessary to insert or paste new data into tree are
-+ collected in one place, and this eliminates a lot of extra
-+ argument passing and storing everywhere.
-+
-+ */
-+ void *arg;
-+ /* plugin of item we are inserting */
-+ item_plugin *iplug;
-+};
-+
-+/* __REISER4_COORD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/debug.c linux-2.6.20/fs/reiser4/debug.c
---- linux-2.6.20.orig/fs/reiser4/debug.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/debug.c 2007-05-06 14:50:43.702976975 +0400
-@@ -0,0 +1,308 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Debugging facilities. */
-+
-+/*
-+ * This file contains generic debugging functions used by reiser4. Roughly
-+ * following:
-+ *
-+ * panicking: reiser4_do_panic(), reiser4_print_prefix().
-+ *
-+ * locking:
-+ * reiser4_schedulable(), reiser4_lock_counters(), print_lock_counters(),
-+ * reiser4_no_counters_are_held(), reiser4_commit_check_locks()
-+ *
-+ * error code monitoring (see comment before RETERR macro):
-+ * reiser4_return_err(), reiser4_report_err().
-+ *
-+ * stack back-tracing: fill_backtrace()
-+ *
-+ * miscellaneous: reiser4_preempt_point(), call_on_each_assert(),
-+ * reiser4_debugtrap().
-+ *
-+ */
-+
-+#include "reiser4.h"
-+#include "context.h"
-+#include "super.h"
-+#include "txnmgr.h"
-+#include "znode.h"
-+
-+#include <linux/sysfs.h>
-+#include <linux/slab.h>
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/spinlock.h>
-+#include <linux/kallsyms.h>
-+#include <linux/vmalloc.h>
-+#include <linux/ctype.h>
-+#include <linux/sysctl.h>
-+#include <linux/hardirq.h>
-+
-+#if 0
-+#if REISER4_DEBUG
-+static void reiser4_report_err(void);
-+#else
-+#define reiser4_report_err() noop
-+#endif
-+#endif /* 0 */
-+
-+/*
-+ * global buffer where message given to reiser4_panic is formatted.
-+ */
-+static char panic_buf[REISER4_PANIC_MSG_BUFFER_SIZE];
-+
-+/*
-+ * lock protecting consistency of panic_buf under concurrent panics
-+ */
-+static DEFINE_SPINLOCK(panic_guard);
-+
-+/* Your best friend. Call it on each occasion. This is called by
-+ fs/reiser4/debug.h:reiser4_panic(). */
-+void reiser4_do_panic(const char *format /* format string */ , ... /* rest */ )
-+{
-+ static int in_panic = 0;
-+ va_list args;
-+
-+ /*
-+ * check for recursive panic.
-+ */
-+ if (in_panic == 0) {
-+ in_panic = 1;
-+
-+ spin_lock(&panic_guard);
-+ va_start(args, format);
-+ vsnprintf(panic_buf, sizeof(panic_buf), format, args);
-+ va_end(args);
-+ printk(KERN_EMERG "reiser4 panicked cowardly: %s", panic_buf);
-+ spin_unlock(&panic_guard);
-+
-+ /*
-+ * if kernel debugger is configured---drop in. Early dropping
-+ * into kgdb is not always convenient, because panic message
-+ * is not yet printed most of the times. But:
-+ *
-+ * (1) message can be extracted from printk_buf[]
-+ * (declared static inside of printk()), and
-+ *
-+ * (2) sometimes serial/kgdb combo dies while printing
-+ * long panic message, so it's more prudent to break into
-+ * debugger earlier.
-+ *
-+ */
-+ DEBUGON(1);
-+ }
-+ /* to make gcc happy about noreturn attribute */
-+ panic("%s", panic_buf);
-+}
-+
-+#if 0
-+void
-+reiser4_print_prefix(const char *level, int reperr, const char *mid,
-+ const char *function, const char *file, int lineno)
-+{
-+ const char *comm;
-+ int pid;
-+
-+ if (unlikely(in_interrupt() || in_irq())) {
-+ comm = "interrupt";
-+ pid = 0;
-+ } else {
-+ comm = current->comm;
-+ pid = current->pid;
-+ }
-+ printk("%sreiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n",
-+ level, comm, pid, function, file, lineno, mid);
-+ if (reperr)
-+ reiser4_report_err();
-+}
-+#endif /* 0 */
-+
-+/* Preemption point: this should be called periodically during long running
-+ operations (carry, allocate, and squeeze are best examples) */
-+int reiser4_preempt_point(void)
-+{
-+ assert("nikita-3008", reiser4_schedulable());
-+ cond_resched();
-+ return signal_pending(current);
-+}
-+
-+#if REISER4_DEBUG
-+/* Debugging aid: return struct where information about locks taken by current
-+ thread is accumulated. This can be used to formulate lock ordering
-+ constraints and various assertions.
-+
-+*/
-+reiser4_lock_counters_info *reiser4_lock_counters(void)
-+{
-+ reiser4_context *ctx = get_current_context();
-+ assert("jmacd-1123", ctx != NULL);
-+ return &ctx->locks;
-+}
-+
-+/*
-+ * print human readable information about locks held by the reiser4 context.
-+ */
-+static void print_lock_counters(const char *prefix,
-+ const reiser4_lock_counters_info * info)
-+{
-+ printk("%s: jnode: %i, tree: %i (r:%i,w:%i), dk: %i (r:%i,w:%i)\n"
-+ "jload: %i, "
-+ "txnh: %i, atom: %i, stack: %i, txnmgr: %i, "
-+ "ktxnmgrd: %i, fq: %i\n"
-+ "inode: %i, "
-+ "cbk_cache: %i (r:%i,w%i), "
-+ "eflush: %i, "
-+ "zlock: %i,\n"
-+ "spin: %i, long: %i inode_sem: (r:%i,w:%i)\n"
-+ "d: %i, x: %i, t: %i\n", prefix,
-+ info->spin_locked_jnode,
-+ info->rw_locked_tree, info->read_locked_tree,
-+ info->write_locked_tree,
-+ info->rw_locked_dk, info->read_locked_dk, info->write_locked_dk,
-+ info->spin_locked_jload,
-+ info->spin_locked_txnh,
-+ info->spin_locked_atom, info->spin_locked_stack,
-+ info->spin_locked_txnmgr, info->spin_locked_ktxnmgrd,
-+ info->spin_locked_fq,
-+ info->spin_locked_inode,
-+ info->rw_locked_cbk_cache,
-+ info->read_locked_cbk_cache,
-+ info->write_locked_cbk_cache,
-+ info->spin_locked_super_eflush,
-+ info->spin_locked_zlock,
-+ info->spin_locked,
-+ info->long_term_locked_znode,
-+ info->inode_sem_r, info->inode_sem_w,
-+ info->d_refs, info->x_refs, info->t_refs);
-+}
-+
-+/* check that no spinlocks are held */
-+int reiser4_schedulable(void)
-+{
-+ if (get_current_context_check() != NULL) {
-+ if (!LOCK_CNT_NIL(spin_locked)) {
-+ print_lock_counters("in atomic", reiser4_lock_counters());
-+ return 0;
-+ }
-+ }
-+ might_sleep();
-+ return 1;
-+}
-+/*
-+ * return true, iff no locks are held.
-+ */
-+int reiser4_no_counters_are_held(void)
-+{
-+ reiser4_lock_counters_info *counters;
-+
-+ counters = reiser4_lock_counters();
-+ return
-+ (counters->spin_locked_zlock == 0) &&
-+ (counters->spin_locked_jnode == 0) &&
-+ (counters->rw_locked_tree == 0) &&
-+ (counters->read_locked_tree == 0) &&
-+ (counters->write_locked_tree == 0) &&
-+ (counters->rw_locked_dk == 0) &&
-+ (counters->read_locked_dk == 0) &&
-+ (counters->write_locked_dk == 0) &&
-+ (counters->spin_locked_txnh == 0) &&
-+ (counters->spin_locked_atom == 0) &&
-+ (counters->spin_locked_stack == 0) &&
-+ (counters->spin_locked_txnmgr == 0) &&
-+ (counters->spin_locked_inode == 0) &&
-+ (counters->spin_locked == 0) &&
-+ (counters->long_term_locked_znode == 0) &&
-+ (counters->inode_sem_r == 0) &&
-+ (counters->inode_sem_w == 0) && (counters->d_refs == 0);
-+}
-+
-+/*
-+ * return true, iff transaction commit can be done under locks held by the
-+ * current thread.
-+ */
-+int reiser4_commit_check_locks(void)
-+{
-+ reiser4_lock_counters_info *counters;
-+ int inode_sem_r;
-+ int inode_sem_w;
-+ int result;
-+
-+ /*
-+ * inode's read/write semaphore is the only reiser4 lock that can be
-+ * held during commit.
-+ */
-+
-+ counters = reiser4_lock_counters();
-+ inode_sem_r = counters->inode_sem_r;
-+ inode_sem_w = counters->inode_sem_w;
-+
-+ counters->inode_sem_r = counters->inode_sem_w = 0;
-+ result = reiser4_no_counters_are_held();
-+ counters->inode_sem_r = inode_sem_r;
-+ counters->inode_sem_w = inode_sem_w;
-+ return result;
-+}
-+
-+/*
-+ * fill "error site" in the current reiser4 context. See comment before RETERR
-+ * macro for more details.
-+ */
-+void reiser4_return_err(int code, const char *file, int line)
-+{
-+ if (code < 0 && is_in_reiser4_context()) {
-+ reiser4_context *ctx = get_current_context();
-+
-+ if (ctx != NULL) {
-+ ctx->err.code = code;
-+ ctx->err.file = file;
-+ ctx->err.line = line;
-+ }
-+ }
-+}
-+
-+#if 0
-+/*
-+ * report error information recorder by reiser4_return_err().
-+ */
-+static void reiser4_report_err(void)
-+{
-+ reiser4_context *ctx = get_current_context_check();
-+
-+ if (ctx != NULL) {
-+ if (ctx->err.code != 0) {
-+ printk("code: %i at %s:%i\n",
-+ ctx->err.code, ctx->err.file, ctx->err.line);
-+ }
-+ }
-+}
-+#endif /* 0 */
-+
-+#endif /* REISER4_DEBUG */
-+
-+#if KERNEL_DEBUGGER
-+
-+/*
-+ * this functions just drops into kernel debugger. It is a convenient place to
-+ * put breakpoint in.
-+ */
-+void reiser4_debugtrap(void)
-+{
-+ /* do nothing. Put break point here. */
-+#if defined(CONFIG_KGDB) && !defined(CONFIG_REISER4_FS_MODULE)
-+ extern void breakpoint(void);
-+ breakpoint();
-+#endif
-+}
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/debug.h linux-2.6.20/fs/reiser4/debug.h
---- linux-2.6.20.orig/fs/reiser4/debug.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/debug.h 2007-05-06 14:50:43.702976975 +0400
-@@ -0,0 +1,350 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Declarations of debug macros. */
-+
-+#if !defined( __FS_REISER4_DEBUG_H__ )
-+#define __FS_REISER4_DEBUG_H__
-+
-+#include "forward.h"
-+#include "reiser4.h"
-+
-+/* generic function to produce formatted output, decorating it with
-+ whatever standard prefixes/postfixes we want. "Fun" is a function
-+ that will be actually called, can be printk, panic etc.
-+ This is for use by other debugging macros, not by users. */
-+#define DCALL(lev, fun, reperr, label, format, ...) \
-+({ \
-+ fun(lev "reiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n" format "\n" , \
-+ current->comm, current->pid, __FUNCTION__, \
-+ __FILE__, __LINE__, label, ## __VA_ARGS__); \
-+})
-+
-+/*
-+ * cause kernel to crash
-+ */
-+#define reiser4_panic(mid, format, ...) \
-+ DCALL("", reiser4_do_panic, 1, mid, format , ## __VA_ARGS__)
-+
-+/* print message with indication of current process, file, line and
-+ function */
-+#define reiser4_log(label, format, ...) \
-+ DCALL(KERN_DEBUG, printk, 0, label, format , ## __VA_ARGS__)
-+
-+/* Assertion checked during compilation.
-+ If "cond" is false (0) we get duplicate case label in switch.
-+ Use this to check something like famous
-+ cassert (sizeof(struct reiserfs_journal_commit) == 4096) ;
-+ in 3.x journal.c. If cassertion fails you get compiler error,
-+ so no "maintainer-id".
-+*/
-+#define cassert(cond) ({ switch(-1) { case (cond): case 0: break; } })
-+
-+#define noop do {;} while(0)
-+
-+#if REISER4_DEBUG
-+/* version of info that only actually prints anything when _d_ebugging
-+ is on */
-+#define dinfo(format, ...) printk(format , ## __VA_ARGS__)
-+/* macro to catch logical errors. Put it into `default' clause of
-+ switch() statement. */
-+#define impossible(label, format, ...) \
-+ reiser4_panic(label, "impossible: " format , ## __VA_ARGS__)
-+/* assert assures that @cond is true. If it is not, reiser4_panic() is
-+ called. Use this for checking logical consistency and _never_ call
-+ this to check correctness of external data: disk blocks and user-input . */
-+#define assert(label, cond) \
-+({ \
-+ /* call_on_each_assert(); */ \
-+ if (cond) { \
-+ /* put negated check to avoid using !(cond) that would lose \
-+ * warnings for things like assert(a = b); */ \
-+ ; \
-+ } else { \
-+ DEBUGON(1); \
-+ reiser4_panic(label, "assertion failed: %s", #cond); \
-+ } \
-+})
-+
-+/* like assertion, but @expr is evaluated even if REISER4_DEBUG is off. */
-+#define check_me( label, expr ) assert( label, ( expr ) )
-+
-+#define ON_DEBUG( exp ) exp
-+
-+extern int reiser4_schedulable(void);
-+extern void call_on_each_assert(void);
-+
-+#else
-+
-+#define dinfo( format, args... ) noop
-+#define impossible( label, format, args... ) noop
-+#define assert( label, cond ) noop
-+#define check_me( label, expr ) ( ( void ) ( expr ) )
-+#define ON_DEBUG( exp )
-+#define reiser4_schedulable() might_sleep()
-+
-+/* REISER4_DEBUG */
-+#endif
-+
-+#if REISER4_DEBUG
-+/* per-thread information about lock acquired by this thread. Used by lock
-+ * ordering checking in spin_macros.h */
-+typedef struct reiser4_lock_counters_info {
-+ int rw_locked_tree;
-+ int read_locked_tree;
-+ int write_locked_tree;
-+
-+ int rw_locked_dk;
-+ int read_locked_dk;
-+ int write_locked_dk;
-+
-+ int rw_locked_cbk_cache;
-+ int read_locked_cbk_cache;
-+ int write_locked_cbk_cache;
-+
-+ int spin_locked_zlock;
-+ int spin_locked_jnode;
-+ int spin_locked_jload;
-+ int spin_locked_txnh;
-+ int spin_locked_atom;
-+ int spin_locked_stack;
-+ int spin_locked_txnmgr;
-+ int spin_locked_ktxnmgrd;
-+ int spin_locked_fq;
-+ int spin_locked_inode;
-+ int spin_locked_super_eflush;
-+ int spin_locked;
-+ int long_term_locked_znode;
-+
-+ int inode_sem_r;
-+ int inode_sem_w;
-+
-+ int d_refs;
-+ int x_refs;
-+ int t_refs;
-+} reiser4_lock_counters_info;
-+
-+extern reiser4_lock_counters_info *reiser4_lock_counters(void);
-+#define IN_CONTEXT(a, b) (is_in_reiser4_context() ? (a) : (b))
-+
-+/* increment lock-counter @counter, if present */
-+#define LOCK_CNT_INC(counter) \
-+ IN_CONTEXT(++(reiser4_lock_counters()->counter), 0)
-+
-+/* decrement lock-counter @counter, if present */
-+#define LOCK_CNT_DEC(counter) \
-+ IN_CONTEXT(--(reiser4_lock_counters()->counter), 0)
-+
-+/* check that lock-counter is zero. This is for use in assertions */
-+#define LOCK_CNT_NIL(counter) \
-+ IN_CONTEXT(reiser4_lock_counters()->counter == 0, 1)
-+
-+/* check that lock-counter is greater than zero. This is for use in
-+ * assertions */
-+#define LOCK_CNT_GTZ(counter) \
-+ IN_CONTEXT(reiser4_lock_counters()->counter > 0, 1)
-+#define LOCK_CNT_LT(counter,n) \
-+ IN_CONTEXT(reiser4_lock_counters()->counter < n, 1)
-+
-+#else /* REISER4_DEBUG */
-+
-+/* no-op versions on the above */
-+
-+typedef struct reiser4_lock_counters_info {
-+} reiser4_lock_counters_info;
-+
-+#define reiser4_lock_counters() ((reiser4_lock_counters_info *)NULL)
-+#define LOCK_CNT_INC(counter) noop
-+#define LOCK_CNT_DEC(counter) noop
-+#define LOCK_CNT_NIL(counter) (1)
-+#define LOCK_CNT_GTZ(counter) (1)
-+#define LOCK_CNT_LT(counter,n) (1)
-+
-+#endif /* REISER4_DEBUG */
-+
-+#define assert_spin_not_locked(lock) BUG_ON(0)
-+#define assert_rw_write_locked(lock) BUG_ON(0)
-+#define assert_rw_read_locked(lock) BUG_ON(0)
-+#define assert_rw_locked(lock) BUG_ON(0)
-+#define assert_rw_not_write_locked(lock) BUG_ON(0)
-+#define assert_rw_not_read_locked(lock) BUG_ON(0)
-+#define assert_rw_not_locked(lock) BUG_ON(0)
-+
-+/* flags controlling debugging behavior. Are set through debug_flags=N mount
-+ option. */
-+typedef enum {
-+ /* print a lot of information during panic. When this is on all jnodes
-+ * are listed. This can be *very* large output. Usually you don't want
-+ * this. Especially over serial line. */
-+ REISER4_VERBOSE_PANIC = 0x00000001,
-+ /* print a lot of information during umount */
-+ REISER4_VERBOSE_UMOUNT = 0x00000002,
-+ /* print gathered statistics on umount */
-+ REISER4_STATS_ON_UMOUNT = 0x00000004,
-+ /* check node consistency */
-+ REISER4_CHECK_NODE = 0x00000008
-+} reiser4_debug_flags;
-+
-+extern int is_in_reiser4_context(void);
-+
-+/*
-+ * evaluate expression @e only if with reiser4 context
-+ */
-+#define ON_CONTEXT(e) do { \
-+ if(is_in_reiser4_context()) { \
-+ e; \
-+ } } while(0)
-+
-+/*
-+ * evaluate expression @e only when within reiser4_context and debugging is
-+ * on.
-+ */
-+#define ON_DEBUG_CONTEXT( e ) ON_DEBUG( ON_CONTEXT( e ) )
-+
-+/*
-+ * complain about unexpected function result and crash. Used in "default"
-+ * branches of switch statements and alike to assert that invalid results are
-+ * not silently ignored.
-+ */
-+#define wrong_return_value( label, function ) \
-+ impossible( label, "wrong return value from " function )
-+
-+/* Issue different types of reiser4 messages to the console */
-+#define warning( label, format, ... ) \
-+ DCALL( KERN_WARNING, \
-+ printk, 1, label, "WARNING: " format , ## __VA_ARGS__ )
-+#define notice( label, format, ... ) \
-+ DCALL( KERN_NOTICE, \
-+ printk, 1, label, "NOTICE: " format , ## __VA_ARGS__ )
-+
-+/* mark not yet implemented functionality */
-+#define not_yet( label, format, ... ) \
-+ reiser4_panic( label, "NOT YET IMPLEMENTED: " format , ## __VA_ARGS__ )
-+
-+extern void reiser4_do_panic(const char *format, ...)
-+ __attribute__ ((noreturn, format(printf, 1, 2)));
-+
-+extern int reiser4_preempt_point(void);
-+extern void reiser4_print_stats(void);
-+
-+#if REISER4_DEBUG
-+extern int reiser4_no_counters_are_held(void);
-+extern int reiser4_commit_check_locks(void);
-+#else
-+#define reiser4_no_counters_are_held() (1)
-+#define reiser4_commit_check_locks() (1)
-+#endif
-+
-+/* true if @i is power-of-two. Useful for rate-limited warnings, etc. */
-+#define IS_POW(i) \
-+({ \
-+ typeof(i) __i; \
-+ \
-+ __i = (i); \
-+ !(__i & (__i - 1)); \
-+})
-+
-+#define KERNEL_DEBUGGER (1)
-+
-+#if KERNEL_DEBUGGER
-+
-+extern void reiser4_debugtrap(void);
-+
-+/*
-+ * Check condition @cond and drop into kernel debugger (kgdb) if it's true. If
-+ * kgdb is not compiled in, do nothing.
-+ */
-+#define DEBUGON(cond) \
-+({ \
-+ if (unlikely(cond)) \
-+ reiser4_debugtrap(); \
-+})
-+#else
-+#define DEBUGON(cond) noop
-+#endif
-+
-+/*
-+ * Error code tracing facility. (Idea is borrowed from XFS code.)
-+ *
-+ * Suppose some strange and/or unexpected code is returned from some function
-+ * (for example, write(2) returns -EEXIST). It is possible to place a
-+ * breakpoint in the reiser4_write(), but it is too late here. How to find out
-+ * in what particular place -EEXIST was generated first?
-+ *
-+ * In reiser4 all places where actual error codes are produced (that is,
-+ * statements of the form
-+ *
-+ * return -EFOO; // (1), or
-+ *
-+ * result = -EFOO; // (2)
-+ *
-+ * are replaced with
-+ *
-+ * return RETERR(-EFOO); // (1a), and
-+ *
-+ * result = RETERR(-EFOO); // (2a) respectively
-+ *
-+ * RETERR() macro fills a backtrace in reiser4_context. This back-trace is
-+ * printed in error and warning messages. Moreover, it's possible to put a
-+ * conditional breakpoint in reiser4_return_err (low-level function called
-+ * by RETERR() to do the actual work) to break into debugger immediately
-+ * when particular error happens.
-+ *
-+ */
-+
-+#if REISER4_DEBUG
-+
-+/*
-+ * data-type to store information about where error happened ("error site").
-+ */
-+typedef struct err_site {
-+ int code; /* error code */
-+ const char *file; /* source file, filled by __FILE__ */
-+ int line; /* source file line, filled by __LINE__ */
-+} err_site;
-+
-+extern void reiser4_return_err(int code, const char *file, int line);
-+
-+/*
-+ * fill &get_current_context()->err_site with error information.
-+ */
-+#define RETERR(code) \
-+({ \
-+ typeof(code) __code; \
-+ \
-+ __code = (code); \
-+ reiser4_return_err(__code, __FILE__, __LINE__); \
-+ __code; \
-+})
-+
-+#else
-+
-+/*
-+ * no-op versions of the above
-+ */
-+
-+typedef struct err_site {
-+} err_site;
-+#define RETERR(code) code
-+#endif
-+
-+#if REISER4_LARGE_KEY
-+/*
-+ * conditionally compile arguments only if REISER4_LARGE_KEY is on.
-+ */
-+#define ON_LARGE_KEY(...) __VA_ARGS__
-+#else
-+#define ON_LARGE_KEY(...)
-+#endif
-+
-+/* __FS_REISER4_DEBUG_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/dformat.h linux-2.6.20/fs/reiser4/dformat.h
---- linux-2.6.20.orig/fs/reiser4/dformat.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/dformat.h 2007-05-06 14:50:43.702976975 +0400
-@@ -0,0 +1,70 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Formats of on-disk data and conversion functions. */
-+
-+/* put all item formats in the files describing the particular items,
-+ our model is, everything you need to do to add an item to reiser4,
-+ (excepting the changes to the plugin that uses the item which go
-+ into the file defining that plugin), you put into one file. */
-+/* Data on disk are stored in little-endian format.
-+ To declare fields of on-disk structures, use d8, d16, d32 and d64.
-+ d??tocpu() and cputod??() to convert. */
-+
-+#if !defined( __FS_REISER4_DFORMAT_H__ )
-+#define __FS_REISER4_DFORMAT_H__
-+
-+#include <asm/byteorder.h>
-+#include <asm/unaligned.h>
-+#include <linux/types.h>
-+
-+typedef __u8 d8;
-+typedef __le16 d16;
-+typedef __le32 d32;
-+typedef __le64 d64;
-+
-+#define PACKED __attribute__((packed))
-+
-+/* data-type for block number */
-+typedef __u64 reiser4_block_nr;
-+
-+/* data-type for block number on disk, disk format */
-+typedef __le64 reiser4_dblock_nr;
-+
-+/**
-+ * disk_addr_eq - compare disk addresses
-+ * @b1: pointer to block number ot compare
-+ * @b2: pointer to block number ot compare
-+ *
-+ * Returns true if if disk addresses are the same
-+ */
-+static inline int disk_addr_eq(const reiser4_block_nr *b1,
-+ const reiser4_block_nr * b2)
-+{
-+ assert("nikita-1033", b1 != NULL);
-+ assert("nikita-1266", b2 != NULL);
-+
-+ return !memcmp(b1, b2, sizeof *b1);
-+}
-+
-+/* structure of master reiser4 super block */
-+typedef struct reiser4_master_sb {
-+ char magic[16]; /* "ReIsEr4" */
-+ __le16 disk_plugin_id; /* id of disk layout plugin */
-+ __le16 blocksize;
-+ char uuid[16]; /* unique id */
-+ char label[16]; /* filesystem label */
-+ __le64 diskmap; /* location of the diskmap. 0 if not present */
-+} reiser4_master_sb;
-+
-+/* __FS_REISER4_DFORMAT_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/dscale.c linux-2.6.20/fs/reiser4/dscale.c
---- linux-2.6.20.orig/fs/reiser4/dscale.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/dscale.c 2007-05-06 14:50:43.702976975 +0400
-@@ -0,0 +1,174 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Scalable on-disk integers */
-+
-+/*
-+ * Various on-disk structures contain integer-like structures. Stat-data
-+ * contain [yes, "data" is plural, check the dictionary] file size, link
-+ * count; extent unit contains extent width etc. To accommodate for general
-+ * case enough space is reserved to keep largest possible value. 64 bits in
-+ * all cases above. But in overwhelming majority of cases numbers actually
-+ * stored in these fields will be comparatively small and reserving 8 bytes is
-+ * a waste of precious disk bandwidth.
-+ *
-+ * Scalable integers are one way to solve this problem. dscale_write()
-+ * function stores __u64 value in the given area consuming from 1 to 9 bytes,
-+ * depending on the magnitude of the value supplied. dscale_read() reads value
-+ * previously stored by dscale_write().
-+ *
-+ * dscale_write() produces format not completely unlike of UTF: two highest
-+ * bits of the first byte are used to store "tag". One of 4 possible tag
-+ * values is chosen depending on the number being encoded:
-+ *
-+ * 0 ... 0x3f => 0 [table 1]
-+ * 0x40 ... 0x3fff => 1
-+ * 0x4000 ... 0x3fffffff => 2
-+ * 0x40000000 ... 0xffffffffffffffff => 3
-+ *
-+ * (see dscale_range() function)
-+ *
-+ * Values in the range 0x40000000 ... 0xffffffffffffffff require 8 full bytes
-+ * to be stored, so in this case there is no place in the first byte to store
-+ * tag. For such values tag is stored in an extra 9th byte.
-+ *
-+ * As _highest_ bits are used for the test (which is natural) scaled integers
-+ * are stored in BIG-ENDIAN format in contrast with the rest of reiser4 which
-+ * uses LITTLE-ENDIAN.
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "dscale.h"
-+
-+/* return tag of scaled integer stored at @address */
-+static int gettag(const unsigned char *address)
-+{
-+ /* tag is stored in two highest bits */
-+ return (*address) >> 6;
-+}
-+
-+/* clear tag from value. Clear tag embedded into @value. */
-+static void cleartag(__u64 * value, int tag)
-+{
-+ /*
-+ * W-w-what ?!
-+ *
-+ * Actually, this is rather simple: @value passed here was read by
-+ * dscale_read(), converted from BIG-ENDIAN, and padded to __u64 by
-+ * zeroes. Tag is still stored in the highest (arithmetically)
-+ * non-zero bits of @value, but relative position of tag within __u64
-+ * depends on @tag.
-+ *
-+ * For example if @tag is 0, it's stored 2 highest bits of lowest
-+ * byte, and its offset (counting from lowest bit) is 8 - 2 == 6 bits.
-+ *
-+ * If tag is 1, it's stored in two highest bits of 2nd lowest byte,
-+ * and it's offset if (2 * 8) - 2 == 14 bits.
-+ *
-+ * See table 1 above for details.
-+ *
-+ * All these cases are captured by the formula:
-+ */
-+ *value &= ~(3 << (((1 << tag) << 3) - 2));
-+ /*
-+ * That is, clear two (3 == 0t11) bits at the offset
-+ *
-+ * 8 * (2 ^ tag) - 2,
-+ *
-+ * that is, two highest bits of (2 ^ tag)-th byte of @value.
-+ */
-+}
-+
-+/* return tag for @value. See table 1 above for details. */
-+static int dscale_range(__u64 value)
-+{
-+ if (value > 0x3fffffff)
-+ return 3;
-+ if (value > 0x3fff)
-+ return 2;
-+ if (value > 0x3f)
-+ return 1;
-+ return 0;
-+}
-+
-+/* restore value stored at @adderss by dscale_write() and return number of
-+ * bytes consumed */
-+int dscale_read(unsigned char *address, __u64 * value)
-+{
-+ int tag;
-+
-+ /* read tag */
-+ tag = gettag(address);
-+ switch (tag) {
-+ case 3:
-+ /* In this case tag is stored in an extra byte, skip this byte
-+ * and decode value stored in the next 8 bytes.*/
-+ *value = __be64_to_cpu(get_unaligned((__be64 *)(address + 1)));
-+ /* worst case: 8 bytes for value itself plus one byte for
-+ * tag. */
-+ return 9;
-+ case 0:
-+ *value = get_unaligned(address);
-+ break;
-+ case 1:
-+ *value = __be16_to_cpu(get_unaligned((__be16 *)address));
-+ break;
-+ case 2:
-+ *value = __be32_to_cpu(get_unaligned((__be32 *)address));
-+ break;
-+ default:
-+ return RETERR(-EIO);
-+ }
-+ /* clear tag embedded into @value */
-+ cleartag(value, tag);
-+ /* number of bytes consumed is (2 ^ tag)---see table 1. */
-+ return 1 << tag;
-+}
-+
-+/* store @value at @address and return number of bytes consumed */
-+int dscale_write(unsigned char *address, __u64 value)
-+{
-+ int tag;
-+ int shift;
-+ __be64 v;
-+ unsigned char *valarr;
-+
-+ tag = dscale_range(value);
-+ v = __cpu_to_be64(value);
-+ valarr = (unsigned char *)&v;
-+ shift = (tag == 3) ? 1 : 0;
-+ memcpy(address + shift, valarr + sizeof v - (1 << tag), 1 << tag);
-+ *address |= (tag << 6);
-+ return shift + (1 << tag);
-+}
-+
-+/* number of bytes required to store @value */
-+int dscale_bytes(__u64 value)
-+{
-+ int bytes;
-+
-+ bytes = 1 << dscale_range(value);
-+ if (bytes == 8)
-+ ++bytes;
-+ return bytes;
-+}
-+
-+/* returns true if @value and @other require the same number of bytes to be
-+ * stored. Used by detect when data structure (like stat-data) has to be
-+ * expanded or contracted. */
-+int dscale_fit(__u64 value, __u64 other)
-+{
-+ return dscale_range(value) == dscale_range(other);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/dscale.h linux-2.6.20/fs/reiser4/dscale.h
---- linux-2.6.20.orig/fs/reiser4/dscale.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/dscale.h 2007-05-06 14:50:43.702976975 +0400
-@@ -0,0 +1,27 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Scalable on-disk integers. See dscale.h for details. */
-+
-+#if !defined( __FS_REISER4_DSCALE_H__ )
-+#define __FS_REISER4_DSCALE_H__
-+
-+#include "dformat.h"
-+
-+extern int dscale_read(unsigned char *address, __u64 * value);
-+extern int dscale_write(unsigned char *address, __u64 value);
-+extern int dscale_bytes(__u64 value);
-+extern int dscale_fit(__u64 value, __u64 other);
-+
-+/* __FS_REISER4_DSCALE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/entd.c linux-2.6.20/fs/reiser4/entd.c
---- linux-2.6.20.orig/fs/reiser4/entd.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/entd.c 2007-05-06 14:50:43.702976975 +0400
-@@ -0,0 +1,335 @@
-+/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Ent daemon. */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "tree.h"
-+#include "entd.h"
-+#include "super.h"
-+#include "context.h"
-+#include "reiser4.h"
-+#include "vfs_ops.h"
-+#include "page_cache.h"
-+#include "inode.h"
-+
-+#include <linux/sched.h> /* struct task_struct */
-+#include <linux/suspend.h>
-+#include <linux/kernel.h>
-+#include <linux/writeback.h>
-+#include <linux/time.h> /* INITIAL_JIFFIES */
-+#include <linux/backing-dev.h> /* bdi_write_congested */
-+#include <linux/wait.h>
-+#include <linux/kthread.h>
-+#include <linux/freezer.h>
-+
-+#define DEF_PRIORITY 12
-+#define MAX_ENTD_ITERS 10
-+
-+static void entd_flush(struct super_block *, struct wbq *);
-+static int entd(void *arg);
-+
-+/*
-+ * set ->comm field of end thread to make its state visible to the user level
-+ */
-+#define entd_set_comm(state) \
-+ snprintf(current->comm, sizeof(current->comm), \
-+ "ent:%s%s", super->s_id, (state))
-+
-+/**
-+ * reiser4_init_entd - initialize entd context and start kernel daemon
-+ * @super: super block to start ent thread for
-+ *
-+ * Creates entd contexts, starts kernel thread and waits until it
-+ * initializes.
-+ */
-+int reiser4_init_entd(struct super_block *super)
-+{
-+ entd_context *ctx;
-+
-+ assert("nikita-3104", super != NULL);
-+
-+ ctx = get_entd_context(super);
-+
-+ memset(ctx, 0, sizeof *ctx);
-+ spin_lock_init(&ctx->guard);
-+ init_waitqueue_head(&ctx->wait);
-+#if REISER4_DEBUG
-+ INIT_LIST_HEAD(&ctx->flushers_list);
-+#endif
-+ /* lists of writepage requests */
-+ INIT_LIST_HEAD(&ctx->todo_list);
-+ INIT_LIST_HEAD(&ctx->done_list);
-+ /* start entd */
-+ ctx->tsk = kthread_run(entd, super, "ent:%s", super->s_id);
-+ if (IS_ERR(ctx->tsk))
-+ return PTR_ERR(ctx->tsk);
-+ return 0;
-+}
-+
-+static void put_wbq(struct wbq *rq)
-+{
-+ iput(rq->mapping->host);
-+ complete(&rq->completion);
-+}
-+
-+/* ent should be locked */
-+static struct wbq *__get_wbq(entd_context * ent)
-+{
-+ struct wbq *wbq;
-+
-+ if (list_empty(&ent->todo_list))
-+ return NULL;
-+
-+ ent->nr_todo_reqs --;
-+ wbq = list_entry(ent->todo_list.next, struct wbq, link);
-+ list_del_init(&wbq->link);
-+ return wbq;
-+}
-+
-+/* ent thread function */
-+static int entd(void *arg)
-+{
-+ struct super_block *super;
-+ entd_context *ent;
-+ int done = 0;
-+
-+ super = arg;
-+ /* do_fork() just copies task_struct into the new
-+ thread. ->fs_context shouldn't be copied of course. This shouldn't
-+ be a problem for the rest of the code though.
-+ */
-+ current->journal_info = NULL;
-+
-+ ent = get_entd_context(super);
-+
-+ while (!done) {
-+ try_to_freeze();
-+
-+ spin_lock(&ent->guard);
-+ while (ent->nr_todo_reqs != 0) {
-+ struct wbq *rq;
-+
-+ assert("", list_empty(&ent->done_list));
-+
-+ /* take request from the queue head */
-+ rq = __get_wbq(ent);
-+ assert("", rq != NULL);
-+ ent->cur_request = rq;
-+ spin_unlock(&ent->guard);
-+
-+ entd_set_comm("!");
-+ entd_flush(super, rq);
-+
-+ put_wbq(rq);
-+
-+ /*
-+ * wakeup all requestors and iput their inodes
-+ */
-+ spin_lock(&ent->guard);
-+ while (!list_empty(&ent->done_list)) {
-+ rq = list_entry(ent->done_list.next, struct wbq, link);
-+ list_del_init(&rq->link);
-+ ent->nr_done_reqs --;
-+ spin_unlock(&ent->guard);
-+ assert("", rq->written == 1);
-+ put_wbq(rq);
-+ spin_lock(&ent->guard);
-+ }
-+ }
-+ spin_unlock(&ent->guard);
-+
-+ entd_set_comm(".");
-+
-+ {
-+ DEFINE_WAIT(__wait);
-+
-+ do {
-+ prepare_to_wait(&ent->wait, &__wait, TASK_INTERRUPTIBLE);
-+ if (kthread_should_stop()) {
-+ done = 1;
-+ break;
-+ }
-+ if (ent->nr_todo_reqs != 0)
-+ break;
-+ schedule();
-+ } while (0);
-+ finish_wait(&ent->wait, &__wait);
-+ }
-+ }
-+ BUG_ON(ent->nr_todo_reqs != 0);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_entd - stop entd kernel thread
-+ * @super: super block to stop ent thread for
-+ *
-+ * It is called on umount. Sends stop signal to entd and wait until it handles
-+ * it.
-+ */
-+void reiser4_done_entd(struct super_block *super)
-+{
-+ entd_context *ent;
-+
-+ assert("nikita-3103", super != NULL);
-+
-+ ent = get_entd_context(super);
-+ assert("zam-1055", ent->tsk != NULL);
-+ kthread_stop(ent->tsk);
-+}
-+
-+/* called at the beginning of jnode_flush to register flusher thread with ent
-+ * daemon */
-+void reiser4_enter_flush(struct super_block *super)
-+{
-+ entd_context *ent;
-+
-+ assert("zam-1029", super != NULL);
-+ ent = get_entd_context(super);
-+
-+ assert("zam-1030", ent != NULL);
-+
-+ spin_lock(&ent->guard);
-+ ent->flushers++;
-+#if REISER4_DEBUG
-+ list_add(&get_current_context()->flushers_link, &ent->flushers_list);
-+#endif
-+ spin_unlock(&ent->guard);
-+}
-+
-+/* called at the end of jnode_flush */
-+void reiser4_leave_flush(struct super_block *super)
-+{
-+ entd_context *ent;
-+ int wake_up_ent;
-+
-+ assert("zam-1027", super != NULL);
-+ ent = get_entd_context(super);
-+
-+ assert("zam-1028", ent != NULL);
-+
-+ spin_lock(&ent->guard);
-+ ent->flushers--;
-+ wake_up_ent = (ent->flushers == 0 && ent->nr_todo_reqs != 0);
-+#if REISER4_DEBUG
-+ list_del_init(&get_current_context()->flushers_link);
-+#endif
-+ spin_unlock(&ent->guard);
-+ if (wake_up_ent)
-+ wake_up(&ent->wait);
-+}
-+
-+#define ENTD_CAPTURE_APAGE_BURST SWAP_CLUSTER_MAX
-+
-+static void entd_flush(struct super_block *super, struct wbq *rq)
-+{
-+ reiser4_context ctx;
-+ int tmp;
-+
-+ init_stack_context(&ctx, super);
-+ ctx.entd = 1;
-+ ctx.gfp_mask = GFP_NOFS;
-+
-+ rq->wbc->range_start = page_offset(rq->page);
-+ rq->wbc->range_end = rq->wbc->range_start +
-+ (ENTD_CAPTURE_APAGE_BURST << PAGE_CACHE_SHIFT);
-+ tmp = rq->wbc->nr_to_write;
-+ rq->mapping->a_ops->writepages(rq->mapping, rq->wbc);
-+
-+ if (rq->wbc->nr_to_write > 0) {
-+ rq->wbc->range_start = 0;
-+ rq->wbc->range_end = LLONG_MAX;
-+ generic_sync_sb_inodes(super, rq->wbc);
-+ }
-+ rq->wbc->nr_to_write = ENTD_CAPTURE_APAGE_BURST;
-+ reiser4_writeout(super, rq->wbc);
-+
-+ context_set_commit_async(&ctx);
-+ reiser4_exit_context(&ctx);
-+}
-+
-+/**
-+ * write_page_by_ent - ask entd thread to flush this page as part of slum
-+ * @page: page to be written
-+ * @wbc: writeback control passed to reiser4_writepage
-+ *
-+ * Creates a request, puts it on entd list of requests, wakeups entd if
-+ * necessary, waits until entd completes with the request.
-+ */
-+int write_page_by_ent(struct page *page, struct writeback_control *wbc)
-+{
-+ struct super_block *sb;
-+ struct inode *inode;
-+ entd_context *ent;
-+ struct wbq rq;
-+
-+ assert("", PageLocked(page));
-+ assert("", page->mapping != NULL);
-+
-+ sb = page->mapping->host->i_sb;
-+ ent = get_entd_context(sb);
-+ assert("", ent && ent->done == 0);
-+
-+ /*
-+ * we are going to unlock page and ask ent thread to write the
-+ * page. Re-dirty page before unlocking so that if ent thread fails to
-+ * write it - it will remain dirty
-+ */
-+ reiser4_set_page_dirty_internal(page);
-+
-+ /*
-+ * pin inode in memory, unlock page, entd_flush will iput. We can not
-+ * iput here becasue we can not allow delete_inode to be called here
-+ */
-+ inode = igrab(page->mapping->host);
-+ unlock_page(page);
-+ if (inode == NULL)
-+ /* inode is getting freed */
-+ return 0;
-+
-+ /* init wbq */
-+ INIT_LIST_HEAD(&rq.link);
-+ rq.magic = WBQ_MAGIC;
-+ rq.wbc = wbc;
-+ rq.page = page;
-+ rq.mapping = inode->i_mapping;
-+ rq.node = NULL;
-+ rq.written = 0;
-+ init_completion(&rq.completion);
-+
-+ /* add request to entd's list of writepage requests */
-+ spin_lock(&ent->guard);
-+ ent->nr_todo_reqs++;
-+ list_add_tail(&rq.link, &ent->todo_list);
-+ if (ent->nr_todo_reqs == 1)
-+ wake_up(&ent->wait);
-+
-+ spin_unlock(&ent->guard);
-+
-+ /* wait until entd finishes */
-+ wait_for_completion(&rq.completion);
-+
-+ if (rq.written)
-+ /* Eventually ENTD has written the page to disk. */
-+ return 0;
-+ return 0;
-+}
-+
-+int wbq_available(void)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ entd_context *ent = get_entd_context(sb);
-+ return ent->nr_todo_reqs;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/entd.h linux-2.6.20/fs/reiser4/entd.h
---- linux-2.6.20.orig/fs/reiser4/entd.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/entd.h 2007-05-06 14:50:43.706978224 +0400
-@@ -0,0 +1,90 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Ent daemon. */
-+
-+#ifndef __ENTD_H__
-+#define __ENTD_H__
-+
-+#include "context.h"
-+
-+#include <linux/fs.h>
-+#include <linux/completion.h>
-+#include <linux/wait.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+#define WBQ_MAGIC 0x7876dc76
-+
-+/* write-back request. */
-+struct wbq {
-+ int magic;
-+ struct list_head link; /* list head of this list is in entd context */
-+ struct writeback_control *wbc;
-+ struct page *page;
-+ struct address_space *mapping;
-+ struct completion completion;
-+ jnode *node; /* set if ent thread captured requested page */
-+ int written; /* set if ent thread wrote requested page */
-+};
-+
-+/* ent-thread context. This is used to synchronize starting/stopping ent
-+ * threads. */
-+typedef struct entd_context {
-+ /* wait queue that ent thread waits on for more work. It's
-+ * signaled by write_page_by_ent(). */
-+ wait_queue_head_t wait;
-+ /* spinlock protecting other fields */
-+ spinlock_t guard;
-+ /* ent thread */
-+ struct task_struct *tsk;
-+ /* set to indicate that ent thread should leave. */
-+ int done;
-+ /* counter of active flushers */
-+ int flushers;
-+ /*
-+ * when reiser4_writepage asks entd to write a page - it adds struct
-+ * wbq to this list
-+ */
-+ struct list_head todo_list;
-+ /* number of elements on the above list */
-+ int nr_todo_reqs;
-+
-+ struct wbq *cur_request;
-+ /*
-+ * when entd writes a page it moves write-back request from todo_list
-+ * to done_list. This list is used at the end of entd iteration to
-+ * wakeup requestors and iput inodes.
-+ */
-+ struct list_head done_list;
-+ /* number of elements on the above list */
-+ int nr_done_reqs;
-+
-+#if REISER4_DEBUG
-+ /* list of all active flushers */
-+ struct list_head flushers_list;
-+#endif
-+} entd_context;
-+
-+extern int reiser4_init_entd(struct super_block *);
-+extern void reiser4_done_entd(struct super_block *);
-+
-+extern void reiser4_enter_flush(struct super_block *);
-+extern void reiser4_leave_flush(struct super_block *);
-+
-+extern int write_page_by_ent(struct page *, struct writeback_control *);
-+extern int wbq_available(void);
-+extern void ent_writes_page(struct super_block *, struct page *);
-+
-+extern jnode *get_jnode_by_wbq(struct super_block *, struct wbq *);
-+/* __ENTD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/eottl.c linux-2.6.20/fs/reiser4/eottl.c
---- linux-2.6.20.orig/fs/reiser4/eottl.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/eottl.c 2007-05-06 14:50:43.706978224 +0400
-@@ -0,0 +1,509 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "tree_mod.h"
-+#include "carry.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+
-+/*
-+ * Extents on the twig level (EOTTL) handling.
-+ *
-+ * EOTTL poses some problems to the tree traversal, that are better explained
-+ * by example.
-+ *
-+ * Suppose we have block B1 on the twig level with the following items:
-+ *
-+ * 0. internal item I0 with key (0:0:0:0) (locality, key-type, object-id,
-+ * offset)
-+ * 1. extent item E1 with key (1:4:100:0), having 10 blocks of 4k each
-+ * 2. internal item I2 with key (10:0:0:0)
-+ *
-+ * We are trying to insert item with key (5:0:0:0). Lookup finds node B1, and
-+ * then intra-node lookup is done. This lookup finished on the E1, because the
-+ * key we are looking for is larger than the key of E1 and is smaller than key
-+ * the of I2.
-+ *
-+ * Here search is stuck.
-+ *
-+ * After some thought it is clear what is wrong here: extents on the twig level
-+ * break some basic property of the *search* tree (on the pretext, that they
-+ * restore property of balanced tree).
-+ *
-+ * Said property is the following: if in the internal node of the search tree
-+ * we have [ ... Key1 Pointer Key2 ... ] then, all data that are or will be
-+ * keyed in the tree with the Key such that Key1 <= Key < Key2 are accessible
-+ * through the Pointer.
-+ *
-+ * This is not true, when Pointer is Extent-Pointer, simply because extent
-+ * cannot expand indefinitely to the right to include any item with
-+ *
-+ * Key1 <= Key <= Key2.
-+ *
-+ * For example, our E1 extent is only responsible for the data with keys
-+ *
-+ * (1:4:100:0) <= key <= (1:4:100:0xffffffffffffffff), and
-+ *
-+ * so, key range
-+ *
-+ * ( (1:4:100:0xffffffffffffffff), (10:0:0:0) )
-+ *
-+ * is orphaned: there is no way to get there from the tree root.
-+ *
-+ * In other words, extent pointers are different than normal child pointers as
-+ * far as search tree is concerned, and this creates such problems.
-+ *
-+ * Possible solution for this problem is to insert our item into node pointed
-+ * to by I2. There are some problems through:
-+ *
-+ * (1) I2 can be in a different node.
-+ * (2) E1 can be immediately followed by another extent E2.
-+ *
-+ * (1) is solved by calling reiser4_get_right_neighbor() and accounting
-+ * for locks/coords as necessary.
-+ *
-+ * (2) is more complex. Solution here is to insert new empty leaf node and
-+ * insert internal item between E1 and E2 pointing to said leaf node. This is
-+ * further complicated by possibility that E2 is in a different node, etc.
-+ *
-+ * Problems:
-+ *
-+ * (1) if there was internal item I2 immediately on the right of an extent E1
-+ * we and we decided to insert new item S1 into node N2 pointed to by I2, then
-+ * key of S1 will be less than smallest key in the N2. Normally, search key
-+ * checks that key we are looking for is in the range of keys covered by the
-+ * node key is being looked in. To work around of this situation, while
-+ * preserving useful consistency check new flag CBK_TRUST_DK was added to the
-+ * cbk falgs bitmask. This flag is automatically set on entrance to the
-+ * coord_by_key() and is only cleared when we are about to enter situation
-+ * described above.
-+ *
-+ * (2) If extent E1 is immediately followed by another extent E2 and we are
-+ * searching for the key that is between E1 and E2 we only have to insert new
-+ * empty leaf node when coord_by_key was called for insertion, rather than just
-+ * for lookup. To distinguish these cases, new flag CBK_FOR_INSERT was added to
-+ * the cbk falgs bitmask. This flag is automatically set by coord_by_key calls
-+ * performed by insert_by_key() and friends.
-+ *
-+ * (3) Insertion of new empty leaf node (possibly) requires balancing. In any
-+ * case it requires modification of node content which is only possible under
-+ * write lock. It may well happen that we only have read lock on the node where
-+ * new internal pointer is to be inserted (common case: lookup of non-existent
-+ * stat-data that fells between two extents). If only read lock is held, tree
-+ * traversal is restarted with lock_level modified so that next time we hit
-+ * this problem, write lock will be held. Once we have write lock, balancing
-+ * will be performed.
-+ */
-+
-+/**
-+ * is_next_item_internal - check whether next item is internal
-+ * @coord: coordinate of extent item in twig node
-+ * @key: search key
-+ * @lh: twig node lock handle
-+ *
-+ * Looks at the unit next to @coord. If it is an internal one - 1 is returned,
-+ * @coord is set to that unit. If that unit is in right neighbor, @lh is moved
-+ * to that node, @coord is set to its first unit. If next item is not internal
-+ * or does not exist then 0 is returned, @coord and @lh are left unchanged. 2
-+ * is returned if search restart has to be done.
-+ */
-+static int
-+is_next_item_internal(coord_t *coord, const reiser4_key *key,
-+ lock_handle *lh)
-+{
-+ coord_t next;
-+ lock_handle rn;
-+ int result;
-+
-+ coord_dup(&next, coord);
-+ if (coord_next_unit(&next) == 0) {
-+ /* next unit is in this node */
-+ if (item_is_internal(&next)) {
-+ coord_dup(coord, &next);
-+ return 1;
-+ }
-+ assert("vs-3", item_is_extent(&next));
-+ return 0;
-+ }
-+
-+ /*
-+ * next unit either does not exist or is in right neighbor. If it is in
-+ * right neighbor we have to check right delimiting key because
-+ * concurrent thread could get their first and insert item with a key
-+ * smaller than @key
-+ */
-+ read_lock_dk(current_tree);
-+ result = keycmp(key, znode_get_rd_key(coord->node));
-+ read_unlock_dk(current_tree);
-+ assert("vs-6", result != EQUAL_TO);
-+ if (result == GREATER_THAN)
-+ return 2;
-+
-+ /* lock right neighbor */
-+ init_lh(&rn);
-+ result = reiser4_get_right_neighbor(&rn, coord->node,
-+ znode_is_wlocked(coord->node) ?
-+ ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result == -E_NO_NEIGHBOR) {
-+ /* we are on the rightmost edge of the tree */
-+ done_lh(&rn);
-+ return 0;
-+ }
-+
-+ if (result) {
-+ assert("vs-4", result < 0);
-+ done_lh(&rn);
-+ return result;
-+ }
-+
-+ /*
-+ * check whether concurrent thread managed to insert item with a key
-+ * smaller than @key
-+ */
-+ read_lock_dk(current_tree);
-+ result = keycmp(key, znode_get_ld_key(rn.node));
-+ read_unlock_dk(current_tree);
-+ assert("vs-6", result != EQUAL_TO);
-+ if (result == GREATER_THAN) {
-+ done_lh(&rn);
-+ return 2;
-+ }
-+
-+ result = zload(rn.node);
-+ if (result) {
-+ assert("vs-5", result < 0);
-+ done_lh(&rn);
-+ return result;
-+ }
-+
-+ coord_init_first_unit(&next, rn.node);
-+ if (item_is_internal(&next)) {
-+ /*
-+ * next unit is in right neighbor and it is an unit of internal
-+ * item. Unlock coord->node. Move @lh to right neighbor. @coord
-+ * is set to the first unit of right neighbor.
-+ */
-+ coord_dup(coord, &next);
-+ zrelse(rn.node);
-+ done_lh(lh);
-+ move_lh(lh, &rn);
-+ return 1;
-+ }
-+
-+ /*
-+ * next unit is unit of extent item. Return without chaning @lh and
-+ * @coord.
-+ */
-+ assert("vs-6", item_is_extent(&next));
-+ zrelse(rn.node);
-+ done_lh(&rn);
-+ return 0;
-+}
-+
-+/**
-+ * rd_key - calculate key of an item next to the given one
-+ * @coord: position in a node
-+ * @key: storage for result key
-+ *
-+ * @coord is set between items or after the last item in a node. Calculate key
-+ * of item to the right of @coord.
-+ */
-+static reiser4_key *rd_key(const coord_t *coord, reiser4_key *key)
-+{
-+ coord_t dup;
-+
-+ assert("nikita-2281", coord_is_between_items(coord));
-+ coord_dup(&dup, coord);
-+
-+ if (coord_set_to_right(&dup) == 0)
-+ /* next item is in this node. Return its key. */
-+ unit_key_by_coord(&dup, key);
-+ else {
-+ /*
-+ * next item either does not exist or is in right
-+ * neighbor. Return znode's right delimiting key.
-+ */
-+ read_lock_dk(current_tree);
-+ *key = *znode_get_rd_key(coord->node);
-+ read_unlock_dk(current_tree);
-+ }
-+ return key;
-+}
-+
-+/**
-+ * add_empty_leaf - insert empty leaf between two extents
-+ * @insert_coord: position in twig node between two extents
-+ * @lh: twig node lock handle
-+ * @key: left delimiting key of new node
-+ * @rdkey: right delimiting key of new node
-+ *
-+ * Inserts empty leaf node between two extent items. It is necessary when we
-+ * have to insert an item on leaf level between two extents (items on the twig
-+ * level).
-+ */
-+static int
-+add_empty_leaf(coord_t *insert_coord, lock_handle *lh,
-+ const reiser4_key *key, const reiser4_key *rdkey)
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *todo;
-+ reiser4_item_data *item;
-+ carry_insert_data *cdata;
-+ carry_op *op;
-+ znode *node;
-+ reiser4_tree *tree;
-+
-+ assert("vs-49827", znode_contains_key_lock(insert_coord->node, key));
-+ tree = znode_get_tree(insert_coord->node);
-+ node = reiser4_new_node(insert_coord->node, LEAF_LEVEL);
-+ if (IS_ERR(node))
-+ return PTR_ERR(node);
-+
-+ /* setup delimiting keys for node being inserted */
-+ write_lock_dk(tree);
-+ znode_set_ld_key(node, key);
-+ znode_set_rd_key(node, rdkey);
-+ ON_DEBUG(node->creator = current);
-+ ON_DEBUG(node->first_key = *key);
-+ write_unlock_dk(tree);
-+
-+ ZF_SET(node, JNODE_ORPHAN);
-+
-+ /*
-+ * allocate carry_pool, 3 carry_level-s, reiser4_item_data and
-+ * carry_insert_data
-+ */
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
-+ sizeof(*item) + sizeof(*cdata));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ todo = (carry_level *) (pool + 1);
-+ init_carry_level(todo, pool);
-+
-+ item = (reiser4_item_data *) (todo + 3);
-+ cdata = (carry_insert_data *) (item + 1);
-+
-+ op = reiser4_post_carry(todo, COP_INSERT, insert_coord->node, 0);
-+ if (!IS_ERR(op)) {
-+ cdata->coord = insert_coord;
-+ cdata->key = key;
-+ cdata->data = item;
-+ op->u.insert.d = cdata;
-+ op->u.insert.type = COPT_ITEM_DATA;
-+ build_child_ptr_data(node, item);
-+ item->arg = NULL;
-+ /* have @insert_coord to be set at inserted item after
-+ insertion is done */
-+ todo->track_type = CARRY_TRACK_CHANGE;
-+ todo->tracked = lh;
-+
-+ result = reiser4_carry(todo, NULL);
-+ if (result == 0) {
-+ /*
-+ * pin node in memory. This is necessary for
-+ * znode_make_dirty() below.
-+ */
-+ result = zload(node);
-+ if (result == 0) {
-+ lock_handle local_lh;
-+
-+ /*
-+ * if we inserted new child into tree we have
-+ * to mark it dirty so that flush will be able
-+ * to process it.
-+ */
-+ init_lh(&local_lh);
-+ result = longterm_lock_znode(&local_lh, node,
-+ ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (result == 0) {
-+ znode_make_dirty(node);
-+
-+ /*
-+ * when internal item pointing to @node
-+ * was inserted into twig node
-+ * create_hook_internal did not connect
-+ * it properly because its right
-+ * neighbor was not known. Do it
-+ * here
-+ */
-+ write_lock_tree(tree);
-+ assert("nikita-3312",
-+ znode_is_right_connected(node));
-+ assert("nikita-2984",
-+ node->right == NULL);
-+ ZF_CLR(node, JNODE_RIGHT_CONNECTED);
-+ write_unlock_tree(tree);
-+ result =
-+ connect_znode(insert_coord, node);
-+ ON_DEBUG(if (result == 0) check_dkeys(node););
-+
-+ done_lh(lh);
-+ move_lh(lh, &local_lh);
-+ assert("vs-1676", node_is_empty(node));
-+ coord_init_first_unit(insert_coord,
-+ node);
-+ } else {
-+ warning("nikita-3136",
-+ "Cannot lock child");
-+ }
-+ done_lh(&local_lh);
-+ zrelse(node);
-+ }
-+ }
-+ } else
-+ result = PTR_ERR(op);
-+ zput(node);
-+ done_carry_pool(pool);
-+ return result;
-+}
-+
-+/**
-+ * handle_eottl - handle extent-on-the-twig-level cases in tree traversal
-+ * @h: search handle
-+ * @outcome: flag saying whether search has to restart or is done
-+ *
-+ * Handles search on twig level. If this function completes search itself then
-+ * it returns 1. If search has to go one level down then 0 is returned. If
-+ * error happens then LOOKUP_DONE is returned via @outcome and error code is saved
-+ * in @h->result.
-+ */
-+int handle_eottl(cbk_handle *h, int *outcome)
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t *coord;
-+
-+ coord = h->coord;
-+
-+ if (h->level != TWIG_LEVEL ||
-+ (coord_is_existing_item(coord) && item_is_internal(coord))) {
-+ /* Continue to traverse tree downward. */
-+ return 0;
-+ }
-+
-+ /*
-+ * make sure that @h->coord is set to twig node and that it is either
-+ * set to extent item or after extent item
-+ */
-+ assert("vs-356", h->level == TWIG_LEVEL);
-+ assert("vs-357", ( {
-+ coord_t lcoord;
-+ coord_dup(&lcoord, coord);
-+ check_me("vs-733", coord_set_to_left(&lcoord) == 0);
-+ item_is_extent(&lcoord);
-+ }
-+ ));
-+
-+ if (*outcome == NS_FOUND) {
-+ /* we have found desired key on twig level in extent item */
-+ h->result = CBK_COORD_FOUND;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+
-+ if (!(h->flags & CBK_FOR_INSERT)) {
-+ /* tree traversal is not for insertion. Just return
-+ CBK_COORD_NOTFOUND. */
-+ h->result = CBK_COORD_NOTFOUND;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+
-+ /* take a look at the item to the right of h -> coord */
-+ result = is_next_item_internal(coord, h->key, h->active_lh);
-+ if (unlikely(result < 0)) {
-+ h->error = "get_right_neighbor failed";
-+ h->result = result;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+ if (result == 0) {
-+ /*
-+ * item to the right is also an extent one. Allocate a new node
-+ * and insert pointer to it after item h -> coord.
-+ *
-+ * This is a result of extents being located at the twig
-+ * level. For explanation, see comment just above
-+ * is_next_item_internal().
-+ */
-+ znode *loaded;
-+
-+ if (cbk_lock_mode(h->level, h) != ZNODE_WRITE_LOCK) {
-+ /*
-+ * we got node read locked, restart coord_by_key to
-+ * have write lock on twig level
-+ */
-+ h->lock_level = TWIG_LEVEL;
-+ h->lock_mode = ZNODE_WRITE_LOCK;
-+ *outcome = LOOKUP_REST;
-+ return 1;
-+ }
-+
-+ loaded = coord->node;
-+ result =
-+ add_empty_leaf(coord, h->active_lh, h->key,
-+ rd_key(coord, &key));
-+ if (result) {
-+ h->error = "could not add empty leaf";
-+ h->result = result;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+ /* added empty leaf is locked (h->active_lh), its parent node
-+ is unlocked, h->coord is set as EMPTY */
-+ assert("vs-13", coord->between == EMPTY_NODE);
-+ assert("vs-14", znode_is_write_locked(coord->node));
-+ assert("vs-15",
-+ WITH_DATA(coord->node, node_is_empty(coord->node)));
-+ assert("vs-16", jnode_is_leaf(ZJNODE(coord->node)));
-+ assert("vs-17", coord->node == h->active_lh->node);
-+ *outcome = LOOKUP_DONE;
-+ h->result = CBK_COORD_NOTFOUND;
-+ return 1;
-+ } else if (result == 1) {
-+ /*
-+ * this is special case mentioned in the comment on
-+ * tree.h:cbk_flags. We have found internal item immediately on
-+ * the right of extent, and we are going to insert new item
-+ * there. Key of item we are going to insert is smaller than
-+ * leftmost key in the node pointed to by said internal item
-+ * (otherwise search wouldn't come to the extent in the first
-+ * place).
-+ *
-+ * This is a result of extents being located at the twig
-+ * level. For explanation, see comment just above
-+ * is_next_item_internal().
-+ */
-+ h->flags &= ~CBK_TRUST_DK;
-+ } else {
-+ assert("vs-8", result == 2);
-+ *outcome = LOOKUP_REST;
-+ return 1;
-+ }
-+ assert("vs-362", WITH_DATA(coord->node, item_is_internal(coord)));
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/estimate.c linux-2.6.20/fs/reiser4/estimate.c
---- linux-2.6.20.orig/fs/reiser4/estimate.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/estimate.c 2007-05-06 14:50:43.706978224 +0400
-@@ -0,0 +1,111 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "tree.h"
-+#include "carry.h"
-+#include "inode.h"
-+#include "plugin/cluster.h"
-+#include "plugin/item/ctail.h"
-+
-+/* this returns how many nodes might get dirty and added nodes if @children nodes are dirtied
-+
-+ Amount of internals which will get dirty or get allocated we estimate as 5% of the childs + 1 balancing. 1 balancing
-+ is 2 neighbours, 2 new blocks and the current block on the leaf level, 2 neighbour nodes + the current (or 1
-+ neighbour and 1 new and the current) on twig level, 2 neighbour nodes on upper levels and 1 for a new root. So 5 for
-+ leaf level, 3 for twig level, 2 on upper + 1 for root.
-+
-+ Do not calculate the current node of the lowest level here - this is overhead only.
-+
-+ children is almost always 1 here. Exception is flow insertion
-+*/
-+static reiser4_block_nr
-+max_balance_overhead(reiser4_block_nr childen, tree_level tree_height)
-+{
-+ reiser4_block_nr ten_percent;
-+
-+ ten_percent = ((103 * childen) >> 10);
-+
-+ /* If we have too many balancings at the time, tree height can raise on more
-+ then 1. Assume that if tree_height is 5, it can raise on 1 only. */
-+ return ((tree_height < 5 ? 5 : tree_height) * 2 + (4 + ten_percent));
-+}
-+
-+/* this returns maximal possible number of nodes which can be modified plus number of new nodes which can be required to
-+ perform insertion of one item into the tree */
-+/* it is only called when tree height changes, or gets initialized */
-+reiser4_block_nr calc_estimate_one_insert(tree_level height)
-+{
-+ return 1 + max_balance_overhead(1, height);
-+}
-+
-+reiser4_block_nr estimate_one_insert_item(reiser4_tree * tree)
-+{
-+ return tree->estimate_one_insert;
-+}
-+
-+/* this returns maximal possible number of nodes which can be modified plus number of new nodes which can be required to
-+ perform insertion of one unit into an item in the tree */
-+reiser4_block_nr estimate_one_insert_into_item(reiser4_tree * tree)
-+{
-+ /* estimate insert into item just like item insertion */
-+ return tree->estimate_one_insert;
-+}
-+
-+reiser4_block_nr estimate_one_item_removal(reiser4_tree * tree)
-+{
-+ /* on item removal reiser4 does not try to pack nodes more complact, so, only one node may be dirtied on leaf
-+ level */
-+ return tree->estimate_one_insert;
-+}
-+
-+/* on leaf level insert_flow may add CARRY_FLOW_NEW_NODES_LIMIT new nodes and dirty 3 existing nodes (insert point and
-+ both its neighbors). Max_balance_overhead should estimate number of blocks which may change/get added on internal
-+ levels */
-+reiser4_block_nr estimate_insert_flow(tree_level height)
-+{
-+ return 3 + CARRY_FLOW_NEW_NODES_LIMIT + max_balance_overhead(3 +
-+ CARRY_FLOW_NEW_NODES_LIMIT,
-+ height);
-+}
-+
-+/* returnes max number of nodes can be occupied by disk cluster */
-+static reiser4_block_nr estimate_cluster(struct inode * inode, int unprepped)
-+{
-+ int per_cluster;
-+ per_cluster = (unprepped ? 1 : cluster_nrpages(inode));
-+ return 3 + per_cluster +
-+ max_balance_overhead(3 + per_cluster,
-+ REISER4_MAX_ZTREE_HEIGHT);
-+}
-+
-+/* how many nodes might get dirty and added
-+ during insertion of a disk cluster */
-+reiser4_block_nr estimate_insert_cluster(struct inode * inode)
-+{
-+ return estimate_cluster(inode, 1); /* 24 */
-+}
-+
-+/* how many nodes might get dirty and added
-+ during update of a (prepped or unprepped) disk cluster */
-+reiser4_block_nr estimate_update_cluster(struct inode * inode)
-+{
-+ return estimate_cluster(inode, 0); /* 44, for 64K-cluster */
-+}
-+
-+/* how many nodes occupied by a disk cluster might get dirty */
-+reiser4_block_nr estimate_dirty_cluster(struct inode * inode)
-+{
-+ return cluster_nrpages(inode) + 4;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/export_ops.c linux-2.6.20/fs/reiser4/export_ops.c
---- linux-2.6.20.orig/fs/reiser4/export_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/export_ops.c 2007-05-06 14:50:43.706978224 +0400
-@@ -0,0 +1,295 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "inode.h"
-+#include "plugin/plugin.h"
-+
-+/*
-+ * Supported file-handle types
-+ */
-+typedef enum {
-+ FH_WITH_PARENT = 0x10, /* file handle with parent */
-+ FH_WITHOUT_PARENT = 0x11 /* file handle without parent */
-+} reiser4_fhtype;
-+
-+#define NFSERROR (255)
-+
-+/* initialize place-holder for object */
-+static void object_on_wire_init(reiser4_object_on_wire *o)
-+{
-+ o->plugin = NULL;
-+}
-+
-+/* finish with @o */
-+static void object_on_wire_done(reiser4_object_on_wire *o)
-+{
-+ if (o->plugin != NULL)
-+ o->plugin->wire.done(o);
-+}
-+
-+/*
-+ * read serialized object identity from @addr and store information about
-+ * object in @obj. This is dual to encode_inode().
-+ */
-+static char *decode_inode(struct super_block *s, char *addr,
-+ reiser4_object_on_wire * obj)
-+{
-+ file_plugin *fplug;
-+
-+ /* identifier of object plugin is stored in the first two bytes,
-+ * followed by... */
-+ fplug = file_plugin_by_disk_id(reiser4_get_tree(s), (d16 *) addr);
-+ if (fplug != NULL) {
-+ addr += sizeof(d16);
-+ obj->plugin = fplug;
-+ assert("nikita-3520", fplug->wire.read != NULL);
-+ /* plugin specific encoding of object identity. */
-+ addr = fplug->wire.read(addr, obj);
-+ } else
-+ addr = ERR_PTR(RETERR(-EINVAL));
-+ return addr;
-+}
-+
-+/**
-+ * reiser4_decode_fh - decode_fh of export operations
-+ * @super: super block
-+ * @fh: nfsd file handle
-+ * @len: length of file handle
-+ * @fhtype: type of file handle
-+ * @acceptable: acceptability testing function
-+ * @context: argument for @acceptable
-+ *
-+ * Returns dentry referring to the same file as @fh.
-+ */
-+static struct dentry *reiser4_decode_fh(struct super_block *super, __u32 *fh,
-+ int len, int fhtype,
-+ int (*acceptable) (void *context,
-+ struct dentry *de),
-+ void *context)
-+{
-+ reiser4_context *ctx;
-+ reiser4_object_on_wire object;
-+ reiser4_object_on_wire parent;
-+ char *addr;
-+ int with_parent;
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx))
-+ return (struct dentry *)ctx;
-+
-+ assert("vs-1482",
-+ fhtype == FH_WITH_PARENT || fhtype == FH_WITHOUT_PARENT);
-+
-+ with_parent = (fhtype == FH_WITH_PARENT);
-+
-+ addr = (char *)fh;
-+
-+ object_on_wire_init(&object);
-+ object_on_wire_init(&parent);
-+
-+ addr = decode_inode(super, addr, &object);
-+ if (!IS_ERR(addr)) {
-+ if (with_parent)
-+ addr = decode_inode(super, addr, &parent);
-+ if (!IS_ERR(addr)) {
-+ struct dentry *d;
-+ typeof(super->s_export_op->find_exported_dentry) fn;
-+
-+ fn = super->s_export_op->find_exported_dentry;
-+ assert("nikita-3521", fn != NULL);
-+ d = fn(super, &object, with_parent ? &parent : NULL,
-+ acceptable, context);
-+ if (d != NULL && !IS_ERR(d))
-+ /* FIXME check for -ENOMEM */
-+ reiser4_get_dentry_fsdata(d)->stateless = 1;
-+ addr = (char *)d;
-+ }
-+ }
-+
-+ object_on_wire_done(&object);
-+ object_on_wire_done(&parent);
-+
-+ reiser4_exit_context(ctx);
-+ return (void *)addr;
-+}
-+
-+/*
-+ * Object serialization support.
-+ *
-+ * To support knfsd file system provides export_operations that are used to
-+ * construct and interpret NFS file handles. As a generalization of this,
-+ * reiser4 object plugins have serialization support: it provides methods to
-+ * create on-wire representation of identity of reiser4 object, and
-+ * re-create/locate object given its on-wire identity.
-+ *
-+ */
-+
-+/*
-+ * return number of bytes that on-wire representation of @inode's identity
-+ * consumes.
-+ */
-+static int encode_inode_size(struct inode *inode)
-+{
-+ assert("nikita-3514", inode != NULL);
-+ assert("nikita-3515", inode_file_plugin(inode) != NULL);
-+ assert("nikita-3516", inode_file_plugin(inode)->wire.size != NULL);
-+
-+ return inode_file_plugin(inode)->wire.size(inode) + sizeof(d16);
-+}
-+
-+/*
-+ * store on-wire representation of @inode's identity at the area beginning at
-+ * @start.
-+ */
-+static char *encode_inode(struct inode *inode, char *start)
-+{
-+ assert("nikita-3517", inode != NULL);
-+ assert("nikita-3518", inode_file_plugin(inode) != NULL);
-+ assert("nikita-3519", inode_file_plugin(inode)->wire.write != NULL);
-+
-+ /*
-+ * first, store two-byte identifier of object plugin, then
-+ */
-+ save_plugin_id(file_plugin_to_plugin(inode_file_plugin(inode)),
-+ (d16 *) start);
-+ start += sizeof(d16);
-+ /*
-+ * call plugin to serialize object's identity
-+ */
-+ return inode_file_plugin(inode)->wire.write(inode, start);
-+}
-+
-+/* this returns number of 32 bit long numbers encoded in @lenp. 255 is
-+ * returned if file handle can not be stored */
-+/**
-+ * reiser4_encode_fh - encode_fh of export operations
-+ * @dentry:
-+ * @fh:
-+ * @lenp:
-+ * @need_parent:
-+ *
-+ */
-+static int
-+reiser4_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp,
-+ int need_parent)
-+{
-+ struct inode *inode;
-+ struct inode *parent;
-+ char *addr;
-+ int need;
-+ int delta;
-+ int result;
-+ reiser4_context *ctx;
-+
-+ /*
-+ * knfsd asks as to serialize object in @dentry, and, optionally its
-+ * parent (if need_parent != 0).
-+ *
-+ * encode_inode() and encode_inode_size() is used to build
-+ * representation of object and its parent. All hard work is done by
-+ * object plugins.
-+ */
-+ inode = dentry->d_inode;
-+ parent = dentry->d_parent->d_inode;
-+
-+ addr = (char *)fh;
-+
-+ need = encode_inode_size(inode);
-+ if (need < 0)
-+ return NFSERROR;
-+ if (need_parent) {
-+ delta = encode_inode_size(parent);
-+ if (delta < 0)
-+ return NFSERROR;
-+ need += delta;
-+ }
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ if (need <= sizeof(__u32) * (*lenp)) {
-+ addr = encode_inode(inode, addr);
-+ if (need_parent)
-+ addr = encode_inode(parent, addr);
-+
-+ /* store in lenp number of 32bit words required for file
-+ * handle. */
-+ *lenp = (need + sizeof(__u32) - 1) >> 2;
-+ result = need_parent ? FH_WITH_PARENT : FH_WITHOUT_PARENT;
-+ } else
-+ /* no enough space in file handle */
-+ result = NFSERROR;
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * reiser4_get_dentry_parent - get_parent of export operations
-+ * @child:
-+ *
-+ */
-+static struct dentry *reiser4_get_dentry_parent(struct dentry *child)
-+{
-+ struct inode *dir;
-+ dir_plugin *dplug;
-+
-+ assert("nikita-3527", child != NULL);
-+ /* see comment in reiser4_get_dentry() about following assertion */
-+ assert("nikita-3528", is_in_reiser4_context());
-+
-+ dir = child->d_inode;
-+ assert("nikita-3529", dir != NULL);
-+ dplug = inode_dir_plugin(dir);
-+ assert("nikita-3531", ergo(dplug != NULL, dplug->get_parent != NULL));
-+ if (dplug != NULL)
-+ return dplug->get_parent(dir);
-+ else
-+ return ERR_PTR(RETERR(-ENOTDIR));
-+}
-+
-+/**
-+ * reiser4_get_dentry - get_dentry of export operations
-+ * @super:
-+ * @data:
-+ *
-+ *
-+ */
-+static struct dentry *reiser4_get_dentry(struct super_block *super, void *data)
-+{
-+ reiser4_object_on_wire *o;
-+
-+ assert("nikita-3522", super != NULL);
-+ assert("nikita-3523", data != NULL);
-+ /*
-+ * this is only supposed to be called by
-+ *
-+ * reiser4_decode_fh->find_exported_dentry
-+ *
-+ * so, reiser4_context should be here already.
-+ */
-+ assert("nikita-3526", is_in_reiser4_context());
-+
-+ o = (reiser4_object_on_wire *)data;
-+ assert("nikita-3524", o->plugin != NULL);
-+ assert("nikita-3525", o->plugin->wire.get != NULL);
-+
-+ return o->plugin->wire.get(super, o);
-+}
-+
-+struct export_operations reiser4_export_operations = {
-+ .encode_fh = reiser4_encode_fh,
-+ .decode_fh = reiser4_decode_fh,
-+ .get_parent = reiser4_get_dentry_parent,
-+ .get_dentry = reiser4_get_dentry
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/flush.c linux-2.6.20/fs/reiser4/flush.c
---- linux-2.6.20.orig/fs/reiser4/flush.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/flush.c 2007-05-06 14:50:43.000000000 +0400
-@@ -0,0 +1,3622 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* The design document for this file is at http://www.namesys.com/v4/v4.html. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/plugin.h"
-+#include "plugin/object.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "carry.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "wander.h"
-+#include "super.h"
-+#include "entd.h"
-+#include "reiser4.h"
-+#include "flush.h"
-+#include "writeout.h"
-+
-+#include <asm/atomic.h>
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/bio.h> /* for struct bio */
-+#include <linux/pagemap.h>
-+#include <linux/blkdev.h>
-+
-+/* IMPLEMENTATION NOTES */
-+
-+/* PARENT-FIRST: Some terminology: A parent-first traversal is a way of assigning a total
-+ order to the nodes of the tree in which the parent is placed before its children, which
-+ are ordered (recursively) in left-to-right order. When we speak of a "parent-first preceder", it
-+ describes the node that "came before in forward parent-first order". When we speak of a
-+ "parent-first follower", it describes the node that "comes next in parent-first
-+ order" (alternatively the node that "came before in reverse parent-first order").
-+
-+ The following pseudo-code prints the nodes of a tree in forward parent-first order:
-+
-+ void parent_first (node)
-+ {
-+ print_node (node);
-+ if (node->level > leaf) {
-+ for (i = 0; i < num_children; i += 1) {
-+ parent_first (node->child[i]);
-+ }
-+ }
-+ }
-+*/
-+
-+/* JUST WHAT ARE WE TRYING TO OPTIMIZE, HERE? The idea is to optimize block allocation so
-+ that a left-to-right scan of the tree's data (i.e., the leaves in left-to-right order)
-+ can be accomplished with sequential reads, which results in reading nodes in their
-+ parent-first order. This is a read-optimization aspect of the flush algorithm, and
-+ there is also a write-optimization aspect, which is that we wish to make large
-+ sequential writes to the disk by allocating or reallocating blocks so that they can be
-+ written in sequence. Sometimes the read-optimization and write-optimization goals
-+ conflict with each other, as we discuss in more detail below.
-+*/
-+
-+/* STATE BITS: The flush code revolves around the state of the jnodes it covers. Here are
-+ the relevant jnode->state bits and their relevence to flush:
-+
-+ JNODE_DIRTY: If a node is dirty, it must be flushed. But in order to be written it
-+ must be allocated first. In order to be considered allocated, the jnode must have
-+ exactly one of { JNODE_OVRWR, JNODE_RELOC } set. These two bits are exclusive, and
-+ all dirtied jnodes eventually have one of these bits set during each transaction.
-+
-+ JNODE_CREATED: The node was freshly created in its transaction and has no previous
-+ block address, so it is unconditionally assigned to be relocated, although this is
-+ mainly for code-convenience. It is not being 'relocated' from anything, but in
-+ almost every regard it is treated as part of the relocate set. The JNODE_CREATED bit
-+ remains set even after JNODE_RELOC is set, so the actual relocate can be
-+ distinguished from the created-and-allocated set easily: relocate-set members
-+ (belonging to the preserve-set) have (JNODE_RELOC) set and created-set members which
-+ have no previous location to preserve have (JNODE_RELOC | JNODE_CREATED) set.
-+
-+ JNODE_OVRWR: The node belongs to atom's overwrite set. The flush algorithm made the
-+ decision to maintain the pre-existing location for this node and it will be written
-+ to the wandered-log.
-+
-+ JNODE_RELOC: The flush algorithm made the decision to relocate this block (if it was
-+ not created, see note above). A block with JNODE_RELOC set is eligible for
-+ early-flushing and may be submitted during flush_empty_queues. When the JNODE_RELOC
-+ bit is set on a znode, the parent node's internal item is modified and the znode is
-+ rehashed.
-+
-+ JNODE_SQUEEZABLE: Before shifting everything left, the flush algorithm scans the node
-+ and calls plugin->f.squeeze() method for its items. By this technology we update disk
-+ clusters of cryptcompress objects. Also if leftmost point that was found by flush scan
-+ has this flag (races with write(), rare case) the flush algorythm makes the decision
-+ to pass it to squalloc() in spite of its flushprepped status for squeezing, not for
-+ repeated allocation.
-+
-+ JNODE_FLUSH_QUEUED: This bit is set when a call to flush enters the jnode into its
-+ flush queue. This means the jnode is not on any clean or dirty list, instead it is
-+ moved to one of the flush queue (see flush_queue.h) object private list. This
-+ prevents multiple concurrent flushes from attempting to start flushing from the
-+ same node.
-+
-+ (DEAD STATE BIT) JNODE_FLUSH_BUSY: This bit was set during the bottom-up
-+ squeeze-and-allocate on a node while its children are actively being squeezed and
-+ allocated. This flag was created to avoid submitting a write request for a node
-+ while its children are still being allocated and squeezed. Then flush queue was
-+ re-implemented to allow unlimited number of nodes be queued. This flag support was
-+ commented out in source code because we decided that there was no reason to submit
-+ queued nodes before jnode_flush() finishes. However, current code calls fq_write()
-+ during a slum traversal and may submit "busy nodes" to disk. Probably we can
-+ re-enable the JNODE_FLUSH_BUSY bit support in future.
-+
-+ With these state bits, we describe a test used frequently in the code below,
-+ jnode_is_flushprepped() (and the spin-lock-taking jnode_check_flushprepped()). The
-+ test for "flushprepped" returns true if any of the following are true:
-+
-+ - The node is not dirty
-+ - The node has JNODE_RELOC set
-+ - The node has JNODE_OVRWR set
-+
-+ If either the node is not dirty or it has already been processed by flush (and assigned
-+ JNODE_OVRWR or JNODE_RELOC), then it is prepped. If jnode_is_flushprepped() returns
-+ true then flush has work to do on that node.
-+*/
-+
-+/* FLUSH_PREP_ONCE_PER_TRANSACTION: Within a single transaction a node is never
-+ flushprepped twice (unless an explicit call to flush_unprep is made as described in
-+ detail below). For example a node is dirtied, allocated, and then early-flushed to
-+ disk and set clean. Before the transaction commits, the page is dirtied again and, due
-+ to memory pressure, the node is flushed again. The flush algorithm will not relocate
-+ the node to a new disk location, it will simply write it to the same, previously
-+ relocated position again.
-+*/
-+
-+/* THE BOTTOM-UP VS. TOP-DOWN ISSUE: This code implements a bottom-up algorithm where we
-+ start at a leaf node and allocate in parent-first order by iterating to the right. At
-+ each step of the iteration, we check for the right neighbor. Before advancing to the
-+ right neighbor, we check if the current position and the right neighbor share the same
-+ parent. If they do not share the same parent, the parent is allocated before the right
-+ neighbor.
-+
-+ This process goes recursively up the tree and squeeze nodes level by level as long as
-+ the right neighbor and the current position have different parents, then it allocates
-+ the right-neighbors-with-different-parents on the way back down. This process is
-+ described in more detail in flush_squalloc_changed_ancestor and the recursive function
-+ squalloc_one_changed_ancestor. But the purpose here is not to discuss the
-+ specifics of the bottom-up approach as it is to contrast the bottom-up and top-down
-+ approaches.
-+
-+ The top-down algorithm was implemented earlier (April-May 2002). In the top-down
-+ approach, we find a starting point by scanning left along each level past dirty nodes,
-+ then going up and repeating the process until the left node and the parent node are
-+ clean. We then perform a parent-first traversal from the starting point, which makes
-+ allocating in parent-first order trivial. After one subtree has been allocated in this
-+ manner, we move to the right, try moving upward, then repeat the parent-first
-+ traversal.
-+
-+ Both approaches have problems that need to be addressed. Both are approximately the
-+ same amount of code, but the bottom-up approach has advantages in the order it acquires
-+ locks which, at the very least, make it the better approach. At first glance each one
-+ makes the other one look simpler, so it is important to remember a few of the problems
-+ with each one.
-+
-+ Main problem with the top-down approach: When you encounter a clean child during the
-+ parent-first traversal, what do you do? You would like to avoid searching through a
-+ large tree of nodes just to find a few dirty leaves at the bottom, and there is not an
-+ obvious solution. One of the advantages of the top-down approach is that during the
-+ parent-first traversal you check every child of a parent to see if it is dirty. In
-+ this way, the top-down approach easily handles the main problem of the bottom-up
-+ approach: unallocated children.
-+
-+ The unallocated children problem is that before writing a node to disk we must make
-+ sure that all of its children are allocated. Otherwise, the writing the node means
-+ extra I/O because the node will have to be written again when the child is finally
-+ allocated.
-+
-+ WE HAVE NOT YET ELIMINATED THE UNALLOCATED CHILDREN PROBLEM. Except for bugs, this
-+ should not cause any file system corruption, it only degrades I/O performance because a
-+ node may be written when it is sure to be written at least one more time in the same
-+ transaction when the remaining children are allocated. What follows is a description
-+ of how we will solve the problem.
-+*/
-+
-+/* HANDLING UNALLOCATED CHILDREN: During flush we may allocate a parent node then,
-+ proceeding in parent first order, allocate some of its left-children, then encounter a
-+ clean child in the middle of the parent. We do not allocate the clean child, but there
-+ may remain unallocated (dirty) children to the right of the clean child. If we were to
-+ stop flushing at this moment and write everything to disk, the parent might still
-+ contain unallocated children.
-+
-+ We could try to allocate all the descendents of every node that we allocate, but this
-+ is not necessary. Doing so could result in allocating the entire tree: if the root
-+ node is allocated then every unallocated node would have to be allocated before
-+ flushing. Actually, we do not have to write a node just because we allocate it. It is
-+ possible to allocate but not write a node during flush, when it still has unallocated
-+ children. However, this approach is probably not optimal for the following reason.
-+
-+ The flush algorithm is designed to allocate nodes in parent-first order in an attempt
-+ to optimize reads that occur in the same order. Thus we are read-optimizing for a
-+ left-to-right scan through all the leaves in the system, and we are hoping to
-+ write-optimize at the same time because those nodes will be written together in batch.
-+ What happens, however, if we assign a block number to a node in its read-optimized
-+ order but then avoid writing it because it has unallocated children? In that
-+ situation, we lose out on the write-optimization aspect because a node will have to be
-+ written again to the its location on the device, later, which likely means seeking back
-+ to that location.
-+
-+ So there are tradeoffs. We can choose either:
-+
-+ A. Allocate all unallocated children to preserve both write-optimization and
-+ read-optimization, but this is not always desirable because it may mean having to
-+ allocate and flush very many nodes at once.
-+
-+ B. Defer writing nodes with unallocated children, keep their read-optimized locations,
-+ but sacrifice write-optimization because those nodes will be written again.
-+
-+ C. Defer writing nodes with unallocated children, but do not keep their read-optimized
-+ locations. Instead, choose to write-optimize them later, when they are written. To
-+ facilitate this, we "undo" the read-optimized allocation that was given to the node so
-+ that later it can be write-optimized, thus "unpreparing" the flush decision. This is a
-+ case where we disturb the FLUSH_PREP_ONCE_PER_TRANSACTION rule described above. By a
-+ call to flush_unprep() we will: if the node was wandered, unset the JNODE_OVRWR bit;
-+ if the node was relocated, unset the JNODE_RELOC bit, non-deferred-deallocate its block
-+ location, and set the JNODE_CREATED bit, effectively setting the node back to an
-+ unallocated state.
-+
-+ We will take the following approach in v4.0: for twig nodes we will always finish
-+ allocating unallocated children (A). For nodes with (level > TWIG) we will defer
-+ writing and choose write-optimization (C).
-+
-+ To summarize, there are several parts to a solution that avoids the problem with
-+ unallocated children:
-+
-+ FIXME-ZAM: Still no one approach is implemented to eliminate the "UNALLOCATED CHILDREN"
-+ problem because there was an experiment which was done showed that we have 1-2 nodes
-+ with unallocated children for thousands of written nodes. The experiment was simple
-+ like coping / deletion of linux kernel sources. However the problem can arise in more
-+ complex tests. I think we have jnode_io_hook to insert a check for unallocated
-+ children and see what kind of problem we have.
-+
-+ 1. When flush reaches a stopping point (e.g., a clean node), it should continue calling
-+ squeeze-and-allocate on any remaining unallocated children. FIXME: Difficulty to
-+ implement: should be simple -- amounts to adding a while loop to jnode_flush, see
-+ comments in that function.
-+
-+ 2. When flush reaches flush_empty_queue(), some of the (level > TWIG) nodes may still
-+ have unallocated children. If the twig level has unallocated children it is an
-+ assertion failure. If a higher-level node has unallocated children, then it should be
-+ explicitly de-allocated by a call to flush_unprep(). FIXME: Difficulty to implement:
-+ should be simple.
-+
-+ 3. (CPU-Optimization) Checking whether a node has unallocated children may consume more
-+ CPU cycles than we would like, and it is possible (but medium complexity) to optimize
-+ this somewhat in the case where large sub-trees are flushed. The following observation
-+ helps: if both the left- and right-neighbor of a node are processed by the flush
-+ algorithm then the node itself is guaranteed to have all of its children allocated.
-+ However, the cost of this check may not be so expensive after all: it is not needed for
-+ leaves and flush can guarantee this property for twigs. That leaves only (level >
-+ TWIG) nodes that have to be checked, so this optimization only helps if at least three
-+ (level > TWIG) nodes are flushed in one pass, and the savings will be very small unless
-+ there are many more (level > TWIG) nodes. But if there are many (level > TWIG) nodes
-+ then the number of blocks being written will be very large, so the savings may be
-+ insignificant. That said, the idea is to maintain both the left and right edges of
-+ nodes that are processed in flush. When flush_empty_queue() is called, a relatively
-+ simple test will tell whether the (level > TWIG) node is on the edge. If it is on the
-+ edge, the slow check is necessary, but if it is in the interior then it can be assumed
-+ to have all of its children allocated. FIXME: medium complexity to implement, but
-+ simple to verify given that we must have a slow check anyway.
-+
-+ 4. (Optional) This part is optional, not for v4.0--flush should work independently of
-+ whether this option is used or not. Called RAPID_SCAN, the idea is to amend the
-+ left-scan operation to take unallocated children into account. Normally, the left-scan
-+ operation goes left as long as adjacent nodes are dirty up until some large maximum
-+ value (FLUSH_SCAN_MAXNODES) at which point it stops and begins flushing. But scan-left
-+ may stop at a position where there are unallocated children to the left with the same
-+ parent. When RAPID_SCAN is enabled, the ordinary scan-left operation stops after
-+ FLUSH_RELOCATE_THRESHOLD, which is much smaller than FLUSH_SCAN_MAXNODES, then procedes
-+ with a rapid scan. The rapid scan skips all the interior children of a node--if the
-+ leftmost child of a twig is dirty, check its left neighbor (the rightmost child of the
-+ twig to the left). If the left neighbor of the leftmost child is also dirty, then
-+ continue the scan at the left twig and repeat. This option will cause flush to
-+ allocate more twigs in a single pass, but it also has the potential to write many more
-+ nodes than would otherwise be written without the RAPID_SCAN option. RAPID_SCAN
-+ was partially implemented, code removed August 12, 2002 by JMACD.
-+*/
-+
-+/* FLUSH CALLED ON NON-LEAF LEVEL. Most of our design considerations assume that the
-+ starting point for flush is a leaf node, but actually the flush code cares very little
-+ about whether or not this is true. It is possible that all the leaf nodes are flushed
-+ and dirty parent nodes still remain, in which case jnode_flush() is called on a
-+ non-leaf argument. Flush doesn't care--it treats the argument node as if it were a
-+ leaf, even when it is not. This is a simple approach, and there may be a more optimal
-+ policy but until a problem with this approach is discovered, simplest is probably best.
-+
-+ NOTE: In this case, the ordering produced by flush is parent-first only if you ignore
-+ the leaves. This is done as a matter of simplicity and there is only one (shaky)
-+ justification. When an atom commits, it flushes all leaf level nodes first, followed
-+ by twigs, and so on. With flushing done in this order, if flush is eventually called
-+ on a non-leaf node it means that (somehow) we reached a point where all leaves are
-+ clean and only internal nodes need to be flushed. If that it the case, then it means
-+ there were no leaves that were the parent-first preceder/follower of the parent. This
-+ is expected to be a rare case, which is why we do nothing special about it. However,
-+ memory pressure may pass an internal node to flush when there are still dirty leaf
-+ nodes that need to be flushed, which could prove our original assumptions
-+ "inoperative". If this needs to be fixed, then scan_left/right should have
-+ special checks for the non-leaf levels. For example, instead of passing from a node to
-+ the left neighbor, it should pass from the node to the left neighbor's rightmost
-+ descendent (if dirty).
-+
-+*/
-+
-+/* UNIMPLEMENTED AS YET: REPACKING AND RESIZING. We walk the tree in 4MB-16MB chunks, dirtying everything and putting
-+ it into a transaction. We tell the allocator to allocate the blocks as far as possible towards one end of the
-+ logical device--the left (starting) end of the device if we are walking from left to right, the right end of the
-+ device if we are walking from right to left. We then make passes in alternating directions, and as we do this the
-+ device becomes sorted such that tree order and block number order fully correlate.
-+
-+ Resizing is done by shifting everything either all the way to the left or all the way
-+ to the right, and then reporting the last block.
-+*/
-+
-+/* RELOCATE DECISIONS: The code makes a decision to relocate in several places. This
-+ descibes the policy from the highest level:
-+
-+ The FLUSH_RELOCATE_THRESHOLD parameter: If we count this many consecutive nodes on the
-+ leaf level during flush-scan (right, left), then we unconditionally decide to relocate
-+ leaf nodes.
-+
-+ Otherwise, there are two contexts in which we make a decision to relocate:
-+
-+ 1. The REVERSE PARENT-FIRST context: Implemented in reverse_relocate_test().
-+ During the initial stages of flush, after scan-right completes, we want to ask the
-+ question: should we relocate this leaf node and thus dirty the parent node. Then if
-+ the node is a leftmost child its parent is its own parent-first preceder, thus we repeat
-+ the question at the next level up, and so on. In these cases we are moving in the
-+ reverse-parent first direction.
-+
-+ There is another case which is considered the reverse direction, which comes at the end
-+ of a twig in reverse_relocate_end_of_twig(). As we finish processing a twig we may
-+ reach a point where there is a clean twig to the right with a dirty leftmost child. In
-+ this case, we may wish to relocate the child by testing if it should be relocated
-+ relative to its parent.
-+
-+ 2. The FORWARD PARENT-FIRST context: Testing for forward relocation is done in
-+ allocate_znode. What distinguishes the forward parent-first case from the
-+ reverse-parent first case is that the preceder has already been allocated in the
-+ forward case, whereas in the reverse case we don't know what the preceder is until we
-+ finish "going in reverse". That simplifies the forward case considerably, and there we
-+ actually use the block allocator to determine whether, e.g., a block closer to the
-+ preceder is available.
-+*/
-+
-+/* SQUEEZE_LEFT_EDGE: Unimplemented idea for future consideration. The idea is, once we
-+ finish scan-left and find a starting point, if the parent's left neighbor is dirty then
-+ squeeze the parent's left neighbor and the parent. This may change the
-+ flush-starting-node's parent. Repeat until the child's parent is stable. If the child
-+ is a leftmost child, repeat this left-edge squeezing operation at the next level up.
-+ Note that we cannot allocate extents during this or they will be out of parent-first
-+ order. There is also some difficult coordinate maintenence issues. We can't do a tree
-+ search to find coordinates again (because we hold locks), we have to determine them
-+ from the two nodes being squeezed. Looks difficult, but has potential to increase
-+ space utilization. */
-+
-+/* Flush-scan helper functions. */
-+static void scan_init(flush_scan * scan);
-+static void scan_done(flush_scan * scan);
-+
-+/* Flush-scan algorithm. */
-+static int scan_left(flush_scan * scan, flush_scan * right, jnode * node,
-+ unsigned limit);
-+static int scan_right(flush_scan * scan, jnode * node, unsigned limit);
-+static int scan_common(flush_scan * scan, flush_scan * other);
-+static int scan_formatted(flush_scan * scan);
-+static int scan_unformatted(flush_scan * scan, flush_scan * other);
-+static int scan_by_coord(flush_scan * scan);
-+
-+/* Initial flush-point ancestor allocation. */
-+static int alloc_pos_and_ancestors(flush_pos_t * pos);
-+static int alloc_one_ancestor(const coord_t * coord, flush_pos_t * pos);
-+static int set_preceder(const coord_t * coord_in, flush_pos_t * pos);
-+
-+/* Main flush algorithm. Note on abbreviation: "squeeze and allocate" == "squalloc". */
-+static int squalloc(flush_pos_t * pos);
-+
-+/* Flush squeeze implementation. */
-+static int squeeze_right_non_twig(znode * left, znode * right);
-+static int shift_one_internal_unit(znode * left, znode * right);
-+
-+/* Flush reverse parent-first relocation routines. */
-+static int reverse_relocate_if_close_enough(const reiser4_block_nr * pblk,
-+ const reiser4_block_nr * nblk);
-+static int reverse_relocate_test(jnode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+static int reverse_relocate_check_dirty_parent(jnode * node,
-+ const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+
-+/* Flush allocate write-queueing functions: */
-+static int allocate_znode(znode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+static int allocate_znode_update(znode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+static int lock_parent_and_allocate_znode(znode *, flush_pos_t *);
-+
-+/* Flush helper functions: */
-+static int jnode_lock_parent_coord(jnode * node,
-+ coord_t * coord,
-+ lock_handle * parent_lh,
-+ load_count * parent_zh,
-+ znode_lock_mode mode, int try);
-+static int neighbor_in_slum(znode * node, lock_handle * right_lock, sideof side,
-+ znode_lock_mode mode, int check_dirty);
-+static int znode_same_parents(znode * a, znode * b);
-+
-+static int znode_check_flushprepped(znode * node)
-+{
-+ return jnode_check_flushprepped(ZJNODE(node));
-+}
-+
-+/* Flush position functions */
-+static void pos_init(flush_pos_t * pos);
-+static int pos_valid(flush_pos_t * pos);
-+static void pos_done(flush_pos_t * pos);
-+static int pos_stop(flush_pos_t * pos);
-+
-+/* check that @org is first jnode extent unit, if extent is unallocated,
-+ * because all jnodes of unallocated extent are dirty and of the same atom. */
-+#define checkchild(scan) \
-+assert("nikita-3435", \
-+ ergo(scan->direction == LEFT_SIDE && \
-+ (scan->parent_coord.node->level == TWIG_LEVEL) && \
-+ jnode_is_unformatted(scan->node) && \
-+ extent_is_unallocated(&scan->parent_coord), \
-+ extent_unit_index(&scan->parent_coord) == index_jnode(scan->node)))
-+
-+/* This flush_cnt variable is used to track the number of concurrent flush operations,
-+ useful for debugging. It is initialized in txnmgr.c out of laziness (because flush has
-+ no static initializer function...) */
-+ON_DEBUG(atomic_t flush_cnt;
-+ )
-+
-+/* check fs backing device for write congestion */
-+static int check_write_congestion(void)
-+{
-+ struct super_block *sb;
-+ struct backing_dev_info *bdi;
-+
-+ sb = reiser4_get_current_sb();
-+ bdi = reiser4_get_super_fake(sb)->i_mapping->backing_dev_info;
-+ return bdi_write_congested(bdi);
-+}
-+
-+/* conditionally write flush queue */
-+static int write_prepped_nodes(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ assert("zam-831", pos);
-+ assert("zam-832", pos->fq);
-+
-+ if (!(pos->flags & JNODE_FLUSH_WRITE_BLOCKS))
-+ return 0;
-+
-+ if (check_write_congestion())
-+ return 0;
-+
-+ ret = reiser4_write_fq(pos->fq, pos->nr_written,
-+ WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
-+ return ret;
-+}
-+
-+/* Proper release all flush pos. resources then move flush position to new
-+ locked node */
-+static void move_flush_pos(flush_pos_t * pos, lock_handle * new_lock,
-+ load_count * new_load, const coord_t * new_coord)
-+{
-+ assert("zam-857", new_lock->node == new_load->node);
-+
-+ if (new_coord) {
-+ assert("zam-858", new_coord->node == new_lock->node);
-+ coord_dup(&pos->coord, new_coord);
-+ } else {
-+ coord_init_first_unit(&pos->coord, new_lock->node);
-+ }
-+
-+ if (pos->child) {
-+ jput(pos->child);
-+ pos->child = NULL;
-+ }
-+
-+ move_load_count(&pos->load, new_load);
-+ done_lh(&pos->lock);
-+ move_lh(&pos->lock, new_lock);
-+}
-+
-+/* delete empty node which link from the parent still exists. */
-+static int delete_empty_node(znode * node)
-+{
-+ reiser4_key smallest_removed;
-+
-+ assert("zam-1019", node != NULL);
-+ assert("zam-1020", node_is_empty(node));
-+ assert("zam-1023", znode_is_wlocked(node));
-+
-+ return reiser4_delete_node(node, &smallest_removed, NULL, 1);
-+}
-+
-+/* Prepare flush position for alloc_pos_and_ancestors() and squalloc() */
-+static int prepare_flush_pos(flush_pos_t * pos, jnode * org)
-+{
-+ int ret;
-+ load_count load;
-+ lock_handle lock;
-+
-+ init_lh(&lock);
-+ init_load_count(&load);
-+
-+ if (jnode_is_znode(org)) {
-+ ret = longterm_lock_znode(&lock, JZNODE(org),
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
-+ if (ret)
-+ return ret;
-+
-+ ret = incr_load_count_znode(&load, JZNODE(org));
-+ if (ret)
-+ return ret;
-+
-+ pos->state =
-+ (jnode_get_level(org) ==
-+ LEAF_LEVEL) ? POS_ON_LEAF : POS_ON_INTERNAL;
-+ move_flush_pos(pos, &lock, &load, NULL);
-+ } else {
-+ coord_t parent_coord;
-+ ret = jnode_lock_parent_coord(org, &parent_coord, &lock,
-+ &load, ZNODE_WRITE_LOCK, 0);
-+ if (ret)
-+ goto done;
-+ if (!item_is_extent(&parent_coord)) {
-+ /* file was converted to tail, org became HB, we found internal
-+ item */
-+ ret = -EAGAIN;
-+ goto done;
-+ }
-+
-+ pos->state = POS_ON_EPOINT;
-+ move_flush_pos(pos, &lock, &load, &parent_coord);
-+ pos->child = jref(org);
-+ if (extent_is_unallocated(&parent_coord)
-+ && extent_unit_index(&parent_coord) != index_jnode(org)) {
-+ /* @org is not first child of its parent unit. This may happen
-+ because longerm lock of its parent node was released between
-+ scan_left and scan_right. For now work around this having flush to repeat */
-+ ret = -EAGAIN;
-+ }
-+ }
-+
-+ done:
-+ done_load_count(&load);
-+ done_lh(&lock);
-+ return ret;
-+}
-+
-+/* TODO LIST (no particular order): */
-+/* I have labelled most of the legitimate FIXME comments in this file with letters to
-+ indicate which issue they relate to. There are a few miscellaneous FIXMEs with
-+ specific names mentioned instead that need to be inspected/resolved. */
-+/* B. There is an issue described in reverse_relocate_test having to do with an
-+ imprecise is_preceder? check having to do with partially-dirty extents. The code that
-+ sets preceder hints and computes the preceder is basically untested. Careful testing
-+ needs to be done that preceder calculations are done correctly, since if it doesn't
-+ affect correctness we will not catch this stuff during regular testing. */
-+/* C. EINVAL, E_DEADLOCK, E_NO_NEIGHBOR, ENOENT handling. It is unclear which of these are
-+ considered expected but unlikely conditions. Flush currently returns 0 (i.e., success
-+ but no progress, i.e., restart) whenever it receives any of these in jnode_flush().
-+ Many of the calls that may produce one of these return values (i.e.,
-+ longterm_lock_znode, reiser4_get_parent, reiser4_get_neighbor, ...) check some of these
-+ values themselves and, for instance, stop flushing instead of resulting in a restart.
-+ If any of these results are true error conditions then flush will go into a busy-loop,
-+ as we noticed during testing when a corrupt tree caused find_child_ptr to return
-+ ENOENT. It needs careful thought and testing of corner conditions.
-+*/
-+/* D. Atomicity of flush_prep against deletion and flush concurrency. Suppose a created
-+ block is assigned a block number then early-flushed to disk. It is dirtied again and
-+ flush is called again. Concurrently, that block is deleted, and the de-allocation of
-+ its block number does not need to be deferred, since it is not part of the preserve set
-+ (i.e., it didn't exist before the transaction). I think there may be a race condition
-+ where flush writes the dirty, created block after the non-deferred deallocated block
-+ number is re-allocated, making it possible to write deleted data on top of non-deleted
-+ data. Its just a theory, but it needs to be thought out. */
-+/* F. bio_alloc() failure is not handled gracefully. */
-+/* G. Unallocated children. */
-+/* H. Add a WANDERED_LIST to the atom to clarify the placement of wandered blocks. */
-+/* I. Rename flush-scan to scan-point, (flush-pos to flush-point?) */
-+
-+/* JNODE_FLUSH: MAIN ENTRY POINT */
-+/* This is the main entry point for flushing a jnode and its dirty neighborhood (dirty
-+ neighborhood is named "slum"). Jnode_flush() is called if reiser4 has to write dirty
-+ blocks to disk, it happens when Linux VM decides to reduce number of dirty pages or as
-+ a part of transaction commit.
-+
-+ Our objective here is to prep and flush the slum the jnode belongs to. We want to
-+ squish the slum together, and allocate the nodes in it as we squish because allocation
-+ of children affects squishing of parents.
-+
-+ The "argument" @node tells flush where to start. From there, flush finds the left edge
-+ of the slum, and calls squalloc (in which nodes are squeezed and allocated). To find a
-+ "better place" to start squalloc first we perform a flush_scan.
-+
-+ Flush-scanning may be performed in both left and right directions, but for different
-+ purposes. When scanning to the left, we are searching for a node that precedes a
-+ sequence of parent-first-ordered nodes which we will then flush in parent-first order.
-+ During flush-scanning, we also take the opportunity to count the number of consecutive
-+ leaf nodes. If this number is past some threshold (FLUSH_RELOCATE_THRESHOLD), then we
-+ make a decision to reallocate leaf nodes (thus favoring write-optimization).
-+
-+ Since the flush argument node can be anywhere in a sequence of dirty leaves, there may
-+ also be dirty nodes to the right of the argument. If the scan-left operation does not
-+ count at least FLUSH_RELOCATE_THRESHOLD nodes then we follow it with a right-scan
-+ operation to see whether there is, in fact, enough nodes to meet the relocate
-+ threshold. Each right- and left-scan operation uses a single flush_scan object.
-+
-+ After left-scan and possibly right-scan, we prepare a flush_position object with the
-+ starting flush point or parent coordinate, which was determined using scan-left.
-+
-+ Next we call the main flush routine, squalloc, which iterates along the
-+ leaf level, squeezing and allocating nodes (and placing them into the flush queue).
-+
-+ After squalloc returns we take extra steps to ensure that all the children
-+ of the final twig node are allocated--this involves repeating squalloc
-+ until we finish at a twig with no unallocated children.
-+
-+ Finally, we call flush_empty_queue to submit write-requests to disk. If we encounter
-+ any above-twig nodes during flush_empty_queue that still have unallocated children, we
-+ flush_unprep them.
-+
-+ Flush treats several "failure" cases as non-failures, essentially causing them to start
-+ over. E_DEADLOCK is one example. FIXME:(C) EINVAL, E_NO_NEIGHBOR, ENOENT: these should
-+ probably be handled properly rather than restarting, but there are a bunch of cases to
-+ audit.
-+*/
-+
-+static int
-+jnode_flush(jnode * node, long nr_to_write, long *nr_written,
-+ flush_queue_t * fq, int flags)
-+{
-+ long ret = 0;
-+ flush_scan *right_scan;
-+ flush_scan *left_scan;
-+ flush_pos_t *flush_pos;
-+ int todo;
-+ struct super_block *sb;
-+ reiser4_super_info_data *sbinfo;
-+ jnode *leftmost_in_slum = NULL;
-+
-+ assert("jmacd-76619", lock_stack_isclean(get_current_lock_stack()));
-+ assert("nikita-3022", reiser4_schedulable());
-+
-+ assert("nikita-3185",
-+ get_current_super_private()->delete_mutex_owner != current);
-+
-+ /* allocate right_scan, left_scan and flush_pos */
-+ right_scan =
-+ kmalloc(2 * sizeof(*right_scan) + sizeof(*flush_pos),
-+ reiser4_ctx_gfp_mask_get());
-+ if (right_scan == NULL)
-+ return RETERR(-ENOMEM);
-+ left_scan = right_scan + 1;
-+ flush_pos = (flush_pos_t *) (left_scan + 1);
-+
-+ sb = reiser4_get_current_sb();
-+ sbinfo = get_super_private(sb);
-+
-+ /* Flush-concurrency debug code */
-+#if REISER4_DEBUG
-+ atomic_inc(&flush_cnt);
-+#endif
-+
-+ reiser4_enter_flush(sb);
-+
-+ /* Initialize a flush position. */
-+ pos_init(flush_pos);
-+
-+ flush_pos->nr_written = nr_written;
-+ flush_pos->fq = fq;
-+ flush_pos->flags = flags;
-+ flush_pos->nr_to_write = nr_to_write;
-+
-+ scan_init(right_scan);
-+ scan_init(left_scan);
-+
-+ /* First scan left and remember the leftmost scan position. If the leftmost
-+ position is unformatted we remember its parent_coord. We scan until counting
-+ FLUSH_SCAN_MAXNODES.
-+
-+ If starting @node is unformatted, at the beginning of left scan its
-+ parent (twig level node, containing extent item) will be long term
-+ locked and lock handle will be stored in the
-+ @right_scan->parent_lock. This lock is used to start the rightward
-+ scan without redoing the tree traversal (necessary to find parent)
-+ and, hence, is kept during leftward scan. As a result, we have to
-+ use try-lock when taking long term locks during the leftward scan.
-+ */
-+ ret = scan_left(left_scan, right_scan,
-+ node, sbinfo->flush.scan_maxnodes);
-+ if (ret != 0)
-+ goto failed;
-+
-+ leftmost_in_slum = jref(left_scan->node);
-+ scan_done(left_scan);
-+
-+ /* Then possibly go right to decide if we will use a policy of relocating leaves.
-+ This is only done if we did not scan past (and count) enough nodes during the
-+ leftward scan. If we do scan right, we only care to go far enough to establish
-+ that at least FLUSH_RELOCATE_THRESHOLD number of nodes are being flushed. The
-+ scan limit is the difference between left_scan.count and the threshold. */
-+
-+ todo = sbinfo->flush.relocate_threshold - left_scan->count;
-+ /* scan right is inherently deadlock prone, because we are
-+ * (potentially) holding a lock on the twig node at this moment.
-+ * FIXME: this is incorrect comment: lock is not held */
-+ if (todo > 0) {
-+ ret = scan_right(right_scan, node, (unsigned)todo);
-+ if (ret != 0)
-+ goto failed;
-+ }
-+
-+ /* Only the right-scan count is needed, release any rightward locks right away. */
-+ scan_done(right_scan);
-+
-+ /* ... and the answer is: we should relocate leaf nodes if at least
-+ FLUSH_RELOCATE_THRESHOLD nodes were found. */
-+ flush_pos->leaf_relocate = JF_ISSET(node, JNODE_REPACK) ||
-+ (left_scan->count + right_scan->count >=
-+ sbinfo->flush.relocate_threshold);
-+
-+ /* Funny business here. We set the 'point' in the flush_position at prior to
-+ starting squalloc regardless of whether the first point is
-+ formatted or unformatted. Without this there would be an invariant, in the
-+ rest of the code, that if the flush_position is unformatted then
-+ flush_position->point is NULL and flush_position->parent_{lock,coord} is set,
-+ and if the flush_position is formatted then flush_position->point is non-NULL
-+ and no parent info is set.
-+
-+ This seems lazy, but it makes the initial calls to reverse_relocate_test
-+ (which ask "is it the pos->point the leftmost child of its parent") much easier
-+ because we know the first child already. Nothing is broken by this, but the
-+ reasoning is subtle. Holding an extra reference on a jnode during flush can
-+ cause us to see nodes with HEARD_BANSHEE during squalloc, because nodes are not
-+ removed from sibling lists until they have zero reference count. Flush would
-+ never observe a HEARD_BANSHEE node on the left-edge of flush, nodes are only
-+ deleted to the right. So if nothing is broken, why fix it?
-+
-+ NOTE-NIKITA actually, flush can meet HEARD_BANSHEE node at any
-+ point and in any moment, because of the concurrent file system
-+ activity (for example, truncate). */
-+
-+ /* Check jnode state after flush_scan completed. Having a lock on this
-+ node or its parent (in case of unformatted) helps us in case of
-+ concurrent flushing. */
-+ if (jnode_check_flushprepped(leftmost_in_slum)
-+ && !jnode_convertible(leftmost_in_slum)) {
-+ ret = 0;
-+ goto failed;
-+ }
-+
-+ /* Now setup flush_pos using scan_left's endpoint. */
-+ ret = prepare_flush_pos(flush_pos, leftmost_in_slum);
-+ if (ret)
-+ goto failed;
-+
-+ if (znode_get_level(flush_pos->coord.node) == LEAF_LEVEL
-+ && node_is_empty(flush_pos->coord.node)) {
-+ znode *empty = flush_pos->coord.node;
-+
-+ assert("zam-1022", !ZF_ISSET(empty, JNODE_HEARD_BANSHEE));
-+ ret = delete_empty_node(empty);
-+ goto failed;
-+ }
-+
-+ if (jnode_check_flushprepped(leftmost_in_slum)
-+ && !jnode_convertible(leftmost_in_slum)) {
-+ ret = 0;
-+ goto failed;
-+ }
-+
-+ /* Set pos->preceder and (re)allocate pos and its ancestors if it is needed */
-+ ret = alloc_pos_and_ancestors(flush_pos);
-+ if (ret)
-+ goto failed;
-+
-+ /* Do the main rightward-bottom-up squeeze and allocate loop. */
-+ ret = squalloc(flush_pos);
-+ pos_stop(flush_pos);
-+ if (ret)
-+ goto failed;
-+
-+ /* FIXME_NFQUCMPD: Here, handle the twig-special case for unallocated children.
-+ First, the pos_stop() and pos_valid() routines should be modified
-+ so that pos_stop() sets a flush_position->stop flag to 1 without
-+ releasing the current position immediately--instead release it in
-+ pos_done(). This is a better implementation than the current one anyway.
-+
-+ It is not clear that all fields of the flush_position should not be released,
-+ but at the very least the parent_lock, parent_coord, and parent_load should
-+ remain held because they are hold the last twig when pos_stop() is
-+ called.
-+
-+ When we reach this point in the code, if the parent_coord is set to after the
-+ last item then we know that flush reached the end of a twig (and according to
-+ the new flush queueing design, we will return now). If parent_coord is not
-+ past the last item, we should check if the current twig has any unallocated
-+ children to the right (we are not concerned with unallocated children to the
-+ left--in that case the twig itself should not have been allocated). If the
-+ twig has unallocated children to the right, set the parent_coord to that
-+ position and then repeat the call to squalloc.
-+
-+ Testing for unallocated children may be defined in two ways: if any internal
-+ item has a fake block number, it is unallocated; if any extent item is
-+ unallocated then all of its children are unallocated. But there is a more
-+ aggressive approach: if there are any dirty children of the twig to the right
-+ of the current position, we may wish to relocate those nodes now. Checking for
-+ potential relocation is more expensive as it requires knowing whether there are
-+ any dirty children that are not unallocated. The extent_needs_allocation
-+ should be used after setting the correct preceder.
-+
-+ When we reach the end of a twig at this point in the code, if the flush can
-+ continue (when the queue is ready) it will need some information on the future
-+ starting point. That should be stored away in the flush_handle using a seal, I
-+ believe. Holding a jref() on the future starting point may break other code
-+ that deletes that node.
-+ */
-+
-+ /* FIXME_NFQUCMPD: Also, we don't want to do any flushing when flush is called
-+ above the twig level. If the VM calls flush above the twig level, do nothing
-+ and return (but figure out why this happens). The txnmgr should be modified to
-+ only flush its leaf-level dirty list. This will do all the necessary squeeze
-+ and allocate steps but leave unallocated branches and possibly unallocated
-+ twigs (when the twig's leftmost child is not dirty). After flushing the leaf
-+ level, the remaining unallocated nodes should be given write-optimized
-+ locations. (Possibly, the remaining unallocated twigs should be allocated just
-+ before their leftmost child.)
-+ */
-+
-+ /* Any failure reaches this point. */
-+ failed:
-+
-+ switch (ret) {
-+ case -E_REPEAT:
-+ case -EINVAL:
-+ case -E_DEADLOCK:
-+ case -E_NO_NEIGHBOR:
-+ case -ENOENT:
-+ /* FIXME(C): Except for E_DEADLOCK, these should probably be handled properly
-+ in each case. They already are handled in many cases. */
-+ /* Something bad happened, but difficult to avoid... Try again! */
-+ ret = 0;
-+ }
-+
-+ if (leftmost_in_slum)
-+ jput(leftmost_in_slum);
-+
-+ pos_done(flush_pos);
-+ scan_done(left_scan);
-+ scan_done(right_scan);
-+ kfree(right_scan);
-+
-+ ON_DEBUG(atomic_dec(&flush_cnt));
-+
-+ reiser4_leave_flush(sb);
-+
-+ return ret;
-+}
-+
-+/* The reiser4 flush subsystem can be turned into "rapid flush mode" means that
-+ * flusher should submit all prepped nodes immediately without keeping them in
-+ * flush queues for long time. The reason for rapid flush mode is to free
-+ * memory as fast as possible. */
-+
-+#if REISER4_USE_RAPID_FLUSH
-+
-+/**
-+ * submit all prepped nodes if rapid flush mode is set,
-+ * turn rapid flush mode off.
-+ */
-+
-+static int rapid_flush(flush_pos_t * pos)
-+{
-+ if (!wbq_available())
-+ return 0;
-+
-+ return write_prepped_nodes(pos);
-+}
-+
-+#else
-+
-+#define rapid_flush(pos) (0)
-+
-+#endif /* REISER4_USE_RAPID_FLUSH */
-+
-+static jnode *find_flush_start_jnode(jnode *start, txn_atom *atom,
-+ flush_queue_t *fq, int *nr_queued,
-+ int flags)
-+{
-+ jnode * node;
-+
-+ if (start != NULL) {
-+ spin_lock_jnode(start);
-+ if (!jnode_is_flushprepped(start)) {
-+ assert("zam-1056", start->atom == atom);
-+ node = start;
-+ goto enter;
-+ }
-+ spin_unlock_jnode(start);
-+ }
-+ /*
-+ * In this loop we process all already prepped (RELOC or OVRWR) and dirtied again
-+ * nodes. The atom spin lock is not released until all dirty nodes processed or
-+ * not prepped node found in the atom dirty lists.
-+ */
-+ while ((node = find_first_dirty_jnode(atom, flags))) {
-+ spin_lock_jnode(node);
-+ enter:
-+ assert("zam-881", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-898", !JF_ISSET(node, JNODE_OVRWR));
-+
-+ if (JF_ISSET(node, JNODE_WRITEBACK)) {
-+ /* move node to the end of atom's writeback list */
-+ list_move_tail(&node->capture_link, ATOM_WB_LIST(atom));
-+
-+ /*
-+ * jnode is not necessarily on dirty list: if it was dirtied when
-+ * it was on flush queue - it does not get moved to dirty list
-+ */
-+ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node),
-+ WB_LIST, 1));
-+
-+ } else if (jnode_is_znode(node)
-+ && znode_above_root(JZNODE(node))) {
-+ /*
-+ * A special case for znode-above-root. The above-root (fake)
-+ * znode is captured and dirtied when the tree height changes or
-+ * when the root node is relocated. This causes atoms to fuse so
-+ * that changes at the root are serialized. However, this node is
-+ * never flushed. This special case used to be in lock.c to
-+ * prevent the above-root node from ever being captured, but now
-+ * that it is captured we simply prevent it from flushing. The
-+ * log-writer code relies on this to properly log superblock
-+ * modifications of the tree height.
-+ */
-+ jnode_make_wander_nolock(node);
-+ } else if (JF_ISSET(node, JNODE_RELOC)) {
-+ queue_jnode(fq, node);
-+ ++(*nr_queued);
-+ } else
-+ break;
-+
-+ spin_unlock_jnode(node);
-+ }
-+ return node;
-+}
-+
-+/* Flush some nodes of current atom, usually slum, return -E_REPEAT if there are more nodes
-+ * to flush, return 0 if atom's dirty lists empty and keep current atom locked, return
-+ * other errors as they are. */
-+int
-+flush_current_atom(int flags, long nr_to_write, long *nr_submitted,
-+ txn_atom ** atom, jnode *start)
-+{
-+ reiser4_super_info_data *sinfo = get_current_super_private();
-+ flush_queue_t *fq = NULL;
-+ jnode *node;
-+ int nr_queued;
-+ int ret;
-+
-+ assert("zam-889", atom != NULL && *atom != NULL);
-+ assert_spin_locked(&((*atom)->alock));
-+ assert("zam-892", get_current_context()->trans->atom == *atom);
-+
-+ nr_to_write = LONG_MAX;
-+ while (1) {
-+ ret = reiser4_fq_by_atom(*atom, &fq);
-+ if (ret != -E_REPEAT)
-+ break;
-+ *atom = get_current_atom_locked();
-+ }
-+ if (ret)
-+ return ret;
-+
-+ assert_spin_locked(&((*atom)->alock));
-+
-+ /* parallel flushers limit */
-+ if (sinfo->tmgr.atom_max_flushers != 0) {
-+ while ((*atom)->nr_flushers >= sinfo->tmgr.atom_max_flushers) {
-+ /* An reiser4_atom_send_event() call is inside
-+ reiser4_fq_put_nolock() which is called when flush is
-+ finished and nr_flushers is decremented. */
-+ reiser4_atom_wait_event(*atom);
-+ *atom = get_current_atom_locked();
-+ }
-+ }
-+
-+ /* count ourself as a flusher */
-+ (*atom)->nr_flushers++;
-+
-+ writeout_mode_enable();
-+
-+ nr_queued = 0;
-+ node = find_flush_start_jnode(start, *atom, fq, &nr_queued, flags);
-+
-+ if (node == NULL) {
-+ if (nr_queued == 0) {
-+ (*atom)->nr_flushers--;
-+ reiser4_fq_put_nolock(fq);
-+ reiser4_atom_send_event(*atom);
-+ /* current atom remains locked */
-+ writeout_mode_disable();
-+ return 0;
-+ }
-+ spin_unlock_atom(*atom);
-+ } else {
-+ jref(node);
-+ BUG_ON((*atom)->super != node->tree->super);
-+ spin_unlock_atom(*atom);
-+ spin_unlock_jnode(node);
-+ BUG_ON(nr_to_write == 0);
-+ ret = jnode_flush(node, nr_to_write, nr_submitted, fq, flags);
-+ jput(node);
-+ }
-+
-+ ret =
-+ reiser4_write_fq(fq, nr_submitted,
-+ WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
-+
-+ *atom = get_current_atom_locked();
-+ (*atom)->nr_flushers--;
-+ reiser4_fq_put_nolock(fq);
-+ reiser4_atom_send_event(*atom);
-+ spin_unlock_atom(*atom);
-+
-+ writeout_mode_disable();
-+
-+ if (ret == 0)
-+ ret = -E_REPEAT;
-+
-+ return ret;
-+}
-+
-+/* REVERSE PARENT-FIRST RELOCATION POLICIES */
-+
-+/* This implements the is-it-close-enough-to-its-preceder? test for relocation in the
-+ reverse parent-first relocate context. Here all we know is the preceder and the block
-+ number. Since we are going in reverse, the preceder may still be relocated as well, so
-+ we can't ask the block allocator "is there a closer block available to relocate?" here.
-+ In the _forward_ parent-first relocate context (not here) we actually call the block
-+ allocator to try and find a closer location. */
-+static int
-+reverse_relocate_if_close_enough(const reiser4_block_nr * pblk,
-+ const reiser4_block_nr * nblk)
-+{
-+ reiser4_block_nr dist;
-+
-+ assert("jmacd-7710", *pblk != 0 && *nblk != 0);
-+ assert("jmacd-7711", !reiser4_blocknr_is_fake(pblk));
-+ assert("jmacd-7712", !reiser4_blocknr_is_fake(nblk));
-+
-+ /* Distance is the absolute value. */
-+ dist = (*pblk > *nblk) ? (*pblk - *nblk) : (*nblk - *pblk);
-+
-+ /* If the block is less than FLUSH_RELOCATE_DISTANCE blocks away from its preceder
-+ block, do not relocate. */
-+ if (dist <= get_current_super_private()->flush.relocate_distance) {
-+ return 0;
-+ }
-+
-+ return 1;
-+}
-+
-+/* This function is a predicate that tests for relocation. Always called in the
-+ reverse-parent-first context, when we are asking whether the current node should be
-+ relocated in order to expand the flush by dirtying the parent level (and thus
-+ proceeding to flush that level). When traversing in the forward parent-first direction
-+ (not here), relocation decisions are handled in two places: allocate_znode() and
-+ extent_needs_allocation(). */
-+static int
-+reverse_relocate_test(jnode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos)
-+{
-+ reiser4_block_nr pblk = 0;
-+ reiser4_block_nr nblk = 0;
-+
-+ assert("jmacd-8989", !jnode_is_root(node));
-+
-+ /*
-+ * This function is called only from the
-+ * reverse_relocate_check_dirty_parent() and only if the parent
-+ * node is clean. This implies that the parent has the real (i.e., not
-+ * fake) block number, and, so does the child, because otherwise the
-+ * parent would be dirty.
-+ */
-+
-+ /* New nodes are treated as if they are being relocated. */
-+ if (JF_ISSET (node, JNODE_CREATED) ||
-+ (pos->leaf_relocate && jnode_get_level(node) == LEAF_LEVEL)) {
-+ return 1;
-+ }
-+
-+ /* Find the preceder. FIXME(B): When the child is an unformatted, previously
-+ existing node, the coord may be leftmost even though the child is not the
-+ parent-first preceder of the parent. If the first dirty node appears somewhere
-+ in the middle of the first extent unit, this preceder calculation is wrong.
-+ Needs more logic in here. */
-+ if (coord_is_leftmost_unit(parent_coord)) {
-+ pblk = *znode_get_block(parent_coord->node);
-+ } else {
-+ pblk = pos->preceder.blk;
-+ }
-+ check_preceder(pblk);
-+
-+ /* If (pblk == 0) then the preceder isn't allocated or isn't known: relocate. */
-+ if (pblk == 0) {
-+ return 1;
-+ }
-+
-+ nblk = *jnode_get_block(node);
-+
-+ if (reiser4_blocknr_is_fake(&nblk))
-+ /* child is unallocated, mark parent dirty */
-+ return 1;
-+
-+ return reverse_relocate_if_close_enough(&pblk, &nblk);
-+}
-+
-+/* This function calls reverse_relocate_test to make a reverse-parent-first
-+ relocation decision and then, if yes, it marks the parent dirty. */
-+static int
-+reverse_relocate_check_dirty_parent(jnode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ if (!JF_ISSET(ZJNODE(parent_coord->node), JNODE_DIRTY)) {
-+
-+ ret = reverse_relocate_test(node, parent_coord, pos);
-+ if (ret < 0) {
-+ return ret;
-+ }
-+
-+ /* FIXME-ZAM
-+ if parent is already relocated - we do not want to grab space, right? */
-+ if (ret == 1) {
-+ int grabbed;
-+
-+ grabbed = get_current_context()->grabbed_blocks;
-+ if (reiser4_grab_space_force((__u64) 1, BA_RESERVED) !=
-+ 0)
-+ reiser4_panic("umka-1250",
-+ "No space left during flush.");
-+
-+ assert("jmacd-18923",
-+ znode_is_write_locked(parent_coord->node));
-+ znode_make_dirty(parent_coord->node);
-+ grabbed2free_mark(grabbed);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* INITIAL ALLOCATE ANCESTORS STEP (REVERSE PARENT-FIRST ALLOCATION BEFORE FORWARD
-+ PARENT-FIRST LOOP BEGINS) */
-+
-+/* Get the leftmost child for given coord. */
-+static int get_leftmost_child_of_unit(const coord_t * coord, jnode ** child)
-+{
-+ int ret;
-+
-+ ret = item_utmost_child(coord, LEFT_SIDE, child);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (IS_ERR(*child))
-+ return PTR_ERR(*child);
-+
-+ return 0;
-+}
-+
-+/* This step occurs after the left- and right-scans are completed, before starting the
-+ forward parent-first traversal. Here we attempt to allocate ancestors of the starting
-+ flush point, which means continuing in the reverse parent-first direction to the
-+ parent, grandparent, and so on (as long as the child is a leftmost child). This
-+ routine calls a recursive process, alloc_one_ancestor, which does the real work,
-+ except there is special-case handling here for the first ancestor, which may be a twig.
-+ At each level (here and alloc_one_ancestor), we check for relocation and then, if
-+ the child is a leftmost child, repeat at the next level. On the way back down (the
-+ recursion), we allocate the ancestors in parent-first order. */
-+static int alloc_pos_and_ancestors(flush_pos_t * pos)
-+{
-+ int ret = 0;
-+ lock_handle plock;
-+ load_count pload;
-+ coord_t pcoord;
-+
-+ if (znode_check_flushprepped(pos->lock.node))
-+ return 0;
-+
-+ coord_init_invalid(&pcoord, NULL);
-+ init_lh(&plock);
-+ init_load_count(&pload);
-+
-+ if (pos->state == POS_ON_EPOINT) {
-+ /* a special case for pos on twig level, where we already have
-+ a lock on parent node. */
-+ /* The parent may not be dirty, in which case we should decide
-+ whether to relocate the child now. If decision is made to
-+ relocate the child, the parent is marked dirty. */
-+ ret =
-+ reverse_relocate_check_dirty_parent(pos->child, &pos->coord,
-+ pos);
-+ if (ret)
-+ goto exit;
-+
-+ /* FIXME_NFQUCMPD: We only need to allocate the twig (if child
-+ is leftmost) and the leaf/child, so recursion is not needed.
-+ Levels above the twig will be allocated for
-+ write-optimization before the transaction commits. */
-+
-+ /* Do the recursive step, allocating zero or more of our
-+ * ancestors. */
-+ ret = alloc_one_ancestor(&pos->coord, pos);
-+
-+ } else {
-+ if (!znode_is_root(pos->lock.node)) {
-+ /* all formatted nodes except tree root */
-+ ret =
-+ reiser4_get_parent(&plock, pos->lock.node,
-+ ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto exit;
-+
-+ ret = incr_load_count_znode(&pload, plock.node);
-+ if (ret)
-+ goto exit;
-+
-+ ret =
-+ find_child_ptr(plock.node, pos->lock.node, &pcoord);
-+ if (ret)
-+ goto exit;
-+
-+ ret =
-+ reverse_relocate_check_dirty_parent(ZJNODE
-+ (pos->lock.
-+ node), &pcoord,
-+ pos);
-+ if (ret)
-+ goto exit;
-+
-+ ret = alloc_one_ancestor(&pcoord, pos);
-+ if (ret)
-+ goto exit;
-+ }
-+
-+ ret = allocate_znode(pos->lock.node, &pcoord, pos);
-+ }
-+ exit:
-+ done_load_count(&pload);
-+ done_lh(&plock);
-+ return ret;
-+}
-+
-+/* This is the recursive step described in alloc_pos_and_ancestors, above. Ignoring the
-+ call to set_preceder, which is the next function described, this checks if the
-+ child is a leftmost child and returns if it is not. If the child is a leftmost child
-+ it checks for relocation, possibly dirtying the parent. Then it performs the recursive
-+ step. */
-+static int alloc_one_ancestor(const coord_t * coord, flush_pos_t * pos)
-+{
-+ int ret = 0;
-+ lock_handle alock;
-+ load_count aload;
-+ coord_t acoord;
-+
-+ /* As we ascend at the left-edge of the region to flush, take this opportunity at
-+ the twig level to find our parent-first preceder unless we have already set
-+ it. */
-+ if (pos->preceder.blk == 0) {
-+ ret = set_preceder(coord, pos);
-+ if (ret != 0)
-+ return ret;
-+ }
-+
-+ /* If the ancestor is clean or already allocated, or if the child is not a
-+ leftmost child, stop going up, even leaving coord->node not flushprepped. */
-+ if (znode_check_flushprepped(coord->node)
-+ || !coord_is_leftmost_unit(coord))
-+ return 0;
-+
-+ init_lh(&alock);
-+ init_load_count(&aload);
-+ coord_init_invalid(&acoord, NULL);
-+
-+ /* Only ascend to the next level if it is a leftmost child, but write-lock the
-+ parent in case we will relocate the child. */
-+ if (!znode_is_root(coord->node)) {
-+
-+ ret =
-+ jnode_lock_parent_coord(ZJNODE(coord->node), &acoord,
-+ &alock, &aload, ZNODE_WRITE_LOCK,
-+ 0);
-+ if (ret != 0) {
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ goto exit;
-+ }
-+
-+ ret =
-+ reverse_relocate_check_dirty_parent(ZJNODE(coord->node),
-+ &acoord, pos);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ /* Recursive call. */
-+ if (!znode_check_flushprepped(acoord.node)) {
-+ ret = alloc_one_ancestor(&acoord, pos);
-+ if (ret)
-+ goto exit;
-+ }
-+ }
-+
-+ /* Note: we call allocate with the parent write-locked (except at the root) in
-+ case we relocate the child, in which case it will modify the parent during this
-+ call. */
-+ ret = allocate_znode(coord->node, &acoord, pos);
-+
-+ exit:
-+ done_load_count(&aload);
-+ done_lh(&alock);
-+ return ret;
-+}
-+
-+/* During the reverse parent-first alloc_pos_and_ancestors process described above there is
-+ a call to this function at the twig level. During alloc_pos_and_ancestors we may ask:
-+ should this node be relocated (in reverse parent-first context)? We repeat this
-+ process as long as the child is the leftmost child, eventually reaching an ancestor of
-+ the flush point that is not a leftmost child. The preceder of that ancestors, which is
-+ not a leftmost child, is actually on the leaf level. The preceder of that block is the
-+ left-neighbor of the flush point. The preceder of that block is the rightmost child of
-+ the twig on the left. So, when alloc_pos_and_ancestors passes upward through the twig
-+ level, it stops momentarily to remember the block of the rightmost child of the twig on
-+ the left and sets it to the flush_position's preceder_hint.
-+
-+ There is one other place where we may set the flush_position's preceder hint, which is
-+ during scan-left.
-+*/
-+static int set_preceder(const coord_t * coord_in, flush_pos_t * pos)
-+{
-+ int ret;
-+ coord_t coord;
-+ lock_handle left_lock;
-+ load_count left_load;
-+
-+ coord_dup(&coord, coord_in);
-+
-+ init_lh(&left_lock);
-+ init_load_count(&left_load);
-+
-+ /* FIXME(B): Same FIXME as in "Find the preceder" in reverse_relocate_test.
-+ coord_is_leftmost_unit is not the right test if the unformatted child is in the
-+ middle of the first extent unit. */
-+ if (!coord_is_leftmost_unit(&coord)) {
-+ coord_prev_unit(&coord);
-+ } else {
-+ ret =
-+ reiser4_get_left_neighbor(&left_lock, coord.node,
-+ ZNODE_READ_LOCK, GN_SAME_ATOM);
-+ if (ret) {
-+ /* If we fail for any reason it doesn't matter because the
-+ preceder is only a hint. We are low-priority at this point, so
-+ this must be the case. */
-+ if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
-+ ret == -ENOENT || ret == -EINVAL
-+ || ret == -E_DEADLOCK) {
-+ ret = 0;
-+ }
-+ goto exit;
-+ }
-+
-+ ret = incr_load_count_znode(&left_load, left_lock.node);
-+ if (ret)
-+ goto exit;
-+
-+ coord_init_last_unit(&coord, left_lock.node);
-+ }
-+
-+ ret =
-+ item_utmost_child_real_block(&coord, RIGHT_SIDE,
-+ &pos->preceder.blk);
-+ exit:
-+ check_preceder(pos->preceder.blk);
-+ done_load_count(&left_load);
-+ done_lh(&left_lock);
-+ return ret;
-+}
-+
-+/* MAIN SQUEEZE AND ALLOCATE LOOP (THREE BIG FUNCTIONS) */
-+
-+/* This procedure implements the outer loop of the flush algorithm. To put this in
-+ context, here is the general list of steps taken by the flush routine as a whole:
-+
-+ 1. Scan-left
-+ 2. Scan-right (maybe)
-+ 3. Allocate initial flush position and its ancestors
-+ 4. <handle extents>
-+ 5. <squeeze and next position and its ancestors to-the-right,
-+ then update position to-the-right>
-+ 6. <repeat from #4 until flush is stopped>
-+
-+ This procedure implements the loop in steps 4 through 6 in the above listing.
-+
-+ Step 4: if the current flush position is an extent item (position on the twig level),
-+ it allocates the extent (allocate_extent_item_in_place) then shifts to the next
-+ coordinate. If the next coordinate's leftmost child needs flushprep, we will continue.
-+ If the next coordinate is an internal item, we descend back to the leaf level,
-+ otherwise we repeat a step #4 (labeled ALLOC_EXTENTS below). If the "next coordinate"
-+ brings us past the end of the twig level, then we call
-+ reverse_relocate_end_of_twig to possibly dirty the next (right) twig, prior to
-+ step #5 which moves to the right.
-+
-+ Step 5: calls squalloc_changed_ancestors, which initiates a recursive call up the
-+ tree to allocate any ancestors of the next-right flush position that are not also
-+ ancestors of the current position. Those ancestors (in top-down order) are the next in
-+ parent-first order. We squeeze adjacent nodes on the way up until the right node and
-+ current node share the same parent, then allocate on the way back down. Finally, this
-+ step sets the flush position to the next-right node. Then repeat steps 4 and 5.
-+*/
-+
-+/* SQUEEZE CODE */
-+
-+/* squalloc_right_twig helper function, cut a range of extent items from
-+ cut node to->node from the beginning up to coord @to. */
-+static int squalloc_right_twig_cut(coord_t * to, reiser4_key * to_key,
-+ znode * left)
-+{
-+ coord_t from;
-+ reiser4_key from_key;
-+
-+ coord_init_first_unit(&from, to->node);
-+ item_key_by_coord(&from, &from_key);
-+
-+ return cut_node_content(&from, to, &from_key, to_key, NULL);
-+}
-+
-+/* Copy as much of the leading extents from @right to @left, allocating
-+ unallocated extents as they are copied. Returns SQUEEZE_TARGET_FULL or
-+ SQUEEZE_SOURCE_EMPTY when no more can be shifted. If the next item is an
-+ internal item it calls shift_one_internal_unit and may then return
-+ SUBTREE_MOVED. */
-+static int squeeze_right_twig(znode * left, znode * right, flush_pos_t * pos)
-+{
-+ int ret = SUBTREE_MOVED;
-+ coord_t coord; /* used to iterate over items */
-+ reiser4_key stop_key;
-+
-+ assert("jmacd-2008", !node_is_empty(right));
-+ coord_init_first_unit(&coord, right);
-+
-+ /* FIXME: can be optimized to cut once */
-+ while (!node_is_empty(coord.node) && item_is_extent(&coord)) {
-+ ON_DEBUG(void *vp);
-+
-+ assert("vs-1468", coord_is_leftmost_unit(&coord));
-+ ON_DEBUG(vp = shift_check_prepare(left, coord.node));
-+
-+ /* stop_key is used to find what was copied and what to cut */
-+ stop_key = *reiser4_min_key();
-+ ret = squalloc_extent(left, &coord, pos, &stop_key);
-+ if (ret != SQUEEZE_CONTINUE) {
-+ ON_DEBUG(kfree(vp));
-+ break;
-+ }
-+ assert("vs-1465", !keyeq(&stop_key, reiser4_min_key()));
-+
-+ /* Helper function to do the cutting. */
-+ set_key_offset(&stop_key, get_key_offset(&stop_key) - 1);
-+ check_me("vs-1466",
-+ squalloc_right_twig_cut(&coord, &stop_key, left) == 0);
-+
-+ ON_DEBUG(shift_check(vp, left, coord.node));
-+ }
-+
-+ if (node_is_empty(coord.node))
-+ ret = SQUEEZE_SOURCE_EMPTY;
-+
-+ if (ret == SQUEEZE_TARGET_FULL) {
-+ goto out;
-+ }
-+
-+ if (node_is_empty(right)) {
-+ /* The whole right node was copied into @left. */
-+ assert("vs-464", ret == SQUEEZE_SOURCE_EMPTY);
-+ goto out;
-+ }
-+
-+ coord_init_first_unit(&coord, right);
-+
-+ if (!item_is_internal(&coord)) {
-+ /* we do not want to squeeze anything else to left neighbor because "slum"
-+ is over */
-+ ret = SQUEEZE_TARGET_FULL;
-+ goto out;
-+ }
-+ assert("jmacd-433", item_is_internal(&coord));
-+
-+ /* Shift an internal unit. The child must be allocated before shifting any more
-+ extents, so we stop here. */
-+ ret = shift_one_internal_unit(left, right);
-+
-+ out:
-+ assert("jmacd-8612", ret < 0 || ret == SQUEEZE_TARGET_FULL
-+ || ret == SUBTREE_MOVED || ret == SQUEEZE_SOURCE_EMPTY);
-+
-+ if (ret == SQUEEZE_TARGET_FULL) {
-+ /* We submit prepped nodes here and expect that this @left twig
-+ * will not be modified again during this jnode_flush() call. */
-+ int ret1;
-+
-+ /* NOTE: seems like io is done under long term locks. */
-+ ret1 = write_prepped_nodes(pos);
-+ if (ret1 < 0)
-+ return ret1;
-+ }
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+static void item_convert_invariant(flush_pos_t * pos)
-+{
-+ assert("edward-1225", coord_is_existing_item(&pos->coord));
-+ if (chaining_data_present(pos)) {
-+ item_plugin *iplug = item_convert_plug(pos);
-+
-+ assert("edward-1000",
-+ iplug == item_plugin_by_coord(&pos->coord));
-+ assert("edward-1001", iplug->f.convert != NULL);
-+ } else
-+ assert("edward-1226", pos->child == NULL);
-+}
-+#else
-+
-+#define item_convert_invariant(pos) noop
-+
-+#endif
-+
-+/* Scan node items starting from the first one and apply for each
-+ item its flush ->convert() method (if any). This method may
-+ resize/kill the item so the tree will be changed.
-+*/
-+static int convert_node(flush_pos_t * pos, znode * node)
-+{
-+ int ret = 0;
-+ item_plugin *iplug;
-+
-+ assert("edward-304", pos != NULL);
-+ assert("edward-305", pos->child == NULL);
-+ assert("edward-475", znode_convertible(node));
-+ assert("edward-669", znode_is_wlocked(node));
-+ assert("edward-1210", !node_is_empty(node));
-+
-+ if (znode_get_level(node) != LEAF_LEVEL)
-+ /* unsupported */
-+ goto exit;
-+
-+ coord_init_first_unit(&pos->coord, node);
-+
-+ while (1) {
-+ ret = 0;
-+ coord_set_to_left(&pos->coord);
-+ item_convert_invariant(pos);
-+
-+ iplug = item_plugin_by_coord(&pos->coord);
-+ assert("edward-844", iplug != NULL);
-+
-+ if (iplug->f.convert) {
-+ ret = iplug->f.convert(pos);
-+ if (ret)
-+ goto exit;
-+ }
-+ assert("edward-307", pos->child == NULL);
-+
-+ if (coord_next_item(&pos->coord)) {
-+ /* node is over */
-+
-+ if (!chaining_data_present(pos))
-+ /* finished this node */
-+ break;
-+ if (should_chain_next_node(pos)) {
-+ /* go to next node */
-+ move_chaining_data(pos, 0 /* to next node */ );
-+ break;
-+ }
-+ /* repeat this node */
-+ move_chaining_data(pos, 1 /* this node */ );
-+ continue;
-+ }
-+ /* Node is not over.
-+ Check if there is attached convert data.
-+ If so roll one item position back and repeat
-+ on this node
-+ */
-+ if (chaining_data_present(pos)) {
-+
-+ if (iplug != item_plugin_by_coord(&pos->coord))
-+ set_item_convert_count(pos, 0);
-+
-+ ret = coord_prev_item(&pos->coord);
-+ assert("edward-1003", !ret);
-+
-+ move_chaining_data(pos, 1 /* this node */ );
-+ }
-+ }
-+ JF_CLR(ZJNODE(node), JNODE_CONVERTIBLE);
-+ znode_make_dirty(node);
-+ exit:
-+ assert("edward-1004", !ret);
-+ return ret;
-+}
-+
-+/* Squeeze and allocate the right neighbor. This is called after @left and
-+ its current children have been squeezed and allocated already. This
-+ procedure's job is to squeeze and items from @right to @left.
-+
-+ If at the leaf level, use the shift_everything_left memcpy-optimized
-+ version of shifting (squeeze_right_leaf).
-+
-+ If at the twig level, extents are allocated as they are shifted from @right
-+ to @left (squalloc_right_twig).
-+
-+ At any other level, shift one internal item and return to the caller
-+ (squalloc_parent_first) so that the shifted-subtree can be processed in
-+ parent-first order.
-+
-+ When unit of internal item is moved, squeezing stops and SUBTREE_MOVED is
-+ returned. When all content of @right is squeezed, SQUEEZE_SOURCE_EMPTY is
-+ returned. If nothing can be moved into @left anymore, SQUEEZE_TARGET_FULL
-+ is returned.
-+*/
-+
-+static int squeeze_right_neighbor(flush_pos_t * pos, znode * left,
-+ znode * right)
-+{
-+ int ret;
-+
-+ /* FIXME it is possible to see empty hasn't-heard-banshee node in a
-+ * tree owing to error (for example, ENOSPC) in write */
-+ /* assert("jmacd-9321", !node_is_empty(left)); */
-+ assert("jmacd-9322", !node_is_empty(right));
-+ assert("jmacd-9323", znode_get_level(left) == znode_get_level(right));
-+
-+ switch (znode_get_level(left)) {
-+ case TWIG_LEVEL:
-+ /* Shift with extent allocating until either an internal item
-+ is encountered or everything is shifted or no free space
-+ left in @left */
-+ ret = squeeze_right_twig(left, right, pos);
-+ break;
-+
-+ default:
-+ /* All other levels can use shift_everything until we implement per-item
-+ flush plugins. */
-+ ret = squeeze_right_non_twig(left, right);
-+ break;
-+ }
-+
-+ assert("jmacd-2011", (ret < 0 ||
-+ ret == SQUEEZE_SOURCE_EMPTY
-+ || ret == SQUEEZE_TARGET_FULL
-+ || ret == SUBTREE_MOVED));
-+ return ret;
-+}
-+
-+static int squeeze_right_twig_and_advance_coord(flush_pos_t * pos,
-+ znode * right)
-+{
-+ int ret;
-+
-+ ret = squeeze_right_twig(pos->lock.node, right, pos);
-+ if (ret < 0)
-+ return ret;
-+ if (ret > 0) {
-+ coord_init_after_last_item(&pos->coord, pos->lock.node);
-+ return ret;
-+ }
-+
-+ coord_init_last_unit(&pos->coord, pos->lock.node);
-+ return 0;
-+}
-+
-+/* forward declaration */
-+static int squalloc_upper_levels(flush_pos_t *, znode *, znode *);
-+
-+/* do a fast check for "same parents" condition before calling
-+ * squalloc_upper_levels() */
-+static inline int check_parents_and_squalloc_upper_levels(flush_pos_t * pos,
-+ znode * left,
-+ znode * right)
-+{
-+ if (znode_same_parents(left, right))
-+ return 0;
-+
-+ return squalloc_upper_levels(pos, left, right);
-+}
-+
-+/* Check whether the parent of given @right node needs to be processes
-+ ((re)allocated) prior to processing of the child. If @left and @right do not
-+ share at least the parent of the @right is after the @left but before the
-+ @right in parent-first order, we have to (re)allocate it before the @right
-+ gets (re)allocated. */
-+static int squalloc_upper_levels(flush_pos_t * pos, znode * left, znode * right)
-+{
-+ int ret;
-+
-+ lock_handle left_parent_lock;
-+ lock_handle right_parent_lock;
-+
-+ load_count left_parent_load;
-+ load_count right_parent_load;
-+
-+ init_lh(&left_parent_lock);
-+ init_lh(&right_parent_lock);
-+
-+ init_load_count(&left_parent_load);
-+ init_load_count(&right_parent_load);
-+
-+ ret = reiser4_get_parent(&left_parent_lock, left, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ ret = reiser4_get_parent(&right_parent_lock, right, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ /* Check for same parents */
-+ if (left_parent_lock.node == right_parent_lock.node)
-+ goto out;
-+
-+ if (znode_check_flushprepped(right_parent_lock.node)) {
-+ /* Keep parent-first order. In the order, the right parent node stands
-+ before the @right node. If it is already allocated, we set the
-+ preceder (next block search start point) to its block number, @right
-+ node should be allocated after it.
-+
-+ However, preceder is set only if the right parent is on twig level.
-+ The explanation is the following: new branch nodes are allocated over
-+ already allocated children while the tree grows, it is difficult to
-+ keep tree ordered, we assume that only leaves and twings are correctly
-+ allocated. So, only twigs are used as a preceder for allocating of the
-+ rest of the slum. */
-+ if (znode_get_level(right_parent_lock.node) == TWIG_LEVEL) {
-+ pos->preceder.blk =
-+ *znode_get_block(right_parent_lock.node);
-+ check_preceder(pos->preceder.blk);
-+ }
-+ goto out;
-+ }
-+
-+ ret = incr_load_count_znode(&left_parent_load, left_parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&right_parent_load, right_parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret =
-+ squeeze_right_neighbor(pos, left_parent_lock.node,
-+ right_parent_lock.node);
-+ /* We stop if error. We stop if some items/units were shifted (ret == 0)
-+ * and thus @right changed its parent. It means we have not process
-+ * right_parent node prior to processing of @right. Positive return
-+ * values say that shifting items was not happen because of "empty
-+ * source" or "target full" conditions. */
-+ if (ret <= 0)
-+ goto out;
-+
-+ /* parent(@left) and parent(@right) may have different parents also. We
-+ * do a recursive call for checking that. */
-+ ret =
-+ check_parents_and_squalloc_upper_levels(pos, left_parent_lock.node,
-+ right_parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ /* allocate znode when going down */
-+ ret = lock_parent_and_allocate_znode(right_parent_lock.node, pos);
-+
-+ out:
-+ done_load_count(&left_parent_load);
-+ done_load_count(&right_parent_load);
-+
-+ done_lh(&left_parent_lock);
-+ done_lh(&right_parent_lock);
-+
-+ return ret;
-+}
-+
-+/* Check the leftmost child "flushprepped" status, also returns true if child
-+ * node was not found in cache. */
-+static int leftmost_child_of_unit_check_flushprepped(const coord_t * coord)
-+{
-+ int ret;
-+ int prepped;
-+
-+ jnode *child;
-+
-+ ret = get_leftmost_child_of_unit(coord, &child);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (child) {
-+ prepped = jnode_check_flushprepped(child);
-+ jput(child);
-+ } else {
-+ /* We consider not existing child as a node which slum
-+ processing should not continue to. Not cached node is clean,
-+ so it is flushprepped. */
-+ prepped = 1;
-+ }
-+
-+ return prepped;
-+}
-+
-+/* (re)allocate znode with automated getting parent node */
-+static int lock_parent_and_allocate_znode(znode * node, flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle parent_lock;
-+ load_count parent_load;
-+ coord_t pcoord;
-+
-+ assert("zam-851", znode_is_write_locked(node));
-+
-+ init_lh(&parent_lock);
-+ init_load_count(&parent_load);
-+
-+ ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&parent_load, parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret = find_child_ptr(parent_lock.node, node, &pcoord);
-+ if (ret)
-+ goto out;
-+
-+ ret = allocate_znode(node, &pcoord, pos);
-+
-+ out:
-+ done_load_count(&parent_load);
-+ done_lh(&parent_lock);
-+ return ret;
-+}
-+
-+/* Process nodes on leaf level until unformatted node or rightmost node in the
-+ * slum reached. */
-+static int handle_pos_on_formatted(flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle right_lock;
-+ load_count right_load;
-+
-+ init_lh(&right_lock);
-+ init_load_count(&right_load);
-+
-+ if (should_convert_node(pos, pos->lock.node)) {
-+ ret = convert_node(pos, pos->lock.node);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ while (1) {
-+ ret =
-+ neighbor_in_slum(pos->lock.node, &right_lock, RIGHT_SIDE,
-+ ZNODE_WRITE_LOCK,
-+ !should_convert_next_node(pos,
-+ right_lock.
-+ node));
-+ if (ret)
-+ break;
-+
-+ /* we don't prep(allocate) nodes for flushing twice. This can be suboptimal, or it
-+ * can be optimal. For now we choose to live with the risk that it will
-+ * be suboptimal because it would be quite complex to code it to be
-+ * smarter. */
-+ if (znode_check_flushprepped(right_lock.node)
-+ && !znode_convertible(right_lock.node)) {
-+ assert("edward-1005",
-+ !should_convert_next_node(pos, right_lock.node));
-+ pos_stop(pos);
-+ break;
-+ }
-+
-+ ret = incr_load_count_znode(&right_load, right_lock.node);
-+ if (ret)
-+ break;
-+
-+ if (should_convert_node(pos, right_lock.node)) {
-+ ret = convert_node(pos, right_lock.node);
-+ if (ret)
-+ break;
-+ if (node_is_empty(right_lock.node)) {
-+ /* node became empty after converting, repeat */
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+ continue;
-+ }
-+ }
-+
-+ /* squeeze _before_ going upward. */
-+ ret =
-+ squeeze_right_neighbor(pos, pos->lock.node,
-+ right_lock.node);
-+ if (ret < 0)
-+ break;
-+
-+ if (znode_check_flushprepped(right_lock.node)) {
-+ if (should_convert_next_node(pos, right_lock.node)) {
-+ /* in spite of flushprepped status of the node,
-+ its right slum neighbor should be converted */
-+ assert("edward-953", convert_data(pos));
-+ assert("edward-954", item_convert_data(pos));
-+
-+ if (node_is_empty(right_lock.node)) {
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+ } else
-+ move_flush_pos(pos, &right_lock,
-+ &right_load, NULL);
-+ continue;
-+ }
-+ pos_stop(pos);
-+ break;
-+ }
-+
-+ if (node_is_empty(right_lock.node)) {
-+ /* repeat if right node was squeezed completely */
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+ continue;
-+ }
-+
-+ /* parent(right_lock.node) has to be processed before
-+ * (right_lock.node) due to "parent-first" allocation order. */
-+ ret =
-+ check_parents_and_squalloc_upper_levels(pos, pos->lock.node,
-+ right_lock.node);
-+ if (ret)
-+ break;
-+ /* (re)allocate _after_ going upward */
-+ ret = lock_parent_and_allocate_znode(right_lock.node, pos);
-+ if (ret)
-+ break;
-+
-+ if (should_terminate_squalloc(pos)) {
-+ set_item_convert_count(pos, 0);
-+ break;
-+ }
-+
-+ /* advance the flush position to the right neighbor */
-+ move_flush_pos(pos, &right_lock, &right_load, NULL);
-+
-+ ret = rapid_flush(pos);
-+ if (ret)
-+ break;
-+ }
-+
-+ assert("edward-1006", !convert_data(pos) || !item_convert_data(pos));
-+
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+
-+ /* This function indicates via pos whether to stop or go to twig or continue on current
-+ * level. */
-+ return ret;
-+
-+}
-+
-+/* Process nodes on leaf level until unformatted node or rightmost node in the
-+ * slum reached. */
-+static int handle_pos_on_leaf(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ assert("zam-845", pos->state == POS_ON_LEAF);
-+
-+ ret = handle_pos_on_formatted(pos);
-+
-+ if (ret == -E_NO_NEIGHBOR) {
-+ /* cannot get right neighbor, go process extents. */
-+ pos->state = POS_TO_TWIG;
-+ return 0;
-+ }
-+
-+ return ret;
-+}
-+
-+/* Process slum on level > 1 */
-+static int handle_pos_on_internal(flush_pos_t * pos)
-+{
-+ assert("zam-850", pos->state == POS_ON_INTERNAL);
-+ return handle_pos_on_formatted(pos);
-+}
-+
-+/* check whether squalloc should stop before processing given extent */
-+static int squalloc_extent_should_stop(flush_pos_t * pos)
-+{
-+ assert("zam-869", item_is_extent(&pos->coord));
-+
-+ /* pos->child is a jnode handle_pos_on_extent() should start with in
-+ * stead of the first child of the first extent unit. */
-+ if (pos->child) {
-+ int prepped;
-+
-+ assert("vs-1383", jnode_is_unformatted(pos->child));
-+ prepped = jnode_check_flushprepped(pos->child);
-+ pos->pos_in_unit =
-+ jnode_get_index(pos->child) -
-+ extent_unit_index(&pos->coord);
-+ assert("vs-1470",
-+ pos->pos_in_unit < extent_unit_width(&pos->coord));
-+ assert("nikita-3434",
-+ ergo(extent_is_unallocated(&pos->coord),
-+ pos->pos_in_unit == 0));
-+ jput(pos->child);
-+ pos->child = NULL;
-+
-+ return prepped;
-+ }
-+
-+ pos->pos_in_unit = 0;
-+ if (extent_is_unallocated(&pos->coord))
-+ return 0;
-+
-+ return leftmost_child_of_unit_check_flushprepped(&pos->coord);
-+}
-+
-+/* Handle the case when regular reiser4 tree (znodes connected one to its
-+ * neighbors by sibling pointers) is interrupted on leaf level by one or more
-+ * unformatted nodes. By having a lock on twig level and use extent code
-+ * routines to process unformatted nodes we swim around an irregular part of
-+ * reiser4 tree. */
-+static int handle_pos_on_twig(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ assert("zam-844", pos->state == POS_ON_EPOINT);
-+ assert("zam-843", item_is_extent(&pos->coord));
-+
-+ /* We decide should we continue slum processing with current extent
-+ unit: if leftmost child of current extent unit is flushprepped
-+ (i.e. clean or already processed by flush) we stop squalloc(). There
-+ is a fast check for unallocated extents which we assume contain all
-+ not flushprepped nodes. */
-+ /* FIXME: Here we implement simple check, we are only looking on the
-+ leftmost child. */
-+ ret = squalloc_extent_should_stop(pos);
-+ if (ret != 0) {
-+ pos_stop(pos);
-+ return ret;
-+ }
-+
-+ while (pos_valid(pos) && coord_is_existing_unit(&pos->coord)
-+ && item_is_extent(&pos->coord)) {
-+ ret = reiser4_alloc_extent(pos);
-+ if (ret) {
-+ break;
-+ }
-+ coord_next_unit(&pos->coord);
-+ }
-+
-+ if (coord_is_after_rightmost(&pos->coord)) {
-+ pos->state = POS_END_OF_TWIG;
-+ return 0;
-+ }
-+ if (item_is_internal(&pos->coord)) {
-+ pos->state = POS_TO_LEAF;
-+ return 0;
-+ }
-+
-+ assert("zam-860", item_is_extent(&pos->coord));
-+
-+ /* "slum" is over */
-+ pos->state = POS_INVALID;
-+ return 0;
-+}
-+
-+/* When we about to return flush position from twig to leaf level we can process
-+ * the right twig node or move position to the leaf. This processes right twig
-+ * if it is possible and jump to leaf level if not. */
-+static int handle_pos_end_of_twig(flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle right_lock;
-+ load_count right_load;
-+ coord_t at_right;
-+ jnode *child = NULL;
-+
-+ assert("zam-848", pos->state == POS_END_OF_TWIG);
-+ assert("zam-849", coord_is_after_rightmost(&pos->coord));
-+
-+ init_lh(&right_lock);
-+ init_load_count(&right_load);
-+
-+ /* We get a lock on the right twig node even it is not dirty because
-+ * slum continues or discontinues on leaf level not on next twig. This
-+ * lock on the right twig is needed for getting its leftmost child. */
-+ ret =
-+ reiser4_get_right_neighbor(&right_lock, pos->lock.node,
-+ ZNODE_WRITE_LOCK, GN_SAME_ATOM);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&right_load, right_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ /* right twig could be not dirty */
-+ if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) {
-+ /* If right twig node is dirty we always attempt to squeeze it
-+ * content to the left... */
-+ became_dirty:
-+ ret =
-+ squeeze_right_twig_and_advance_coord(pos, right_lock.node);
-+ if (ret <= 0) {
-+ /* pos->coord is on internal item, go to leaf level, or
-+ * we have an error which will be caught in squalloc() */
-+ pos->state = POS_TO_LEAF;
-+ goto out;
-+ }
-+
-+ /* If right twig was squeezed completely we wave to re-lock
-+ * right twig. now it is done through the top-level squalloc
-+ * routine. */
-+ if (node_is_empty(right_lock.node))
-+ goto out;
-+
-+ /* ... and prep it if it is not yet prepped */
-+ if (!znode_check_flushprepped(right_lock.node)) {
-+ /* As usual, process parent before ... */
-+ ret =
-+ check_parents_and_squalloc_upper_levels(pos,
-+ pos->lock.
-+ node,
-+ right_lock.
-+ node);
-+ if (ret)
-+ goto out;
-+
-+ /* ... processing the child */
-+ ret =
-+ lock_parent_and_allocate_znode(right_lock.node,
-+ pos);
-+ if (ret)
-+ goto out;
-+ }
-+ } else {
-+ coord_init_first_unit(&at_right, right_lock.node);
-+
-+ /* check first child of next twig, should we continue there ? */
-+ ret = get_leftmost_child_of_unit(&at_right, &child);
-+ if (ret || child == NULL || jnode_check_flushprepped(child)) {
-+ pos_stop(pos);
-+ goto out;
-+ }
-+
-+ /* check clean twig for possible relocation */
-+ if (!znode_check_flushprepped(right_lock.node)) {
-+ ret =
-+ reverse_relocate_check_dirty_parent(child,
-+ &at_right, pos);
-+ if (ret)
-+ goto out;
-+ if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY))
-+ goto became_dirty;
-+ }
-+ }
-+
-+ assert("zam-875", znode_check_flushprepped(right_lock.node));
-+
-+ /* Update the preceder by a block number of just processed right twig
-+ * node. The code above could miss the preceder updating because
-+ * allocate_znode() could not be called for this node. */
-+ pos->preceder.blk = *znode_get_block(right_lock.node);
-+ check_preceder(pos->preceder.blk);
-+
-+ coord_init_first_unit(&at_right, right_lock.node);
-+ assert("zam-868", coord_is_existing_unit(&at_right));
-+
-+ pos->state = item_is_extent(&at_right) ? POS_ON_EPOINT : POS_TO_LEAF;
-+ move_flush_pos(pos, &right_lock, &right_load, &at_right);
-+
-+ out:
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+
-+ if (child)
-+ jput(child);
-+
-+ return ret;
-+}
-+
-+/* Move the pos->lock to leaf node pointed by pos->coord, check should we
-+ * continue there. */
-+static int handle_pos_to_leaf(flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle child_lock;
-+ load_count child_load;
-+ jnode *child;
-+
-+ assert("zam-846", pos->state == POS_TO_LEAF);
-+ assert("zam-847", item_is_internal(&pos->coord));
-+
-+ init_lh(&child_lock);
-+ init_load_count(&child_load);
-+
-+ ret = get_leftmost_child_of_unit(&pos->coord, &child);
-+ if (ret)
-+ return ret;
-+ if (child == NULL) {
-+ pos_stop(pos);
-+ return 0;
-+ }
-+
-+ if (jnode_check_flushprepped(child)) {
-+ pos->state = POS_INVALID;
-+ goto out;
-+ }
-+
-+ ret =
-+ longterm_lock_znode(&child_lock, JZNODE(child), ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&child_load, JZNODE(child));
-+ if (ret)
-+ goto out;
-+
-+ ret = allocate_znode(JZNODE(child), &pos->coord, pos);
-+ if (ret)
-+ goto out;
-+
-+ /* move flush position to leaf level */
-+ pos->state = POS_ON_LEAF;
-+ move_flush_pos(pos, &child_lock, &child_load, NULL);
-+
-+ if (node_is_empty(JZNODE(child))) {
-+ ret = delete_empty_node(JZNODE(child));
-+ pos->state = POS_INVALID;
-+ }
-+ out:
-+ done_load_count(&child_load);
-+ done_lh(&child_lock);
-+ jput(child);
-+
-+ return ret;
-+}
-+
-+/* move pos from leaf to twig, and move lock from leaf to twig. */
-+/* Move pos->lock to upper (twig) level */
-+static int handle_pos_to_twig(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ lock_handle parent_lock;
-+ load_count parent_load;
-+ coord_t pcoord;
-+
-+ assert("zam-852", pos->state == POS_TO_TWIG);
-+
-+ init_lh(&parent_lock);
-+ init_load_count(&parent_load);
-+
-+ ret =
-+ reiser4_get_parent(&parent_lock, pos->lock.node, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&parent_load, parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret = find_child_ptr(parent_lock.node, pos->lock.node, &pcoord);
-+ if (ret)
-+ goto out;
-+
-+ assert("zam-870", item_is_internal(&pcoord));
-+ coord_next_item(&pcoord);
-+
-+ if (coord_is_after_rightmost(&pcoord))
-+ pos->state = POS_END_OF_TWIG;
-+ else if (item_is_extent(&pcoord))
-+ pos->state = POS_ON_EPOINT;
-+ else {
-+ /* Here we understand that getting -E_NO_NEIGHBOR in
-+ * handle_pos_on_leaf() was because of just a reaching edge of
-+ * slum */
-+ pos_stop(pos);
-+ goto out;
-+ }
-+
-+ move_flush_pos(pos, &parent_lock, &parent_load, &pcoord);
-+
-+ out:
-+ done_load_count(&parent_load);
-+ done_lh(&parent_lock);
-+
-+ return ret;
-+}
-+
-+typedef int (*pos_state_handle_t) (flush_pos_t *);
-+static pos_state_handle_t flush_pos_handlers[] = {
-+ /* process formatted nodes on leaf level, keep lock on a leaf node */
-+ [POS_ON_LEAF] = handle_pos_on_leaf,
-+ /* process unformatted nodes, keep lock on twig node, pos->coord points to extent currently
-+ * being processed */
-+ [POS_ON_EPOINT] = handle_pos_on_twig,
-+ /* move a lock from leaf node to its parent for further processing of unformatted nodes */
-+ [POS_TO_TWIG] = handle_pos_to_twig,
-+ /* move a lock from twig to leaf level when a processing of unformatted nodes finishes,
-+ * pos->coord points to the leaf node we jump to */
-+ [POS_TO_LEAF] = handle_pos_to_leaf,
-+ /* after processing last extent in the twig node, attempting to shift items from the twigs
-+ * right neighbor and process them while shifting */
-+ [POS_END_OF_TWIG] = handle_pos_end_of_twig,
-+ /* process formatted nodes on internal level, keep lock on an internal node */
-+ [POS_ON_INTERNAL] = handle_pos_on_internal
-+};
-+
-+/* Advance flush position horizontally, prepare for flushing ((re)allocate, squeeze,
-+ * encrypt) nodes and their ancestors in "parent-first" order */
-+static int squalloc(flush_pos_t * pos)
-+{
-+ int ret = 0;
-+
-+ /* maybe needs to be made a case statement with handle_pos_on_leaf as first case, for
-+ * greater CPU efficiency? Measure and see.... -Hans */
-+ while (pos_valid(pos)) {
-+ ret = flush_pos_handlers[pos->state] (pos);
-+ if (ret < 0)
-+ break;
-+
-+ ret = rapid_flush(pos);
-+ if (ret)
-+ break;
-+ }
-+
-+ /* any positive value or -E_NO_NEIGHBOR are legal return codes for handle_pos*
-+ routines, -E_NO_NEIGHBOR means that slum edge was reached */
-+ if (ret > 0 || ret == -E_NO_NEIGHBOR)
-+ ret = 0;
-+
-+ return ret;
-+}
-+
-+static void update_ldkey(znode * node)
-+{
-+ reiser4_key ldkey;
-+
-+ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
-+ if (node_is_empty(node))
-+ return;
-+
-+ znode_set_ld_key(node, leftmost_key_in_node(node, &ldkey));
-+}
-+
-+/* this is to be called after calling of shift node's method to shift data from @right to
-+ @left. It sets left delimiting keys of @left and @right to keys of first items of @left
-+ and @right correspondingly and sets right delimiting key of @left to first key of @right */
-+static void update_znode_dkeys(znode * left, znode * right)
-+{
-+ assert_rw_write_locked(&(znode_get_tree(right)->dk_lock));
-+ assert("vs-1629", (znode_is_write_locked(left) &&
-+ znode_is_write_locked(right)));
-+
-+ /* we need to update left delimiting of left if it was empty before shift */
-+ update_ldkey(left);
-+ update_ldkey(right);
-+ if (node_is_empty(right))
-+ znode_set_rd_key(left, znode_get_rd_key(right));
-+ else
-+ znode_set_rd_key(left, znode_get_ld_key(right));
-+}
-+
-+/* try to shift everything from @right to @left. If everything was shifted -
-+ @right is removed from the tree. Result is the number of bytes shifted. */
-+static int
-+shift_everything_left(znode * right, znode * left, carry_level * todo)
-+{
-+ coord_t from;
-+ node_plugin *nplug;
-+ carry_plugin_info info;
-+
-+ coord_init_after_last_item(&from, right);
-+
-+ nplug = node_plugin_by_node(right);
-+ info.doing = NULL;
-+ info.todo = todo;
-+ return nplug->shift(&from, left, SHIFT_LEFT,
-+ 1 /* delete @right if it becomes empty */ ,
-+ 1
-+ /* move coord @from to node @left if everything will be shifted */
-+ ,
-+ &info);
-+}
-+
-+/* Shift as much as possible from @right to @left using the memcpy-optimized
-+ shift_everything_left. @left and @right are formatted neighboring nodes on
-+ leaf level. */
-+static int squeeze_right_non_twig(znode * left, znode * right)
-+{
-+ int ret;
-+ carry_pool *pool;
-+ carry_level *todo;
-+
-+ assert("nikita-2246", znode_get_level(left) == znode_get_level(right));
-+
-+ if (!JF_ISSET(ZJNODE(left), JNODE_DIRTY) ||
-+ !JF_ISSET(ZJNODE(right), JNODE_DIRTY))
-+ return SQUEEZE_TARGET_FULL;
-+
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ todo = (carry_level *) (pool + 1);
-+ init_carry_level(todo, pool);
-+
-+ ret = shift_everything_left(right, left, todo);
-+ if (ret > 0) {
-+ /* something was shifted */
-+ reiser4_tree *tree;
-+ __u64 grabbed;
-+
-+ znode_make_dirty(left);
-+ znode_make_dirty(right);
-+
-+ /* update delimiting keys of nodes which participated in
-+ shift. FIXME: it would be better to have this in shift
-+ node's operation. But it can not be done there. Nobody
-+ remembers why, though */
-+ tree = znode_get_tree(left);
-+ write_lock_dk(tree);
-+ update_znode_dkeys(left, right);
-+ write_unlock_dk(tree);
-+
-+ /* Carry is called to update delimiting key and, maybe, to remove empty
-+ node. */
-+ grabbed = get_current_context()->grabbed_blocks;
-+ ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
-+ assert("nikita-3003", ret == 0); /* reserved space is exhausted. Ask Hans. */
-+ ret = reiser4_carry(todo, NULL /* previous level */ );
-+ grabbed2free_mark(grabbed);
-+ } else {
-+ /* Shifting impossible, we return appropriate result code */
-+ ret =
-+ node_is_empty(right) ? SQUEEZE_SOURCE_EMPTY :
-+ SQUEEZE_TARGET_FULL;
-+ }
-+
-+ done_carry_pool(pool);
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+static int sibling_link_is_ok(const znode *left, const znode *right)
-+{
-+ int result;
-+
-+ read_lock_tree(znode_get_tree(left));
-+ result = (left->right == right && left == right->left);
-+ read_unlock_tree(znode_get_tree(left));
-+ return result;
-+}
-+#endif
-+
-+/* Shift first unit of first item if it is an internal one. Return
-+ SQUEEZE_TARGET_FULL if it fails to shift an item, otherwise return
-+ SUBTREE_MOVED. */
-+static int shift_one_internal_unit(znode * left, znode * right)
-+{
-+ int ret;
-+ carry_pool *pool;
-+ carry_level *todo;
-+ coord_t *coord;
-+ carry_plugin_info *info;
-+ int size, moved;
-+
-+ assert("nikita-2247", znode_get_level(left) == znode_get_level(right));
-+ assert("nikita-2435", znode_is_write_locked(left));
-+ assert("nikita-2436", znode_is_write_locked(right));
-+ assert("nikita-2434", sibling_link_is_ok(left, right));
-+
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
-+ sizeof(*coord) + sizeof(*info)
-+#if REISER4_DEBUG
-+ + sizeof(*coord) + 2 * sizeof(reiser4_key)
-+#endif
-+ );
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ todo = (carry_level *) (pool + 1);
-+ init_carry_level(todo, pool);
-+
-+ coord = (coord_t *) (todo + 3);
-+ coord_init_first_unit(coord, right);
-+ info = (carry_plugin_info *) (coord + 1);
-+
-+#if REISER4_DEBUG
-+ if (!node_is_empty(left)) {
-+ coord_t *last;
-+ reiser4_key *right_key;
-+ reiser4_key *left_key;
-+
-+ last = (coord_t *) (info + 1);
-+ right_key = (reiser4_key *) (last + 1);
-+ left_key = right_key + 1;
-+ coord_init_last_unit(last, left);
-+
-+ assert("nikita-2463",
-+ keyle(item_key_by_coord(last, left_key),
-+ item_key_by_coord(coord, right_key)));
-+ }
-+#endif
-+
-+ assert("jmacd-2007", item_is_internal(coord));
-+
-+ size = item_length_by_coord(coord);
-+ info->todo = todo;
-+ info->doing = NULL;
-+
-+ ret = node_plugin_by_node(left)->shift(coord, left, SHIFT_LEFT,
-+ 1
-+ /* delete @right if it becomes empty */
-+ ,
-+ 0
-+ /* do not move coord @coord to node @left */
-+ ,
-+ info);
-+
-+ /* If shift returns positive, then we shifted the item. */
-+ assert("vs-423", ret <= 0 || size == ret);
-+ moved = (ret > 0);
-+
-+ if (moved) {
-+ /* something was moved */
-+ reiser4_tree *tree;
-+ int grabbed;
-+
-+ znode_make_dirty(left);
-+ znode_make_dirty(right);
-+ tree = znode_get_tree(left);
-+ write_lock_dk(tree);
-+ update_znode_dkeys(left, right);
-+ write_unlock_dk(tree);
-+
-+ /* reserve space for delimiting keys after shifting */
-+ grabbed = get_current_context()->grabbed_blocks;
-+ ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
-+ assert("nikita-3003", ret == 0); /* reserved space is exhausted. Ask Hans. */
-+
-+ ret = reiser4_carry(todo, NULL /* previous level */ );
-+ grabbed2free_mark(grabbed);
-+ }
-+
-+ done_carry_pool(pool);
-+
-+ if (ret != 0) {
-+ /* Shift or carry operation failed. */
-+ assert("jmacd-7325", ret < 0);
-+ return ret;
-+ }
-+
-+ return moved ? SUBTREE_MOVED : SQUEEZE_TARGET_FULL;
-+}
-+
-+/* Make the final relocate/wander decision during forward parent-first squalloc for a
-+ znode. For unformatted nodes this is done in plugin/item/extent.c:extent_needs_allocation(). */
-+static int
-+allocate_znode_loaded(znode * node,
-+ const coord_t * parent_coord, flush_pos_t * pos)
-+{
-+ int ret;
-+ reiser4_super_info_data *sbinfo = get_current_super_private();
-+ /* FIXME(D): We have the node write-locked and should have checked for !
-+ allocated() somewhere before reaching this point, but there can be a race, so
-+ this assertion is bogus. */
-+ assert("jmacd-7987", !jnode_check_flushprepped(ZJNODE(node)));
-+ assert("jmacd-7988", znode_is_write_locked(node));
-+ assert("jmacd-7989", coord_is_invalid(parent_coord)
-+ || znode_is_write_locked(parent_coord->node));
-+
-+ if (ZF_ISSET(node, JNODE_REPACK) || ZF_ISSET(node, JNODE_CREATED) ||
-+ znode_is_root(node) ||
-+ /* We have enough nodes to relocate no matter what. */
-+ (pos->leaf_relocate != 0 && znode_get_level(node) == LEAF_LEVEL)) {
-+ /* No need to decide with new nodes, they are treated the same as
-+ relocate. If the root node is dirty, relocate. */
-+ if (pos->preceder.blk == 0) {
-+ /* preceder is unknown and we have decided to relocate node --
-+ using of default value for search start is better than search
-+ from block #0. */
-+ get_blocknr_hint_default(&pos->preceder.blk);
-+ check_preceder(pos->preceder.blk);
-+ }
-+
-+ goto best_reloc;
-+
-+ } else if (pos->preceder.blk == 0) {
-+ /* If we don't know the preceder, leave it where it is. */
-+ jnode_make_wander(ZJNODE(node));
-+ } else {
-+ /* Make a decision based on block distance. */
-+ reiser4_block_nr dist;
-+ reiser4_block_nr nblk = *znode_get_block(node);
-+
-+ assert("jmacd-6172", !reiser4_blocknr_is_fake(&nblk));
-+ assert("jmacd-6173", !reiser4_blocknr_is_fake(&pos->preceder.blk));
-+ assert("jmacd-6174", pos->preceder.blk != 0);
-+
-+ if (pos->preceder.blk == nblk - 1) {
-+ /* Ideal. */
-+ jnode_make_wander(ZJNODE(node));
-+ } else {
-+
-+ dist =
-+ (nblk <
-+ pos->preceder.blk) ? (pos->preceder.blk -
-+ nblk) : (nblk -
-+ pos->preceder.blk);
-+
-+ /* See if we can find a closer block (forward direction only). */
-+ pos->preceder.max_dist =
-+ min((reiser4_block_nr) sbinfo->flush.
-+ relocate_distance, dist);
-+ pos->preceder.level = znode_get_level(node);
-+
-+ ret = allocate_znode_update(node, parent_coord, pos);
-+
-+ pos->preceder.max_dist = 0;
-+
-+ if (ret && (ret != -ENOSPC))
-+ return ret;
-+
-+ if (ret == 0) {
-+ /* Got a better allocation. */
-+ znode_make_reloc(node, pos->fq);
-+ } else if (dist < sbinfo->flush.relocate_distance) {
-+ /* The present allocation is good enough. */
-+ jnode_make_wander(ZJNODE(node));
-+ } else {
-+ /* Otherwise, try to relocate to the best position. */
-+ best_reloc:
-+ ret =
-+ allocate_znode_update(node, parent_coord,
-+ pos);
-+ if (ret != 0)
-+ return ret;
-+
-+ /* set JNODE_RELOC bit _after_ node gets allocated */
-+ znode_make_reloc(node, pos->fq);
-+ }
-+ }
-+ }
-+
-+ /* This is the new preceder. */
-+ pos->preceder.blk = *znode_get_block(node);
-+ check_preceder(pos->preceder.blk);
-+ pos->alloc_cnt += 1;
-+
-+ assert("jmacd-4277", !reiser4_blocknr_is_fake(&pos->preceder.blk));
-+
-+ return 0;
-+}
-+
-+static int
-+allocate_znode(znode * node, const coord_t * parent_coord, flush_pos_t * pos)
-+{
-+ /*
-+ * perform znode allocation with znode pinned in memory to avoid races
-+ * with asynchronous emergency flush (which plays with
-+ * JNODE_FLUSH_RESERVED bit).
-+ */
-+ return WITH_DATA(node, allocate_znode_loaded(node, parent_coord, pos));
-+}
-+
-+/* A subroutine of allocate_znode, this is called first to see if there is a close
-+ position to relocate to. It may return ENOSPC if there is no close position. If there
-+ is no close position it may not relocate. This takes care of updating the parent node
-+ with the relocated block address. */
-+static int
-+allocate_znode_update(znode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos)
-+{
-+ int ret;
-+ reiser4_block_nr blk;
-+ lock_handle uber_lock;
-+ int flush_reserved_used = 0;
-+ int grabbed;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ init_lh(&uber_lock);
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ grabbed = ctx->grabbed_blocks;
-+
-+ /* discard e-flush allocation */
-+ ret = zload(node);
-+ if (ret)
-+ return ret;
-+
-+ if (ZF_ISSET(node, JNODE_CREATED)) {
-+ assert("zam-816", reiser4_blocknr_is_fake(znode_get_block(node)));
-+ pos->preceder.block_stage = BLOCK_UNALLOCATED;
-+ } else {
-+ pos->preceder.block_stage = BLOCK_GRABBED;
-+
-+ /* The disk space for relocating the @node is already reserved in "flush reserved"
-+ * counter if @node is leaf, otherwise we grab space using BA_RESERVED (means grab
-+ * space from whole disk not from only 95%). */
-+ if (znode_get_level(node) == LEAF_LEVEL) {
-+ /*
-+ * earlier (during do_jnode_make_dirty()) we decided
-+ * that @node can possibly go into overwrite set and
-+ * reserved block for its wandering location.
-+ */
-+ txn_atom *atom = get_current_atom_locked();
-+ assert("nikita-3449",
-+ ZF_ISSET(node, JNODE_FLUSH_RESERVED));
-+ flush_reserved2grabbed(atom, (__u64) 1);
-+ spin_unlock_atom(atom);
-+ /*
-+ * we are trying to move node into relocate
-+ * set. Allocation of relocated position "uses"
-+ * reserved block.
-+ */
-+ ZF_CLR(node, JNODE_FLUSH_RESERVED);
-+ flush_reserved_used = 1;
-+ } else {
-+ ret = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
-+ if (ret != 0)
-+ goto exit;
-+ }
-+ }
-+
-+ /* We may do not use 5% of reserved disk space here and flush will not pack tightly. */
-+ ret = reiser4_alloc_block(&pos->preceder, &blk,
-+ BA_FORMATTED | BA_PERMANENT);
-+ if (ret)
-+ goto exit;
-+
-+ if (!ZF_ISSET(node, JNODE_CREATED) &&
-+ (ret =
-+ reiser4_dealloc_block(znode_get_block(node), 0,
-+ BA_DEFER | BA_FORMATTED)))
-+ goto exit;
-+
-+ if (likely(!znode_is_root(node))) {
-+ item_plugin *iplug;
-+
-+ iplug = item_plugin_by_coord(parent_coord);
-+ assert("nikita-2954", iplug->f.update != NULL);
-+ iplug->f.update(parent_coord, &blk);
-+
-+ znode_make_dirty(parent_coord->node);
-+
-+ } else {
-+ reiser4_tree *tree = znode_get_tree(node);
-+ znode *uber;
-+
-+ /* We take a longterm lock on the fake node in order to change
-+ the root block number. This may cause atom fusion. */
-+ ret = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
-+ &uber_lock);
-+ /* The fake node cannot be deleted, and we must have priority
-+ here, and may not be confused with ENOSPC. */
-+ assert("jmacd-74412",
-+ ret != -EINVAL && ret != -E_DEADLOCK && ret != -ENOSPC);
-+
-+ if (ret)
-+ goto exit;
-+
-+ uber = uber_lock.node;
-+
-+ write_lock_tree(tree);
-+ tree->root_block = blk;
-+ write_unlock_tree(tree);
-+
-+ znode_make_dirty(uber);
-+ }
-+
-+ ret = znode_rehash(node, &blk);
-+ exit:
-+ if (ret) {
-+ /* Get flush reserved block back if something fails, because
-+ * callers assume that on error block wasn't relocated and its
-+ * flush reserved block wasn't used. */
-+ if (flush_reserved_used) {
-+ /*
-+ * ok, we failed to move node into relocate
-+ * set. Restore status quo.
-+ */
-+ grabbed2flush_reserved((__u64) 1);
-+ ZF_SET(node, JNODE_FLUSH_RESERVED);
-+ }
-+ }
-+ zrelse(node);
-+ done_lh(&uber_lock);
-+ grabbed2free_mark(grabbed);
-+ return ret;
-+}
-+
-+/* JNODE INTERFACE */
-+
-+/* Lock a node (if formatted) and then get its parent locked, set the child's
-+ coordinate in the parent. If the child is the root node, the above_root
-+ znode is returned but the coord is not set. This function may cause atom
-+ fusion, but it is only used for read locks (at this point) and therefore
-+ fusion only occurs when the parent is already dirty. */
-+/* Hans adds this note: remember to ask how expensive this operation is vs. storing parent
-+ pointer in jnodes. */
-+static int
-+jnode_lock_parent_coord(jnode * node,
-+ coord_t * coord,
-+ lock_handle * parent_lh,
-+ load_count * parent_zh,
-+ znode_lock_mode parent_mode, int try)
-+{
-+ int ret;
-+
-+ assert("edward-53", jnode_is_unformatted(node) || jnode_is_znode(node));
-+ assert("edward-54", jnode_is_unformatted(node)
-+ || znode_is_any_locked(JZNODE(node)));
-+
-+ if (!jnode_is_znode(node)) {
-+ reiser4_key key;
-+ tree_level stop_level = TWIG_LEVEL;
-+ lookup_bias bias = FIND_EXACT;
-+
-+ assert("edward-168", !(jnode_get_type(node) == JNODE_BITMAP));
-+
-+ /* The case when node is not znode, but can have parent coord
-+ (unformatted node, node which represents cluster page,
-+ etc..). Generate a key for the appropriate entry, search
-+ in the tree using coord_by_key, which handles locking for
-+ us. */
-+
-+ /*
-+ * nothing is locked at this moment, so, nothing prevents
-+ * concurrent truncate from removing jnode from inode. To
-+ * prevent this spin-lock jnode. jnode can be truncated just
-+ * after call to the jnode_build_key(), but this is ok,
-+ * because coord_by_key() will just fail to find appropriate
-+ * extent.
-+ */
-+ spin_lock_jnode(node);
-+ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
-+ jnode_build_key(node, &key);
-+ ret = 0;
-+ } else
-+ ret = RETERR(-ENOENT);
-+ spin_unlock_jnode(node);
-+
-+ if (ret != 0)
-+ return ret;
-+
-+ if (jnode_is_cluster_page(node))
-+ stop_level = LEAF_LEVEL;
-+
-+ assert("jmacd-1812", coord != NULL);
-+
-+ ret = coord_by_key(jnode_get_tree(node), &key, coord, parent_lh,
-+ parent_mode, bias, stop_level, stop_level,
-+ CBK_UNIQUE, NULL /*ra_info */ );
-+ switch (ret) {
-+ case CBK_COORD_NOTFOUND:
-+ assert("edward-1038",
-+ ergo(jnode_is_cluster_page(node),
-+ JF_ISSET(node, JNODE_HEARD_BANSHEE)));
-+ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE))
-+ warning("nikita-3177", "Parent not found");
-+ return ret;
-+ case CBK_COORD_FOUND:
-+ if (coord->between != AT_UNIT) {
-+ /* FIXME: comment needed */
-+ done_lh(parent_lh);
-+ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
-+ warning("nikita-3178",
-+ "Found but not happy: %i",
-+ coord->between);
-+ }
-+ return RETERR(-ENOENT);
-+ }
-+ ret = incr_load_count_znode(parent_zh, parent_lh->node);
-+ if (ret != 0)
-+ return ret;
-+ /* if (jnode_is_cluster_page(node)) {
-+ races with write() are possible
-+ check_child_cluster (parent_lh->node);
-+ }
-+ */
-+ break;
-+ default:
-+ return ret;
-+ }
-+
-+ } else {
-+ int flags;
-+ znode *z;
-+
-+ z = JZNODE(node);
-+ /* Formatted node case: */
-+ assert("jmacd-2061", !znode_is_root(z));
-+
-+ flags = GN_ALLOW_NOT_CONNECTED;
-+ if (try)
-+ flags |= GN_TRY_LOCK;
-+
-+ ret =
-+ reiser4_get_parent_flags(parent_lh, z, parent_mode, flags);
-+ if (ret != 0)
-+ /* -E_REPEAT is ok here, it is handled by the caller. */
-+ return ret;
-+
-+ /* Make the child's position "hint" up-to-date. (Unless above
-+ root, which caller must check.) */
-+ if (coord != NULL) {
-+
-+ ret = incr_load_count_znode(parent_zh, parent_lh->node);
-+ if (ret != 0) {
-+ warning("jmacd-976812386",
-+ "incr_load_count_znode failed: %d",
-+ ret);
-+ return ret;
-+ }
-+
-+ ret = find_child_ptr(parent_lh->node, z, coord);
-+ if (ret != 0) {
-+ warning("jmacd-976812",
-+ "find_child_ptr failed: %d", ret);
-+ return ret;
-+ }
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* Get the (locked) next neighbor of a znode which is dirty and a member of the same atom.
-+ If there is no next neighbor or the neighbor is not in memory or if there is a
-+ neighbor but it is not dirty or not in the same atom, -E_NO_NEIGHBOR is returned.
-+ In some cases the slum may include nodes which are not dirty, if so @check_dirty should be 0 */
-+static int neighbor_in_slum(znode * node, /* starting point */
-+ lock_handle * lock, /* lock on starting point */
-+ sideof side, /* left or right direction we seek the next node in */
-+ znode_lock_mode mode, /* kind of lock we want */
-+ int check_dirty)
-+{ /* true if the neighbor should be dirty */
-+ int ret;
-+
-+ assert("jmacd-6334", znode_is_connected(node));
-+
-+ ret =
-+ reiser4_get_neighbor(lock, node, mode,
-+ GN_SAME_ATOM | (side ==
-+ LEFT_SIDE ? GN_GO_LEFT : 0));
-+
-+ if (ret) {
-+ /* May return -ENOENT or -E_NO_NEIGHBOR. */
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ if (ret == -ENOENT) {
-+ ret = RETERR(-E_NO_NEIGHBOR);
-+ }
-+
-+ return ret;
-+ }
-+ if (!check_dirty)
-+ return 0;
-+ /* Check dirty bit of locked znode, no races here */
-+ if (JF_ISSET(ZJNODE(lock->node), JNODE_DIRTY))
-+ return 0;
-+
-+ done_lh(lock);
-+ return RETERR(-E_NO_NEIGHBOR);
-+}
-+
-+/* Return true if two znodes have the same parent. This is called with both nodes
-+ write-locked (for squeezing) so no tree lock is needed. */
-+static int znode_same_parents(znode * a, znode * b)
-+{
-+ int result;
-+
-+ assert("jmacd-7011", znode_is_write_locked(a));
-+ assert("jmacd-7012", znode_is_write_locked(b));
-+
-+ /* We lock the whole tree for this check.... I really don't like whole tree
-+ * locks... -Hans */
-+ read_lock_tree(znode_get_tree(a));
-+ result = (znode_parent(a) == znode_parent(b));
-+ read_unlock_tree(znode_get_tree(a));
-+ return result;
-+}
-+
-+/* FLUSH SCAN */
-+
-+/* Initialize the flush_scan data structure. */
-+static void scan_init(flush_scan * scan)
-+{
-+ memset(scan, 0, sizeof(*scan));
-+ init_lh(&scan->node_lock);
-+ init_lh(&scan->parent_lock);
-+ init_load_count(&scan->parent_load);
-+ init_load_count(&scan->node_load);
-+ coord_init_invalid(&scan->parent_coord, NULL);
-+}
-+
-+/* Release any resources held by the flush scan, e.g., release locks, free memory, etc. */
-+static void scan_done(flush_scan * scan)
-+{
-+ done_load_count(&scan->node_load);
-+ if (scan->node != NULL) {
-+ jput(scan->node);
-+ scan->node = NULL;
-+ }
-+ done_load_count(&scan->parent_load);
-+ done_lh(&scan->parent_lock);
-+ done_lh(&scan->node_lock);
-+}
-+
-+/* Returns true if flush scanning is finished. */
-+int reiser4_scan_finished(flush_scan * scan)
-+{
-+ return scan->stop || (scan->direction == RIGHT_SIDE &&
-+ scan->count >= scan->max_count);
-+}
-+
-+/* Return true if the scan should continue to the @tonode. True if the node meets the
-+ same_slum_check condition. If not, deref the "left" node and stop the scan. */
-+int reiser4_scan_goto(flush_scan * scan, jnode * tonode)
-+{
-+ int go = same_slum_check(scan->node, tonode, 1, 0);
-+
-+ if (!go) {
-+ scan->stop = 1;
-+ jput(tonode);
-+ }
-+
-+ return go;
-+}
-+
-+/* Set the current scan->node, refcount it, increment count by the @add_count (number to
-+ count, e.g., skipped unallocated nodes), deref previous current, and copy the current
-+ parent coordinate. */
-+int
-+scan_set_current(flush_scan * scan, jnode * node, unsigned add_count,
-+ const coord_t * parent)
-+{
-+ /* Release the old references, take the new reference. */
-+ done_load_count(&scan->node_load);
-+
-+ if (scan->node != NULL) {
-+ jput(scan->node);
-+ }
-+ scan->node = node;
-+ scan->count += add_count;
-+
-+ /* This next stmt is somewhat inefficient. The reiser4_scan_extent() code could
-+ delay this update step until it finishes and update the parent_coord only once.
-+ It did that before, but there was a bug and this was the easiest way to make it
-+ correct. */
-+ if (parent != NULL) {
-+ coord_dup(&scan->parent_coord, parent);
-+ }
-+
-+ /* Failure may happen at the incr_load_count call, but the caller can assume the reference
-+ is safely taken. */
-+ return incr_load_count_jnode(&scan->node_load, node);
-+}
-+
-+/* Return true if scanning in the leftward direction. */
-+int reiser4_scanning_left(flush_scan * scan)
-+{
-+ return scan->direction == LEFT_SIDE;
-+}
-+
-+/* Performs leftward scanning starting from either kind of node. Counts the starting
-+ node. The right-scan object is passed in for the left-scan in order to copy the parent
-+ of an unformatted starting position. This way we avoid searching for the unformatted
-+ node's parent when scanning in each direction. If we search for the parent once it is
-+ set in both scan objects. The limit parameter tells flush-scan when to stop.
-+
-+ Rapid scanning is used only during scan_left, where we are interested in finding the
-+ 'leftpoint' where we begin flushing. We are interested in stopping at the left child
-+ of a twig that does not have a dirty left neighbor. THIS IS A SPECIAL CASE. The
-+ problem is finding a way to flush only those nodes without unallocated children, and it
-+ is difficult to solve in the bottom-up flushing algorithm we are currently using. The
-+ problem can be solved by scanning left at every level as we go upward, but this would
-+ basically bring us back to using a top-down allocation strategy, which we already tried
-+ (see BK history from May 2002), and has a different set of problems. The top-down
-+ strategy makes avoiding unallocated children easier, but makes it difficult to
-+ propertly flush dirty children with clean parents that would otherwise stop the
-+ top-down flush, only later to dirty the parent once the children are flushed. So we
-+ solve the problem in the bottom-up algorithm with a special case for twigs and leaves
-+ only.
-+
-+ The first step in solving the problem is this rapid leftward scan. After we determine
-+ that there are at least enough nodes counted to qualify for FLUSH_RELOCATE_THRESHOLD we
-+ are no longer interested in the exact count, we are only interested in finding a the
-+ best place to start the flush. We could choose one of two possibilities:
-+
-+ 1. Stop at the leftmost child (of a twig) that does not have a dirty left neighbor.
-+ This requires checking one leaf per rapid-scan twig
-+
-+ 2. Stop at the leftmost child (of a twig) where there are no dirty children of the twig
-+ to the left. This requires checking possibly all of the in-memory children of each
-+ twig during the rapid scan.
-+
-+ For now we implement the first policy.
-+*/
-+static int
-+scan_left(flush_scan * scan, flush_scan * right, jnode * node, unsigned limit)
-+{
-+ int ret = 0;
-+
-+ scan->max_count = limit;
-+ scan->direction = LEFT_SIDE;
-+
-+ ret = scan_set_current(scan, jref(node), 1, NULL);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ ret = scan_common(scan, right);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ /* Before rapid scanning, we need a lock on scan->node so that we can get its
-+ parent, only if formatted. */
-+ if (jnode_is_znode(scan->node)) {
-+ ret = longterm_lock_znode(&scan->node_lock, JZNODE(scan->node),
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
-+ }
-+
-+ /* Rapid_scan would go here (with limit set to FLUSH_RELOCATE_THRESHOLD). */
-+ return ret;
-+}
-+
-+/* Performs rightward scanning... Does not count the starting node. The limit parameter
-+ is described in scan_left. If the starting node is unformatted then the
-+ parent_coord was already set during scan_left. The rapid_after parameter is not used
-+ during right-scanning.
-+
-+ scan_right is only called if the scan_left operation does not count at least
-+ FLUSH_RELOCATE_THRESHOLD nodes for flushing. Otherwise, the limit parameter is set to
-+ the difference between scan-left's count and FLUSH_RELOCATE_THRESHOLD, meaning
-+ scan-right counts as high as FLUSH_RELOCATE_THRESHOLD and then stops. */
-+static int scan_right(flush_scan * scan, jnode * node, unsigned limit)
-+{
-+ int ret;
-+
-+ scan->max_count = limit;
-+ scan->direction = RIGHT_SIDE;
-+
-+ ret = scan_set_current(scan, jref(node), 0, NULL);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ return scan_common(scan, NULL);
-+}
-+
-+/* Common code to perform left or right scanning. */
-+static int scan_common(flush_scan * scan, flush_scan * other)
-+{
-+ int ret;
-+
-+ assert("nikita-2376", scan->node != NULL);
-+ assert("edward-54", jnode_is_unformatted(scan->node)
-+ || jnode_is_znode(scan->node));
-+
-+ /* Special case for starting at an unformatted node. Optimization: we only want
-+ to search for the parent (which requires a tree traversal) once. Obviously, we
-+ shouldn't have to call it once for the left scan and once for the right scan.
-+ For this reason, if we search for the parent during scan-left we then duplicate
-+ the coord/lock/load into the scan-right object. */
-+ if (jnode_is_unformatted(scan->node)) {
-+ ret = scan_unformatted(scan, other);
-+ if (ret != 0)
-+ return ret;
-+ }
-+ /* This loop expects to start at a formatted position and performs chaining of
-+ formatted regions */
-+ while (!reiser4_scan_finished(scan)) {
-+
-+ ret = scan_formatted(scan);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static int scan_unformatted(flush_scan * scan, flush_scan * other)
-+{
-+ int ret = 0;
-+ int try = 0;
-+
-+ if (!coord_is_invalid(&scan->parent_coord))
-+ goto scan;
-+
-+ /* set parent coord from */
-+ if (!jnode_is_unformatted(scan->node)) {
-+ /* formatted position */
-+
-+ lock_handle lock;
-+ assert("edward-301", jnode_is_znode(scan->node));
-+ init_lh(&lock);
-+
-+ /*
-+ * when flush starts from unformatted node, first thing it
-+ * does is tree traversal to find formatted parent of starting
-+ * node. This parent is then kept lock across scans to the
-+ * left and to the right. This means that during scan to the
-+ * left we cannot take left-ward lock, because this is
-+ * dead-lock prone. So, if we are scanning to the left and
-+ * there is already lock held by this thread,
-+ * jnode_lock_parent_coord() should use try-lock.
-+ */
-+ try = reiser4_scanning_left(scan)
-+ && !lock_stack_isclean(get_current_lock_stack());
-+ /* Need the node locked to get the parent lock, We have to
-+ take write lock since there is at least one call path
-+ where this znode is already write-locked by us. */
-+ ret =
-+ longterm_lock_znode(&lock, JZNODE(scan->node),
-+ ZNODE_WRITE_LOCK,
-+ reiser4_scanning_left(scan) ?
-+ ZNODE_LOCK_LOPRI :
-+ ZNODE_LOCK_HIPRI);
-+ if (ret != 0)
-+ /* EINVAL or E_DEADLOCK here mean... try again! At this point we've
-+ scanned too far and can't back out, just start over. */
-+ return ret;
-+
-+ ret = jnode_lock_parent_coord(scan->node,
-+ &scan->parent_coord,
-+ &scan->parent_lock,
-+ &scan->parent_load,
-+ ZNODE_WRITE_LOCK, try);
-+
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ done_lh(&lock);
-+ if (ret == -E_REPEAT) {
-+ scan->stop = 1;
-+ return 0;
-+ }
-+ if (ret)
-+ return ret;
-+
-+ } else {
-+ /* unformatted position */
-+
-+ ret =
-+ jnode_lock_parent_coord(scan->node, &scan->parent_coord,
-+ &scan->parent_lock,
-+ &scan->parent_load,
-+ ZNODE_WRITE_LOCK, try);
-+
-+ if (IS_CBKERR(ret))
-+ return ret;
-+
-+ if (ret == CBK_COORD_NOTFOUND)
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ return ret;
-+
-+ /* parent was found */
-+ assert("jmacd-8661", other != NULL);
-+ /* Duplicate the reference into the other flush_scan. */
-+ coord_dup(&other->parent_coord, &scan->parent_coord);
-+ copy_lh(&other->parent_lock, &scan->parent_lock);
-+ copy_load_count(&other->parent_load, &scan->parent_load);
-+ }
-+ scan:
-+ return scan_by_coord(scan);
-+}
-+
-+/* Performs left- or rightward scanning starting from a formatted node. Follow left
-+ pointers under tree lock as long as:
-+
-+ - node->left/right is non-NULL
-+ - node->left/right is connected, dirty
-+ - node->left/right belongs to the same atom
-+ - scan has not reached maximum count
-+*/
-+static int scan_formatted(flush_scan * scan)
-+{
-+ int ret;
-+ znode *neighbor = NULL;
-+
-+ assert("jmacd-1401", !reiser4_scan_finished(scan));
-+
-+ do {
-+ znode *node = JZNODE(scan->node);
-+
-+ /* Node should be connected, but if not stop the scan. */
-+ if (!znode_is_connected(node)) {
-+ scan->stop = 1;
-+ break;
-+ }
-+
-+ /* Lock the tree, check-for and reference the next sibling. */
-+ read_lock_tree(znode_get_tree(node));
-+
-+ /* It may be that a node is inserted or removed between a node and its
-+ left sibling while the tree lock is released, but the flush-scan count
-+ does not need to be precise. Thus, we release the tree lock as soon as
-+ we get the neighboring node. */
-+ neighbor =
-+ reiser4_scanning_left(scan) ? node->left : node->right;
-+ if (neighbor != NULL) {
-+ zref(neighbor);
-+ }
-+
-+ read_unlock_tree(znode_get_tree(node));
-+
-+ /* If neighbor is NULL at the leaf level, need to check for an unformatted
-+ sibling using the parent--break in any case. */
-+ if (neighbor == NULL) {
-+ break;
-+ }
-+
-+ /* Check the condition for going left, break if it is not met. This also
-+ releases (jputs) the neighbor if false. */
-+ if (!reiser4_scan_goto(scan, ZJNODE(neighbor))) {
-+ break;
-+ }
-+
-+ /* Advance the flush_scan state to the left, repeat. */
-+ ret = scan_set_current(scan, ZJNODE(neighbor), 1, NULL);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ } while (!reiser4_scan_finished(scan));
-+
-+ /* If neighbor is NULL then we reached the end of a formatted region, or else the
-+ sibling is out of memory, now check for an extent to the left (as long as
-+ LEAF_LEVEL). */
-+ if (neighbor != NULL || jnode_get_level(scan->node) != LEAF_LEVEL
-+ || reiser4_scan_finished(scan)) {
-+ scan->stop = 1;
-+ return 0;
-+ }
-+ /* Otherwise, calls scan_by_coord for the right(left)most item of the
-+ left(right) neighbor on the parent level, then possibly continue. */
-+
-+ coord_init_invalid(&scan->parent_coord, NULL);
-+ return scan_unformatted(scan, NULL);
-+}
-+
-+/* NOTE-EDWARD:
-+ This scans adjacent items of the same type and calls scan flush plugin for each one.
-+ Performs left(right)ward scanning starting from a (possibly) unformatted node. If we start
-+ from unformatted node, then we continue only if the next neighbor is also unformatted.
-+ When called from scan_formatted, we skip first iteration (to make sure that
-+ right(left)most item of the left(right) neighbor on the parent level is of the same
-+ type and set appropriate coord). */
-+static int scan_by_coord(flush_scan * scan)
-+{
-+ int ret = 0;
-+ int scan_this_coord;
-+ lock_handle next_lock;
-+ load_count next_load;
-+ coord_t next_coord;
-+ jnode *child;
-+ item_plugin *iplug;
-+
-+ init_lh(&next_lock);
-+ init_load_count(&next_load);
-+ scan_this_coord = (jnode_is_unformatted(scan->node) ? 1 : 0);
-+
-+ /* set initial item id */
-+ iplug = item_plugin_by_coord(&scan->parent_coord);
-+
-+ for (; !reiser4_scan_finished(scan); scan_this_coord = 1) {
-+ if (scan_this_coord) {
-+ /* Here we expect that unit is scannable. it would not be so due
-+ * to race with extent->tail conversion. */
-+ if (iplug->f.scan == NULL) {
-+ scan->stop = 1;
-+ ret = -E_REPEAT;
-+ /* skip the check at the end. */
-+ goto race;
-+ }
-+
-+ ret = iplug->f.scan(scan);
-+ if (ret != 0)
-+ goto exit;
-+
-+ if (reiser4_scan_finished(scan)) {
-+ checkchild(scan);
-+ break;
-+ }
-+ } else {
-+ /* the same race against truncate as above is possible
-+ * here, it seems */
-+
-+ /* NOTE-JMACD: In this case, apply the same end-of-node logic but don't scan
-+ the first coordinate. */
-+ assert("jmacd-1231",
-+ item_is_internal(&scan->parent_coord));
-+ }
-+
-+ if (iplug->f.utmost_child == NULL
-+ || znode_get_level(scan->parent_coord.node) != TWIG_LEVEL) {
-+ /* stop this coord and continue on parrent level */
-+ ret =
-+ scan_set_current(scan,
-+ ZJNODE(zref
-+ (scan->parent_coord.node)),
-+ 1, NULL);
-+ if (ret != 0)
-+ goto exit;
-+ break;
-+ }
-+
-+ /* Either way, the invariant is that scan->parent_coord is set to the
-+ parent of scan->node. Now get the next unit. */
-+ coord_dup(&next_coord, &scan->parent_coord);
-+ coord_sideof_unit(&next_coord, scan->direction);
-+
-+ /* If off-the-end of the twig, try the next twig. */
-+ if (coord_is_after_sideof_unit(&next_coord, scan->direction)) {
-+ /* We take the write lock because we may start flushing from this
-+ * coordinate. */
-+ ret =
-+ neighbor_in_slum(next_coord.node, &next_lock,
-+ scan->direction, ZNODE_WRITE_LOCK,
-+ 1 /* check dirty */ );
-+ if (ret == -E_NO_NEIGHBOR) {
-+ scan->stop = 1;
-+ ret = 0;
-+ break;
-+ }
-+
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ ret = incr_load_count_znode(&next_load, next_lock.node);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ coord_init_sideof_unit(&next_coord, next_lock.node,
-+ sideof_reverse(scan->direction));
-+ }
-+
-+ iplug = item_plugin_by_coord(&next_coord);
-+
-+ /* Get the next child. */
-+ ret =
-+ iplug->f.utmost_child(&next_coord,
-+ sideof_reverse(scan->direction),
-+ &child);
-+ if (ret != 0)
-+ goto exit;
-+ /* If the next child is not in memory, or, item_utmost_child
-+ failed (due to race with unlink, most probably), stop
-+ here. */
-+ if (child == NULL || IS_ERR(child)) {
-+ scan->stop = 1;
-+ checkchild(scan);
-+ break;
-+ }
-+
-+ assert("nikita-2374", jnode_is_unformatted(child)
-+ || jnode_is_znode(child));
-+
-+ /* See if it is dirty, part of the same atom. */
-+ if (!reiser4_scan_goto(scan, child)) {
-+ checkchild(scan);
-+ break;
-+ }
-+
-+ /* If so, make this child current. */
-+ ret = scan_set_current(scan, child, 1, &next_coord);
-+ if (ret != 0)
-+ goto exit;
-+
-+ /* Now continue. If formatted we release the parent lock and return, then
-+ proceed. */
-+ if (jnode_is_znode(child))
-+ break;
-+
-+ /* Otherwise, repeat the above loop with next_coord. */
-+ if (next_load.node != NULL) {
-+ done_lh(&scan->parent_lock);
-+ move_lh(&scan->parent_lock, &next_lock);
-+ move_load_count(&scan->parent_load, &next_load);
-+ }
-+ }
-+
-+ assert("jmacd-6233",
-+ reiser4_scan_finished(scan) || jnode_is_znode(scan->node));
-+ exit:
-+ checkchild(scan);
-+ race: /* skip the above check */
-+ if (jnode_is_znode(scan->node)) {
-+ done_lh(&scan->parent_lock);
-+ done_load_count(&scan->parent_load);
-+ }
-+
-+ done_load_count(&next_load);
-+ done_lh(&next_lock);
-+ return ret;
-+}
-+
-+/* FLUSH POS HELPERS */
-+
-+/* Initialize the fields of a flush_position. */
-+static void pos_init(flush_pos_t * pos)
-+{
-+ memset(pos, 0, sizeof *pos);
-+
-+ pos->state = POS_INVALID;
-+ coord_init_invalid(&pos->coord, NULL);
-+ init_lh(&pos->lock);
-+ init_load_count(&pos->load);
-+
-+ reiser4_blocknr_hint_init(&pos->preceder);
-+}
-+
-+/* The flush loop inside squalloc periodically checks pos_valid to
-+ determine when "enough flushing" has been performed. This will return true until one
-+ of the following conditions is met:
-+
-+ 1. the number of flush-queued nodes has reached the kernel-supplied "int *nr_to_flush"
-+ parameter, meaning we have flushed as many blocks as the kernel requested. When
-+ flushing to commit, this parameter is NULL.
-+
-+ 2. pos_stop() is called because squalloc discovers that the "next" node in the
-+ flush order is either non-existant, not dirty, or not in the same atom.
-+*/
-+
-+static int pos_valid(flush_pos_t * pos)
-+{
-+ return pos->state != POS_INVALID;
-+}
-+
-+/* Release any resources of a flush_position. Called when jnode_flush finishes. */
-+static void pos_done(flush_pos_t * pos)
-+{
-+ pos_stop(pos);
-+ reiser4_blocknr_hint_done(&pos->preceder);
-+ if (convert_data(pos))
-+ free_convert_data(pos);
-+}
-+
-+/* Reset the point and parent. Called during flush subroutines to terminate the
-+ squalloc loop. */
-+static int pos_stop(flush_pos_t * pos)
-+{
-+ pos->state = POS_INVALID;
-+ done_lh(&pos->lock);
-+ done_load_count(&pos->load);
-+ coord_init_invalid(&pos->coord, NULL);
-+
-+ if (pos->child) {
-+ jput(pos->child);
-+ pos->child = NULL;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Return the flush_position's block allocator hint. */
-+reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t * pos)
-+{
-+ return &pos->preceder;
-+}
-+
-+flush_queue_t * reiser4_pos_fq(flush_pos_t * pos)
-+{
-+ return pos->fq;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 90
-+ LocalWords: preceder
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/flush.h linux-2.6.20/fs/reiser4/flush.h
---- linux-2.6.20.orig/fs/reiser4/flush.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/flush.h 2007-05-06 14:50:43.718981974 +0400
-@@ -0,0 +1,274 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* DECLARATIONS: */
-+
-+#if !defined(__REISER4_FLUSH_H__)
-+#define __REISER4_FLUSH_H__
-+
-+#include "plugin/cluster.h"
-+
-+/* The flush_scan data structure maintains the state of an in-progress flush-scan on a
-+ single level of the tree. A flush-scan is used for counting the number of adjacent
-+ nodes to flush, which is used to determine whether we should relocate, and it is also
-+ used to find a starting point for flush. A flush-scan object can scan in both right
-+ and left directions via the scan_left() and scan_right() interfaces. The
-+ right- and left-variations are similar but perform different functions. When scanning
-+ left we (optionally perform rapid scanning and then) longterm-lock the endpoint node.
-+ When scanning right we are simply counting the number of adjacent, dirty nodes. */
-+struct flush_scan {
-+
-+ /* The current number of nodes scanned on this level. */
-+ unsigned count;
-+
-+ /* There may be a maximum number of nodes for a scan on any single level. When
-+ going leftward, max_count is determined by FLUSH_SCAN_MAXNODES (see reiser4.h) */
-+ unsigned max_count;
-+
-+ /* Direction: Set to one of the sideof enumeration: { LEFT_SIDE, RIGHT_SIDE }. */
-+ sideof direction;
-+
-+ /* Initially @stop is set to false then set true once some condition stops the
-+ search (e.g., we found a clean node before reaching max_count or we found a
-+ node belonging to another atom). */
-+ int stop;
-+
-+ /* The current scan position. If @node is non-NULL then its reference count has
-+ been incremented to reflect this reference. */
-+ jnode *node;
-+
-+ /* A handle for zload/zrelse of current scan position node. */
-+ load_count node_load;
-+
-+ /* During left-scan, if the final position (a.k.a. endpoint node) is formatted the
-+ node is locked using this lock handle. The endpoint needs to be locked for
-+ transfer to the flush_position object after scanning finishes. */
-+ lock_handle node_lock;
-+
-+ /* When the position is unformatted, its parent, coordinate, and parent
-+ zload/zrelse handle. */
-+ lock_handle parent_lock;
-+ coord_t parent_coord;
-+ load_count parent_load;
-+
-+ /* The block allocator preceder hint. Sometimes flush_scan determines what the
-+ preceder is and if so it sets it here, after which it is copied into the
-+ flush_position. Otherwise, the preceder is computed later. */
-+ reiser4_block_nr preceder_blk;
-+};
-+
-+typedef struct convert_item_info {
-+ dc_item_stat d_cur; /* disk cluster state of the current item */
-+ dc_item_stat d_next; /* disk cluster state of the next slum item */
-+ struct inode *inode;
-+ flow_t flow;
-+} convert_item_info_t;
-+
-+typedef struct convert_info {
-+ int count; /* for squalloc terminating */
-+ reiser4_cluster_t clust; /* transform cluster */
-+ item_plugin *iplug; /* current item plugin */
-+ convert_item_info_t *itm; /* current item info */
-+} convert_info_t;
-+
-+typedef enum flush_position_state {
-+ POS_INVALID, /* Invalid or stopped pos, do not continue slum
-+ * processing */
-+ POS_ON_LEAF, /* pos points to already prepped, locked formatted node at
-+ * leaf level */
-+ POS_ON_EPOINT, /* pos keeps a lock on twig level, "coord" field is used
-+ * to traverse unformatted nodes */
-+ POS_TO_LEAF, /* pos is being moved to leaf level */
-+ POS_TO_TWIG, /* pos is being moved to twig level */
-+ POS_END_OF_TWIG, /* special case of POS_ON_TWIG, when coord is after
-+ * rightmost unit of the current twig */
-+ POS_ON_INTERNAL /* same as POS_ON_LEAF, but points to internal node */
-+} flushpos_state_t;
-+
-+/* An encapsulation of the current flush point and all the parameters that are passed
-+ through the entire squeeze-and-allocate stage of the flush routine. A single
-+ flush_position object is constructed after left- and right-scanning finishes. */
-+struct flush_position {
-+ flushpos_state_t state;
-+
-+ coord_t coord; /* coord to traverse unformatted nodes */
-+ lock_handle lock; /* current lock we hold */
-+ load_count load; /* load status for current locked formatted node */
-+
-+ jnode *child; /* for passing a reference to unformatted child
-+ * across pos state changes */
-+
-+ reiser4_blocknr_hint preceder; /* The flush 'hint' state. */
-+ int leaf_relocate; /* True if enough leaf-level nodes were
-+ * found to suggest a relocate policy. */
-+ int alloc_cnt; /* The number of nodes allocated during squeeze and allococate. */
-+ int prep_or_free_cnt; /* The number of nodes prepared for write (allocate) or squeezed and freed. */
-+ flush_queue_t *fq;
-+ long *nr_written; /* number of nodes submitted to disk */
-+ int flags; /* a copy of jnode_flush flags argument */
-+
-+ znode *prev_twig; /* previous parent pointer value, used to catch
-+ * processing of new twig node */
-+ convert_info_t *sq; /* convert info */
-+
-+ unsigned long pos_in_unit; /* for extents only. Position
-+ within an extent unit of first
-+ jnode of slum */
-+ long nr_to_write; /* number of unformatted nodes to handle on flush */
-+};
-+
-+static inline int item_convert_count(flush_pos_t * pos)
-+{
-+ return pos->sq->count;
-+}
-+static inline void inc_item_convert_count(flush_pos_t * pos)
-+{
-+ pos->sq->count++;
-+}
-+static inline void set_item_convert_count(flush_pos_t * pos, int count)
-+{
-+ pos->sq->count = count;
-+}
-+static inline item_plugin *item_convert_plug(flush_pos_t * pos)
-+{
-+ return pos->sq->iplug;
-+}
-+
-+static inline convert_info_t *convert_data(flush_pos_t * pos)
-+{
-+ return pos->sq;
-+}
-+
-+static inline convert_item_info_t *item_convert_data(flush_pos_t * pos)
-+{
-+ assert("edward-955", convert_data(pos));
-+ return pos->sq->itm;
-+}
-+
-+static inline tfm_cluster_t *tfm_cluster_sq(flush_pos_t * pos)
-+{
-+ return &pos->sq->clust.tc;
-+}
-+
-+static inline tfm_stream_t *tfm_stream_sq(flush_pos_t * pos, tfm_stream_id id)
-+{
-+ assert("edward-854", pos->sq != NULL);
-+ return tfm_stream(tfm_cluster_sq(pos), id);
-+}
-+
-+static inline int chaining_data_present(flush_pos_t * pos)
-+{
-+ return convert_data(pos) && item_convert_data(pos);
-+}
-+
-+/* Returns true if next node contains next item of the disk cluster
-+ so item convert data should be moved to the right slum neighbor.
-+*/
-+static inline int should_chain_next_node(flush_pos_t * pos)
-+{
-+ int result = 0;
-+
-+ assert("edward-1007", chaining_data_present(pos));
-+
-+ switch (item_convert_data(pos)->d_next) {
-+ case DC_CHAINED_ITEM:
-+ result = 1;
-+ break;
-+ case DC_AFTER_CLUSTER:
-+ break;
-+ default:
-+ impossible("edward-1009", "bad state of next slum item");
-+ }
-+ return result;
-+}
-+
-+/* update item state in a disk cluster to assign conversion mode */
-+static inline void
-+move_chaining_data(flush_pos_t * pos, int this_node /* where is next item */ )
-+{
-+
-+ assert("edward-1010", chaining_data_present(pos));
-+
-+ if (this_node == 0) {
-+ /* next item is on the right neighbor */
-+ assert("edward-1011",
-+ item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
-+ item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
-+ assert("edward-1012",
-+ item_convert_data(pos)->d_next == DC_CHAINED_ITEM);
-+
-+ item_convert_data(pos)->d_cur = DC_CHAINED_ITEM;
-+ item_convert_data(pos)->d_next = DC_INVALID_STATE;
-+ } else {
-+ /* next item is on the same node */
-+ assert("edward-1013",
-+ item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
-+ item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
-+ assert("edward-1227",
-+ item_convert_data(pos)->d_next == DC_AFTER_CLUSTER ||
-+ item_convert_data(pos)->d_next == DC_INVALID_STATE);
-+
-+ item_convert_data(pos)->d_cur = DC_AFTER_CLUSTER;
-+ item_convert_data(pos)->d_next = DC_INVALID_STATE;
-+ }
-+}
-+
-+static inline int should_convert_node(flush_pos_t * pos, znode * node)
-+{
-+ return znode_convertible(node);
-+}
-+
-+/* true if there is attached convert item info */
-+static inline int should_convert_next_node(flush_pos_t * pos, znode * node)
-+{
-+ return convert_data(pos) && item_convert_data(pos);
-+}
-+
-+#define SQUALLOC_THRESHOLD 256
-+
-+static inline int should_terminate_squalloc(flush_pos_t * pos)
-+{
-+ return convert_data(pos) &&
-+ !item_convert_data(pos) &&
-+ item_convert_count(pos) >= SQUALLOC_THRESHOLD;
-+}
-+
-+void free_convert_data(flush_pos_t * pos);
-+/* used in extent.c */
-+int scan_set_current(flush_scan * scan, jnode * node, unsigned add_size,
-+ const coord_t * parent);
-+int reiser4_scan_finished(flush_scan * scan);
-+int reiser4_scanning_left(flush_scan * scan);
-+int reiser4_scan_goto(flush_scan * scan, jnode * tonode);
-+txn_atom *atom_locked_by_fq(flush_queue_t * fq);
-+int reiser4_alloc_extent(flush_pos_t *flush_pos);
-+squeeze_result squalloc_extent(znode *left, const coord_t *, flush_pos_t *,
-+ reiser4_key *stop_key);
-+extern int reiser4_init_fqs(void);
-+extern void reiser4_done_fqs(void);
-+
-+#if REISER4_DEBUG
-+
-+extern void reiser4_check_fq(const txn_atom *atom);
-+extern atomic_t flush_cnt;
-+
-+#define check_preceder(blk) \
-+assert("nikita-2588", blk < reiser4_block_count(reiser4_get_current_sb()));
-+extern void check_pos(flush_pos_t * pos);
-+#else
-+#define check_preceder(b) noop
-+#define check_pos(pos) noop
-+#endif
-+
-+/* __REISER4_FLUSH_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 90
-+ LocalWords: preceder
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/flush_queue.c linux-2.6.20/fs/reiser4/flush_queue.c
---- linux-2.6.20.orig/fs/reiser4/flush_queue.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/flush_queue.c 2007-05-06 14:50:43.718981974 +0400
-@@ -0,0 +1,680 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "super.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "page_cache.h"
-+#include "wander.h"
-+#include "vfs_ops.h"
-+#include "writeout.h"
-+#include "flush.h"
-+
-+#include <linux/bio.h>
-+#include <linux/mm.h>
-+#include <linux/pagemap.h>
-+#include <linux/blkdev.h>
-+#include <linux/writeback.h>
-+
-+/* A flush queue object is an accumulator for keeping jnodes prepared
-+ by the jnode_flush() function for writing to disk. Those "queued" jnodes are
-+ kept on the flush queue until memory pressure or atom commit asks
-+ flush queues to write some or all from their jnodes. */
-+
-+/*
-+ LOCKING:
-+
-+ fq->guard spin lock protects fq->atom pointer and nothing else. fq->prepped
-+ list protected by atom spin lock. fq->prepped list uses the following
-+ locking:
-+
-+ two ways to protect fq->prepped list for read-only list traversal:
-+
-+ 1. atom spin-lock atom.
-+ 2. fq is IN_USE, atom->nr_running_queues increased.
-+
-+ and one for list modification:
-+
-+ 1. atom is spin-locked and one condition is true: fq is IN_USE or
-+ atom->nr_running_queues == 0.
-+
-+ The deadlock-safe order for flush queues and atoms is: first lock atom, then
-+ lock flush queue, then lock jnode.
-+*/
-+
-+#define fq_in_use(fq) ((fq)->state & FQ_IN_USE)
-+#define fq_ready(fq) (!fq_in_use(fq))
-+
-+#define mark_fq_in_use(fq) do { (fq)->state |= FQ_IN_USE; } while (0)
-+#define mark_fq_ready(fq) do { (fq)->state &= ~FQ_IN_USE; } while (0)
-+
-+/* get lock on atom from locked flush queue object */
-+static txn_atom *atom_locked_by_fq_nolock(flush_queue_t * fq)
-+{
-+ /* This code is similar to jnode_get_atom(), look at it for the
-+ * explanation. */
-+ txn_atom *atom;
-+
-+ assert_spin_locked(&(fq->guard));
-+
-+ while (1) {
-+ atom = fq->atom;
-+ if (atom == NULL)
-+ break;
-+
-+ if (spin_trylock_atom(atom))
-+ break;
-+
-+ atomic_inc(&atom->refcount);
-+ spin_unlock(&(fq->guard));
-+ spin_lock_atom(atom);
-+ spin_lock(&(fq->guard));
-+
-+ if (fq->atom == atom) {
-+ atomic_dec(&atom->refcount);
-+ break;
-+ }
-+
-+ spin_unlock(&(fq->guard));
-+ atom_dec_and_unlock(atom);
-+ spin_lock(&(fq->guard));
-+ }
-+
-+ return atom;
-+}
-+
-+txn_atom *atom_locked_by_fq(flush_queue_t * fq)
-+{
-+ txn_atom *atom;
-+
-+ spin_lock(&(fq->guard));
-+ atom = atom_locked_by_fq_nolock(fq);
-+ spin_unlock(&(fq->guard));
-+ return atom;
-+}
-+
-+static void init_fq(flush_queue_t * fq)
-+{
-+ memset(fq, 0, sizeof *fq);
-+
-+ atomic_set(&fq->nr_submitted, 0);
-+
-+ INIT_LIST_HEAD(ATOM_FQ_LIST(fq));
-+
-+ init_waitqueue_head(&fq->wait);
-+ spin_lock_init(&fq->guard);
-+}
-+
-+/* slab for flush queues */
-+static struct kmem_cache *fq_slab;
-+
-+/**
-+ * reiser4_init_fqs - create flush queue cache
-+ *
-+ * Initializes slab cache of flush queues. It is part of reiser4 module
-+ * initialization.
-+ */
-+int reiser4_init_fqs(void)
-+{
-+ fq_slab = kmem_cache_create("fq",
-+ sizeof(flush_queue_t),
-+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ if (fq_slab == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_fqs - delete flush queue cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_fqs(void)
-+{
-+ destroy_reiser4_cache(&fq_slab);
-+}
-+
-+/* create new flush queue object */
-+static flush_queue_t *create_fq(gfp_t gfp)
-+{
-+ flush_queue_t *fq;
-+
-+ fq = kmem_cache_alloc(fq_slab, gfp);
-+ if (fq)
-+ init_fq(fq);
-+
-+ return fq;
-+}
-+
-+/* adjust atom's and flush queue's counters of queued nodes */
-+static void count_enqueued_node(flush_queue_t * fq)
-+{
-+ ON_DEBUG(fq->atom->num_queued++);
-+}
-+
-+static void count_dequeued_node(flush_queue_t * fq)
-+{
-+ assert("zam-993", fq->atom->num_queued > 0);
-+ ON_DEBUG(fq->atom->num_queued--);
-+}
-+
-+/* attach flush queue object to the atom */
-+static void attach_fq(txn_atom *atom, flush_queue_t *fq)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ list_add(&fq->alink, &atom->flush_queues);
-+ fq->atom = atom;
-+ ON_DEBUG(atom->nr_flush_queues++);
-+}
-+
-+static void detach_fq(flush_queue_t * fq)
-+{
-+ assert_spin_locked(&(fq->atom->alock));
-+
-+ spin_lock(&(fq->guard));
-+ list_del_init(&fq->alink);
-+ assert("vs-1456", fq->atom->nr_flush_queues > 0);
-+ ON_DEBUG(fq->atom->nr_flush_queues--);
-+ fq->atom = NULL;
-+ spin_unlock(&(fq->guard));
-+}
-+
-+/* destroy flush queue object */
-+static void done_fq(flush_queue_t * fq)
-+{
-+ assert("zam-763", list_empty_careful(ATOM_FQ_LIST(fq)));
-+ assert("zam-766", atomic_read(&fq->nr_submitted) == 0);
-+
-+ kmem_cache_free(fq_slab, fq);
-+}
-+
-+/* */
-+static void mark_jnode_queued(flush_queue_t * fq, jnode * node)
-+{
-+ JF_SET(node, JNODE_FLUSH_QUEUED);
-+ count_enqueued_node(fq);
-+}
-+
-+/* Putting jnode into the flush queue. Both atom and jnode should be
-+ spin-locked. */
-+void queue_jnode(flush_queue_t * fq, jnode * node)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert("zam-713", node->atom != NULL);
-+ assert_spin_locked(&(node->atom->alock));
-+ assert("zam-716", fq->atom != NULL);
-+ assert("zam-717", fq->atom == node->atom);
-+ assert("zam-907", fq_in_use(fq));
-+
-+ assert("zam-714", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-826", JF_ISSET(node, JNODE_RELOC));
-+ assert("vs-1481", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
-+ assert("vs-1481", NODE_LIST(node) != FQ_LIST);
-+
-+ mark_jnode_queued(fq, node);
-+ list_move_tail(&node->capture_link, ATOM_FQ_LIST(fq));
-+
-+ ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
-+ FQ_LIST, 1));
-+}
-+
-+/* repeatable process for waiting io completion on a flush queue object */
-+static int wait_io(flush_queue_t * fq, int *nr_io_errors)
-+{
-+ assert("zam-738", fq->atom != NULL);
-+ assert_spin_locked(&(fq->atom->alock));
-+ assert("zam-736", fq_in_use(fq));
-+ assert("zam-911", list_empty_careful(ATOM_FQ_LIST(fq)));
-+
-+ if (atomic_read(&fq->nr_submitted) != 0) {
-+ struct super_block *super;
-+
-+ spin_unlock_atom(fq->atom);
-+
-+ assert("nikita-3013", reiser4_schedulable());
-+
-+ super = reiser4_get_current_sb();
-+
-+ /* FIXME: this is instead of blk_run_queues() */
-+ blk_run_address_space(reiser4_get_super_fake(super)->i_mapping);
-+
-+ if (!(super->s_flags & MS_RDONLY))
-+ wait_event(fq->wait, atomic_read(&fq->nr_submitted) == 0);
-+
-+ /* Ask the caller to re-acquire the locks and call this
-+ function again. Note: this technique is commonly used in
-+ the txnmgr code. */
-+ return -E_REPEAT;
-+ }
-+
-+ *nr_io_errors += atomic_read(&fq->nr_errors);
-+ return 0;
-+}
-+
-+/* wait on I/O completion, re-submit dirty nodes to write */
-+static int finish_fq(flush_queue_t * fq, int *nr_io_errors)
-+{
-+ int ret;
-+ txn_atom *atom = fq->atom;
-+
-+ assert("zam-801", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("zam-762", fq_in_use(fq));
-+
-+ ret = wait_io(fq, nr_io_errors);
-+ if (ret)
-+ return ret;
-+
-+ detach_fq(fq);
-+ done_fq(fq);
-+
-+ reiser4_atom_send_event(atom);
-+
-+ return 0;
-+}
-+
-+/* wait for all i/o for given atom to be completed, actually do one iteration
-+ on that and return -E_REPEAT if there more iterations needed */
-+static int finish_all_fq(txn_atom * atom, int *nr_io_errors)
-+{
-+ flush_queue_t *fq;
-+
-+ assert_spin_locked(&(atom->alock));
-+
-+ if (list_empty_careful(&atom->flush_queues))
-+ return 0;
-+
-+ list_for_each_entry(fq, &atom->flush_queues, alink) {
-+ if (fq_ready(fq)) {
-+ int ret;
-+
-+ mark_fq_in_use(fq);
-+ assert("vs-1247", fq->owner == NULL);
-+ ON_DEBUG(fq->owner = current);
-+ ret = finish_fq(fq, nr_io_errors);
-+
-+ if (*nr_io_errors)
-+ reiser4_handle_error();
-+
-+ if (ret) {
-+ reiser4_fq_put(fq);
-+ return ret;
-+ }
-+
-+ spin_unlock_atom(atom);
-+
-+ return -E_REPEAT;
-+ }
-+ }
-+
-+ /* All flush queues are in use; atom remains locked */
-+ return -EBUSY;
-+}
-+
-+/* wait all i/o for current atom */
-+int current_atom_finish_all_fq(void)
-+{
-+ txn_atom *atom;
-+ int nr_io_errors = 0;
-+ int ret = 0;
-+
-+ do {
-+ while (1) {
-+ atom = get_current_atom_locked();
-+ ret = finish_all_fq(atom, &nr_io_errors);
-+ if (ret != -EBUSY)
-+ break;
-+ reiser4_atom_wait_event(atom);
-+ }
-+ } while (ret == -E_REPEAT);
-+
-+ /* we do not need locked atom after this function finishes, SUCCESS or
-+ -EBUSY are two return codes when atom remains locked after
-+ finish_all_fq */
-+ if (!ret)
-+ spin_unlock_atom(atom);
-+
-+ assert_spin_not_locked(&(atom->alock));
-+
-+ if (ret)
-+ return ret;
-+
-+ if (nr_io_errors)
-+ return RETERR(-EIO);
-+
-+ return 0;
-+}
-+
-+/* change node->atom field for all jnode from given list */
-+static void
-+scan_fq_and_update_atom_ref(struct list_head *list, txn_atom *atom)
-+{
-+ jnode *cur;
-+
-+ list_for_each_entry(cur, list, capture_link) {
-+ spin_lock_jnode(cur);
-+ cur->atom = atom;
-+ spin_unlock_jnode(cur);
-+ }
-+}
-+
-+/* support for atom fusion operation */
-+void reiser4_fuse_fq(txn_atom *to, txn_atom *from)
-+{
-+ flush_queue_t *fq;
-+
-+ assert_spin_locked(&(to->alock));
-+ assert_spin_locked(&(from->alock));
-+
-+ list_for_each_entry(fq, &from->flush_queues, alink) {
-+ scan_fq_and_update_atom_ref(ATOM_FQ_LIST(fq), to);
-+ spin_lock(&(fq->guard));
-+ fq->atom = to;
-+ spin_unlock(&(fq->guard));
-+ }
-+
-+ list_splice_init(&from->flush_queues, to->flush_queues.prev);
-+
-+#if REISER4_DEBUG
-+ to->num_queued += from->num_queued;
-+ to->nr_flush_queues += from->nr_flush_queues;
-+ from->nr_flush_queues = 0;
-+#endif
-+}
-+
-+#if REISER4_DEBUG
-+int atom_fq_parts_are_clean(txn_atom * atom)
-+{
-+ assert("zam-915", atom != NULL);
-+ return list_empty_careful(&atom->flush_queues);
-+}
-+#endif
-+/* Bio i/o completion routine for reiser4 write operations. */
-+static int
-+end_io_handler(struct bio *bio, unsigned int bytes_done UNUSED_ARG,
-+ int err)
-+{
-+ int i;
-+ int nr_errors = 0;
-+ flush_queue_t *fq;
-+
-+ assert("zam-958", bio->bi_rw & WRITE);
-+
-+ /* i/o op. is not fully completed */
-+ if (bio->bi_size != 0)
-+ return 1;
-+
-+ if (err == -EOPNOTSUPP)
-+ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-+
-+ /* we expect that bio->private is set to NULL or fq object which is used
-+ * for synchronization and error counting. */
-+ fq = bio->bi_private;
-+ /* Check all elements of io_vec for correct write completion. */
-+ for (i = 0; i < bio->bi_vcnt; i += 1) {
-+ struct page *pg = bio->bi_io_vec[i].bv_page;
-+
-+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-+ SetPageError(pg);
-+ nr_errors++;
-+ }
-+
-+ {
-+ /* jnode WRITEBACK ("write is in progress bit") is
-+ * atomically cleared here. */
-+ jnode *node;
-+
-+ assert("zam-736", pg != NULL);
-+ assert("zam-736", PagePrivate(pg));
-+ node = jprivate(pg);
-+
-+ JF_CLR(node, JNODE_WRITEBACK);
-+ }
-+
-+ end_page_writeback(pg);
-+ page_cache_release(pg);
-+ }
-+
-+ if (fq) {
-+ /* count i/o error in fq object */
-+ atomic_add(nr_errors, &fq->nr_errors);
-+
-+ /* If all write requests registered in this "fq" are done we up
-+ * the waiter. */
-+ if (atomic_sub_and_test(bio->bi_vcnt, &fq->nr_submitted))
-+ wake_up(&fq->wait);
-+ }
-+
-+ bio_put(bio);
-+ return 0;
-+}
-+
-+/* Count I/O requests which will be submitted by @bio in given flush queues
-+ @fq */
-+void add_fq_to_bio(flush_queue_t * fq, struct bio *bio)
-+{
-+ bio->bi_private = fq;
-+ bio->bi_end_io = end_io_handler;
-+
-+ if (fq)
-+ atomic_add(bio->bi_vcnt, &fq->nr_submitted);
-+}
-+
-+/* Move all queued nodes out from @fq->prepped list. */
-+static void release_prepped_list(flush_queue_t * fq)
-+{
-+ txn_atom *atom;
-+
-+ assert("zam-904", fq_in_use(fq));
-+ atom = atom_locked_by_fq(fq);
-+
-+ while (!list_empty(ATOM_FQ_LIST(fq))) {
-+ jnode *cur;
-+
-+ cur = list_entry(ATOM_FQ_LIST(fq)->next, jnode, capture_link);
-+ list_del_init(&cur->capture_link);
-+
-+ count_dequeued_node(fq);
-+ spin_lock_jnode(cur);
-+ assert("nikita-3154", !JF_ISSET(cur, JNODE_OVRWR));
-+ assert("nikita-3154", JF_ISSET(cur, JNODE_RELOC));
-+ assert("nikita-3154", JF_ISSET(cur, JNODE_FLUSH_QUEUED));
-+ JF_CLR(cur, JNODE_FLUSH_QUEUED);
-+
-+ if (JF_ISSET(cur, JNODE_DIRTY)) {
-+ list_add_tail(&cur->capture_link,
-+ ATOM_DIRTY_LIST(atom, jnode_get_level(cur)));
-+ ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
-+ DIRTY_LIST, 1));
-+ } else {
-+ list_add_tail(&cur->capture_link, ATOM_CLEAN_LIST(atom));
-+ ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
-+ CLEAN_LIST, 1));
-+ }
-+
-+ spin_unlock_jnode(cur);
-+ }
-+
-+ if (--atom->nr_running_queues == 0)
-+ reiser4_atom_send_event(atom);
-+
-+ spin_unlock_atom(atom);
-+}
-+
-+/* Submit write requests for nodes on the already filled flush queue @fq.
-+
-+ @fq: flush queue object which contains jnodes we can (and will) write.
-+ @return: number of submitted blocks (>=0) if success, otherwise -- an error
-+ code (<0). */
-+int reiser4_write_fq(flush_queue_t * fq, long *nr_submitted, int flags)
-+{
-+ int ret;
-+ txn_atom *atom;
-+
-+ while (1) {
-+ atom = atom_locked_by_fq(fq);
-+ assert("zam-924", atom);
-+ /* do not write fq in parallel. */
-+ if (atom->nr_running_queues == 0
-+ || !(flags & WRITEOUT_SINGLE_STREAM))
-+ break;
-+ reiser4_atom_wait_event(atom);
-+ }
-+
-+ atom->nr_running_queues++;
-+ spin_unlock_atom(atom);
-+
-+ ret = write_jnode_list(ATOM_FQ_LIST(fq), fq, nr_submitted, flags);
-+ release_prepped_list(fq);
-+
-+ return ret;
-+}
-+
-+/* Getting flush queue object for exclusive use by one thread. May require
-+ several iterations which is indicated by -E_REPEAT return code.
-+
-+ This function does not contain code for obtaining an atom lock because an
-+ atom lock is obtained by different ways in different parts of reiser4,
-+ usually it is current atom, but we need a possibility for getting fq for the
-+ atom of given jnode. */
-+static int fq_by_atom_gfp(txn_atom *atom, flush_queue_t **new_fq, gfp_t gfp)
-+{
-+ flush_queue_t *fq;
-+
-+ assert_spin_locked(&(atom->alock));
-+
-+ fq = list_entry(atom->flush_queues.next, flush_queue_t, alink);
-+ while (&atom->flush_queues != &fq->alink) {
-+ spin_lock(&(fq->guard));
-+
-+ if (fq_ready(fq)) {
-+ mark_fq_in_use(fq);
-+ assert("vs-1246", fq->owner == NULL);
-+ ON_DEBUG(fq->owner = current);
-+ spin_unlock(&(fq->guard));
-+
-+ if (*new_fq)
-+ done_fq(*new_fq);
-+
-+ *new_fq = fq;
-+
-+ return 0;
-+ }
-+
-+ spin_unlock(&(fq->guard));
-+
-+ fq = list_entry(fq->alink.next, flush_queue_t, alink);
-+ }
-+
-+ /* Use previously allocated fq object */
-+ if (*new_fq) {
-+ mark_fq_in_use(*new_fq);
-+ assert("vs-1248", (*new_fq)->owner == 0);
-+ ON_DEBUG((*new_fq)->owner = current);
-+ attach_fq(atom, *new_fq);
-+
-+ return 0;
-+ }
-+
-+ spin_unlock_atom(atom);
-+
-+ *new_fq = create_fq(gfp);
-+
-+ if (*new_fq == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ return RETERR(-E_REPEAT);
-+}
-+
-+int reiser4_fq_by_atom(txn_atom * atom, flush_queue_t ** new_fq)
-+{
-+ return fq_by_atom_gfp(atom, new_fq, reiser4_ctx_gfp_mask_get());
-+}
-+
-+/* A wrapper around reiser4_fq_by_atom for getting a flush queue
-+ object for current atom, if success fq->atom remains locked. */
-+flush_queue_t *get_fq_for_current_atom(void)
-+{
-+ flush_queue_t *fq = NULL;
-+ txn_atom *atom;
-+ int ret;
-+
-+ do {
-+ atom = get_current_atom_locked();
-+ ret = reiser4_fq_by_atom(atom, &fq);
-+ } while (ret == -E_REPEAT);
-+
-+ if (ret)
-+ return ERR_PTR(ret);
-+ return fq;
-+}
-+
-+/* Releasing flush queue object after exclusive use */
-+void reiser4_fq_put_nolock(flush_queue_t *fq)
-+{
-+ assert("zam-747", fq->atom != NULL);
-+ assert("zam-902", list_empty_careful(ATOM_FQ_LIST(fq)));
-+ mark_fq_ready(fq);
-+ assert("vs-1245", fq->owner == current);
-+ ON_DEBUG(fq->owner = NULL);
-+}
-+
-+void reiser4_fq_put(flush_queue_t * fq)
-+{
-+ txn_atom *atom;
-+
-+ spin_lock(&(fq->guard));
-+ atom = atom_locked_by_fq_nolock(fq);
-+
-+ assert("zam-746", atom != NULL);
-+
-+ reiser4_fq_put_nolock(fq);
-+ reiser4_atom_send_event(atom);
-+
-+ spin_unlock(&(fq->guard));
-+ spin_unlock_atom(atom);
-+}
-+
-+/* A part of atom object initialization related to the embedded flush queue
-+ list head */
-+
-+void init_atom_fq_parts(txn_atom *atom)
-+{
-+ INIT_LIST_HEAD(&atom->flush_queues);
-+}
-+
-+#if REISER4_DEBUG
-+
-+void reiser4_check_fq(const txn_atom *atom)
-+{
-+ /* check number of nodes on all atom's flush queues */
-+ flush_queue_t *fq;
-+ int count;
-+ struct list_head *pos;
-+
-+ count = 0;
-+ list_for_each_entry(fq, &atom->flush_queues, alink) {
-+ spin_lock(&(fq->guard));
-+ /* calculate number of jnodes on fq' list of prepped jnodes */
-+ list_for_each(pos, ATOM_FQ_LIST(fq))
-+ count++;
-+ spin_unlock(&(fq->guard));
-+ }
-+ if (count != atom->fq)
-+ warning("", "fq counter %d, real %d\n", atom->fq, count);
-+
-+}
-+
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/forward.h linux-2.6.20/fs/reiser4/forward.h
---- linux-2.6.20.orig/fs/reiser4/forward.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/forward.h 2007-05-06 14:50:43.718981974 +0400
-@@ -0,0 +1,256 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Forward declarations. Thank you Kernighan. */
-+
-+#if !defined( __REISER4_FORWARD_H__ )
-+#define __REISER4_FORWARD_H__
-+
-+#include <asm/errno.h>
-+#include <linux/types.h>
-+
-+typedef struct zlock zlock;
-+typedef struct lock_stack lock_stack;
-+typedef struct lock_handle lock_handle;
-+typedef struct znode znode;
-+typedef struct flow flow_t;
-+typedef struct coord coord_t;
-+typedef struct tree_access_pointer tap_t;
-+typedef struct item_coord item_coord;
-+typedef struct shift_params shift_params;
-+typedef struct reiser4_object_create_data reiser4_object_create_data;
-+typedef union reiser4_plugin reiser4_plugin;
-+typedef __u16 reiser4_plugin_id;
-+typedef __u64 reiser4_plugin_groups;
-+typedef struct item_plugin item_plugin;
-+typedef struct jnode_plugin jnode_plugin;
-+typedef struct reiser4_item_data reiser4_item_data;
-+typedef union reiser4_key reiser4_key;
-+typedef struct reiser4_tree reiser4_tree;
-+typedef struct carry_cut_data carry_cut_data;
-+typedef struct carry_kill_data carry_kill_data;
-+typedef struct carry_tree_op carry_tree_op;
-+typedef struct carry_tree_node carry_tree_node;
-+typedef struct carry_plugin_info carry_plugin_info;
-+typedef struct reiser4_journal reiser4_journal;
-+typedef struct txn_atom txn_atom;
-+typedef struct txn_handle txn_handle;
-+typedef struct txn_mgr txn_mgr;
-+typedef struct reiser4_dir_entry_desc reiser4_dir_entry_desc;
-+typedef struct reiser4_context reiser4_context;
-+typedef struct carry_level carry_level;
-+typedef struct blocknr_set_entry blocknr_set_entry;
-+/* super_block->s_fs_info points to this */
-+typedef struct reiser4_super_info_data reiser4_super_info_data;
-+/* next two objects are fields of reiser4_super_info_data */
-+typedef struct reiser4_oid_allocator reiser4_oid_allocator;
-+typedef struct reiser4_space_allocator reiser4_space_allocator;
-+
-+typedef struct flush_scan flush_scan;
-+typedef struct flush_position flush_pos_t;
-+
-+typedef unsigned short pos_in_node_t;
-+#define MAX_POS_IN_NODE 65535
-+
-+typedef struct jnode jnode;
-+typedef struct reiser4_blocknr_hint reiser4_blocknr_hint;
-+
-+typedef struct uf_coord uf_coord_t;
-+typedef struct hint hint_t;
-+
-+typedef struct ktxnmgrd_context ktxnmgrd_context;
-+
-+typedef struct reiser4_xattr_plugin reiser4_xattr_plugin;
-+
-+struct inode;
-+struct page;
-+struct file;
-+struct dentry;
-+struct super_block;
-+
-+/* return values of coord_by_key(). cbk == coord_by_key */
-+typedef enum {
-+ CBK_COORD_FOUND = 0,
-+ CBK_COORD_NOTFOUND = -ENOENT,
-+} lookup_result;
-+
-+/* results of lookup with directory file */
-+typedef enum {
-+ FILE_NAME_FOUND = 0,
-+ FILE_NAME_NOTFOUND = -ENOENT,
-+ FILE_IO_ERROR = -EIO, /* FIXME: it seems silly to have special OOM, IO_ERROR return codes for each search. */
-+ FILE_OOM = -ENOMEM /* FIXME: it seems silly to have special OOM, IO_ERROR return codes for each search. */
-+} file_lookup_result;
-+
-+/* behaviors of lookup. If coord we are looking for is actually in a tree,
-+ both coincide. */
-+typedef enum {
-+ /* search exactly for the coord with key given */
-+ FIND_EXACT,
-+ /* search for coord with the maximal key not greater than one
-+ given */
-+ FIND_MAX_NOT_MORE_THAN /*LEFT_SLANT_BIAS */
-+} lookup_bias;
-+
-+typedef enum {
-+ /* number of leaf level of the tree
-+ The fake root has (tree_level=0). */
-+ LEAF_LEVEL = 1,
-+
-+ /* number of level one above leaf level of the tree.
-+
-+ It is supposed that internal tree used by reiser4 to store file
-+ system data and meta data will have height 2 initially (when
-+ created by mkfs).
-+ */
-+ TWIG_LEVEL = 2,
-+} tree_level;
-+
-+/* The "real" maximum ztree height is the 0-origin size of any per-level
-+ array, since the zero'th level is not used. */
-+#define REAL_MAX_ZTREE_HEIGHT (REISER4_MAX_ZTREE_HEIGHT-LEAF_LEVEL)
-+
-+/* enumeration of possible mutual position of item and coord. This enum is
-+ return type of ->is_in_item() item plugin method which see. */
-+typedef enum {
-+ /* coord is on the left of an item */
-+ IP_ON_THE_LEFT,
-+ /* coord is inside item */
-+ IP_INSIDE,
-+ /* coord is inside item, but to the right of the rightmost unit of
-+ this item */
-+ IP_RIGHT_EDGE,
-+ /* coord is on the right of an item */
-+ IP_ON_THE_RIGHT
-+} interposition;
-+
-+/* type of lock to acquire on znode before returning it to caller */
-+typedef enum {
-+ ZNODE_NO_LOCK = 0,
-+ ZNODE_READ_LOCK = 1,
-+ ZNODE_WRITE_LOCK = 2,
-+} znode_lock_mode;
-+
-+/* type of lock request */
-+typedef enum {
-+ ZNODE_LOCK_LOPRI = 0,
-+ ZNODE_LOCK_HIPRI = (1 << 0),
-+
-+ /* By setting the ZNODE_LOCK_NONBLOCK flag in a lock request the call to longterm_lock_znode will not sleep
-+ waiting for the lock to become available. If the lock is unavailable, reiser4_znode_lock will immediately
-+ return the value -E_REPEAT. */
-+ ZNODE_LOCK_NONBLOCK = (1 << 1),
-+ /* An option for longterm_lock_znode which prevents atom fusion */
-+ ZNODE_LOCK_DONT_FUSE = (1 << 2)
-+} znode_lock_request;
-+
-+typedef enum { READ_OP = 0, WRITE_OP = 1 } rw_op;
-+
-+/* used to specify direction of shift. These must be -1 and 1 */
-+typedef enum {
-+ SHIFT_LEFT = 1,
-+ SHIFT_RIGHT = -1
-+} shift_direction;
-+
-+typedef enum {
-+ LEFT_SIDE,
-+ RIGHT_SIDE
-+} sideof;
-+
-+#define round_up( value, order ) \
-+ ( ( typeof( value ) )( ( ( long ) ( value ) + ( order ) - 1U ) & \
-+ ~( ( order ) - 1 ) ) )
-+
-+/* values returned by squalloc_right_neighbor and its auxiliary functions */
-+typedef enum {
-+ /* unit of internal item is moved */
-+ SUBTREE_MOVED = 0,
-+ /* nothing else can be squeezed into left neighbor */
-+ SQUEEZE_TARGET_FULL = 1,
-+ /* all content of node is squeezed into its left neighbor */
-+ SQUEEZE_SOURCE_EMPTY = 2,
-+ /* one more item is copied (this is only returned by
-+ allocate_and_copy_extent to squalloc_twig)) */
-+ SQUEEZE_CONTINUE = 3
-+} squeeze_result;
-+
-+/* Do not change items ids. If you do - there will be format change */
-+typedef enum {
-+ STATIC_STAT_DATA_ID = 0x0,
-+ SIMPLE_DIR_ENTRY_ID = 0x1,
-+ COMPOUND_DIR_ID = 0x2,
-+ NODE_POINTER_ID = 0x3,
-+ EXTENT_POINTER_ID = 0x5,
-+ FORMATTING_ID = 0x6,
-+ CTAIL_ID = 0x7,
-+ BLACK_BOX_ID = 0x8,
-+ LAST_ITEM_ID = 0x9
-+} item_id;
-+
-+/* Flags passed to jnode_flush() to allow it to distinguish default settings based on
-+ whether commit() was called or VM memory pressure was applied. */
-+typedef enum {
-+ /* submit flush queue to disk at jnode_flush completion */
-+ JNODE_FLUSH_WRITE_BLOCKS = 1,
-+
-+ /* flush is called for commit */
-+ JNODE_FLUSH_COMMIT = 2,
-+ /* not implemented */
-+ JNODE_FLUSH_MEMORY_FORMATTED = 4,
-+
-+ /* not implemented */
-+ JNODE_FLUSH_MEMORY_UNFORMATTED = 8,
-+} jnode_flush_flags;
-+
-+/* Flags to insert/paste carry operations. Currently they only used in
-+ flushing code, but in future, they can be used to optimize for repetitive
-+ accesses. */
-+typedef enum {
-+ /* carry is not allowed to shift data to the left when trying to find
-+ free space */
-+ COPI_DONT_SHIFT_LEFT = (1 << 0),
-+ /* carry is not allowed to shift data to the right when trying to find
-+ free space */
-+ COPI_DONT_SHIFT_RIGHT = (1 << 1),
-+ /* carry is not allowed to allocate new node(s) when trying to find
-+ free space */
-+ COPI_DONT_ALLOCATE = (1 << 2),
-+ /* try to load left neighbor if its not in a cache */
-+ COPI_LOAD_LEFT = (1 << 3),
-+ /* try to load right neighbor if its not in a cache */
-+ COPI_LOAD_RIGHT = (1 << 4),
-+ /* shift insertion point to the left neighbor */
-+ COPI_GO_LEFT = (1 << 5),
-+ /* shift insertion point to the right neighbor */
-+ COPI_GO_RIGHT = (1 << 6),
-+ /* try to step back into original node if insertion into new node
-+ fails after shifting data there. */
-+ COPI_STEP_BACK = (1 << 7)
-+} cop_insert_flag;
-+
-+typedef enum {
-+ SAFE_UNLINK, /* safe-link for unlink */
-+ SAFE_TRUNCATE /* safe-link for truncate */
-+} reiser4_safe_link_t;
-+
-+/* this is to show on which list of atom jnode is */
-+typedef enum {
-+ NOT_CAPTURED,
-+ DIRTY_LIST,
-+ CLEAN_LIST,
-+ FQ_LIST,
-+ WB_LIST,
-+ OVRWR_LIST
-+} atom_list;
-+
-+/* __REISER4_FORWARD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/fsdata.c linux-2.6.20/fs/reiser4/fsdata.c
---- linux-2.6.20.orig/fs/reiser4/fsdata.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/fsdata.c 2007-05-06 14:50:43.722983224 +0400
-@@ -0,0 +1,803 @@
-+/* Copyright 2001, 2002, 2003, 2004, 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "fsdata.h"
-+#include "inode.h"
-+
-+/* cache or dir_cursors */
-+static struct kmem_cache *d_cursor_cache;
-+static struct shrinker *d_cursor_shrinker;
-+
-+/* list of unused cursors */
-+static LIST_HEAD(cursor_cache);
-+
-+/* number of cursors in list of ununsed cursors */
-+static unsigned long d_cursor_unused = 0;
-+
-+/* spinlock protecting manipulations with dir_cursor's hash table and lists */
-+DEFINE_SPINLOCK(d_lock);
-+
-+static reiser4_file_fsdata *create_fsdata(struct file *file);
-+static int file_is_stateless(struct file *file);
-+static void free_fsdata(reiser4_file_fsdata *fsdata);
-+static void kill_cursor(dir_cursor *);
-+
-+/**
-+ * d_cursor_shrink - shrink callback for cache of dir_cursor-s
-+ * @nr: number of objects to free
-+ * @mask: GFP mask
-+ *
-+ * Shrinks d_cursor_cache. Scan LRU list of unused cursors, freeing requested
-+ * number. Return number of still freeable cursors.
-+ */
-+static int d_cursor_shrink(int nr, gfp_t mask)
-+{
-+ if (nr != 0) {
-+ dir_cursor *scan;
-+ int killed;
-+
-+ killed = 0;
-+ spin_lock(&d_lock);
-+ while (!list_empty(&cursor_cache)) {
-+ scan = list_entry(cursor_cache.next, dir_cursor, alist);
-+ assert("nikita-3567", scan->ref == 0);
-+ kill_cursor(scan);
-+ ++killed;
-+ --nr;
-+ if (nr == 0)
-+ break;
-+ }
-+ spin_unlock(&d_lock);
-+ }
-+ return d_cursor_unused;
-+}
-+
-+/**
-+ * reiser4_init_d_cursor - create d_cursor cache
-+ *
-+ * Initializes slab cache of d_cursors. It is part of reiser4 module
-+ * initialization.
-+ */
-+int reiser4_init_d_cursor(void)
-+{
-+ d_cursor_cache = kmem_cache_create("d_cursor", sizeof(dir_cursor), 0,
-+ SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ if (d_cursor_cache == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ /*
-+ * actually, d_cursors are "priceless", because there is no way to
-+ * recover information stored in them. On the other hand, we don't
-+ * want to consume all kernel memory by them. As a compromise, just
-+ * assign higher "seeks" value to d_cursor cache, so that it will be
-+ * shrunk only if system is really tight on memory.
-+ */
-+ d_cursor_shrinker = set_shrinker(DEFAULT_SEEKS << 3,
-+ d_cursor_shrink);
-+ if (d_cursor_shrinker == NULL) {
-+ destroy_reiser4_cache(&d_cursor_cache);
-+ d_cursor_cache = NULL;
-+ return RETERR(-ENOMEM);
-+ }
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_d_cursor - delete d_cursor cache and d_cursor shrinker
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_d_cursor(void)
-+{
-+ BUG_ON(d_cursor_shrinker == NULL);
-+ remove_shrinker(d_cursor_shrinker);
-+ d_cursor_shrinker = NULL;
-+
-+ destroy_reiser4_cache(&d_cursor_cache);
-+}
-+
-+#define D_CURSOR_TABLE_SIZE (256)
-+
-+static inline unsigned long
-+d_cursor_hash(d_cursor_hash_table *table, const d_cursor_key *key)
-+{
-+ assert("nikita-3555", IS_POW(D_CURSOR_TABLE_SIZE));
-+ return (key->oid + key->cid) & (D_CURSOR_TABLE_SIZE - 1);
-+}
-+
-+static inline int d_cursor_eq(const d_cursor_key *k1, const d_cursor_key *k2)
-+{
-+ return k1->cid == k2->cid && k1->oid == k2->oid;
-+}
-+
-+/*
-+ * define functions to manipulate reiser4 super block's hash table of
-+ * dir_cursors
-+ */
-+#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
-+#define KFREE(ptr, size) kfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(d_cursor,
-+ dir_cursor,
-+ d_cursor_key, key, hash, d_cursor_hash, d_cursor_eq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+/**
-+ * reiser4_init_super_d_info - initialize per-super-block d_cursor resources
-+ * @super: super block to initialize
-+ *
-+ * Initializes per-super-block d_cursor's hash table and radix tree. It is part
-+ * of mount.
-+ */
-+int reiser4_init_super_d_info(struct super_block *super)
-+{
-+ d_cursor_info *p;
-+
-+ p = &get_super_private(super)->d_info;
-+
-+ INIT_RADIX_TREE(&p->tree, reiser4_ctx_gfp_mask_get());
-+ return d_cursor_hash_init(&p->table, D_CURSOR_TABLE_SIZE);
-+}
-+
-+/**
-+ * reiser4_done_super_d_info - release per-super-block d_cursor resources
-+ * @super: super block being umounted
-+ *
-+ * It is called on umount. Kills all directory cursors attached to suoer block.
-+ */
-+void reiser4_done_super_d_info(struct super_block *super)
-+{
-+ d_cursor_info *d_info;
-+ dir_cursor *cursor, *next;
-+
-+ d_info = &get_super_private(super)->d_info;
-+ for_all_in_htable(&d_info->table, d_cursor, cursor, next)
-+ kill_cursor(cursor);
-+
-+ BUG_ON(d_info->tree.rnode != NULL);
-+ d_cursor_hash_done(&d_info->table);
-+}
-+
-+/**
-+ * kill_cursor - free dir_cursor and reiser4_file_fsdata attached to it
-+ * @cursor: cursor to free
-+ *
-+ * Removes reiser4_file_fsdata attached to @cursor from readdir list of
-+ * reiser4_inode, frees that reiser4_file_fsdata. Removes @cursor from from
-+ * indices, hash table, list of unused cursors and frees it.
-+ */
-+static void kill_cursor(dir_cursor *cursor)
-+{
-+ unsigned long index;
-+
-+ assert("nikita-3566", cursor->ref == 0);
-+ assert("nikita-3572", cursor->fsdata != NULL);
-+
-+ index = (unsigned long)cursor->key.oid;
-+ list_del_init(&cursor->fsdata->dir.linkage);
-+ free_fsdata(cursor->fsdata);
-+ cursor->fsdata = NULL;
-+
-+ if (list_empty_careful(&cursor->list))
-+ /* this is last cursor for a file. Kill radix-tree entry */
-+ radix_tree_delete(&cursor->info->tree, index);
-+ else {
-+ void **slot;
-+
-+ /*
-+ * there are other cursors for the same oid.
-+ */
-+
-+ /*
-+ * if radix tree point to the cursor being removed, re-target
-+ * radix tree slot to the next cursor in the (non-empty as was
-+ * checked above) element of the circular list of all cursors
-+ * for this oid.
-+ */
-+ slot = radix_tree_lookup_slot(&cursor->info->tree, index);
-+ assert("nikita-3571", *slot != NULL);
-+ if (*slot == cursor)
-+ *slot = list_entry(cursor->list.next, dir_cursor, list);
-+ /* remove cursor from circular list */
-+ list_del_init(&cursor->list);
-+ }
-+ /* remove cursor from the list of unused cursors */
-+ list_del_init(&cursor->alist);
-+ /* remove cursor from the hash table */
-+ d_cursor_hash_remove(&cursor->info->table, cursor);
-+ /* and free it */
-+ kmem_cache_free(d_cursor_cache, cursor);
-+ --d_cursor_unused;
-+}
-+
-+/* possible actions that can be performed on all cursors for the given file */
-+enum cursor_action {
-+ /*
-+ * load all detached state: this is called when stat-data is loaded
-+ * from the disk to recover information about all pending readdirs
-+ */
-+ CURSOR_LOAD,
-+ /*
-+ * detach all state from inode, leaving it in the cache. This is called
-+ * when inode is removed form the memory by memory pressure
-+ */
-+ CURSOR_DISPOSE,
-+ /*
-+ * detach cursors from the inode, and free them. This is called when
-+ * inode is destroyed
-+ */
-+ CURSOR_KILL
-+};
-+
-+/*
-+ * return d_cursor data for the file system @inode is in.
-+ */
-+static inline d_cursor_info *d_info(struct inode *inode)
-+{
-+ return &get_super_private(inode->i_sb)->d_info;
-+}
-+
-+/*
-+ * lookup d_cursor in the per-super-block radix tree.
-+ */
-+static inline dir_cursor *lookup(d_cursor_info * info, unsigned long index)
-+{
-+ return (dir_cursor *) radix_tree_lookup(&info->tree, index);
-+}
-+
-+/*
-+ * attach @cursor to the radix tree. There may be multiple cursors for the
-+ * same oid, they are chained into circular list.
-+ */
-+static void bind_cursor(dir_cursor * cursor, unsigned long index)
-+{
-+ dir_cursor *head;
-+
-+ head = lookup(cursor->info, index);
-+ if (head == NULL) {
-+ /* this is the first cursor for this index */
-+ INIT_LIST_HEAD(&cursor->list);
-+ radix_tree_insert(&cursor->info->tree, index, cursor);
-+ } else {
-+ /* some cursor already exists. Chain ours */
-+ list_add(&cursor->list, &head->list);
-+ }
-+}
-+
-+/*
-+ * detach fsdata (if detachable) from file descriptor, and put cursor on the
-+ * "unused" list. Called when file descriptor is not longer in active use.
-+ */
-+static void clean_fsdata(struct file *file)
-+{
-+ dir_cursor *cursor;
-+ reiser4_file_fsdata *fsdata;
-+
-+ assert("nikita-3570", file_is_stateless(file));
-+
-+ fsdata = (reiser4_file_fsdata *) file->private_data;
-+ if (fsdata != NULL) {
-+ cursor = fsdata->cursor;
-+ if (cursor != NULL) {
-+ spin_lock(&d_lock);
-+ --cursor->ref;
-+ if (cursor->ref == 0) {
-+ list_add_tail(&cursor->alist, &cursor_cache);
-+ ++d_cursor_unused;
-+ }
-+ spin_unlock(&d_lock);
-+ file->private_data = NULL;
-+ }
-+ }
-+}
-+
-+/*
-+ * global counter used to generate "client ids". These ids are encoded into
-+ * high bits of fpos.
-+ */
-+static __u32 cid_counter = 0;
-+#define CID_SHIFT (20)
-+#define CID_MASK (0xfffffull)
-+
-+static void free_file_fsdata_nolock(struct file *);
-+
-+/**
-+ * insert_cursor - allocate file_fsdata, insert cursor to tree and hash table
-+ * @cursor:
-+ * @file:
-+ * @inode:
-+ *
-+ * Allocates reiser4_file_fsdata, attaches it to @cursor, inserts cursor to
-+ * reiser4 super block's hash table and radix tree.
-+ add detachable readdir
-+ * state to the @f
-+ */
-+static int insert_cursor(dir_cursor *cursor, struct file *file,
-+ struct inode *inode)
-+{
-+ int result;
-+ reiser4_file_fsdata *fsdata;
-+
-+ memset(cursor, 0, sizeof *cursor);
-+
-+ /* this is either first call to readdir, or rewind. Anyway, create new
-+ * cursor. */
-+ fsdata = create_fsdata(NULL);
-+ if (fsdata != NULL) {
-+ result = radix_tree_preload(reiser4_ctx_gfp_mask_get());
-+ if (result == 0) {
-+ d_cursor_info *info;
-+ oid_t oid;
-+
-+ info = d_info(inode);
-+ oid = get_inode_oid(inode);
-+ /* cid occupies higher 12 bits of f->f_pos. Don't
-+ * allow it to become negative: this confuses
-+ * nfsd_readdir() */
-+ cursor->key.cid = (++cid_counter) & 0x7ff;
-+ cursor->key.oid = oid;
-+ cursor->fsdata = fsdata;
-+ cursor->info = info;
-+ cursor->ref = 1;
-+
-+ spin_lock_inode(inode);
-+ /* install cursor as @f's private_data, discarding old
-+ * one if necessary */
-+#if REISER4_DEBUG
-+ if (file->private_data)
-+ warning("", "file has fsdata already");
-+#endif
-+ clean_fsdata(file);
-+ free_file_fsdata_nolock(file);
-+ file->private_data = fsdata;
-+ fsdata->cursor = cursor;
-+ spin_unlock_inode(inode);
-+ spin_lock(&d_lock);
-+ /* insert cursor into hash table */
-+ d_cursor_hash_insert(&info->table, cursor);
-+ /* and chain it into radix-tree */
-+ bind_cursor(cursor, (unsigned long)oid);
-+ spin_unlock(&d_lock);
-+ radix_tree_preload_end();
-+ file->f_pos = ((__u64) cursor->key.cid) << CID_SHIFT;
-+ }
-+ } else
-+ result = RETERR(-ENOMEM);
-+ return result;
-+}
-+
-+/**
-+ * process_cursors - do action on each cursor attached to inode
-+ * @inode:
-+ * @act: action to do
-+ *
-+ * Finds all cursors of @inode in reiser4's super block radix tree of cursors
-+ * and performs action specified by @act on each of cursors.
-+ */
-+static void process_cursors(struct inode *inode, enum cursor_action act)
-+{
-+ oid_t oid;
-+ dir_cursor *start;
-+ struct list_head *head;
-+ reiser4_context *ctx;
-+ d_cursor_info *info;
-+
-+ /* this can be called by
-+ *
-+ * kswapd->...->prune_icache->..reiser4_destroy_inode
-+ *
-+ * without reiser4_context
-+ */
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-23", "failed to init context");
-+ return;
-+ }
-+
-+ assert("nikita-3558", inode != NULL);
-+
-+ info = d_info(inode);
-+ oid = get_inode_oid(inode);
-+ spin_lock_inode(inode);
-+ head = get_readdir_list(inode);
-+ spin_lock(&d_lock);
-+ /* find any cursor for this oid: reference to it is hanging of radix
-+ * tree */
-+ start = lookup(info, (unsigned long)oid);
-+ if (start != NULL) {
-+ dir_cursor *scan;
-+ reiser4_file_fsdata *fsdata;
-+
-+ /* process circular list of cursors for this oid */
-+ scan = start;
-+ do {
-+ dir_cursor *next;
-+
-+ next = list_entry(scan->list.next, dir_cursor, list);
-+ fsdata = scan->fsdata;
-+ assert("nikita-3557", fsdata != NULL);
-+ if (scan->key.oid == oid) {
-+ switch (act) {
-+ case CURSOR_DISPOSE:
-+ list_del_init(&fsdata->dir.linkage);
-+ break;
-+ case CURSOR_LOAD:
-+ list_add(&fsdata->dir.linkage, head);
-+ break;
-+ case CURSOR_KILL:
-+ kill_cursor(scan);
-+ break;
-+ }
-+ }
-+ if (scan == next)
-+ /* last cursor was just killed */
-+ break;
-+ scan = next;
-+ } while (scan != start);
-+ }
-+ spin_unlock(&d_lock);
-+ /* check that we killed 'em all */
-+ assert("nikita-3568",
-+ ergo(act == CURSOR_KILL,
-+ list_empty_careful(get_readdir_list(inode))));
-+ assert("nikita-3569",
-+ ergo(act == CURSOR_KILL, lookup(info, oid) == NULL));
-+ spin_unlock_inode(inode);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_dispose_cursors - removes cursors from inode's list
-+ * @inode: inode to dispose cursors of
-+ *
-+ * For each of cursors corresponding to @inode - removes reiser4_file_fsdata
-+ * attached to cursor from inode's readdir list. This is called when inode is
-+ * removed from the memory by memory pressure.
-+ */
-+void reiser4_dispose_cursors(struct inode *inode)
-+{
-+ process_cursors(inode, CURSOR_DISPOSE);
-+}
-+
-+/**
-+ * reiser4_load_cursors - attach cursors to inode
-+ * @inode: inode to load cursors to
-+ *
-+ * For each of cursors corresponding to @inode - attaches reiser4_file_fsdata
-+ * attached to cursor to inode's readdir list. This is done when inode is
-+ * loaded into memory.
-+ */
-+void reiser4_load_cursors(struct inode *inode)
-+{
-+ process_cursors(inode, CURSOR_LOAD);
-+}
-+
-+/**
-+ * reiser4_kill_cursors - kill all inode cursors
-+ * @inode: inode to kill cursors of
-+ *
-+ * Frees all cursors for this inode. This is called when inode is destroyed.
-+ */
-+void reiser4_kill_cursors(struct inode *inode)
-+{
-+ process_cursors(inode, CURSOR_KILL);
-+}
-+
-+/**
-+ * file_is_stateless -
-+ * @file:
-+ *
-+ * true, if file descriptor @f is created by NFS server by "demand" to serve
-+ * one file system operation. This means that there may be "detached state"
-+ * for underlying inode.
-+ */
-+static int file_is_stateless(struct file *file)
-+{
-+ return reiser4_get_dentry_fsdata(file->f_dentry)->stateless;
-+}
-+
-+/**
-+ * reiser4_get_dir_fpos -
-+ * @dir:
-+ *
-+ * Calculates ->fpos from user-supplied cookie. Normally it is dir->f_pos, but
-+ * in the case of stateless directory operation (readdir-over-nfs), client id
-+ * was encoded in the high bits of cookie and should me masked off.
-+ */
-+loff_t reiser4_get_dir_fpos(struct file *dir)
-+{
-+ if (file_is_stateless(dir))
-+ return dir->f_pos & CID_MASK;
-+ else
-+ return dir->f_pos;
-+}
-+
-+/**
-+ * reiser4_attach_fsdata - try to attach fsdata
-+ * @file:
-+ * @inode:
-+ *
-+ * Finds or creates cursor for readdir-over-nfs.
-+ */
-+int reiser4_attach_fsdata(struct file *file, struct inode *inode)
-+{
-+ loff_t pos;
-+ int result;
-+ dir_cursor *cursor;
-+
-+ /*
-+ * we are serialized by inode->i_mutex
-+ */
-+ if (!file_is_stateless(file))
-+ return 0;
-+
-+ pos = file->f_pos;
-+ result = 0;
-+ if (pos == 0) {
-+ /*
-+ * first call to readdir (or rewind to the beginning of
-+ * directory)
-+ */
-+ cursor = kmem_cache_alloc(d_cursor_cache,
-+ reiser4_ctx_gfp_mask_get());
-+ if (cursor != NULL)
-+ result = insert_cursor(cursor, file, inode);
-+ else
-+ result = RETERR(-ENOMEM);
-+ } else {
-+ /* try to find existing cursor */
-+ d_cursor_key key;
-+
-+ key.cid = pos >> CID_SHIFT;
-+ key.oid = get_inode_oid(inode);
-+ spin_lock(&d_lock);
-+ cursor = d_cursor_hash_find(&d_info(inode)->table, &key);
-+ if (cursor != NULL) {
-+ /* cursor was found */
-+ if (cursor->ref == 0) {
-+ /* move it from unused list */
-+ list_del_init(&cursor->alist);
-+ --d_cursor_unused;
-+ }
-+ ++cursor->ref;
-+ }
-+ spin_unlock(&d_lock);
-+ if (cursor != NULL) {
-+ spin_lock_inode(inode);
-+ assert("nikita-3556", cursor->fsdata->back == NULL);
-+ clean_fsdata(file);
-+ free_file_fsdata_nolock(file);
-+ file->private_data = cursor->fsdata;
-+ spin_unlock_inode(inode);
-+ }
-+ }
-+ return result;
-+}
-+
-+/**
-+ * reiser4_detach_fsdata - ???
-+ * @file:
-+ *
-+ * detach fsdata, if necessary
-+ */
-+void reiser4_detach_fsdata(struct file *file)
-+{
-+ struct inode *inode;
-+
-+ if (!file_is_stateless(file))
-+ return;
-+
-+ inode = file->f_dentry->d_inode;
-+ spin_lock_inode(inode);
-+ clean_fsdata(file);
-+ spin_unlock_inode(inode);
-+}
-+
-+/* slab for reiser4_dentry_fsdata */
-+static struct kmem_cache *dentry_fsdata_cache;
-+
-+/**
-+ * reiser4_init_dentry_fsdata - create cache of dentry_fsdata
-+ *
-+ * Initializes slab cache of structures attached to denty->d_fsdata. It is
-+ * part of reiser4 module initialization.
-+ */
-+int reiser4_init_dentry_fsdata(void)
-+{
-+ dentry_fsdata_cache = kmem_cache_create("dentry_fsdata",
-+ sizeof(reiser4_dentry_fsdata),
-+ 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL,
-+ NULL);
-+ if (dentry_fsdata_cache == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_dentry_fsdata - delete cache of dentry_fsdata
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_dentry_fsdata(void)
-+{
-+ destroy_reiser4_cache(&dentry_fsdata_cache);
-+}
-+
-+/**
-+ * reiser4_get_dentry_fsdata - get fs-specific dentry data
-+ * @dentry: queried dentry
-+ *
-+ * Allocates if necessary and returns per-dentry data that we attach to each
-+ * dentry.
-+ */
-+reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *dentry)
-+{
-+ assert("nikita-1365", dentry != NULL);
-+
-+ if (dentry->d_fsdata == NULL) {
-+ dentry->d_fsdata = kmem_cache_alloc(dentry_fsdata_cache,
-+ reiser4_ctx_gfp_mask_get());
-+ if (dentry->d_fsdata == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ memset(dentry->d_fsdata, 0, sizeof(reiser4_dentry_fsdata));
-+ }
-+ return dentry->d_fsdata;
-+}
-+
-+/**
-+ * reiser4_free_dentry_fsdata - detach and free dentry_fsdata
-+ * @dentry: dentry to free fsdata of
-+ *
-+ * Detaches and frees fs-specific dentry data
-+ */
-+void reiser4_free_dentry_fsdata(struct dentry *dentry)
-+{
-+ if (dentry->d_fsdata != NULL) {
-+ kmem_cache_free(dentry_fsdata_cache, dentry->d_fsdata);
-+ dentry->d_fsdata = NULL;
-+ }
-+}
-+
-+/* slab for reiser4_file_fsdata */
-+static struct kmem_cache *file_fsdata_cache;
-+
-+/**
-+ * reiser4_init_file_fsdata - create cache of reiser4_file_fsdata
-+ *
-+ * Initializes slab cache of structures attached to file->private_data. It is
-+ * part of reiser4 module initialization.
-+ */
-+int reiser4_init_file_fsdata(void)
-+{
-+ file_fsdata_cache = kmem_cache_create("file_fsdata",
-+ sizeof(reiser4_file_fsdata),
-+ 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL, NULL);
-+ if (file_fsdata_cache == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_file_fsdata - delete cache of reiser4_file_fsdata
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_file_fsdata(void)
-+{
-+ destroy_reiser4_cache(&file_fsdata_cache);
-+}
-+
-+/**
-+ * create_fsdata - allocate and initialize reiser4_file_fsdata
-+ * @file: what to create file_fsdata for, may be NULL
-+ *
-+ * Allocates and initializes reiser4_file_fsdata structure.
-+ */
-+static reiser4_file_fsdata *create_fsdata(struct file *file)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ fsdata = kmem_cache_alloc(file_fsdata_cache,
-+ reiser4_ctx_gfp_mask_get());
-+ if (fsdata != NULL) {
-+ memset(fsdata, 0, sizeof *fsdata);
-+ fsdata->ra1.max_window_size = VM_MAX_READAHEAD * 1024;
-+ fsdata->back = file;
-+ INIT_LIST_HEAD(&fsdata->dir.linkage);
-+ }
-+ return fsdata;
-+}
-+
-+/**
-+ * free_fsdata - free reiser4_file_fsdata
-+ * @fsdata: object to free
-+ *
-+ * Dual to create_fsdata(). Free reiser4_file_fsdata.
-+ */
-+static void free_fsdata(reiser4_file_fsdata *fsdata)
-+{
-+ BUG_ON(fsdata == NULL);
-+ kmem_cache_free(file_fsdata_cache, fsdata);
-+}
-+
-+/**
-+ * reiser4_get_file_fsdata - get fs-specific file data
-+ * @file: queried file
-+ *
-+ * Returns fs-specific data of @file. If it is NULL, allocates it and attaches
-+ * to @file.
-+ */
-+reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *file)
-+{
-+ assert("nikita-1603", file != NULL);
-+
-+ if (file->private_data == NULL) {
-+ reiser4_file_fsdata *fsdata;
-+ struct inode *inode;
-+
-+ fsdata = create_fsdata(file);
-+ if (fsdata == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ inode = file->f_dentry->d_inode;
-+ spin_lock_inode(inode);
-+ if (file->private_data == NULL) {
-+ file->private_data = fsdata;
-+ fsdata = NULL;
-+ }
-+ spin_unlock_inode(inode);
-+ if (fsdata != NULL)
-+ /* other thread initialized ->fsdata */
-+ kmem_cache_free(file_fsdata_cache, fsdata);
-+ }
-+ assert("nikita-2665", file->private_data != NULL);
-+ return file->private_data;
-+}
-+
-+/**
-+ * free_file_fsdata_nolock - detach and free reiser4_file_fsdata
-+ * @file:
-+ *
-+ * Detaches reiser4_file_fsdata from @file, removes reiser4_file_fsdata from
-+ * readdir list, frees if it is not linked to d_cursor object.
-+ */
-+static void free_file_fsdata_nolock(struct file *file)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ assert("", spin_inode_is_locked(file->f_dentry->d_inode));
-+ fsdata = file->private_data;
-+ if (fsdata != NULL) {
-+ list_del_init(&fsdata->dir.linkage);
-+ if (fsdata->cursor == NULL)
-+ free_fsdata(fsdata);
-+ }
-+ file->private_data = NULL;
-+}
-+
-+/**
-+ * reiser4_free_file_fsdata - detach from struct file and free reiser4_file_fsdata
-+ * @file:
-+ *
-+ * Spinlocks inode and calls free_file_fsdata_nolock to do the work.
-+ */
-+void reiser4_free_file_fsdata(struct file *file)
-+{
-+ spin_lock_inode(file->f_dentry->d_inode);
-+ free_file_fsdata_nolock(file);
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/fsdata.h linux-2.6.20/fs/reiser4/fsdata.h
---- linux-2.6.20.orig/fs/reiser4/fsdata.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/fsdata.h 2007-05-06 14:50:43.722983224 +0400
-@@ -0,0 +1,207 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#if !defined( __REISER4_FSDATA_H__ )
-+#define __REISER4_FSDATA_H__
-+
-+#include "debug.h"
-+#include "kassign.h"
-+#include "seal.h"
-+#include "type_safe_hash.h"
-+#include "plugin/file/file.h"
-+#include "readahead.h"
-+
-+/*
-+ * comment about reiser4_dentry_fsdata
-+ *
-+ *
-+ */
-+
-+/*
-+ * locking: fields of per file descriptor readdir_pos and ->f_pos are
-+ * protected by ->i_mutex on inode. Under this lock following invariant
-+ * holds:
-+ *
-+ * file descriptor is "looking" at the entry_no-th directory entry from
-+ * the beginning of directory. This entry has key dir_entry_key and is
-+ * pos-th entry with duplicate-key sequence.
-+ *
-+ */
-+
-+/* logical position within directory */
-+typedef struct {
-+ /* key of directory entry (actually, part of a key sufficient to
-+ identify directory entry) */
-+ de_id dir_entry_key;
-+ /* ordinal number of directory entry among all entries with the same
-+ key. (Starting from 0.) */
-+ unsigned pos;
-+} dir_pos;
-+
-+typedef struct {
-+ /* f_pos corresponding to this readdir position */
-+ __u64 fpos;
-+ /* logical position within directory */
-+ dir_pos position;
-+ /* logical number of directory entry within
-+ directory */
-+ __u64 entry_no;
-+} readdir_pos;
-+
-+/*
-+ * this is used to speed up lookups for directory entry: on initial call to
-+ * ->lookup() seal and coord of directory entry (if found, that is) are stored
-+ * in struct dentry and reused later to avoid tree traversals.
-+ */
-+typedef struct de_location {
-+ /* seal covering directory entry */
-+ seal_t entry_seal;
-+ /* coord of directory entry */
-+ coord_t entry_coord;
-+ /* ordinal number of directory entry among all entries with the same
-+ key. (Starting from 0.) */
-+ int pos;
-+} de_location;
-+
-+/**
-+ * reiser4_dentry_fsdata - reiser4-specific data attached to dentries
-+ *
-+ * This is allocated dynamically and released in d_op->d_release()
-+ *
-+ * Currently it only contains cached location (hint) of directory entry, but
-+ * it is expected that other information will be accumulated here.
-+ */
-+typedef struct reiser4_dentry_fsdata {
-+ /*
-+ * here will go fields filled by ->lookup() to speedup next
-+ * create/unlink, like blocknr of znode with stat-data, or key of
-+ * stat-data.
-+ */
-+ de_location dec;
-+ int stateless; /* created through reiser4_decode_fh, needs special
-+ * treatment in readdir. */
-+} reiser4_dentry_fsdata;
-+
-+extern int reiser4_init_dentry_fsdata(void);
-+extern void reiser4_done_dentry_fsdata(void);
-+extern reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *);
-+extern void reiser4_free_dentry_fsdata(struct dentry *dentry);
-+
-+/**
-+ * reiser4_file_fsdata - reiser4-specific data attached to file->private_data
-+ *
-+ * This is allocated dynamically and released in inode->i_fop->release
-+ */
-+typedef struct reiser4_file_fsdata {
-+ /*
-+ * pointer back to the struct file which this reiser4_file_fsdata is
-+ * part of
-+ */
-+ struct file *back;
-+ /* detached cursor for stateless readdir. */
-+ struct dir_cursor *cursor;
-+ /*
-+ * We need both directory and regular file parts here, because there
-+ * are file system objects that are files and directories.
-+ */
-+ struct {
-+ /*
-+ * position in directory. It is updated each time directory is
-+ * modified
-+ */
-+ readdir_pos readdir;
-+ /* head of this list is reiser4_inode->lists.readdir_list */
-+ struct list_head linkage;
-+ } dir;
-+ /* hints to speed up operations with regular files: read and write. */
-+ struct {
-+ hint_t hint;
-+ } reg;
-+ struct reiser4_file_ra_state ra1;
-+
-+} reiser4_file_fsdata;
-+
-+extern int reiser4_init_file_fsdata(void);
-+extern void reiser4_done_file_fsdata(void);
-+extern reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *);
-+extern void reiser4_free_file_fsdata(struct file *);
-+
-+/*
-+ * d_cursor is reiser4_file_fsdata not attached to struct file. d_cursors are
-+ * used to address problem reiser4 has with readdir accesses via NFS. See
-+ * plugin/file_ops_readdir.c for more details.
-+ */
-+typedef struct {
-+ __u16 cid;
-+ __u64 oid;
-+} d_cursor_key;
-+
-+/*
-+ * define structures d_cursor_hash_table d_cursor_hash_link which are used to
-+ * maintain hash table of dir_cursor-s in reiser4's super block
-+ */
-+typedef struct dir_cursor dir_cursor;
-+TYPE_SAFE_HASH_DECLARE(d_cursor, dir_cursor);
-+
-+typedef struct d_cursor_info d_cursor_info;
-+
-+struct dir_cursor {
-+ int ref;
-+ reiser4_file_fsdata *fsdata;
-+
-+ /* link to reiser4 super block hash table of cursors */
-+ d_cursor_hash_link hash;
-+
-+ /*
-+ * this is to link cursors to reiser4 super block's radix tree of
-+ * cursors if there are more than one cursor of the same objectid
-+ */
-+ struct list_head list;
-+ d_cursor_key key;
-+ d_cursor_info *info;
-+ /* list of unused cursors */
-+ struct list_head alist;
-+};
-+
-+extern int reiser4_init_d_cursor(void);
-+extern void reiser4_done_d_cursor(void);
-+
-+extern int reiser4_init_super_d_info(struct super_block *);
-+extern void reiser4_done_super_d_info(struct super_block *);
-+
-+extern loff_t reiser4_get_dir_fpos(struct file *);
-+extern int reiser4_attach_fsdata(struct file *, struct inode *);
-+extern void reiser4_detach_fsdata(struct file *);
-+
-+/* these are needed for "stateless" readdir. See plugin/file_ops_readdir.c for
-+ more details */
-+void reiser4_dispose_cursors(struct inode *inode);
-+void reiser4_load_cursors(struct inode *inode);
-+void reiser4_kill_cursors(struct inode *inode);
-+void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
-+ int offset, int adj);
-+
-+/*
-+ * this structure is embedded to reise4_super_info_data. It maintains d_cursors
-+ * (detached readdir state). See plugin/file_ops_readdir.c for more details.
-+ */
-+struct d_cursor_info {
-+ d_cursor_hash_table table;
-+ struct radix_tree_root tree;
-+};
-+
-+/* spinlock protecting readdir cursors */
-+extern spinlock_t d_lock;
-+
-+/* __REISER4_FSDATA_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/init_super.c linux-2.6.20/fs/reiser4/init_super.c
---- linux-2.6.20.orig/fs/reiser4/init_super.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/init_super.c 2007-05-06 14:50:43.722983224 +0400
-@@ -0,0 +1,750 @@
-+/* Copyright by Hans Reiser, 2003 */
-+
-+#include "super.h"
-+#include "inode.h"
-+#include "plugin/plugin_set.h"
-+
-+#include <linux/swap.h>
-+
-+/**
-+ * init_fs_info - allocate reiser4 specific super block
-+ * @super: super block of filesystem
-+ *
-+ * Allocates and initialize reiser4_super_info_data, attaches it to
-+ * super->s_fs_info, initializes structures maintaining d_cursor-s.
-+ */
-+int reiser4_init_fs_info(struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = kmalloc(sizeof(reiser4_super_info_data),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!sbinfo)
-+ return RETERR(-ENOMEM);
-+
-+ super->s_fs_info = sbinfo;
-+ super->s_op = NULL;
-+ memset(sbinfo, 0, sizeof(*sbinfo));
-+
-+ ON_DEBUG(INIT_LIST_HEAD(&sbinfo->all_jnodes));
-+ ON_DEBUG(spin_lock_init(&sbinfo->all_guard));
-+
-+ mutex_init(&sbinfo->delete_mutex);
-+ spin_lock_init(&(sbinfo->guard));
-+
-+ /* initialize per-super-block d_cursor resources */
-+ reiser4_init_super_d_info(super);
-+
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_fs_info - free reiser4 specific super block
-+ * @super: super block of filesystem
-+ *
-+ * Performs some sanity checks, releases structures maintaining d_cursor-s,
-+ * frees reiser4_super_info_data.
-+ */
-+void reiser4_done_fs_info(struct super_block *super)
-+{
-+ assert("zam-990", super->s_fs_info != NULL);
-+
-+ /* release per-super-block d_cursor resources */
-+ reiser4_done_super_d_info(super);
-+
-+ /* make sure that there are not jnodes already */
-+ assert("", list_empty(&get_super_private(super)->all_jnodes));
-+ assert("", get_current_context()->trans->atom == NULL);
-+ reiser4_check_block_counters(super);
-+ kfree(super->s_fs_info);
-+ super->s_fs_info = NULL;
-+}
-+
-+/* type of option parseable by parse_option() */
-+typedef enum {
-+ /* value of option is arbitrary string */
-+ OPT_STRING,
-+
-+ /*
-+ * option specifies bit in a bitmask. When option is set - bit in
-+ * sbinfo->fs_flags is set. Examples are bsdgroups, 32bittimes, mtflush,
-+ * dont_load_bitmap, atomic_write.
-+ */
-+ OPT_BIT,
-+
-+ /*
-+ * value of option should conform to sprintf() format. Examples are
-+ * tmgr.atom_max_size=N, tmgr.atom_max_age=N
-+ */
-+ OPT_FORMAT,
-+
-+ /*
-+ * option can take one of predefined values. Example is onerror=panic or
-+ * onerror=remount-ro
-+ */
-+ OPT_ONEOF,
-+} opt_type_t;
-+
-+typedef struct opt_bitmask_bit {
-+ const char *bit_name;
-+ int bit_nr;
-+} opt_bitmask_bit;
-+
-+/* description of option parseable by parse_option() */
-+typedef struct opt_desc {
-+ /* option name.
-+
-+ parsed portion of string has a form "name=value".
-+ */
-+ const char *name;
-+ /* type of option */
-+ opt_type_t type;
-+ union {
-+ /* where to store value of string option (type == OPT_STRING) */
-+ char **string;
-+ /* description of bits for bit option (type == OPT_BIT) */
-+ struct {
-+ int nr;
-+ void *addr;
-+ } bit;
-+ /* description of format and targets for format option (type
-+ == OPT_FORMAT) */
-+ struct {
-+ const char *format;
-+ int nr_args;
-+ void *arg1;
-+ void *arg2;
-+ void *arg3;
-+ void *arg4;
-+ } f;
-+ struct {
-+ int *result;
-+ const char *list[10];
-+ } oneof;
-+ struct {
-+ void *addr;
-+ int nr_bits;
-+ opt_bitmask_bit *bits;
-+ } bitmask;
-+ } u;
-+} opt_desc_t;
-+
-+/**
-+ * parse_option - parse one option
-+ * @opt_strin: starting point of parsing
-+ * @opt: option description
-+ *
-+ * foo=bar,
-+ * ^ ^ ^
-+ * | | +-- replaced to '\0'
-+ * | +-- val_start
-+ * +-- opt_string
-+ * Figures out option type and handles option correspondingly.
-+ */
-+static int parse_option(char *opt_string, opt_desc_t *opt)
-+{
-+ char *val_start;
-+ int result;
-+ const char *err_msg;
-+
-+ /* NOTE-NIKITA think about using lib/cmdline.c functions here. */
-+
-+ val_start = strchr(opt_string, '=');
-+ if (val_start != NULL) {
-+ *val_start = '\0';
-+ ++val_start;
-+ }
-+
-+ err_msg = NULL;
-+ result = 0;
-+ switch (opt->type) {
-+ case OPT_STRING:
-+ if (val_start == NULL) {
-+ err_msg = "String arg missing";
-+ result = RETERR(-EINVAL);
-+ } else
-+ *opt->u.string = val_start;
-+ break;
-+ case OPT_BIT:
-+ if (val_start != NULL)
-+ err_msg = "Value ignored";
-+ else
-+ set_bit(opt->u.bit.nr, opt->u.bit.addr);
-+ break;
-+ case OPT_FORMAT:
-+ if (val_start == NULL) {
-+ err_msg = "Formatted arg missing";
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ if (sscanf(val_start, opt->u.f.format,
-+ opt->u.f.arg1, opt->u.f.arg2, opt->u.f.arg3,
-+ opt->u.f.arg4) != opt->u.f.nr_args) {
-+ err_msg = "Wrong conversion";
-+ result = RETERR(-EINVAL);
-+ }
-+ break;
-+ case OPT_ONEOF:
-+ {
-+ int i = 0;
-+
-+ if (val_start == NULL) {
-+ err_msg = "Value is missing";
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ err_msg = "Wrong option value";
-+ result = RETERR(-EINVAL);
-+ while (opt->u.oneof.list[i]) {
-+ if (!strcmp(opt->u.oneof.list[i], val_start)) {
-+ result = 0;
-+ err_msg = NULL;
-+ *opt->u.oneof.result = i;
-+ break;
-+ }
-+ i++;
-+ }
-+ break;
-+ }
-+ default:
-+ wrong_return_value("nikita-2100", "opt -> type");
-+ break;
-+ }
-+ if (err_msg != NULL) {
-+ warning("nikita-2496", "%s when parsing option \"%s%s%s\"",
-+ err_msg, opt->name, val_start ? "=" : "",
-+ val_start ? : "");
-+ }
-+ return result;
-+}
-+
-+/**
-+ * parse_options - parse reiser4 mount options
-+ * @opt_string: starting point
-+ * @opts: array of option description
-+ * @nr_opts: number of elements in @opts
-+ *
-+ * Parses comma separated list of reiser4 mount options.
-+ */
-+static int parse_options(char *opt_string, opt_desc_t *opts, int nr_opts)
-+{
-+ int result;
-+
-+ result = 0;
-+ while ((result == 0) && opt_string && *opt_string) {
-+ int j;
-+ char *next;
-+
-+ next = strchr(opt_string, ',');
-+ if (next != NULL) {
-+ *next = '\0';
-+ ++next;
-+ }
-+ for (j = 0; j < nr_opts; ++j) {
-+ if (!strncmp(opt_string, opts[j].name,
-+ strlen(opts[j].name))) {
-+ result = parse_option(opt_string, &opts[j]);
-+ break;
-+ }
-+ }
-+ if (j == nr_opts) {
-+ warning("nikita-2307", "Unrecognized option: \"%s\"",
-+ opt_string);
-+ /* traditionally, -EINVAL is returned on wrong mount
-+ option */
-+ result = RETERR(-EINVAL);
-+ }
-+ opt_string = next;
-+ }
-+ return result;
-+}
-+
-+#define NUM_OPT( label, fmt, addr ) \
-+ { \
-+ .name = ( label ), \
-+ .type = OPT_FORMAT, \
-+ .u = { \
-+ .f = { \
-+ .format = ( fmt ), \
-+ .nr_args = 1, \
-+ .arg1 = ( addr ), \
-+ .arg2 = NULL, \
-+ .arg3 = NULL, \
-+ .arg4 = NULL \
-+ } \
-+ } \
-+ }
-+
-+#define SB_FIELD_OPT( field, fmt ) NUM_OPT( #field, fmt, &sbinfo -> field )
-+
-+#define BIT_OPT(label, bitnr) \
-+ { \
-+ .name = label, \
-+ .type = OPT_BIT, \
-+ .u = { \
-+ .bit = { \
-+ .nr = bitnr, \
-+ .addr = &sbinfo->fs_flags \
-+ } \
-+ } \
-+ }
-+
-+#define MAX_NR_OPTIONS (30)
-+
-+/**
-+ * reiser4_init_super_data - initialize reiser4 private super block
-+ * @super: super block to initialize
-+ * @opt_string: list of reiser4 mount options
-+ *
-+ * Sets various reiser4 parameters to default values. Parses mount options and
-+ * overwrites default settings.
-+ */
-+int reiser4_init_super_data(struct super_block *super, char *opt_string)
-+{
-+ int result;
-+ opt_desc_t *opts, *p;
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+
-+ /* initialize super, export, dentry operations */
-+ sbinfo->ops.super = reiser4_super_operations;
-+ sbinfo->ops.export = reiser4_export_operations;
-+ sbinfo->ops.dentry = reiser4_dentry_operations;
-+ super->s_op = &sbinfo->ops.super;
-+ super->s_export_op = &sbinfo->ops.export;
-+
-+ /* initialize transaction manager parameters to default values */
-+ sbinfo->tmgr.atom_max_size = totalram_pages / 4;
-+ sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE / HZ;
-+ sbinfo->tmgr.atom_min_size = 256;
-+ sbinfo->tmgr.atom_max_flushers = ATOM_MAX_FLUSHERS;
-+
-+ /* initialize cbk cache parameter */
-+ sbinfo->tree.cbk_cache.nr_slots = CBK_CACHE_SLOTS;
-+
-+ /* initialize flush parameters */
-+ sbinfo->flush.relocate_threshold = FLUSH_RELOCATE_THRESHOLD;
-+ sbinfo->flush.relocate_distance = FLUSH_RELOCATE_DISTANCE;
-+ sbinfo->flush.written_threshold = FLUSH_WRITTEN_THRESHOLD;
-+ sbinfo->flush.scan_maxnodes = FLUSH_SCAN_MAXNODES;
-+
-+ sbinfo->optimal_io_size = REISER4_OPTIMAL_IO_SIZE;
-+
-+ /* preliminary tree initializations */
-+ sbinfo->tree.super = super;
-+ sbinfo->tree.carry.new_node_flags = REISER4_NEW_NODE_FLAGS;
-+ sbinfo->tree.carry.new_extent_flags = REISER4_NEW_EXTENT_FLAGS;
-+ sbinfo->tree.carry.paste_flags = REISER4_PASTE_FLAGS;
-+ sbinfo->tree.carry.insert_flags = REISER4_INSERT_FLAGS;
-+ rwlock_init(&(sbinfo->tree.tree_lock));
-+ spin_lock_init(&(sbinfo->tree.epoch_lock));
-+
-+ /* initialize default readahead params */
-+ sbinfo->ra_params.max = num_physpages / 4;
-+ sbinfo->ra_params.flags = 0;
-+
-+ /* allocate memory for structure describing reiser4 mount options */
-+ opts = kmalloc(sizeof(opt_desc_t) * MAX_NR_OPTIONS,
-+ reiser4_ctx_gfp_mask_get());
-+ if (opts == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ /* initialize structure describing reiser4 mount options */
-+ p = opts;
-+
-+#if REISER4_DEBUG
-+# define OPT_ARRAY_CHECK if ((p) > (opts) + MAX_NR_OPTIONS) { \
-+ warning ("zam-1046", "opt array is overloaded"); break; \
-+ }
-+#else
-+# define OPT_ARRAY_CHECK noop
-+#endif
-+
-+#define PUSH_OPT(...) \
-+do { \
-+ opt_desc_t o = __VA_ARGS__; \
-+ OPT_ARRAY_CHECK; \
-+ *p ++ = o; \
-+} while (0)
-+
-+#define PUSH_SB_FIELD_OPT(field, format) PUSH_OPT(SB_FIELD_OPT(field, format))
-+#define PUSH_BIT_OPT(name, bit) PUSH_OPT(BIT_OPT(name, bit))
-+
-+ /*
-+ * tmgr.atom_max_size=N
-+ * Atoms containing more than N blocks will be forced to commit. N is
-+ * decimal.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_max_size, "%u");
-+ /*
-+ * tmgr.atom_max_age=N
-+ * Atoms older than N seconds will be forced to commit. N is decimal.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_max_age, "%u");
-+ /*
-+ * tmgr.atom_min_size=N
-+ * In committing an atom to free dirty pages, force the atom less than
-+ * N in size to fuse with another one.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_min_size, "%u");
-+ /*
-+ * tmgr.atom_max_flushers=N
-+ * limit of concurrent flushers for one atom. 0 means no limit.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_max_flushers, "%u");
-+ /*
-+ * tree.cbk_cache_slots=N
-+ * Number of slots in the cbk cache.
-+ */
-+ PUSH_SB_FIELD_OPT(tree.cbk_cache.nr_slots, "%u");
-+ /*
-+ * If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty
-+ * leaf-level blocks it will force them to be relocated.
-+ */
-+ PUSH_SB_FIELD_OPT(flush.relocate_threshold, "%u");
-+ /*
-+ * If flush finds can find a block allocation closer than at most
-+ * FLUSH_RELOCATE_DISTANCE from the preceder it will relocate to that
-+ * position.
-+ */
-+ PUSH_SB_FIELD_OPT(flush.relocate_distance, "%u");
-+ /*
-+ * If we have written this much or more blocks before encountering busy
-+ * jnode in flush list - abort flushing hoping that next time we get
-+ * called this jnode will be clean already, and we will save some
-+ * seeks.
-+ */
-+ PUSH_SB_FIELD_OPT(flush.written_threshold, "%u");
-+ /* The maximum number of nodes to scan left on a level during flush. */
-+ PUSH_SB_FIELD_OPT(flush.scan_maxnodes, "%u");
-+ /* preferred IO size */
-+ PUSH_SB_FIELD_OPT(optimal_io_size, "%u");
-+ /* carry flags used for insertion of new nodes */
-+ PUSH_SB_FIELD_OPT(tree.carry.new_node_flags, "%u");
-+ /* carry flags used for insertion of new extents */
-+ PUSH_SB_FIELD_OPT(tree.carry.new_extent_flags, "%u");
-+ /* carry flags used for paste operations */
-+ PUSH_SB_FIELD_OPT(tree.carry.paste_flags, "%u");
-+ /* carry flags used for insert operations */
-+ PUSH_SB_FIELD_OPT(tree.carry.insert_flags, "%u");
-+
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ /*
-+ * Alternative master superblock location in case if it's original
-+ * location is not writeable/accessable. This is offset in BYTES.
-+ */
-+ PUSH_SB_FIELD_OPT(altsuper, "%lu");
-+#endif
-+
-+ /* turn on BSD-style gid assignment */
-+ PUSH_BIT_OPT("bsdgroups", REISER4_BSD_GID);
-+ /* turn on 32 bit times */
-+ PUSH_BIT_OPT("32bittimes", REISER4_32_BIT_TIMES);
-+ /*
-+ * Don't load all bitmap blocks at mount time, it is useful for
-+ * machines with tiny RAM and large disks.
-+ */
-+ PUSH_BIT_OPT("dont_load_bitmap", REISER4_DONT_LOAD_BITMAP);
-+ /* disable transaction commits during write() */
-+ PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE);
-+ /* disable use of write barriers in the reiser4 log writer. */
-+ PUSH_BIT_OPT("no_write_barrier", REISER4_NO_WRITE_BARRIER);
-+
-+ PUSH_OPT(
-+ {
-+ /*
-+ * tree traversal readahead parameters:
-+ * -o readahead:MAXNUM:FLAGS
-+ * MAXNUM - max number fo nodes to request readahead for: -1UL
-+ * will set it to max_sane_readahead()
-+ * FLAGS - combination of bits: RA_ADJCENT_ONLY, RA_ALL_LEVELS,
-+ * CONTINUE_ON_PRESENT
-+ */
-+ .name = "readahead",
-+ .type = OPT_FORMAT,
-+ .u = {
-+ .f = {
-+ .format = "%u:%u",
-+ .nr_args = 2,
-+ .arg1 = &sbinfo->ra_params.max,
-+ .arg2 = &sbinfo->ra_params.flags,
-+ .arg3 = NULL,
-+ .arg4 = NULL
-+ }
-+ }
-+ }
-+ );
-+
-+ /* What to do in case of fs error */
-+ PUSH_OPT(
-+ {
-+ .name = "onerror",
-+ .type = OPT_ONEOF,
-+ .u = {
-+ .oneof = {
-+ .result = &sbinfo->onerror,
-+ .list = {
-+ "panic", "remount-ro", NULL
-+ },
-+ }
-+ }
-+ }
-+ );
-+
-+ /* modify default settings to values set by mount options */
-+ result = parse_options(opt_string, opts, p - opts);
-+ kfree(opts);
-+ if (result != 0)
-+ return result;
-+
-+ /* correct settings to sanity values */
-+ sbinfo->tmgr.atom_max_age *= HZ;
-+ if (sbinfo->tmgr.atom_max_age <= 0)
-+ /* overflow */
-+ sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE;
-+
-+ /* round optimal io size up to 512 bytes */
-+ sbinfo->optimal_io_size >>= VFS_BLKSIZE_BITS;
-+ sbinfo->optimal_io_size <<= VFS_BLKSIZE_BITS;
-+ if (sbinfo->optimal_io_size == 0) {
-+ warning("nikita-2497", "optimal_io_size is too small");
-+ return RETERR(-EINVAL);
-+ }
-+ return result;
-+}
-+
-+/**
-+ * reiser4_init_read_super - read reiser4 master super block
-+ * @super: super block to fill
-+ * @silent: if 0 - print warnings
-+ *
-+ * Reads reiser4 master super block either from predefined location or from
-+ * location specified by altsuper mount option, initializes disk format plugin.
-+ */
-+int reiser4_init_read_super(struct super_block *super, int silent)
-+{
-+ struct buffer_head *super_bh;
-+ struct reiser4_master_sb *master_sb;
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ unsigned long blocksize;
-+
-+ read_super_block:
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ if (sbinfo->altsuper)
-+ /*
-+ * read reiser4 master super block at position specified by
-+ * mount option
-+ */
-+ super_bh = sb_bread(super,
-+ (sector_t)(sbinfo->altsuper / super->s_blocksize));
-+ else
-+#endif
-+ /* read reiser4 master super block at 16-th 4096 block */
-+ super_bh = sb_bread(super,
-+ (sector_t)(REISER4_MAGIC_OFFSET / super->s_blocksize));
-+ if (!super_bh)
-+ return RETERR(-EIO);
-+
-+ master_sb = (struct reiser4_master_sb *)super_bh->b_data;
-+ /* check reiser4 magic string */
-+ if (!strncmp(master_sb->magic, REISER4_SUPER_MAGIC_STRING,
-+ sizeof(REISER4_SUPER_MAGIC_STRING))) {
-+ /* reiser4 master super block contains filesystem blocksize */
-+ blocksize = le16_to_cpu(get_unaligned(&master_sb->blocksize));
-+
-+ if (blocksize != PAGE_CACHE_SIZE) {
-+ /*
-+ * currenly reiser4's blocksize must be equal to
-+ * pagesize
-+ */
-+ if (!silent)
-+ warning("nikita-2609",
-+ "%s: wrong block size %ld\n", super->s_id,
-+ blocksize);
-+ brelse(super_bh);
-+ return RETERR(-EINVAL);
-+ }
-+ if (blocksize != super->s_blocksize) {
-+ /*
-+ * filesystem uses different blocksize. Reread master
-+ * super block with correct blocksize
-+ */
-+ brelse(super_bh);
-+ if (!sb_set_blocksize(super, (int)blocksize))
-+ return RETERR(-EINVAL);
-+ goto read_super_block;
-+ }
-+
-+ sbinfo->df_plug =
-+ disk_format_plugin_by_id(
-+ le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
-+ if (sbinfo->df_plug == NULL) {
-+ if (!silent)
-+ warning("nikita-26091",
-+ "%s: unknown disk format plugin %d\n",
-+ super->s_id,
-+ le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
-+ brelse(super_bh);
-+ return RETERR(-EINVAL);
-+ }
-+ sbinfo->diskmap_block = le64_to_cpu(get_unaligned(&master_sb->diskmap));
-+ brelse(super_bh);
-+ return 0;
-+ }
-+
-+ /* there is no reiser4 on the device */
-+ if (!silent)
-+ warning("nikita-2608",
-+ "%s: wrong master super block magic", super->s_id);
-+ brelse(super_bh);
-+ return RETERR(-EINVAL);
-+}
-+
-+static struct {
-+ reiser4_plugin_type type;
-+ reiser4_plugin_id id;
-+} default_plugins[PSET_LAST] = {
-+ [PSET_FILE] = {
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .id = UNIX_FILE_PLUGIN_ID
-+ },
-+ [PSET_DIR] = {
-+ .type = REISER4_DIR_PLUGIN_TYPE,
-+ .id = HASHED_DIR_PLUGIN_ID
-+ },
-+ [PSET_HASH] = {
-+ .type = REISER4_HASH_PLUGIN_TYPE,
-+ .id = R5_HASH_ID
-+ },
-+ [PSET_FIBRATION] = {
-+ .type = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_DOT_O
-+ },
-+ [PSET_PERM] = {
-+ .type = REISER4_PERM_PLUGIN_TYPE,
-+ .id = NULL_PERM_ID
-+ },
-+ [PSET_FORMATTING] = {
-+ .type = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = SMALL_FILE_FORMATTING_ID
-+ },
-+ [PSET_SD] = {
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = STATIC_STAT_DATA_ID
-+ },
-+ [PSET_DIR_ITEM] = {
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = COMPOUND_DIR_ID
-+ },
-+ [PSET_CIPHER] = {
-+ .type = REISER4_CIPHER_PLUGIN_TYPE,
-+ .id = NONE_CIPHER_ID
-+ },
-+ [PSET_DIGEST] = {
-+ .type = REISER4_DIGEST_PLUGIN_TYPE,
-+ .id = SHA256_32_DIGEST_ID
-+ },
-+ [PSET_COMPRESSION] = {
-+ .type = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .id = LZO1_COMPRESSION_ID
-+ },
-+ [PSET_COMPRESSION_MODE] = {
-+ .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = CONVX_COMPRESSION_MODE_ID
-+ },
-+ [PSET_CLUSTER] = {
-+ .type = REISER4_CLUSTER_PLUGIN_TYPE,
-+ .id = CLUSTER_64K_ID
-+ },
-+ [PSET_CREATE] = {
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .id = UNIX_FILE_PLUGIN_ID
-+ }
-+};
-+
-+/* access to default plugin table */
-+reiser4_plugin *get_default_plugin(pset_member memb)
-+{
-+ return plugin_by_id(default_plugins[memb].type,
-+ default_plugins[memb].id);
-+}
-+
-+/**
-+ * reiser4_init_root_inode - obtain inode of root directory
-+ * @super: super block of filesystem
-+ *
-+ * Obtains inode of root directory (reading it from disk), initializes plugin
-+ * set it was not initialized.
-+ */
-+int reiser4_init_root_inode(struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ struct inode *inode;
-+ int result = 0;
-+
-+ inode = reiser4_iget(super, sbinfo->df_plug->root_dir_key(super), 0);
-+ if (IS_ERR(inode))
-+ return RETERR(PTR_ERR(inode));
-+
-+ super->s_root = d_alloc_root(inode);
-+ if (!super->s_root) {
-+ iput(inode);
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ super->s_root->d_op = &sbinfo->ops.dentry;
-+
-+ if (!is_inode_loaded(inode)) {
-+ pset_member memb;
-+ plugin_set *pset;
-+
-+ pset = reiser4_inode_data(inode)->pset;
-+ for (memb = 0; memb < PSET_LAST; ++memb) {
-+
-+ if (aset_get(pset, memb) != NULL)
-+ continue;
-+
-+ result = grab_plugin_pset(inode, NULL, memb);
-+ if (result != 0)
-+ break;
-+
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+ }
-+
-+ if (result == 0) {
-+ if (REISER4_DEBUG) {
-+ for (memb = 0; memb < PSET_LAST; ++memb)
-+ assert("nikita-3500",
-+ aset_get(pset, memb) != NULL);
-+ }
-+ } else
-+ warning("nikita-3448", "Cannot set plugins of root: %i",
-+ result);
-+ reiser4_iget_complete(inode);
-+
-+ /* As the default pset kept in the root dir may has been changed
-+ (length is unknown), call update_sd. */
-+ if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
-+ result = reiser4_grab_space(
-+ inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+
-+ if (result == 0)
-+ result = reiser4_update_sd(inode);
-+
-+ all_grabbed2free();
-+ }
-+ }
-+
-+ super->s_maxbytes = MAX_LFS_FILESIZE;
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/inode.c linux-2.6.20/fs/reiser4/inode.c
---- linux-2.6.20.orig/fs/reiser4/inode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/inode.c 2007-05-06 14:50:43.726984474 +0400
-@@ -0,0 +1,709 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Inode specific operations. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "kassign.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "dscale.h"
-+#include "plugin/item/item.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/plugin.h"
-+#include "plugin/object.h"
-+#include "znode.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/fs.h> /* for struct super_block, address_space */
-+
-+/* return reiser4 internal tree which inode belongs to */
-+/* Audited by: green(2002.06.17) */
-+reiser4_tree *reiser4_tree_by_inode(const struct inode *inode /* inode queried */ )
-+{
-+ assert("nikita-256", inode != NULL);
-+ assert("nikita-257", inode->i_sb != NULL);
-+ return reiser4_get_tree(inode->i_sb);
-+}
-+
-+/* return reiser4-specific inode flags */
-+static inline unsigned long *inode_flags(const struct inode *const inode)
-+{
-+ assert("nikita-2842", inode != NULL);
-+ return &reiser4_inode_data(inode)->flags;
-+}
-+
-+/* set reiser4-specific flag @f in @inode */
-+void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f)
-+{
-+ assert("nikita-2248", inode != NULL);
-+ set_bit((int)f, inode_flags(inode));
-+}
-+
-+/* clear reiser4-specific flag @f in @inode */
-+void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f)
-+{
-+ assert("nikita-2250", inode != NULL);
-+ clear_bit((int)f, inode_flags(inode));
-+}
-+
-+/* true if reiser4-specific flag @f is set in @inode */
-+int reiser4_inode_get_flag(const struct inode *inode,
-+ reiser4_file_plugin_flags f)
-+{
-+ assert("nikita-2251", inode != NULL);
-+ return test_bit((int)f, inode_flags(inode));
-+}
-+
-+/* convert oid to inode number */
-+ino_t oid_to_ino(oid_t oid)
-+{
-+ return (ino_t) oid;
-+}
-+
-+/* convert oid to user visible inode number */
-+ino_t oid_to_uino(oid_t oid)
-+{
-+ /* reiser4 object is uniquely identified by oid which is 64 bit
-+ quantity. Kernel in-memory inode is indexed (in the hash table) by
-+ 32 bit i_ino field, but this is not a problem, because there is a
-+ way to further distinguish inodes with identical inode numbers
-+ (find_actor supplied to iget()).
-+
-+ But user space expects unique 32 bit inode number. Obviously this
-+ is impossible. Work-around is to somehow hash oid into user visible
-+ inode number.
-+ */
-+ oid_t max_ino = (ino_t) ~ 0;
-+
-+ if (REISER4_INO_IS_OID || (oid <= max_ino))
-+ return oid;
-+ else
-+ /* this is remotely similar to algorithm used to find next pid
-+ to use for process: after wrap-around start from some
-+ offset rather than from 0. Idea is that there are some long
-+ living objects with which we don't want to collide.
-+ */
-+ return REISER4_UINO_SHIFT + ((oid - max_ino) & (max_ino >> 1));
-+}
-+
-+/* check that "inode" is on reiser4 file-system */
-+int is_reiser4_inode(const struct inode *inode /* inode queried */ )
-+{
-+ return inode != NULL && is_reiser4_super(inode->i_sb);
-+}
-+
-+/* Maximal length of a name that can be stored in directory @inode.
-+
-+ This is used in check during file creation and lookup. */
-+int reiser4_max_filename_len(const struct inode *inode /* inode queried */ )
-+{
-+ assert("nikita-287", is_reiser4_inode(inode));
-+ assert("nikita-1710", inode_dir_item_plugin(inode));
-+ if (inode_dir_item_plugin(inode)->s.dir.max_name_len)
-+ return inode_dir_item_plugin(inode)->s.dir.max_name_len(inode);
-+ else
-+ return 255;
-+}
-+
-+#if REISER4_USE_COLLISION_LIMIT
-+/* Maximal number of hash collisions for this directory. */
-+int max_hash_collisions(const struct inode *dir /* inode queried */ )
-+{
-+ assert("nikita-1711", dir != NULL);
-+ return reiser4_inode_data(dir)->plugin.max_collisions;
-+}
-+#endif /* REISER4_USE_COLLISION_LIMIT */
-+
-+/* Install file, inode, and address_space operation on @inode, depending on
-+ its mode. */
-+int setup_inode_ops(struct inode *inode /* inode to intialize */ ,
-+ reiser4_object_create_data * data /* parameters to create
-+ * object */ )
-+{
-+ reiser4_super_info_data *sinfo;
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ fplug = inode_file_plugin(inode);
-+ dplug = inode_dir_plugin(inode);
-+
-+ sinfo = get_super_private(inode->i_sb);
-+
-+ switch (inode->i_mode & S_IFMT) {
-+ case S_IFSOCK:
-+ case S_IFBLK:
-+ case S_IFCHR:
-+ case S_IFIFO:
-+ {
-+ dev_t rdev; /* to keep gcc happy */
-+
-+ assert("vs-46", fplug != NULL);
-+ /* ugly hack with rdev */
-+ if (data == NULL) {
-+ rdev = inode->i_rdev;
-+ inode->i_rdev = 0;
-+ } else
-+ rdev = data->rdev;
-+ inode->i_blocks = 0;
-+ assert("vs-42", fplug->h.id == SPECIAL_FILE_PLUGIN_ID);
-+ inode->i_op = &file_plugins[fplug->h.id].inode_ops;
-+ /* initialize inode->i_fop and inode->i_rdev for block and char
-+ devices */
-+ init_special_inode(inode, inode->i_mode, rdev);
-+ /* all address space operations are null */
-+ inode->i_mapping->a_ops =
-+ &file_plugins[fplug->h.id].as_ops;
-+ break;
-+ }
-+ case S_IFLNK:
-+ assert("vs-46", fplug != NULL);
-+ assert("vs-42", fplug->h.id == SYMLINK_FILE_PLUGIN_ID);
-+ inode->i_op = &file_plugins[fplug->h.id].inode_ops;
-+ inode->i_fop = NULL;
-+ /* all address space operations are null */
-+ inode->i_mapping->a_ops = &file_plugins[fplug->h.id].as_ops;
-+ break;
-+ case S_IFDIR:
-+ assert("vs-46", dplug != NULL);
-+ assert("vs-43", (dplug->h.id == HASHED_DIR_PLUGIN_ID ||
-+ dplug->h.id == SEEKABLE_HASHED_DIR_PLUGIN_ID));
-+ inode->i_op = &dir_plugins[dplug->h.id].inode_ops;
-+ inode->i_fop = &dir_plugins[dplug->h.id].file_ops;
-+ inode->i_mapping->a_ops = &dir_plugins[dplug->h.id].as_ops;
-+ break;
-+ case S_IFREG:
-+ assert("vs-46", fplug != NULL);
-+ assert("vs-43", (fplug->h.id == UNIX_FILE_PLUGIN_ID ||
-+ fplug->h.id == CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+ inode->i_op = &file_plugins[fplug->h.id].inode_ops;
-+ inode->i_fop = &file_plugins[fplug->h.id].file_ops;
-+ inode->i_mapping->a_ops = &file_plugins[fplug->h.id].as_ops;
-+ break;
-+ default:
-+ warning("nikita-291", "wrong file mode: %o for %llu",
-+ inode->i_mode,
-+ (unsigned long long)get_inode_oid(inode));
-+ reiser4_make_bad_inode(inode);
-+ return RETERR(-EINVAL);
-+ }
-+ return 0;
-+}
-+
-+/* Initialize inode from disk data. Called with inode locked.
-+ Return inode locked. */
-+static int init_inode(struct inode *inode /* inode to intialise */ ,
-+ coord_t * coord /* coord of stat data */ )
-+{
-+ int result;
-+ item_plugin *iplug;
-+ void *body;
-+ int length;
-+ reiser4_inode *state;
-+
-+ assert("nikita-292", coord != NULL);
-+ assert("nikita-293", inode != NULL);
-+
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (result)
-+ return result;
-+ iplug = item_plugin_by_coord(coord);
-+ body = item_body_by_coord(coord);
-+ length = item_length_by_coord(coord);
-+
-+ assert("nikita-295", iplug != NULL);
-+ assert("nikita-296", body != NULL);
-+ assert("nikita-297", length > 0);
-+
-+ /* inode is under I_LOCK now */
-+
-+ state = reiser4_inode_data(inode);
-+ /* call stat-data plugin method to load sd content into inode */
-+ result = iplug->s.sd.init_inode(inode, body, length);
-+ set_plugin(&state->pset, PSET_SD, item_plugin_to_plugin(iplug));
-+ if (result == 0) {
-+ result = setup_inode_ops(inode, NULL);
-+ if (result == 0 && inode->i_sb->s_root &&
-+ inode->i_sb->s_root->d_inode)
-+ result = finish_pset(inode);
-+ }
-+ zrelse(coord->node);
-+ return result;
-+}
-+
-+/* read `inode' from the disk. This is what was previously in
-+ reiserfs_read_inode2().
-+
-+ Must be called with inode locked. Return inode still locked.
-+*/
-+static int read_inode(struct inode *inode /* inode to read from disk */ ,
-+ const reiser4_key * key /* key of stat data */ ,
-+ int silent)
-+{
-+ int result;
-+ lock_handle lh;
-+ reiser4_inode *info;
-+ coord_t coord;
-+
-+ assert("nikita-298", inode != NULL);
-+ assert("nikita-1945", !is_inode_loaded(inode));
-+
-+ info = reiser4_inode_data(inode);
-+ assert("nikita-300", info->locality_id != 0);
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ /* locate stat-data in a tree and return znode locked */
-+ result = lookup_sd(inode, ZNODE_READ_LOCK, &coord, &lh, key, silent);
-+ assert("nikita-301", !is_inode_loaded(inode));
-+ if (result == 0) {
-+ /* use stat-data plugin to load sd into inode. */
-+ result = init_inode(inode, &coord);
-+ if (result == 0) {
-+ /* initialize stat-data seal */
-+ spin_lock_inode(inode);
-+ reiser4_seal_init(&info->sd_seal, &coord, key);
-+ info->sd_coord = coord;
-+ spin_unlock_inode(inode);
-+
-+ /* call file plugin's method to initialize plugin
-+ * specific part of inode */
-+ if (inode_file_plugin(inode)->init_inode_data)
-+ inode_file_plugin(inode)->init_inode_data(inode,
-+ NULL,
-+ 0);
-+ /* load detached directory cursors for stateless
-+ * directory readers (NFS). */
-+ reiser4_load_cursors(inode);
-+
-+ /* Check the opened inode for consistency. */
-+ result =
-+ get_super_private(inode->i_sb)->df_plug->
-+ check_open(inode);
-+ }
-+ }
-+ /* lookup_sd() doesn't release coord because we want znode
-+ stay read-locked while stat-data fields are accessed in
-+ init_inode() */
-+ done_lh(&lh);
-+
-+ if (result != 0)
-+ reiser4_make_bad_inode(inode);
-+ return result;
-+}
-+
-+/* initialise new reiser4 inode being inserted into hash table. */
-+static int init_locked_inode(struct inode *inode /* new inode */ ,
-+ void *opaque /* key of stat data passed to the
-+ * iget5_locked as cookie */ )
-+{
-+ reiser4_key *key;
-+
-+ assert("nikita-1995", inode != NULL);
-+ assert("nikita-1996", opaque != NULL);
-+ key = opaque;
-+ set_inode_oid(inode, get_key_objectid(key));
-+ reiser4_inode_data(inode)->locality_id = get_key_locality(key);
-+ return 0;
-+}
-+
-+/* reiser4_inode_find_actor() - "find actor" supplied by reiser4 to iget5_locked().
-+
-+ This function is called by iget5_locked() to distinguish reiser4 inodes
-+ having the same inode numbers. Such inodes can only exist due to some error
-+ condition. One of them should be bad. Inodes with identical inode numbers
-+ (objectids) are distinguished by their packing locality.
-+
-+*/
-+static int reiser4_inode_find_actor(struct inode *inode /* inode from hash table to
-+ * check */ ,
-+ void *opaque /* "cookie" passed to
-+ * iget5_locked(). This is stat data
-+ * key */ )
-+{
-+ reiser4_key *key;
-+
-+ key = opaque;
-+ return
-+ /* oid is unique, so first term is enough, actually. */
-+ get_inode_oid(inode) == get_key_objectid(key) &&
-+ /*
-+ * also, locality should be checked, but locality is stored in
-+ * the reiser4-specific part of the inode, and actor can be
-+ * called against arbitrary inode that happened to be in this
-+ * hash chain. Hence we first have to check that this is
-+ * reiser4 inode at least. is_reiser4_inode() is probably too
-+ * early to call, as inode may have ->i_op not yet
-+ * initialised.
-+ */
-+ is_reiser4_super(inode->i_sb) &&
-+ /*
-+ * usually objectid is unique, but pseudo files use counter to
-+ * generate objectid. All pseudo files are placed into special
-+ * (otherwise unused) locality.
-+ */
-+ reiser4_inode_data(inode)->locality_id == get_key_locality(key);
-+}
-+
-+/* hook for kmem_cache_create */
-+void loading_init_once(reiser4_inode * info)
-+{
-+ mutex_init(&info->loading);
-+}
-+
-+/* for reiser4_alloc_inode */
-+void loading_alloc(reiser4_inode * info)
-+{
-+ assert("vs-1717", !mutex_is_locked(&info->loading));
-+}
-+
-+/* for reiser4_destroy */
-+void loading_destroy(reiser4_inode * info)
-+{
-+ assert("vs-1717a", !mutex_is_locked(&info->loading));
-+}
-+
-+static void loading_begin(reiser4_inode * info)
-+{
-+ mutex_lock(&info->loading);
-+}
-+
-+static void loading_end(reiser4_inode * info)
-+{
-+ mutex_unlock(&info->loading);
-+}
-+
-+/**
-+ * reiser4_iget - obtain inode via iget5_locked, read from disk if necessary
-+ * @super: super block of filesystem
-+ * @key: key of inode's stat-data
-+ * @silent:
-+ *
-+ * This is our helper function a la iget(). This is be called by
-+ * lookup_common() and reiser4_read_super(). Return inode locked or error
-+ * encountered.
-+ */
-+struct inode *reiser4_iget(struct super_block *super, const reiser4_key *key,
-+ int silent)
-+{
-+ struct inode *inode;
-+ int result;
-+ reiser4_inode *info;
-+
-+ assert("nikita-302", super != NULL);
-+ assert("nikita-303", key != NULL);
-+
-+ result = 0;
-+
-+ /* call iget(). Our ->read_inode() is dummy, so this will either
-+ find inode in cache or return uninitialised inode */
-+ inode = iget5_locked(super,
-+ (unsigned long)get_key_objectid(key),
-+ reiser4_inode_find_actor,
-+ init_locked_inode, (reiser4_key *) key);
-+ if (inode == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ if (is_bad_inode(inode)) {
-+ warning("nikita-304", "Bad inode found");
-+ reiser4_print_key("key", key);
-+ iput(inode);
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+
-+ info = reiser4_inode_data(inode);
-+
-+ /* Reiser4 inode state bit REISER4_LOADED is used to distinguish fully
-+ loaded and initialized inode from just allocated inode. If
-+ REISER4_LOADED bit is not set, reiser4_iget() completes loading under
-+ info->loading. The place in reiser4 which uses not initialized inode
-+ is the reiser4 repacker, see repacker-related functions in
-+ plugin/item/extent.c */
-+ if (!is_inode_loaded(inode)) {
-+ loading_begin(info);
-+ if (!is_inode_loaded(inode)) {
-+ /* locking: iget5_locked returns locked inode */
-+ assert("nikita-1941", !is_inode_loaded(inode));
-+ assert("nikita-1949",
-+ reiser4_inode_find_actor(inode,
-+ (reiser4_key *) key));
-+ /* now, inode has objectid as ->i_ino and locality in
-+ reiser4-specific part. This is enough for
-+ read_inode() to read stat data from the disk */
-+ result = read_inode(inode, key, silent);
-+ } else
-+ loading_end(info);
-+ }
-+
-+ if (inode->i_state & I_NEW)
-+ unlock_new_inode(inode);
-+
-+ if (is_bad_inode(inode)) {
-+ assert("vs-1717", result != 0);
-+ loading_end(info);
-+ iput(inode);
-+ inode = ERR_PTR(result);
-+ } else if (REISER4_DEBUG) {
-+ reiser4_key found_key;
-+
-+ assert("vs-1717", result == 0);
-+ build_sd_key(inode, &found_key);
-+ if (!keyeq(&found_key, key)) {
-+ warning("nikita-305", "Wrong key in sd");
-+ reiser4_print_key("sought for", key);
-+ reiser4_print_key("found", &found_key);
-+ }
-+ if (inode->i_nlink == 0) {
-+ warning("nikita-3559", "Unlinked inode found: %llu\n",
-+ (unsigned long long)get_inode_oid(inode));
-+ }
-+ }
-+ return inode;
-+}
-+
-+/* reiser4_iget() may return not fully initialized inode, this function should
-+ * be called after one completes reiser4 inode initializing. */
-+void reiser4_iget_complete(struct inode *inode)
-+{
-+ assert("zam-988", is_reiser4_inode(inode));
-+
-+ if (!is_inode_loaded(inode)) {
-+ reiser4_inode_set_flag(inode, REISER4_LOADED);
-+ loading_end(reiser4_inode_data(inode));
-+ }
-+}
-+
-+void reiser4_make_bad_inode(struct inode *inode)
-+{
-+ assert("nikita-1934", inode != NULL);
-+
-+ /* clear LOADED bit */
-+ reiser4_inode_clr_flag(inode, REISER4_LOADED);
-+ make_bad_inode(inode);
-+ return;
-+}
-+
-+file_plugin *inode_file_plugin(const struct inode * inode)
-+{
-+ assert("nikita-1997", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->file;
-+}
-+
-+dir_plugin *inode_dir_plugin(const struct inode * inode)
-+{
-+ assert("nikita-1998", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->dir;
-+}
-+
-+formatting_plugin *inode_formatting_plugin(const struct inode * inode)
-+{
-+ assert("nikita-2000", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->formatting;
-+}
-+
-+hash_plugin *inode_hash_plugin(const struct inode * inode)
-+{
-+ assert("nikita-2001", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->hash;
-+}
-+
-+fibration_plugin *inode_fibration_plugin(const struct inode * inode)
-+{
-+ assert("nikita-2001", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->fibration;
-+}
-+
-+cipher_plugin *inode_cipher_plugin(const struct inode * inode)
-+{
-+ assert("edward-36", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->cipher;
-+}
-+
-+compression_plugin *inode_compression_plugin(const struct inode * inode)
-+{
-+ assert("edward-37", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->compression;
-+}
-+
-+compression_mode_plugin *inode_compression_mode_plugin(const struct inode *
-+ inode)
-+{
-+ assert("edward-1330", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->compression_mode;
-+}
-+
-+cluster_plugin *inode_cluster_plugin(const struct inode * inode)
-+{
-+ assert("edward-1328", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->cluster;
-+}
-+
-+file_plugin *inode_create_plugin(const struct inode * inode)
-+{
-+ assert("edward-1329", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->create;
-+}
-+
-+digest_plugin *inode_digest_plugin(const struct inode * inode)
-+{
-+ assert("edward-86", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->digest;
-+}
-+
-+item_plugin *inode_sd_plugin(const struct inode * inode)
-+{
-+ assert("vs-534", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->sd;
-+}
-+
-+item_plugin *inode_dir_item_plugin(const struct inode * inode)
-+{
-+ assert("vs-534", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->dir_item;
-+}
-+
-+file_plugin *child_create_plugin(const struct inode * inode)
-+{
-+ assert("edward-1329", inode != NULL);
-+ return reiser4_inode_data(inode)->hset->create;
-+}
-+
-+void inode_set_extension(struct inode *inode, sd_ext_bits ext)
-+{
-+ reiser4_inode *state;
-+
-+ assert("nikita-2716", inode != NULL);
-+ assert("nikita-2717", ext < LAST_SD_EXTENSION);
-+ assert("nikita-3491", spin_inode_is_locked(inode));
-+
-+ state = reiser4_inode_data(inode);
-+ state->extmask |= 1 << ext;
-+ /* force re-calculation of stat-data length on next call to
-+ update_sd(). */
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+}
-+
-+void inode_clr_extension(struct inode *inode, sd_ext_bits ext)
-+{
-+ reiser4_inode *state;
-+
-+ assert("vpf-1926", inode != NULL);
-+ assert("vpf-1927", ext < LAST_SD_EXTENSION);
-+ assert("vpf-1928", spin_inode_is_locked(inode));
-+
-+ state = reiser4_inode_data(inode);
-+ state->extmask &= ~(1 << ext);
-+ /* force re-calculation of stat-data length on next call to
-+ update_sd(). */
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+}
-+
-+void inode_check_scale_nolock(struct inode *inode, __u64 old, __u64 new)
-+{
-+ assert("edward-1287", inode != NULL);
-+ if (!dscale_fit(old, new))
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+ return;
-+}
-+
-+void inode_check_scale(struct inode *inode, __u64 old, __u64 new)
-+{
-+ assert("nikita-2875", inode != NULL);
-+ spin_lock_inode(inode);
-+ inode_check_scale_nolock(inode, old, new);
-+ spin_unlock_inode(inode);
-+}
-+
-+/*
-+ * initialize ->ordering field of inode. This field defines how file stat-data
-+ * and body is ordered within a tree with respect to other objects within the
-+ * same parent directory.
-+ */
-+void
-+init_inode_ordering(struct inode *inode,
-+ reiser4_object_create_data * crd, int create)
-+{
-+ reiser4_key key;
-+
-+ if (create) {
-+ struct inode *parent;
-+
-+ parent = crd->parent;
-+ assert("nikita-3224", inode_dir_plugin(parent) != NULL);
-+ inode_dir_plugin(parent)->build_entry_key(parent,
-+ &crd->dentry->d_name,
-+ &key);
-+ } else {
-+ coord_t *coord;
-+
-+ coord = &reiser4_inode_data(inode)->sd_coord;
-+ coord_clear_iplug(coord);
-+ /* safe to use ->sd_coord, because node is under long term
-+ * lock */
-+ WITH_DATA(coord->node, item_key_by_coord(coord, &key));
-+ }
-+
-+ set_inode_ordering(inode, get_key_ordering(&key));
-+}
-+
-+znode *inode_get_vroot(struct inode *inode)
-+{
-+ reiser4_block_nr blk;
-+ znode *result;
-+
-+ spin_lock_inode(inode);
-+ blk = reiser4_inode_data(inode)->vroot;
-+ spin_unlock_inode(inode);
-+ if (!disk_addr_eq(&UBER_TREE_ADDR, &blk))
-+ result = zlook(reiser4_tree_by_inode(inode), &blk);
-+ else
-+ result = NULL;
-+ return result;
-+}
-+
-+void inode_set_vroot(struct inode *inode, znode *vroot)
-+{
-+ spin_lock_inode(inode);
-+ reiser4_inode_data(inode)->vroot = *znode_get_block(vroot);
-+ spin_unlock_inode(inode);
-+}
-+
-+#if REISER4_DEBUG
-+
-+void reiser4_inode_invariant(const struct inode *inode)
-+{
-+ assert("nikita-3077", spin_inode_is_locked(inode));
-+}
-+
-+int inode_has_no_jnodes(reiser4_inode * r4_inode)
-+{
-+ return jnode_tree_by_reiser4_inode(r4_inode)->rnode == NULL &&
-+ r4_inode->nr_jnodes == 0;
-+}
-+
-+#endif
-+
-+/* true if directory is empty (only contains dot and dotdot) */
-+/* FIXME: shouldn't it be dir plugin method? */
-+int is_dir_empty(const struct inode *dir)
-+{
-+ assert("nikita-1976", dir != NULL);
-+
-+ /* rely on our method to maintain directory i_size being equal to the
-+ number of entries. */
-+ return dir->i_size <= 2 ? 0 : RETERR(-ENOTEMPTY);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/inode.h linux-2.6.20/fs/reiser4/inode.h
---- linux-2.6.20.orig/fs/reiser4/inode.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/inode.h 2007-05-06 14:50:43.726984474 +0400
-@@ -0,0 +1,438 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Inode functions. */
-+
-+#if !defined( __REISER4_INODE_H__ )
-+#define __REISER4_INODE_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "seal.h"
-+#include "plugin/plugin.h"
-+#include "plugin/file/cryptcompress.h"
-+#include "plugin/file/file.h"
-+#include "plugin/dir/dir.h"
-+#include "plugin/plugin_set.h"
-+#include "plugin/security/perm.h"
-+#include "vfs_ops.h"
-+#include "jnode.h"
-+#include "fsdata.h"
-+
-+#include <linux/types.h> /* for __u?? , ino_t */
-+#include <linux/fs.h> /* for struct super_block, struct
-+ * rw_semaphore, etc */
-+#include <linux/spinlock.h>
-+#include <asm/types.h>
-+
-+/* reiser4-specific inode flags. They are "transient" and are not
-+ supposed to be stored on disk. Used to trace "state" of
-+ inode
-+*/
-+typedef enum {
-+ /* this is light-weight inode, inheriting some state from its
-+ parent */
-+ REISER4_LIGHT_WEIGHT = 0,
-+ /* stat data wasn't yet created */
-+ REISER4_NO_SD = 1,
-+ /* internal immutable flag. Currently is only used
-+ to avoid race condition during file creation.
-+ See comment in create_object(). */
-+ REISER4_IMMUTABLE = 2,
-+ /* inode was read from storage */
-+ REISER4_LOADED = 3,
-+ /* this bit is set for symlinks. inode->i_private points to target
-+ name of symlink. */
-+ REISER4_GENERIC_PTR_USED = 4,
-+ /* set if size of stat-data item for this inode is known. If this is
-+ * set we can avoid recalculating size of stat-data on each update. */
-+ REISER4_SDLEN_KNOWN = 5,
-+ /* reiser4_inode->crypt points to the crypto stat */
-+ REISER4_CRYPTO_STAT_LOADED = 6,
-+ /* cryptcompress_inode_data points to the secret key */
-+ REISER4_SECRET_KEY_INSTALLED = 7,
-+ /* File (possibly) has pages corresponding to the tail items, that
-+ * were created by ->readpage. It is set by mmap_unix_file() and
-+ * sendfile_unix_file(). This bit is inspected by write_unix_file and
-+ * kill-hook of tail items. It is never cleared once set. This bit is
-+ * modified and inspected under i_mutex. */
-+ REISER4_HAS_MMAP = 8,
-+ REISER4_PART_MIXED = 9,
-+ REISER4_PART_IN_CONV = 10,
-+ /* This flag indicates that file plugin conversion is in progress */
-+ REISER4_FILE_CONV_IN_PROGRESS = 11
-+} reiser4_file_plugin_flags;
-+
-+/* state associated with each inode.
-+ reiser4 inode.
-+
-+ NOTE-NIKITA In 2.5 kernels it is not necessary that all file-system inodes
-+ be of the same size. File-system allocates inodes by itself through
-+ s_op->allocate_inode() method. So, it is possible to adjust size of inode
-+ at the time of its creation.
-+
-+ Invariants involving parts of this data-type:
-+
-+ [inode->eflushed]
-+
-+*/
-+
-+typedef struct reiser4_inode reiser4_inode;
-+/* return pointer to reiser4-specific part of inode */
-+static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
-+ /* inode queried */ );
-+
-+#if BITS_PER_LONG == 64
-+
-+#define REISER4_INO_IS_OID (1)
-+typedef struct {;
-+} oid_hi_t;
-+
-+/* BITS_PER_LONG == 64 */
-+#else
-+
-+#define REISER4_INO_IS_OID (0)
-+typedef __u32 oid_hi_t;
-+
-+/* BITS_PER_LONG == 64 */
-+#endif
-+
-+struct reiser4_inode {
-+ /* spin lock protecting fields of this structure. */
-+ spinlock_t guard;
-+ /* main plugin set that control the file
-+ (see comments in plugin/plugin_set.c) */
-+ plugin_set *pset;
-+ /* plugin set for inheritance
-+ (see comments in plugin/plugin_set.c) */
-+ plugin_set *hset;
-+ /* high 32 bits of object id */
-+ oid_hi_t oid_hi;
-+ /* seal for stat-data */
-+ seal_t sd_seal;
-+ /* locality id for this file */
-+ oid_t locality_id;
-+#if REISER4_LARGE_KEY
-+ __u64 ordering;
-+#endif
-+ /* coord of stat-data in sealed node */
-+ coord_t sd_coord;
-+ /* bit-mask of stat-data extentions used by this file */
-+ __u64 extmask;
-+ /* bitmask of non-default plugins for this inode */
-+ __u16 plugin_mask;
-+ /* bitmask of set heir plugins for this inode. */
-+ __u16 heir_mask;
-+ union {
-+ struct list_head readdir_list;
-+ struct list_head not_used;
-+ } lists;
-+ /* per-inode flags. Filled by values of reiser4_file_plugin_flags */
-+ unsigned long flags;
-+ union {
-+ /* fields specific to unix_file plugin */
-+ unix_file_info_t unix_file_info;
-+ /* fields specific to cryptcompress plugin */
-+ cryptcompress_info_t cryptcompress_info;
-+ } file_plugin_data;
-+
-+ /* this semaphore is to serialize readers and writers of @pset->file
-+ * when file plugin conversion is enabled
-+ */
-+ struct rw_semaphore conv_sem;
-+
-+ /* tree of jnodes. Phantom jnodes (ones not attched to any atom) are
-+ tagged in that tree by EFLUSH_TAG_ANONYMOUS */
-+ struct radix_tree_root jnodes_tree;
-+#if REISER4_DEBUG
-+ /* number of unformatted node jnodes of this file in jnode hash table */
-+ unsigned long nr_jnodes;
-+#endif
-+
-+ /* block number of virtual root for this object. See comment above
-+ * fs/reiser4/search.c:handle_vroot() */
-+ reiser4_block_nr vroot;
-+ struct mutex loading;
-+};
-+
-+void loading_init_once(reiser4_inode *);
-+void loading_alloc(reiser4_inode *);
-+void loading_destroy(reiser4_inode *);
-+
-+typedef struct reiser4_inode_object {
-+ /* private part */
-+ reiser4_inode p;
-+ /* generic fields not specific to reiser4, but used by VFS */
-+ struct inode vfs_inode;
-+} reiser4_inode_object;
-+
-+/* return pointer to the reiser4 specific portion of @inode */
-+static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
-+ /* inode queried */ )
-+{
-+ assert("nikita-254", inode != NULL);
-+ return &container_of(inode, reiser4_inode_object, vfs_inode)->p;
-+}
-+
-+static inline struct inode *inode_by_reiser4_inode(const reiser4_inode *
-+ r4_inode /* inode queried */
-+ )
-+{
-+ return &container_of(r4_inode, reiser4_inode_object, p)->vfs_inode;
-+}
-+
-+/*
-+ * reiser4 inodes are identified by 64bit object-id (oid_t), but in struct
-+ * inode ->i_ino field is of type ino_t (long) that can be either 32 or 64
-+ * bits.
-+ *
-+ * If ->i_ino is 32 bits we store remaining 32 bits in reiser4 specific part
-+ * of inode, otherwise whole oid is stored in i_ino.
-+ *
-+ * Wrappers below ([sg]et_inode_oid()) are used to hide this difference.
-+ */
-+
-+#define OID_HI_SHIFT (sizeof(ino_t) * 8)
-+
-+#if REISER4_INO_IS_OID
-+
-+static inline oid_t get_inode_oid(const struct inode *inode)
-+{
-+ return inode->i_ino;
-+}
-+
-+static inline void set_inode_oid(struct inode *inode, oid_t oid)
-+{
-+ inode->i_ino = oid;
-+}
-+
-+/* REISER4_INO_IS_OID */
-+#else
-+
-+static inline oid_t get_inode_oid(const struct inode *inode)
-+{
-+ return
-+ ((__u64) reiser4_inode_data(inode)->oid_hi << OID_HI_SHIFT) |
-+ inode->i_ino;
-+}
-+
-+static inline void set_inode_oid(struct inode *inode, oid_t oid)
-+{
-+ assert("nikita-2519", inode != NULL);
-+ inode->i_ino = (ino_t) (oid);
-+ reiser4_inode_data(inode)->oid_hi = (oid) >> OID_HI_SHIFT;
-+ assert("nikita-2521", get_inode_oid(inode) == (oid));
-+}
-+
-+/* REISER4_INO_IS_OID */
-+#endif
-+
-+static inline oid_t get_inode_locality(const struct inode *inode)
-+{
-+ return reiser4_inode_data(inode)->locality_id;
-+}
-+
-+#if REISER4_LARGE_KEY
-+static inline __u64 get_inode_ordering(const struct inode *inode)
-+{
-+ return reiser4_inode_data(inode)->ordering;
-+}
-+
-+static inline void set_inode_ordering(const struct inode *inode, __u64 ordering)
-+{
-+ reiser4_inode_data(inode)->ordering = ordering;
-+}
-+
-+#else
-+
-+#define get_inode_ordering(inode) (0)
-+#define set_inode_ordering(inode, val) noop
-+
-+#endif
-+
-+/* return inode in which @uf_info is embedded */
-+static inline struct inode *unix_file_info_to_inode(const unix_file_info_t *
-+ uf_info)
-+{
-+ return &container_of(uf_info, reiser4_inode_object,
-+ p.file_plugin_data.unix_file_info)->vfs_inode;
-+}
-+
-+extern ino_t oid_to_ino(oid_t oid) __attribute__ ((const));
-+extern ino_t oid_to_uino(oid_t oid) __attribute__ ((const));
-+
-+extern reiser4_tree *reiser4_tree_by_inode(const struct inode *inode);
-+
-+#if REISER4_DEBUG
-+extern void reiser4_inode_invariant(const struct inode *inode);
-+extern int inode_has_no_jnodes(reiser4_inode *);
-+#else
-+#define reiser4_inode_invariant(inode) noop
-+#endif
-+
-+static inline int spin_inode_is_locked(const struct inode *inode)
-+{
-+ assert_spin_locked(&reiser4_inode_data(inode)->guard);
-+ return 1;
-+}
-+
-+/**
-+ * spin_lock_inode - lock reiser4_inode' embedded spinlock
-+ * @inode: inode to lock
-+ *
-+ * In debug mode it checks that lower priority locks are not held and
-+ * increments reiser4_context's lock counters on which lock ordering checking
-+ * is based.
-+ */
-+static inline void spin_lock_inode(struct inode *inode)
-+{
-+ assert("", LOCK_CNT_NIL(spin_locked));
-+ /* check lock ordering */
-+ assert_spin_not_locked(&d_lock);
-+
-+ spin_lock(&reiser4_inode_data(inode)->guard);
-+
-+ LOCK_CNT_INC(spin_locked_inode);
-+ LOCK_CNT_INC(spin_locked);
-+
-+ reiser4_inode_invariant(inode);
-+}
-+
-+/**
-+ * spin_unlock_inode - unlock reiser4_inode' embedded spinlock
-+ * @inode: inode to unlock
-+ *
-+ * In debug mode it checks that spinlock is held and decrements
-+ * reiser4_context's lock counters on which lock ordering checking is based.
-+ */
-+static inline void spin_unlock_inode(struct inode *inode)
-+{
-+ assert_spin_locked(&reiser4_inode_data(inode)->guard);
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_inode));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ reiser4_inode_invariant(inode);
-+
-+ LOCK_CNT_DEC(spin_locked_inode);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&reiser4_inode_data(inode)->guard);
-+}
-+
-+extern znode *inode_get_vroot(struct inode *inode);
-+extern void inode_set_vroot(struct inode *inode, znode * vroot);
-+
-+extern int reiser4_max_filename_len(const struct inode *inode);
-+extern int max_hash_collisions(const struct inode *dir);
-+extern void reiser4_unlock_inode(struct inode *inode);
-+extern int is_reiser4_inode(const struct inode *inode);
-+extern int setup_inode_ops(struct inode *inode, reiser4_object_create_data *);
-+extern struct inode *reiser4_iget(struct super_block *super,
-+ const reiser4_key * key, int silent);
-+extern void reiser4_iget_complete(struct inode *inode);
-+extern void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f);
-+extern void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f);
-+extern int reiser4_inode_get_flag(const struct inode *inode,
-+ reiser4_file_plugin_flags f);
-+
-+/* has inode been initialized? */
-+static inline int
-+is_inode_loaded(const struct inode *inode /* inode queried */ )
-+{
-+ assert("nikita-1120", inode != NULL);
-+ return reiser4_inode_get_flag(inode, REISER4_LOADED);
-+}
-+
-+extern file_plugin *inode_file_plugin(const struct inode *inode);
-+extern dir_plugin *inode_dir_plugin(const struct inode *inode);
-+extern formatting_plugin *inode_formatting_plugin(const struct inode *inode);
-+extern hash_plugin *inode_hash_plugin(const struct inode *inode);
-+extern fibration_plugin *inode_fibration_plugin(const struct inode *inode);
-+extern cipher_plugin *inode_cipher_plugin(const struct inode *inode);
-+extern digest_plugin *inode_digest_plugin(const struct inode *inode);
-+extern compression_plugin *inode_compression_plugin(const struct inode *inode);
-+extern compression_mode_plugin *inode_compression_mode_plugin(const struct inode
-+ *inode);
-+extern cluster_plugin *inode_cluster_plugin(const struct inode *inode);
-+extern file_plugin *inode_create_plugin(const struct inode *inode);
-+extern item_plugin *inode_sd_plugin(const struct inode *inode);
-+extern item_plugin *inode_dir_item_plugin(const struct inode *inode);
-+extern file_plugin *child_create_plugin(const struct inode *inode);
-+
-+extern void reiser4_make_bad_inode(struct inode *inode);
-+
-+extern void inode_set_extension(struct inode *inode, sd_ext_bits ext);
-+extern void inode_clr_extension(struct inode *inode, sd_ext_bits ext);
-+extern void inode_check_scale(struct inode *inode, __u64 old, __u64 new);
-+extern void inode_check_scale_nolock(struct inode * inode, __u64 old, __u64 new);
-+
-+/*
-+ * update field @field in inode @i to contain value @value.
-+ */
-+#define INODE_SET_FIELD(i, field, value) \
-+({ \
-+ struct inode *__i; \
-+ typeof(value) __v; \
-+ \
-+ __i = (i); \
-+ __v = (value); \
-+ inode_check_scale(__i, __i->field, __v); \
-+ __i->field = __v; \
-+})
-+
-+#define INODE_INC_FIELD(i, field) \
-+({ \
-+ struct inode *__i; \
-+ \
-+ __i = (i); \
-+ inode_check_scale(__i, __i->field, __i->field + 1); \
-+ ++ __i->field; \
-+})
-+
-+#define INODE_DEC_FIELD(i, field) \
-+({ \
-+ struct inode *__i; \
-+ \
-+ __i = (i); \
-+ inode_check_scale(__i, __i->field, __i->field - 1); \
-+ -- __i->field; \
-+})
-+
-+/* See comment before reiser4_readdir_common() for description. */
-+static inline struct list_head *get_readdir_list(const struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->lists.readdir_list;
-+}
-+
-+extern void init_inode_ordering(struct inode *inode,
-+ reiser4_object_create_data * crd, int create);
-+
-+static inline struct radix_tree_root *jnode_tree_by_inode(struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->jnodes_tree;
-+}
-+
-+static inline struct radix_tree_root *jnode_tree_by_reiser4_inode(reiser4_inode
-+ * r4_inode)
-+{
-+ return &r4_inode->jnodes_tree;
-+}
-+
-+#if REISER4_DEBUG
-+extern void print_inode(const char *prefix, const struct inode *i);
-+#endif
-+
-+int is_dir_empty(const struct inode *);
-+
-+/* __REISER4_INODE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/ioctl.h linux-2.6.20/fs/reiser4/ioctl.h
---- linux-2.6.20.orig/fs/reiser4/ioctl.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/ioctl.h 2007-05-06 14:50:43.726984474 +0400
-@@ -0,0 +1,41 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#if !defined( __REISER4_IOCTL_H__ )
-+#define __REISER4_IOCTL_H__
-+
-+#include <linux/fs.h>
-+
-+/*
-+ * ioctl(2) command used to "unpack" reiser4 file, that is, convert it into
-+ * extents and fix in this state. This is used by applications that rely on
-+ *
-+ * . files being block aligned, and
-+ *
-+ * . files never migrating on disk
-+ *
-+ * for example, boot loaders (LILO) need this.
-+ *
-+ * This ioctl should be used as
-+ *
-+ * result = ioctl(fd, REISER4_IOC_UNPACK);
-+ *
-+ * File behind fd descriptor will be converted to the extents (if necessary),
-+ * and its stat-data will be updated so that it will never be converted back
-+ * into tails again.
-+ */
-+#define REISER4_IOC_UNPACK _IOW(0xCD,1,long)
-+
-+/* __REISER4_IOCTL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/jnode.c linux-2.6.20/fs/reiser4/jnode.c
---- linux-2.6.20.orig/fs/reiser4/jnode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/jnode.c 2007-05-06 14:50:43.730985723 +0400
-@@ -0,0 +1,1925 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* Jnode manipulation functions. */
-+/* Jnode is entity used to track blocks with data and meta-data in reiser4.
-+
-+ In particular, jnodes are used to track transactional information
-+ associated with each block. Each znode contains jnode as ->zjnode field.
-+
-+ Jnode stands for either Josh or Journal node.
-+*/
-+
-+/*
-+ * Taxonomy.
-+ *
-+ * Jnode represents block containing data or meta-data. There are jnodes
-+ * for:
-+ *
-+ * unformatted blocks (jnodes proper). There are plans, however to
-+ * have a handle per extent unit rather than per each unformatted
-+ * block, because there are so many of them.
-+ *
-+ * For bitmaps. Each bitmap is actually represented by two jnodes--one
-+ * for working and another for "commit" data, together forming bnode.
-+ *
-+ * For io-heads. These are used by log writer.
-+ *
-+ * For formatted nodes (znode). See comment at the top of znode.c for
-+ * details specific to the formatted nodes (znodes).
-+ *
-+ * Node data.
-+ *
-+ * Jnode provides access to the data of node it represents. Data are
-+ * stored in a page. Page is kept in a page cache. This means, that jnodes
-+ * are highly interconnected with page cache and VM internals.
-+ *
-+ * jnode has a pointer to page (->pg) containing its data. Pointer to data
-+ * themselves is cached in ->data field to avoid frequent calls to
-+ * page_address().
-+ *
-+ * jnode and page are attached to each other by jnode_attach_page(). This
-+ * function places pointer to jnode in set_page_private(), sets PG_private
-+ * flag and increments page counter.
-+ *
-+ * Opposite operation is performed by page_clear_jnode().
-+ *
-+ * jnode->pg is protected by jnode spin lock, and page->private is
-+ * protected by page lock. See comment at the top of page_cache.c for
-+ * more.
-+ *
-+ * page can be detached from jnode for two reasons:
-+ *
-+ * . jnode is removed from a tree (file is truncated, of formatted
-+ * node is removed by balancing).
-+ *
-+ * . during memory pressure, VM calls ->releasepage() method
-+ * (reiser4_releasepage()) to evict page from memory.
-+ *
-+ * (there, of course, is also umount, but this is special case we are not
-+ * concerned with here).
-+ *
-+ * To protect jnode page from eviction, one calls jload() function that
-+ * "pins" page in memory (loading it if necessary), increments
-+ * jnode->d_count, and kmap()s page. Page is unpinned through call to
-+ * jrelse().
-+ *
-+ * Jnode life cycle.
-+ *
-+ * jnode is created, placed in hash table, and, optionally, in per-inode
-+ * radix tree. Page can be attached to jnode, pinned, released, etc.
-+ *
-+ * When jnode is captured into atom its reference counter is
-+ * increased. While being part of an atom, jnode can be "early
-+ * flushed". This means that as part of flush procedure, jnode is placed
-+ * into "relocate set", and its page is submitted to the disk. After io
-+ * completes, page can be detached, then loaded again, re-dirtied, etc.
-+ *
-+ * Thread acquired reference to jnode by calling jref() and releases it by
-+ * jput(). When last reference is removed, jnode is still retained in
-+ * memory (cached) if it has page attached, _unless_ it is scheduled for
-+ * destruction (has JNODE_HEARD_BANSHEE bit set).
-+ *
-+ * Tree read-write lock was used as "existential" lock for jnodes. That is,
-+ * jnode->x_count could be changed from 0 to 1 only under tree write lock,
-+ * that is, tree lock protected unreferenced jnodes stored in the hash
-+ * table, from recycling.
-+ *
-+ * This resulted in high contention on tree lock, because jref()/jput() is
-+ * frequent operation. To ameliorate this problem, RCU is used: when jput()
-+ * is just about to release last reference on jnode it sets JNODE_RIP bit
-+ * on it, and then proceed with jnode destruction (removing jnode from hash
-+ * table, cbk_cache, detaching page, etc.). All places that change jnode
-+ * reference counter from 0 to 1 (jlookup(), zlook(), zget(), and
-+ * cbk_cache_scan_slots()) check for JNODE_RIP bit (this is done by
-+ * jnode_rip_check() function), and pretend that nothing was found in hash
-+ * table if bit is set.
-+ *
-+ * jput defers actual return of jnode into slab cache to some later time
-+ * (by call_rcu()), this guarantees that other threads can safely continue
-+ * working with JNODE_RIP-ped jnode.
-+ *
-+ */
-+
-+#include "reiser4.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "jnode.h"
-+#include "plugin/plugin_header.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+/*#include "jnode.h"*/
-+#include "znode.h"
-+#include "tree.h"
-+#include "tree_walk.h"
-+#include "super.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+
-+#include <asm/uaccess.h> /* UML needs this for PAGE_OFFSET */
-+#include <linux/types.h>
-+#include <linux/slab.h>
-+#include <linux/pagemap.h>
-+#include <linux/swap.h>
-+#include <linux/fs.h> /* for struct address_space */
-+#include <linux/writeback.h> /* for inode_lock */
-+
-+static struct kmem_cache *_jnode_slab = NULL;
-+
-+static void jnode_set_type(jnode * node, jnode_type type);
-+static int jdelete(jnode * node);
-+static int jnode_try_drop(jnode * node);
-+
-+#if REISER4_DEBUG
-+static int jnode_invariant(const jnode * node, int tlocked, int jlocked);
-+#endif
-+
-+/* true if valid page is attached to jnode */
-+static inline int jnode_is_parsed(jnode * node)
-+{
-+ return JF_ISSET(node, JNODE_PARSED);
-+}
-+
-+/* hash table support */
-+
-+/* compare two jnode keys for equality. Used by hash-table macros */
-+static inline int jnode_key_eq(const jnode_key_t * k1, const jnode_key_t * k2)
-+{
-+ assert("nikita-2350", k1 != NULL);
-+ assert("nikita-2351", k2 != NULL);
-+
-+ return (k1->index == k2->index && k1->objectid == k2->objectid);
-+}
-+
-+/* Hash jnode by its key (inode plus offset). Used by hash-table macros */
-+static inline __u32
-+jnode_key_hashfn(j_hash_table * table, const jnode_key_t * key)
-+{
-+ assert("nikita-2352", key != NULL);
-+ assert("nikita-3346", IS_POW(table->_buckets));
-+
-+ /* yes, this is remarkable simply (where not stupid) hash function. */
-+ return (key->objectid + key->index) & (table->_buckets - 1);
-+}
-+
-+/* The hash table definition */
-+#define KMALLOC(size) reiser4_vmalloc(size)
-+#define KFREE(ptr, size) vfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(j, jnode, jnode_key_t, key.j, link.j, jnode_key_hashfn,
-+ jnode_key_eq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+/* call this to initialise jnode hash table */
-+int jnodes_tree_init(reiser4_tree * tree /* tree to initialise jnodes for */ )
-+{
-+ assert("nikita-2359", tree != NULL);
-+ return j_hash_init(&tree->jhash_table, 16384);
-+}
-+
-+/* call this to destroy jnode hash table. This is called during umount. */
-+int jnodes_tree_done(reiser4_tree * tree /* tree to destroy jnodes for */ )
-+{
-+ j_hash_table *jtable;
-+ jnode *node;
-+ jnode *next;
-+
-+ assert("nikita-2360", tree != NULL);
-+
-+ /*
-+ * Scan hash table and free all jnodes.
-+ */
-+ jtable = &tree->jhash_table;
-+ if (jtable->_table) {
-+ for_all_in_htable(jtable, j, node, next) {
-+ assert("nikita-2361", !atomic_read(&node->x_count));
-+ jdrop(node);
-+ }
-+
-+ j_hash_done(&tree->jhash_table);
-+ }
-+ return 0;
-+}
-+
-+/**
-+ * init_jnodes - create jnode cache
-+ *
-+ * Initializes slab cache jnodes. It is part of reiser4 module initialization.
-+ */
-+int init_jnodes(void)
-+{
-+ assert("umka-168", _jnode_slab == NULL);
-+
-+ _jnode_slab = kmem_cache_create("jnode", sizeof(jnode), 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL, NULL);
-+ if (_jnode_slab == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ return 0;
-+}
-+
-+/**
-+ * done_znodes - delete znode cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_jnodes(void)
-+{
-+ destroy_reiser4_cache(&_jnode_slab);
-+}
-+
-+/* Initialize a jnode. */
-+void jnode_init(jnode * node, reiser4_tree * tree, jnode_type type)
-+{
-+ assert("umka-175", node != NULL);
-+
-+ memset(node, 0, sizeof(jnode));
-+ ON_DEBUG(node->magic = JMAGIC);
-+ jnode_set_type(node, type);
-+ atomic_set(&node->d_count, 0);
-+ atomic_set(&node->x_count, 0);
-+ spin_lock_init(&node->guard);
-+ spin_lock_init(&node->load);
-+ node->atom = NULL;
-+ node->tree = tree;
-+ INIT_LIST_HEAD(&node->capture_link);
-+
-+ ASSIGN_NODE_LIST(node, NOT_CAPTURED);
-+
-+ INIT_RCU_HEAD(&node->rcu);
-+
-+#if REISER4_DEBUG
-+ {
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(tree->super);
-+ spin_lock_irq(&sbinfo->all_guard);
-+ list_add(&node->jnodes, &sbinfo->all_jnodes);
-+ spin_unlock_irq(&sbinfo->all_guard);
-+ }
-+#endif
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * Remove jnode from ->all_jnodes list.
-+ */
-+static void jnode_done(jnode * node, reiser4_tree * tree)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(tree->super);
-+
-+ spin_lock_irq(&sbinfo->all_guard);
-+ assert("nikita-2422", !list_empty(&node->jnodes));
-+ list_del_init(&node->jnodes);
-+ spin_unlock_irq(&sbinfo->all_guard);
-+}
-+#endif
-+
-+/* return already existing jnode of page */
-+jnode *jnode_by_page(struct page *pg)
-+{
-+ assert("nikita-2066", pg != NULL);
-+ assert("nikita-2400", PageLocked(pg));
-+ assert("nikita-2068", PagePrivate(pg));
-+ assert("nikita-2067", jprivate(pg) != NULL);
-+ return jprivate(pg);
-+}
-+
-+/* exported functions to allocate/free jnode objects outside this file */
-+jnode *jalloc(void)
-+{
-+ jnode *jal = kmem_cache_alloc(_jnode_slab, reiser4_ctx_gfp_mask_get());
-+ return jal;
-+}
-+
-+/* return jnode back to the slab allocator */
-+inline void jfree(jnode * node)
-+{
-+ assert("zam-449", node != NULL);
-+
-+ assert("nikita-2663", (list_empty_careful(&node->capture_link) &&
-+ NODE_LIST(node) == NOT_CAPTURED));
-+ assert("nikita-3222", list_empty(&node->jnodes));
-+ assert("nikita-3221", jnode_page(node) == NULL);
-+
-+ /* not yet phash_jnode_destroy(node); */
-+
-+ kmem_cache_free(_jnode_slab, node);
-+}
-+
-+/*
-+ * This function is supplied as RCU callback. It actually frees jnode when
-+ * last reference to it is gone.
-+ */
-+static void jnode_free_actor(struct rcu_head *head)
-+{
-+ jnode *node;
-+ jnode_type jtype;
-+
-+ node = container_of(head, jnode, rcu);
-+ jtype = jnode_get_type(node);
-+
-+ ON_DEBUG(jnode_done(node, jnode_get_tree(node)));
-+
-+ switch (jtype) {
-+ case JNODE_IO_HEAD:
-+ case JNODE_BITMAP:
-+ case JNODE_UNFORMATTED_BLOCK:
-+ jfree(node);
-+ break;
-+ case JNODE_FORMATTED_BLOCK:
-+ zfree(JZNODE(node));
-+ break;
-+ case JNODE_INODE:
-+ default:
-+ wrong_return_value("nikita-3197", "Wrong jnode type");
-+ }
-+}
-+
-+/*
-+ * Free a jnode. Post a callback to be executed later through RCU when all
-+ * references to @node are released.
-+ */
-+static inline void jnode_free(jnode * node, jnode_type jtype)
-+{
-+ if (jtype != JNODE_INODE) {
-+ /*assert("nikita-3219", list_empty(&node->rcu.list)); */
-+ call_rcu(&node->rcu, jnode_free_actor);
-+ } else
-+ jnode_list_remove(node);
-+}
-+
-+/* allocate new unformatted jnode */
-+static jnode *jnew_unformatted(void)
-+{
-+ jnode *jal;
-+
-+ jal = jalloc();
-+ if (jal == NULL)
-+ return NULL;
-+
-+ jnode_init(jal, current_tree, JNODE_UNFORMATTED_BLOCK);
-+ jal->key.j.mapping = NULL;
-+ jal->key.j.index = (unsigned long)-1;
-+ jal->key.j.objectid = 0;
-+ return jal;
-+}
-+
-+/* look for jnode with given mapping and offset within hash table */
-+jnode *jlookup(reiser4_tree * tree, oid_t objectid, unsigned long index)
-+{
-+ jnode_key_t jkey;
-+ jnode *node;
-+
-+ assert("nikita-2353", tree != NULL);
-+
-+ jkey.objectid = objectid;
-+ jkey.index = index;
-+
-+ /*
-+ * hash table is _not_ protected by any lock during lookups. All we
-+ * have to do is to disable preemption to keep RCU happy.
-+ */
-+
-+ rcu_read_lock();
-+ node = j_hash_find(&tree->jhash_table, &jkey);
-+ if (node != NULL) {
-+ /* protect @node from recycling */
-+ jref(node);
-+ assert("nikita-2955", jnode_invariant(node, 0, 0));
-+ node = jnode_rip_check(tree, node);
-+ }
-+ rcu_read_unlock();
-+ return node;
-+}
-+
-+/* per inode radix tree of jnodes is protected by tree's read write spin lock */
-+static jnode *jfind_nolock(struct address_space *mapping, unsigned long index)
-+{
-+ assert("vs-1694", mapping->host != NULL);
-+
-+ return radix_tree_lookup(jnode_tree_by_inode(mapping->host), index);
-+}
-+
-+jnode *jfind(struct address_space * mapping, unsigned long index)
-+{
-+ reiser4_tree *tree;
-+ jnode *node;
-+
-+ assert("vs-1694", mapping->host != NULL);
-+ tree = reiser4_tree_by_inode(mapping->host);
-+
-+ read_lock_tree(tree);
-+ node = jfind_nolock(mapping, index);
-+ if (node != NULL)
-+ jref(node);
-+ read_unlock_tree(tree);
-+ return node;
-+}
-+
-+static void inode_attach_jnode(jnode * node)
-+{
-+ struct inode *inode;
-+ reiser4_inode *info;
-+ struct radix_tree_root *rtree;
-+
-+ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
-+ assert("zam-1043", node->key.j.mapping != NULL);
-+ inode = node->key.j.mapping->host;
-+ info = reiser4_inode_data(inode);
-+ rtree = jnode_tree_by_reiser4_inode(info);
-+ if (rtree->rnode == NULL) {
-+ /* prevent inode from being pruned when it has jnodes attached
-+ to it */
-+ write_lock_irq(&inode->i_data.tree_lock);
-+ inode->i_data.nrpages++;
-+ write_unlock_irq(&inode->i_data.tree_lock);
-+ }
-+ assert("zam-1049", equi(rtree->rnode != NULL, info->nr_jnodes != 0));
-+ check_me("zam-1045",
-+ !radix_tree_insert(rtree, node->key.j.index, node));
-+ ON_DEBUG(info->nr_jnodes++);
-+}
-+
-+static void inode_detach_jnode(jnode * node)
-+{
-+ struct inode *inode;
-+ reiser4_inode *info;
-+ struct radix_tree_root *rtree;
-+
-+ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
-+ assert("zam-1044", node->key.j.mapping != NULL);
-+ inode = node->key.j.mapping->host;
-+ info = reiser4_inode_data(inode);
-+ rtree = jnode_tree_by_reiser4_inode(info);
-+
-+ assert("zam-1051", info->nr_jnodes != 0);
-+ assert("zam-1052", rtree->rnode != NULL);
-+ ON_DEBUG(info->nr_jnodes--);
-+
-+ /* delete jnode from inode's radix tree of jnodes */
-+ check_me("zam-1046", radix_tree_delete(rtree, node->key.j.index));
-+ if (rtree->rnode == NULL) {
-+ /* inode can be pruned now */
-+ write_lock_irq(&inode->i_data.tree_lock);
-+ inode->i_data.nrpages--;
-+ write_unlock_irq(&inode->i_data.tree_lock);
-+ }
-+}
-+
-+/* put jnode into hash table (where they can be found by flush who does not know
-+ mapping) and to inode's tree of jnodes (where they can be found (hopefully
-+ faster) in places where mapping is known). Currently it is used by
-+ fs/reiser4/plugin/item/extent_file_ops.c:index_extent_jnode when new jnode is
-+ created */
-+static void
-+hash_unformatted_jnode(jnode * node, struct address_space *mapping,
-+ unsigned long index)
-+{
-+ j_hash_table *jtable;
-+
-+ assert("vs-1446", jnode_is_unformatted(node));
-+ assert("vs-1442", node->key.j.mapping == 0);
-+ assert("vs-1443", node->key.j.objectid == 0);
-+ assert("vs-1444", node->key.j.index == (unsigned long)-1);
-+ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
-+
-+ node->key.j.mapping = mapping;
-+ node->key.j.objectid = get_inode_oid(mapping->host);
-+ node->key.j.index = index;
-+
-+ jtable = &jnode_get_tree(node)->jhash_table;
-+
-+ /* race with some other thread inserting jnode into the hash table is
-+ * impossible, because we keep the page lock. */
-+ /*
-+ * following assertion no longer holds because of RCU: it is possible
-+ * jnode is in the hash table, but with JNODE_RIP bit set.
-+ */
-+ /* assert("nikita-3211", j_hash_find(jtable, &node->key.j) == NULL); */
-+ j_hash_insert_rcu(jtable, node);
-+ inode_attach_jnode(node);
-+}
-+
-+static void unhash_unformatted_node_nolock(jnode * node)
-+{
-+ assert("vs-1683", node->key.j.mapping != NULL);
-+ assert("vs-1684",
-+ node->key.j.objectid ==
-+ get_inode_oid(node->key.j.mapping->host));
-+
-+ /* remove jnode from hash-table */
-+ j_hash_remove_rcu(&node->tree->jhash_table, node);
-+ inode_detach_jnode(node);
-+ node->key.j.mapping = NULL;
-+ node->key.j.index = (unsigned long)-1;
-+ node->key.j.objectid = 0;
-+
-+}
-+
-+/* remove jnode from hash table and from inode's tree of jnodes. This is used in
-+ reiser4_invalidatepage and in kill_hook_extent -> truncate_inode_jnodes ->
-+ reiser4_uncapture_jnode */
-+void unhash_unformatted_jnode(jnode * node)
-+{
-+ assert("vs-1445", jnode_is_unformatted(node));
-+
-+ write_lock_tree(node->tree);
-+ unhash_unformatted_node_nolock(node);
-+ write_unlock_tree(node->tree);
-+}
-+
-+/*
-+ * search hash table for a jnode with given oid and index. If not found,
-+ * allocate new jnode, insert it, and also insert into radix tree for the
-+ * given inode/mapping.
-+ */
-+static jnode *find_get_jnode(reiser4_tree * tree,
-+ struct address_space *mapping,
-+ oid_t oid, unsigned long index)
-+{
-+ jnode *result;
-+ jnode *shadow;
-+ int preload;
-+
-+ result = jnew_unformatted();
-+
-+ if (unlikely(result == NULL))
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ preload = radix_tree_preload(reiser4_ctx_gfp_mask_get());
-+ if (preload != 0)
-+ return ERR_PTR(preload);
-+
-+ write_lock_tree(tree);
-+ shadow = jfind_nolock(mapping, index);
-+ if (likely(shadow == NULL)) {
-+ /* add new jnode to hash table and inode's radix tree of jnodes */
-+ jref(result);
-+ hash_unformatted_jnode(result, mapping, index);
-+ } else {
-+ /* jnode is found in inode's radix tree of jnodes */
-+ jref(shadow);
-+ jnode_free(result, JNODE_UNFORMATTED_BLOCK);
-+ assert("vs-1498", shadow->key.j.mapping == mapping);
-+ result = shadow;
-+ }
-+ write_unlock_tree(tree);
-+
-+ assert("nikita-2955",
-+ ergo(result != NULL, jnode_invariant(result, 0, 0)));
-+ radix_tree_preload_end();
-+ return result;
-+}
-+
-+/* jget() (a la zget() but for unformatted nodes). Returns (and possibly
-+ creates) jnode corresponding to page @pg. jnode is attached to page and
-+ inserted into jnode hash-table. */
-+static jnode *do_jget(reiser4_tree * tree, struct page *pg)
-+{
-+ /*
-+ * There are two ways to create jnode: starting with pre-existing page
-+ * and without page.
-+ *
-+ * When page already exists, jnode is created
-+ * (jnode_of_page()->do_jget()) under page lock. This is done in
-+ * ->writepage(), or when capturing anonymous page dirtied through
-+ * mmap.
-+ *
-+ * Jnode without page is created by index_extent_jnode().
-+ *
-+ */
-+
-+ jnode *result;
-+ oid_t oid = get_inode_oid(pg->mapping->host);
-+
-+ assert("umka-176", pg != NULL);
-+ assert("nikita-2394", PageLocked(pg));
-+
-+ result = jprivate(pg);
-+ if (likely(result != NULL))
-+ return jref(result);
-+
-+ tree = reiser4_tree_by_page(pg);
-+
-+ /* check hash-table first */
-+ result = jfind(pg->mapping, pg->index);
-+ if (unlikely(result != NULL)) {
-+ spin_lock_jnode(result);
-+ jnode_attach_page(result, pg);
-+ spin_unlock_jnode(result);
-+ result->key.j.mapping = pg->mapping;
-+ return result;
-+ }
-+
-+ /* since page is locked, jnode should be allocated with GFP_NOFS flag */
-+ reiser4_ctx_gfp_mask_force(GFP_NOFS);
-+ result = find_get_jnode(tree, pg->mapping, oid, pg->index);
-+ if (unlikely(IS_ERR(result)))
-+ return result;
-+ /* attach jnode to page */
-+ spin_lock_jnode(result);
-+ jnode_attach_page(result, pg);
-+ spin_unlock_jnode(result);
-+ return result;
-+}
-+
-+/*
-+ * return jnode for @pg, creating it if necessary.
-+ */
-+jnode *jnode_of_page(struct page * pg)
-+{
-+ jnode *result;
-+
-+ assert("umka-176", pg != NULL);
-+ assert("nikita-2394", PageLocked(pg));
-+
-+ result = do_jget(reiser4_tree_by_page(pg), pg);
-+
-+ if (REISER4_DEBUG && !IS_ERR(result)) {
-+ assert("nikita-3210", result == jprivate(pg));
-+ assert("nikita-2046", jnode_page(jprivate(pg)) == pg);
-+ if (jnode_is_unformatted(jprivate(pg))) {
-+ assert("nikita-2364",
-+ jprivate(pg)->key.j.index == pg->index);
-+ assert("nikita-2367",
-+ jprivate(pg)->key.j.mapping == pg->mapping);
-+ assert("nikita-2365",
-+ jprivate(pg)->key.j.objectid ==
-+ get_inode_oid(pg->mapping->host));
-+ assert("vs-1200",
-+ jprivate(pg)->key.j.objectid ==
-+ pg->mapping->host->i_ino);
-+ assert("nikita-2356",
-+ jnode_is_unformatted(jnode_by_page(pg)));
-+ }
-+ assert("nikita-2956", jnode_invariant(jprivate(pg), 0, 0));
-+ }
-+ return result;
-+}
-+
-+/* attach page to jnode: set ->pg pointer in jnode, and ->private one in the
-+ * page.*/
-+void jnode_attach_page(jnode * node, struct page *pg)
-+{
-+ assert("nikita-2060", node != NULL);
-+ assert("nikita-2061", pg != NULL);
-+
-+ assert("nikita-2050", jprivate(pg) == 0ul);
-+ assert("nikita-2393", !PagePrivate(pg));
-+ assert("vs-1741", node->pg == NULL);
-+
-+ assert("nikita-2396", PageLocked(pg));
-+ assert_spin_locked(&(node->guard));
-+
-+ page_cache_get(pg);
-+ set_page_private(pg, (unsigned long)node);
-+ node->pg = pg;
-+ SetPagePrivate(pg);
-+}
-+
-+/* Dual to jnode_attach_page: break a binding between page and jnode */
-+void page_clear_jnode(struct page *page, jnode * node)
-+{
-+ assert("nikita-2424", page != NULL);
-+ assert("nikita-2425", PageLocked(page));
-+ assert("nikita-2426", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ assert("nikita-2428", PagePrivate(page));
-+
-+ assert("nikita-3551", !PageWriteback(page));
-+
-+ JF_CLR(node, JNODE_PARSED);
-+ set_page_private(page, 0ul);
-+ ClearPagePrivate(page);
-+ node->pg = NULL;
-+ page_cache_release(page);
-+}
-+
-+#if 0
-+/* it is only used in one place to handle error */
-+void
-+page_detach_jnode(struct page *page, struct address_space *mapping,
-+ unsigned long index)
-+{
-+ assert("nikita-2395", page != NULL);
-+
-+ lock_page(page);
-+ if ((page->mapping == mapping) && (page->index == index)
-+ && PagePrivate(page)) {
-+ jnode *node;
-+
-+ node = jprivate(page);
-+ spin_lock_jnode(node);
-+ page_clear_jnode(page, node);
-+ spin_unlock_jnode(node);
-+ }
-+ unlock_page(page);
-+}
-+#endif /* 0 */
-+
-+/* return @node page locked.
-+
-+ Locking ordering requires that one first takes page lock and afterwards
-+ spin lock on node attached to this page. Sometimes it is necessary to go in
-+ the opposite direction. This is done through standard trylock-and-release
-+ loop.
-+*/
-+static struct page *jnode_lock_page(jnode * node)
-+{
-+ struct page *page;
-+
-+ assert("nikita-2052", node != NULL);
-+ assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode));
-+
-+ while (1) {
-+
-+ spin_lock_jnode(node);
-+ page = jnode_page(node);
-+ if (page == NULL) {
-+ break;
-+ }
-+
-+ /* no need to page_cache_get( page ) here, because page cannot
-+ be evicted from memory without detaching it from jnode and
-+ this requires spin lock on jnode that we already hold.
-+ */
-+ if (!TestSetPageLocked(page)) {
-+ /* We won a lock on jnode page, proceed. */
-+ break;
-+ }
-+
-+ /* Page is locked by someone else. */
-+ page_cache_get(page);
-+ spin_unlock_jnode(node);
-+ wait_on_page_locked(page);
-+ /* it is possible that page was detached from jnode and
-+ returned to the free pool, or re-assigned while we were
-+ waiting on locked bit. This will be rechecked on the next
-+ loop iteration.
-+ */
-+ page_cache_release(page);
-+
-+ /* try again */
-+ }
-+ return page;
-+}
-+
-+/*
-+ * is JNODE_PARSED bit is not set, call ->parse() method of jnode, to verify
-+ * validness of jnode content.
-+ */
-+static inline int jparse(jnode * node)
-+{
-+ int result;
-+
-+ assert("nikita-2466", node != NULL);
-+
-+ spin_lock_jnode(node);
-+ if (likely(!jnode_is_parsed(node))) {
-+ result = jnode_ops(node)->parse(node);
-+ if (likely(result == 0))
-+ JF_SET(node, JNODE_PARSED);
-+ } else
-+ result = 0;
-+ spin_unlock_jnode(node);
-+ return result;
-+}
-+
-+/* Lock a page attached to jnode, create and attach page to jnode if it had no
-+ * one. */
-+static struct page *jnode_get_page_locked(jnode * node, gfp_t gfp_flags)
-+{
-+ struct page *page;
-+
-+ spin_lock_jnode(node);
-+ page = jnode_page(node);
-+
-+ if (page == NULL) {
-+ spin_unlock_jnode(node);
-+ page = find_or_create_page(jnode_get_mapping(node),
-+ jnode_get_index(node), gfp_flags);
-+ if (page == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ } else {
-+ if (!TestSetPageLocked(page)) {
-+ spin_unlock_jnode(node);
-+ return page;
-+ }
-+ page_cache_get(page);
-+ spin_unlock_jnode(node);
-+ lock_page(page);
-+ assert("nikita-3134", page->mapping == jnode_get_mapping(node));
-+ }
-+
-+ spin_lock_jnode(node);
-+ if (!jnode_page(node))
-+ jnode_attach_page(node, page);
-+ spin_unlock_jnode(node);
-+
-+ page_cache_release(page);
-+ assert("zam-894", jnode_page(node) == page);
-+ return page;
-+}
-+
-+/* Start read operation for jnode's page if page is not up-to-date. */
-+static int jnode_start_read(jnode * node, struct page *page)
-+{
-+ assert("zam-893", PageLocked(page));
-+
-+ if (PageUptodate(page)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ return reiser4_page_io(page, node, READ, reiser4_ctx_gfp_mask_get());
-+}
-+
-+#if REISER4_DEBUG
-+static void check_jload(jnode * node, struct page *page)
-+{
-+ if (jnode_is_znode(node)) {
-+ node40_header *nh;
-+ znode *z;
-+
-+ z = JZNODE(node);
-+ if (znode_is_any_locked(z)) {
-+ nh = (node40_header *) kmap(page);
-+ /* this only works for node40-only file systems. For
-+ * debugging. */
-+ assert("nikita-3253",
-+ z->nr_items == le16_to_cpu(get_unaligned(&nh->nr_items)));
-+ kunmap(page);
-+ }
-+ assert("nikita-3565", znode_invariant(z));
-+ }
-+}
-+#else
-+#define check_jload(node, page) noop
-+#endif
-+
-+/* prefetch jnode to speed up next call to jload. Call this when you are going
-+ * to call jload() shortly. This will bring appropriate portion of jnode into
-+ * CPU cache. */
-+void jload_prefetch(jnode * node)
-+{
-+ prefetchw(&node->x_count);
-+}
-+
-+/* load jnode's data into memory */
-+int jload_gfp(jnode * node /* node to load */ ,
-+ gfp_t gfp_flags /* allocation flags */ ,
-+ int do_kmap /* true if page should be kmapped */ )
-+{
-+ struct page *page;
-+ int result = 0;
-+ int parsed;
-+
-+ assert("nikita-3010", reiser4_schedulable());
-+
-+ prefetchw(&node->pg);
-+
-+ /* taking d-reference implies taking x-reference. */
-+ jref(node);
-+
-+ /*
-+ * acquiring d-reference to @jnode and check for JNODE_PARSED bit
-+ * should be atomic, otherwise there is a race against
-+ * reiser4_releasepage().
-+ */
-+ spin_lock(&(node->load));
-+ add_d_ref(node);
-+ parsed = jnode_is_parsed(node);
-+ spin_unlock(&(node->load));
-+
-+ if (unlikely(!parsed)) {
-+ page = jnode_get_page_locked(node, gfp_flags);
-+ if (unlikely(IS_ERR(page))) {
-+ result = PTR_ERR(page);
-+ goto failed;
-+ }
-+
-+ result = jnode_start_read(node, page);
-+ if (unlikely(result != 0))
-+ goto failed;
-+
-+ wait_on_page_locked(page);
-+ if (unlikely(!PageUptodate(page))) {
-+ result = RETERR(-EIO);
-+ goto failed;
-+ }
-+
-+ if (do_kmap)
-+ node->data = kmap(page);
-+
-+ result = jparse(node);
-+ if (unlikely(result != 0)) {
-+ if (do_kmap)
-+ kunmap(page);
-+ goto failed;
-+ }
-+ check_jload(node, page);
-+ } else {
-+ page = jnode_page(node);
-+ check_jload(node, page);
-+ if (do_kmap)
-+ node->data = kmap(page);
-+ }
-+
-+ if (!is_writeout_mode())
-+ /* We do not mark pages active if jload is called as a part of
-+ * jnode_flush() or reiser4_write_logs(). Both jnode_flush()
-+ * and write_logs() add no value to cached data, there is no
-+ * sense to mark pages as active when they go to disk, it just
-+ * confuses vm scanning routines because clean page could be
-+ * moved out from inactive list as a result of this
-+ * mark_page_accessed() call. */
-+ mark_page_accessed(page);
-+
-+ return 0;
-+
-+ failed:
-+ jrelse_tail(node);
-+ return result;
-+
-+}
-+
-+/* start asynchronous reading for given jnode's page. */
-+int jstartio(jnode * node)
-+{
-+ struct page *page;
-+
-+ page = jnode_get_page_locked(node, reiser4_ctx_gfp_mask_get());
-+ if (IS_ERR(page))
-+ return PTR_ERR(page);
-+
-+ return jnode_start_read(node, page);
-+}
-+
-+/* Initialize a node by calling appropriate plugin instead of reading
-+ * node from disk as in jload(). */
-+int jinit_new(jnode * node, gfp_t gfp_flags)
-+{
-+ struct page *page;
-+ int result;
-+
-+ jref(node);
-+ add_d_ref(node);
-+
-+ page = jnode_get_page_locked(node, gfp_flags);
-+ if (IS_ERR(page)) {
-+ result = PTR_ERR(page);
-+ goto failed;
-+ }
-+
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+
-+ node->data = kmap(page);
-+
-+ if (!jnode_is_parsed(node)) {
-+ jnode_plugin *jplug = jnode_ops(node);
-+ spin_lock_jnode(node);
-+ result = jplug->init(node);
-+ spin_unlock_jnode(node);
-+ if (result) {
-+ kunmap(page);
-+ goto failed;
-+ }
-+ JF_SET(node, JNODE_PARSED);
-+ }
-+
-+ return 0;
-+
-+ failed:
-+ jrelse(node);
-+ return result;
-+}
-+
-+/* release a reference to jnode acquired by jload(), decrement ->d_count */
-+void jrelse_tail(jnode * node /* jnode to release references to */ )
-+{
-+ assert("nikita-489", atomic_read(&node->d_count) > 0);
-+ atomic_dec(&node->d_count);
-+ /* release reference acquired in jload_gfp() or jinit_new() */
-+ jput(node);
-+ if (jnode_is_unformatted(node) || jnode_is_znode(node))
-+ LOCK_CNT_DEC(d_refs);
-+}
-+
-+/* drop reference to node data. When last reference is dropped, data are
-+ unloaded. */
-+void jrelse(jnode * node /* jnode to release references to */ )
-+{
-+ struct page *page;
-+
-+ assert("nikita-487", node != NULL);
-+ assert_spin_not_locked(&(node->guard));
-+
-+ page = jnode_page(node);
-+ if (likely(page != NULL)) {
-+ /*
-+ * it is safe not to lock jnode here, because at this point
-+ * @node->d_count is greater than zero (if jrelse() is used
-+ * correctly, that is). JNODE_PARSED may be not set yet, if,
-+ * for example, we got here as a result of error handling path
-+ * in jload(). Anyway, page cannot be detached by
-+ * reiser4_releasepage(). truncate will invalidate page
-+ * regardless, but this should not be a problem.
-+ */
-+ kunmap(page);
-+ }
-+ jrelse_tail(node);
-+}
-+
-+/* called from jput() to wait for io completion */
-+static void jnode_finish_io(jnode * node)
-+{
-+ struct page *page;
-+
-+ assert("nikita-2922", node != NULL);
-+
-+ spin_lock_jnode(node);
-+ page = jnode_page(node);
-+ if (page != NULL) {
-+ page_cache_get(page);
-+ spin_unlock_jnode(node);
-+ wait_on_page_writeback(page);
-+ page_cache_release(page);
-+ } else
-+ spin_unlock_jnode(node);
-+}
-+
-+/*
-+ * This is called by jput() when last reference to jnode is released. This is
-+ * separate function, because we want fast path of jput() to be inline and,
-+ * therefore, small.
-+ */
-+void jput_final(jnode * node)
-+{
-+ int r_i_p;
-+
-+ /* A fast check for keeping node in cache. We always keep node in cache
-+ * if its page is present and node was not marked for deletion */
-+ if (jnode_page(node) != NULL && !JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
-+ rcu_read_unlock();
-+ return;
-+ }
-+ assert("edward-1432", node->page_count == 0);
-+
-+ r_i_p = !JF_TEST_AND_SET(node, JNODE_RIP);
-+ /*
-+ * if r_i_p is true, we were first to set JNODE_RIP on this node. In
-+ * this case it is safe to access node after unlock.
-+ */
-+ rcu_read_unlock();
-+ if (r_i_p) {
-+ jnode_finish_io(node);
-+ if (JF_ISSET(node, JNODE_HEARD_BANSHEE))
-+ /* node is removed from the tree. */
-+ jdelete(node);
-+ else
-+ jnode_try_drop(node);
-+ }
-+ /* if !r_i_p some other thread is already killing it */
-+}
-+
-+int jwait_io(jnode * node, int rw)
-+{
-+ struct page *page;
-+ int result;
-+
-+ assert("zam-447", node != NULL);
-+ assert("zam-448", jnode_page(node) != NULL);
-+
-+ page = jnode_page(node);
-+
-+ result = 0;
-+ if (rw == READ) {
-+ wait_on_page_locked(page);
-+ } else {
-+ assert("nikita-2227", rw == WRITE);
-+ wait_on_page_writeback(page);
-+ }
-+ if (PageError(page))
-+ result = RETERR(-EIO);
-+
-+ return result;
-+}
-+
-+/*
-+ * jnode types and plugins.
-+ *
-+ * jnode by itself is a "base type". There are several different jnode
-+ * flavors, called "jnode types" (see jnode_type for a list). Sometimes code
-+ * has to do different things based on jnode type. In the standard reiser4 way
-+ * this is done by having jnode plugin (see fs/reiser4/plugin.h:jnode_plugin).
-+ *
-+ * Functions below deal with jnode types and define methods of jnode plugin.
-+ *
-+ */
-+
-+/* set jnode type. This is done during jnode initialization. */
-+static void jnode_set_type(jnode * node, jnode_type type)
-+{
-+ static unsigned long type_to_mask[] = {
-+ [JNODE_UNFORMATTED_BLOCK] = 1,
-+ [JNODE_FORMATTED_BLOCK] = 0,
-+ [JNODE_BITMAP] = 2,
-+ [JNODE_IO_HEAD] = 6,
-+ [JNODE_INODE] = 4
-+ };
-+
-+ assert("zam-647", type < LAST_JNODE_TYPE);
-+ assert("nikita-2815", !jnode_is_loaded(node));
-+ assert("nikita-3386", node->state == 0);
-+
-+ node->state |= (type_to_mask[type] << JNODE_TYPE_1);
-+}
-+
-+/* ->init() method of jnode plugin for jnodes that don't require plugin
-+ * specific initialization. */
-+static int init_noinit(jnode * node UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* ->parse() method of jnode plugin for jnodes that don't require plugin
-+ * specific pasring. */
-+static int parse_noparse(jnode * node UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* ->mapping() method for unformatted jnode */
-+struct address_space *mapping_jnode(const jnode * node)
-+{
-+ struct address_space *map;
-+
-+ assert("nikita-2713", node != NULL);
-+
-+ /* mapping is stored in jnode */
-+
-+ map = node->key.j.mapping;
-+ assert("nikita-2714", map != NULL);
-+ assert("nikita-2897", is_reiser4_inode(map->host));
-+ assert("nikita-2715", get_inode_oid(map->host) == node->key.j.objectid);
-+ return map;
-+}
-+
-+/* ->index() method for unformatted jnodes */
-+unsigned long index_jnode(const jnode * node)
-+{
-+ /* index is stored in jnode */
-+ return node->key.j.index;
-+}
-+
-+/* ->remove() method for unformatted jnodes */
-+static inline void remove_jnode(jnode * node, reiser4_tree * tree)
-+{
-+ /* remove jnode from hash table and radix tree */
-+ if (node->key.j.mapping)
-+ unhash_unformatted_node_nolock(node);
-+}
-+
-+/* ->mapping() method for znodes */
-+static struct address_space *mapping_znode(const jnode * node)
-+{
-+ /* all znodes belong to fake inode */
-+ return reiser4_get_super_fake(jnode_get_tree(node)->super)->i_mapping;
-+}
-+
-+/* ->index() method for znodes */
-+static unsigned long index_znode(const jnode * node)
-+{
-+ unsigned long addr;
-+ assert("nikita-3317", (1 << znode_shift_order) < sizeof(znode));
-+
-+ /* index of znode is just its address (shifted) */
-+ addr = (unsigned long)node;
-+ return (addr - PAGE_OFFSET) >> znode_shift_order;
-+}
-+
-+/* ->mapping() method for bitmap jnode */
-+static struct address_space *mapping_bitmap(const jnode * node)
-+{
-+ /* all bitmap blocks belong to special bitmap inode */
-+ return get_super_private(jnode_get_tree(node)->super)->bitmap->
-+ i_mapping;
-+}
-+
-+/* ->index() method for jnodes that are indexed by address */
-+static unsigned long index_is_address(const jnode * node)
-+{
-+ unsigned long ind;
-+
-+ ind = (unsigned long)node;
-+ return ind - PAGE_OFFSET;
-+}
-+
-+/* resolve race with jput */
-+jnode *jnode_rip_sync(reiser4_tree *tree, jnode *node)
-+{
-+ /*
-+ * This is used as part of RCU-based jnode handling.
-+ *
-+ * jlookup(), zlook(), zget(), and cbk_cache_scan_slots() have to work
-+ * with unreferenced jnodes (ones with ->x_count == 0). Hash table is
-+ * not protected during this, so concurrent thread may execute
-+ * zget-set-HEARD_BANSHEE-zput, or somehow else cause jnode to be
-+ * freed in jput_final(). To avoid such races, jput_final() sets
-+ * JNODE_RIP on jnode (under tree lock). All places that work with
-+ * unreferenced jnodes call this function. It checks for JNODE_RIP bit
-+ * (first without taking tree lock), and if this bit is set, released
-+ * reference acquired by the current thread and returns NULL.
-+ *
-+ * As a result, if jnode is being concurrently freed, NULL is returned
-+ * and caller should pretend that jnode wasn't found in the first
-+ * place.
-+ *
-+ * Otherwise it's safe to release "rcu-read-lock" and continue with
-+ * jnode.
-+ */
-+ if (unlikely(JF_ISSET(node, JNODE_RIP))) {
-+ read_lock_tree(tree);
-+ if (JF_ISSET(node, JNODE_RIP)) {
-+ dec_x_ref(node);
-+ node = NULL;
-+ }
-+ read_unlock_tree(tree);
-+ }
-+ return node;
-+}
-+
-+reiser4_key *jnode_build_key(const jnode * node, reiser4_key * key)
-+{
-+ struct inode *inode;
-+ item_plugin *iplug;
-+ loff_t off;
-+
-+ assert("nikita-3092", node != NULL);
-+ assert("nikita-3093", key != NULL);
-+ assert("nikita-3094", jnode_is_unformatted(node));
-+
-+ off = ((loff_t) index_jnode(node)) << PAGE_CACHE_SHIFT;
-+ inode = mapping_jnode(node)->host;
-+
-+ if (node->parent_item_id != 0)
-+ iplug = item_plugin_by_id(node->parent_item_id);
-+ else
-+ iplug = NULL;
-+
-+ if (iplug != NULL && iplug->f.key_by_offset)
-+ iplug->f.key_by_offset(inode, off, key);
-+ else {
-+ file_plugin *fplug;
-+
-+ fplug = inode_file_plugin(inode);
-+ assert("zam-1007", fplug != NULL);
-+ assert("zam-1008", fplug->key_by_inode != NULL);
-+
-+ fplug->key_by_inode(inode, off, key);
-+ }
-+
-+ return key;
-+}
-+
-+/* ->parse() method for formatted nodes */
-+static int parse_znode(jnode * node)
-+{
-+ return zparse(JZNODE(node));
-+}
-+
-+/* ->delete() method for formatted nodes */
-+static void delete_znode(jnode * node, reiser4_tree * tree)
-+{
-+ znode *z;
-+
-+ assert_rw_write_locked(&(tree->tree_lock));
-+ assert("vs-898", JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ z = JZNODE(node);
-+ assert("vs-899", z->c_count == 0);
-+
-+ /* delete znode from sibling list. */
-+ sibling_list_remove(z);
-+
-+ znode_remove(z, tree);
-+}
-+
-+/* ->remove() method for formatted nodes */
-+static int remove_znode(jnode * node, reiser4_tree * tree)
-+{
-+ znode *z;
-+
-+ assert_rw_write_locked(&(tree->tree_lock));
-+ z = JZNODE(node);
-+
-+ if (z->c_count == 0) {
-+ /* detach znode from sibling list. */
-+ sibling_list_drop(z);
-+ /* this is called with tree spin-lock held, so call
-+ znode_remove() directly (rather than znode_lock_remove()). */
-+ znode_remove(z, tree);
-+ return 0;
-+ }
-+ return RETERR(-EBUSY);
-+}
-+
-+/* ->init() method for formatted nodes */
-+static int init_znode(jnode * node)
-+{
-+ znode *z;
-+
-+ z = JZNODE(node);
-+ /* call node plugin to do actual initialization */
-+ return z->nplug->init(z);
-+}
-+
-+/* ->clone() method for formatted nodes */
-+static jnode *clone_formatted(jnode * node)
-+{
-+ znode *clone;
-+
-+ assert("vs-1430", jnode_is_znode(node));
-+ clone = zalloc(reiser4_ctx_gfp_mask_get());
-+ if (clone == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ zinit(clone, NULL, current_tree);
-+ jnode_set_block(ZJNODE(clone), jnode_get_block(node));
-+ /* ZJNODE(clone)->key.z is not initialized */
-+ clone->level = JZNODE(node)->level;
-+
-+ return ZJNODE(clone);
-+}
-+
-+/* jplug->clone for unformatted nodes */
-+static jnode *clone_unformatted(jnode * node)
-+{
-+ jnode *clone;
-+
-+ assert("vs-1431", jnode_is_unformatted(node));
-+ clone = jalloc();
-+ if (clone == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ jnode_init(clone, current_tree, JNODE_UNFORMATTED_BLOCK);
-+ jnode_set_block(clone, jnode_get_block(node));
-+
-+ return clone;
-+
-+}
-+
-+/*
-+ * Setup jnode plugin methods for various jnode types.
-+ */
-+jnode_plugin jnode_plugins[LAST_JNODE_TYPE] = {
-+ [JNODE_UNFORMATTED_BLOCK] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_UNFORMATTED_BLOCK,
-+ .pops = NULL,
-+ .label = "unformatted",
-+ .desc = "unformatted node",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_noinit,
-+ .parse = parse_noparse,
-+ .mapping = mapping_jnode,
-+ .index = index_jnode,
-+ .clone = clone_unformatted
-+ },
-+ [JNODE_FORMATTED_BLOCK] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_FORMATTED_BLOCK,
-+ .pops = NULL,
-+ .label = "formatted",
-+ .desc = "formatted tree node",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_znode,
-+ .parse = parse_znode,
-+ .mapping = mapping_znode,
-+ .index = index_znode,
-+ .clone = clone_formatted
-+ },
-+ [JNODE_BITMAP] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_BITMAP,
-+ .pops = NULL,
-+ .label = "bitmap",
-+ .desc = "bitmap node",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_noinit,
-+ .parse = parse_noparse,
-+ .mapping = mapping_bitmap,
-+ .index = index_is_address,
-+ .clone = NULL
-+ },
-+ [JNODE_IO_HEAD] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_IO_HEAD,
-+ .pops = NULL,
-+ .label = "io head",
-+ .desc = "io head",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_noinit,
-+ .parse = parse_noparse,
-+ .mapping = mapping_bitmap,
-+ .index = index_is_address,
-+ .clone = NULL
-+ },
-+ [JNODE_INODE] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_INODE,
-+ .pops = NULL,
-+ .label = "inode",
-+ .desc = "inode's builtin jnode",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = NULL,
-+ .parse = NULL,
-+ .mapping = NULL,
-+ .index = NULL,
-+ .clone = NULL
-+ }
-+};
-+
-+/*
-+ * jnode destruction.
-+ *
-+ * Thread may use a jnode after it acquired a reference to it. References are
-+ * counted in ->x_count field. Reference protects jnode from being
-+ * recycled. This is different from protecting jnode data (that are stored in
-+ * jnode page) from being evicted from memory. Data are protected by jload()
-+ * and released by jrelse().
-+ *
-+ * If thread already possesses a reference to the jnode it can acquire another
-+ * one through jref(). Initial reference is obtained (usually) by locating
-+ * jnode in some indexing structure that depends on jnode type: formatted
-+ * nodes are kept in global hash table, where they are indexed by block
-+ * number, and also in the cbk cache. Unformatted jnodes are also kept in hash
-+ * table, which is indexed by oid and offset within file, and in per-inode
-+ * radix tree.
-+ *
-+ * Reference to jnode is released by jput(). If last reference is released,
-+ * jput_final() is called. This function determines whether jnode has to be
-+ * deleted (this happens when corresponding node is removed from the file
-+ * system, jnode is marked with JNODE_HEARD_BANSHEE bit in this case), or it
-+ * should be just "removed" (deleted from memory).
-+ *
-+ * Jnode destruction is signally delicate dance because of locking and RCU.
-+ */
-+
-+/*
-+ * Returns true if jnode cannot be removed right now. This check is called
-+ * under tree lock. If it returns true, jnode is irrevocably committed to be
-+ * deleted/removed.
-+ */
-+static inline int jnode_is_busy(const jnode * node, jnode_type jtype)
-+{
-+ /* if other thread managed to acquire a reference to this jnode, don't
-+ * free it. */
-+ if (atomic_read(&node->x_count) > 0)
-+ return 1;
-+ /* also, don't free znode that has children in memory */
-+ if (jtype == JNODE_FORMATTED_BLOCK && JZNODE(node)->c_count > 0)
-+ return 1;
-+ return 0;
-+}
-+
-+/*
-+ * this is called as part of removing jnode. Based on jnode type, call
-+ * corresponding function that removes jnode from indices and returns it back
-+ * to the appropriate slab (through RCU).
-+ */
-+static inline void
-+jnode_remove(jnode * node, jnode_type jtype, reiser4_tree * tree)
-+{
-+ switch (jtype) {
-+ case JNODE_UNFORMATTED_BLOCK:
-+ remove_jnode(node, tree);
-+ break;
-+ case JNODE_IO_HEAD:
-+ case JNODE_BITMAP:
-+ break;
-+ case JNODE_INODE:
-+ break;
-+ case JNODE_FORMATTED_BLOCK:
-+ remove_znode(node, tree);
-+ break;
-+ default:
-+ wrong_return_value("nikita-3196", "Wrong jnode type");
-+ }
-+}
-+
-+/*
-+ * this is called as part of deleting jnode. Based on jnode type, call
-+ * corresponding function that removes jnode from indices and returns it back
-+ * to the appropriate slab (through RCU).
-+ *
-+ * This differs from jnode_remove() only for formatted nodes---for them
-+ * sibling list handling is different for removal and deletion.
-+ */
-+static inline void
-+jnode_delete(jnode * node, jnode_type jtype, reiser4_tree * tree UNUSED_ARG)
-+{
-+ switch (jtype) {
-+ case JNODE_UNFORMATTED_BLOCK:
-+ remove_jnode(node, tree);
-+ break;
-+ case JNODE_IO_HEAD:
-+ case JNODE_BITMAP:
-+ break;
-+ case JNODE_FORMATTED_BLOCK:
-+ delete_znode(node, tree);
-+ break;
-+ case JNODE_INODE:
-+ default:
-+ wrong_return_value("nikita-3195", "Wrong jnode type");
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * remove jnode from the debugging list of all jnodes hanging off super-block.
-+ */
-+void jnode_list_remove(jnode * node)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(jnode_get_tree(node)->super);
-+
-+ spin_lock_irq(&sbinfo->all_guard);
-+ assert("nikita-2422", !list_empty(&node->jnodes));
-+ list_del_init(&node->jnodes);
-+ spin_unlock_irq(&sbinfo->all_guard);
-+}
-+#endif
-+
-+/*
-+ * this is called by jput_final() to remove jnode when last reference to it is
-+ * released.
-+ */
-+static int jnode_try_drop(jnode * node)
-+{
-+ int result;
-+ reiser4_tree *tree;
-+ jnode_type jtype;
-+
-+ assert("nikita-2491", node != NULL);
-+ assert("nikita-2583", JF_ISSET(node, JNODE_RIP));
-+
-+ tree = jnode_get_tree(node);
-+ jtype = jnode_get_type(node);
-+
-+ spin_lock_jnode(node);
-+ write_lock_tree(tree);
-+ /*
-+ * if jnode has a page---leave it alone. Memory pressure will
-+ * eventually kill page and jnode.
-+ */
-+ if (jnode_page(node) != NULL) {
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ JF_CLR(node, JNODE_RIP);
-+ return RETERR(-EBUSY);
-+ }
-+
-+ /* re-check ->x_count under tree lock. */
-+ result = jnode_is_busy(node, jtype);
-+ if (result == 0) {
-+ assert("nikita-2582", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert("jmacd-511/b", atomic_read(&node->d_count) == 0);
-+
-+ spin_unlock_jnode(node);
-+ /* no page and no references---despatch him. */
-+ jnode_remove(node, jtype, tree);
-+ write_unlock_tree(tree);
-+ jnode_free(node, jtype);
-+ } else {
-+ /* busy check failed: reference was acquired by concurrent
-+ * thread. */
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ JF_CLR(node, JNODE_RIP);
-+ }
-+ return result;
-+}
-+
-+/* jdelete() -- Delete jnode from the tree and file system */
-+static int jdelete(jnode * node /* jnode to finish with */ )
-+{
-+ struct page *page;
-+ int result;
-+ reiser4_tree *tree;
-+ jnode_type jtype;
-+
-+ assert("nikita-467", node != NULL);
-+ assert("nikita-2531", JF_ISSET(node, JNODE_RIP));
-+
-+ jtype = jnode_get_type(node);
-+
-+ page = jnode_lock_page(node);
-+ assert_spin_locked(&(node->guard));
-+
-+ tree = jnode_get_tree(node);
-+
-+ write_lock_tree(tree);
-+ /* re-check ->x_count under tree lock. */
-+ result = jnode_is_busy(node, jtype);
-+ if (likely(!result)) {
-+ assert("nikita-2123", JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert("jmacd-511", atomic_read(&node->d_count) == 0);
-+
-+ /* detach page */
-+ if (page != NULL) {
-+ /*
-+ * FIXME this is racy against jnode_extent_write().
-+ */
-+ page_clear_jnode(page, node);
-+ }
-+ spin_unlock_jnode(node);
-+ /* goodbye */
-+ jnode_delete(node, jtype, tree);
-+ write_unlock_tree(tree);
-+ jnode_free(node, jtype);
-+ /* @node is no longer valid pointer */
-+ if (page != NULL)
-+ reiser4_drop_page(page);
-+ } else {
-+ /* busy check failed: reference was acquired by concurrent
-+ * thread. */
-+ JF_CLR(node, JNODE_RIP);
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ if (page != NULL)
-+ unlock_page(page);
-+ }
-+ return result;
-+}
-+
-+/* drop jnode on the floor.
-+
-+ Return value:
-+
-+ -EBUSY: failed to drop jnode, because there are still references to it
-+
-+ 0: successfully dropped jnode
-+
-+*/
-+static int jdrop_in_tree(jnode * node, reiser4_tree * tree)
-+{
-+ struct page *page;
-+ jnode_type jtype;
-+ int result;
-+
-+ assert("zam-602", node != NULL);
-+ assert_rw_not_read_locked(&(tree->tree_lock));
-+ assert_rw_not_write_locked(&(tree->tree_lock));
-+ assert("nikita-2403", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ jtype = jnode_get_type(node);
-+
-+ page = jnode_lock_page(node);
-+ assert_spin_locked(&(node->guard));
-+
-+ write_lock_tree(tree);
-+
-+ /* re-check ->x_count under tree lock. */
-+ result = jnode_is_busy(node, jtype);
-+ if (!result) {
-+ assert("nikita-2488", page == jnode_page(node));
-+ assert("nikita-2533", atomic_read(&node->d_count) == 0);
-+ if (page != NULL) {
-+ assert("nikita-2126", !PageDirty(page));
-+ assert("nikita-2127", PageUptodate(page));
-+ assert("nikita-2181", PageLocked(page));
-+ page_clear_jnode(page, node);
-+ }
-+ spin_unlock_jnode(node);
-+ jnode_remove(node, jtype, tree);
-+ write_unlock_tree(tree);
-+ jnode_free(node, jtype);
-+ if (page != NULL) {
-+ reiser4_drop_page(page);
-+ }
-+ } else {
-+ /* busy check failed: reference was acquired by concurrent
-+ * thread. */
-+ JF_CLR(node, JNODE_RIP);
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ if (page != NULL)
-+ unlock_page(page);
-+ }
-+ return result;
-+}
-+
-+/* This function frees jnode "if possible". In particular, [dcx]_count has to
-+ be 0 (where applicable). */
-+void jdrop(jnode * node)
-+{
-+ jdrop_in_tree(node, jnode_get_tree(node));
-+}
-+
-+/* IO head jnode implementation; The io heads are simple j-nodes with limited
-+ functionality (these j-nodes are not in any hash table) just for reading
-+ from and writing to disk. */
-+
-+jnode *reiser4_alloc_io_head(const reiser4_block_nr * block)
-+{
-+ jnode *jal = jalloc();
-+
-+ if (jal != NULL) {
-+ jnode_init(jal, current_tree, JNODE_IO_HEAD);
-+ jnode_set_block(jal, block);
-+ }
-+
-+ jref(jal);
-+
-+ return jal;
-+}
-+
-+void reiser4_drop_io_head(jnode * node)
-+{
-+ assert("zam-648", jnode_get_type(node) == JNODE_IO_HEAD);
-+
-+ jput(node);
-+ jdrop(node);
-+}
-+
-+/* protect keep jnode data from reiser4_releasepage() */
-+void pin_jnode_data(jnode * node)
-+{
-+ assert("zam-671", jnode_page(node) != NULL);
-+ page_cache_get(jnode_page(node));
-+}
-+
-+/* make jnode data free-able again */
-+void unpin_jnode_data(jnode * node)
-+{
-+ assert("zam-672", jnode_page(node) != NULL);
-+ page_cache_release(jnode_page(node));
-+}
-+
-+struct address_space *jnode_get_mapping(const jnode * node)
-+{
-+ assert("nikita-3162", node != NULL);
-+ return jnode_ops(node)->mapping(node);
-+}
-+
-+#if REISER4_DEBUG
-+/* debugging aid: jnode invariant */
-+int jnode_invariant_f(const jnode * node, char const **msg)
-+{
-+#define _ergo(ant, con) \
-+ ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
-+#define _check(exp) ((*msg) = #exp, (exp))
-+
-+ return _check(node != NULL) &&
-+ /* [jnode-queued] */
-+ /* only relocated node can be queued, except that when znode
-+ * is being deleted, its JNODE_RELOC bit is cleared */
-+ _ergo(JF_ISSET(node, JNODE_FLUSH_QUEUED),
-+ JF_ISSET(node, JNODE_RELOC) ||
-+ JF_ISSET(node, JNODE_HEARD_BANSHEE)) &&
-+ _check(node->jnodes.prev != NULL) &&
-+ _check(node->jnodes.next != NULL) &&
-+ /* [jnode-dirty] invariant */
-+ /* dirty inode is part of atom */
-+ _ergo(JF_ISSET(node, JNODE_DIRTY), node->atom != NULL) &&
-+ /* [jnode-oid] invariant */
-+ /* for unformatted node ->objectid and ->mapping fields are
-+ * consistent */
-+ _ergo(jnode_is_unformatted(node) && node->key.j.mapping != NULL,
-+ node->key.j.objectid ==
-+ get_inode_oid(node->key.j.mapping->host)) &&
-+ /* [jnode-atom-valid] invariant */
-+ /* node atom has valid state */
-+ _ergo(node->atom != NULL, node->atom->stage != ASTAGE_INVALID) &&
-+ /* [jnode-page-binding] invariant */
-+ /* if node points to page, it points back to node */
-+ _ergo(node->pg != NULL, jprivate(node->pg) == node) &&
-+ /* [jnode-refs] invariant */
-+ /* only referenced jnode can be loaded */
-+ _check(atomic_read(&node->x_count) >= atomic_read(&node->d_count));
-+
-+}
-+
-+static const char *jnode_type_name(jnode_type type)
-+{
-+ switch (type) {
-+ case JNODE_UNFORMATTED_BLOCK:
-+ return "unformatted";
-+ case JNODE_FORMATTED_BLOCK:
-+ return "formatted";
-+ case JNODE_BITMAP:
-+ return "bitmap";
-+ case JNODE_IO_HEAD:
-+ return "io head";
-+ case JNODE_INODE:
-+ return "inode";
-+ case LAST_JNODE_TYPE:
-+ return "last";
-+ default:{
-+ static char unknown[30];
-+
-+ sprintf(unknown, "unknown %i", type);
-+ return unknown;
-+ }
-+ }
-+}
-+
-+#define jnode_state_name( node, flag ) \
-+ ( JF_ISSET( ( node ), ( flag ) ) ? ((#flag "|")+6) : "" )
-+
-+/* debugging aid: output human readable information about @node */
-+static void info_jnode(const char *prefix /* prefix to print */ ,
-+ const jnode * node /* node to print */ )
-+{
-+ assert("umka-068", prefix != NULL);
-+
-+ if (node == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+
-+ printk
-+ ("%s: %p: state: %lx: [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s], level: %i,"
-+ " block: %s, d_count: %d, x_count: %d, "
-+ "pg: %p, atom: %p, lock: %i:%i, type: %s, ", prefix, node,
-+ node->state,
-+ jnode_state_name(node, JNODE_PARSED),
-+ jnode_state_name(node, JNODE_HEARD_BANSHEE),
-+ jnode_state_name(node, JNODE_LEFT_CONNECTED),
-+ jnode_state_name(node, JNODE_RIGHT_CONNECTED),
-+ jnode_state_name(node, JNODE_ORPHAN),
-+ jnode_state_name(node, JNODE_CREATED),
-+ jnode_state_name(node, JNODE_RELOC),
-+ jnode_state_name(node, JNODE_OVRWR),
-+ jnode_state_name(node, JNODE_DIRTY),
-+ jnode_state_name(node, JNODE_IS_DYING),
-+ jnode_state_name(node, JNODE_RIP),
-+ jnode_state_name(node, JNODE_MISSED_IN_CAPTURE),
-+ jnode_state_name(node, JNODE_WRITEBACK),
-+ jnode_state_name(node, JNODE_NEW),
-+ jnode_state_name(node, JNODE_DKSET),
-+ jnode_state_name(node, JNODE_REPACK),
-+ jnode_state_name(node, JNODE_CLUSTER_PAGE),
-+ jnode_get_level(node), sprint_address(jnode_get_block(node)),
-+ atomic_read(&node->d_count), atomic_read(&node->x_count),
-+ jnode_page(node), node->atom, 0, 0,
-+ jnode_type_name(jnode_get_type(node)));
-+ if (jnode_is_unformatted(node)) {
-+ printk("inode: %llu, index: %lu, ",
-+ node->key.j.objectid, node->key.j.index);
-+ }
-+}
-+
-+/* debugging aid: check znode invariant and panic if it doesn't hold */
-+static int jnode_invariant(const jnode * node, int tlocked, int jlocked)
-+{
-+ char const *failed_msg;
-+ int result;
-+ reiser4_tree *tree;
-+
-+ tree = jnode_get_tree(node);
-+
-+ assert("umka-063312", node != NULL);
-+ assert("umka-064321", tree != NULL);
-+
-+ if (!jlocked && !tlocked)
-+ spin_lock_jnode((jnode *) node);
-+ if (!tlocked)
-+ read_lock_tree(jnode_get_tree(node));
-+ result = jnode_invariant_f(node, &failed_msg);
-+ if (!result) {
-+ info_jnode("corrupted node", node);
-+ warning("jmacd-555", "Condition %s failed", failed_msg);
-+ }
-+ if (!tlocked)
-+ read_unlock_tree(jnode_get_tree(node));
-+ if (!jlocked && !tlocked)
-+ spin_unlock_jnode((jnode *) node);
-+ return result;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/jnode.h linux-2.6.20/fs/reiser4/jnode.h
---- linux-2.6.20.orig/fs/reiser4/jnode.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/jnode.h 2007-05-06 14:50:43.734986973 +0400
-@@ -0,0 +1,705 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Declaration of jnode. See jnode.c for details. */
-+
-+#ifndef __JNODE_H__
-+#define __JNODE_H__
-+
-+#include "forward.h"
-+#include "type_safe_hash.h"
-+#include "txnmgr.h"
-+#include "key.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "page_cache.h"
-+#include "context.h"
-+
-+#include "plugin/plugin.h"
-+
-+#include <linux/fs.h>
-+#include <linux/mm.h>
-+#include <linux/spinlock.h>
-+#include <asm/atomic.h>
-+#include <asm/bitops.h>
-+#include <linux/list.h>
-+#include <linux/rcupdate.h>
-+
-+/* declare hash table of jnodes (jnodes proper, that is, unformatted
-+ nodes) */
-+TYPE_SAFE_HASH_DECLARE(j, jnode);
-+
-+/* declare hash table of znodes */
-+TYPE_SAFE_HASH_DECLARE(z, znode);
-+
-+typedef struct {
-+ __u64 objectid;
-+ unsigned long index;
-+ struct address_space *mapping;
-+} jnode_key_t;
-+
-+/*
-+ Jnode is the "base class" of other nodes in reiser4. It is also happens to
-+ be exactly the node we use for unformatted tree nodes.
-+
-+ Jnode provides following basic functionality:
-+
-+ . reference counting and indexing.
-+
-+ . integration with page cache. Jnode has ->pg reference to which page can
-+ be attached.
-+
-+ . interface to transaction manager. It is jnode that is kept in transaction
-+ manager lists, attached to atoms, etc. (NOTE-NIKITA one may argue that this
-+ means, there should be special type of jnode for inode.)
-+
-+ Locking:
-+
-+ Spin lock: the following fields are protected by the per-jnode spin lock:
-+
-+ ->state
-+ ->atom
-+ ->capture_link
-+
-+ Following fields are protected by the global tree lock:
-+
-+ ->link
-+ ->key.z (content of ->key.z is only changed in znode_rehash())
-+ ->key.j
-+
-+ Atomic counters
-+
-+ ->x_count
-+ ->d_count
-+
-+ ->pg, and ->data are protected by spin lock for unused jnode and are
-+ immutable for used jnode (one for which fs/reiser4/vfs_ops.c:releasable()
-+ is false).
-+
-+ ->tree is immutable after creation
-+
-+ Unclear
-+
-+ ->blocknr: should be under jnode spin-lock, but current interface is based
-+ on passing of block address.
-+
-+ If you ever need to spin lock two nodes at once, do this in "natural"
-+ memory order: lock znode with lower address first. (See lock_two_nodes().)
-+
-+ Invariants involving this data-type:
-+
-+ [jnode-dirty]
-+ [jnode-refs]
-+ [jnode-oid]
-+ [jnode-queued]
-+ [jnode-atom-valid]
-+ [jnode-page-binding]
-+*/
-+
-+struct jnode {
-+#if REISER4_DEBUG
-+#define JMAGIC 0x52654973 /* "ReIs" */
-+ int magic;
-+#endif
-+ /* FIRST CACHE LINE (16 bytes): data used by jload */
-+
-+ /* jnode's state: bitwise flags from the reiser4_jnode_state enum. */
-+ /* 0 */ unsigned long state;
-+
-+ /* lock, protecting jnode's fields. */
-+ /* 4 */ spinlock_t load;
-+
-+ /* counter of references to jnode itself. Increased on jref().
-+ Decreased on jput().
-+ */
-+ /* 8 */ atomic_t x_count;
-+
-+ /* counter of references to jnode's data. Pin data page(s) in
-+ memory while this is greater than 0. Increased on jload().
-+ Decreased on jrelse().
-+ */
-+ /* 12 */ atomic_t d_count;
-+
-+ /* SECOND CACHE LINE: data used by hash table lookups */
-+
-+ /* 16 */ union {
-+ /* znodes are hashed by block number */
-+ reiser4_block_nr z;
-+ /* unformatted nodes are hashed by mapping plus offset */
-+ jnode_key_t j;
-+ } key;
-+
-+ /* THIRD CACHE LINE */
-+
-+ /* 32 */ union {
-+ /* pointers to maintain hash-table */
-+ z_hash_link z;
-+ j_hash_link j;
-+ } link;
-+
-+ /* pointer to jnode page. */
-+ /* 36 */ struct page *pg;
-+ /* pointer to node itself. This is page_address(node->pg) when page is
-+ attached to the jnode
-+ */
-+ /* 40 */ void *data;
-+
-+ /* 44 */ reiser4_tree *tree;
-+
-+ /* FOURTH CACHE LINE: atom related fields */
-+
-+ /* 48 */ spinlock_t guard;
-+
-+ /* atom the block is in, if any */
-+ /* 52 */ txn_atom *atom;
-+
-+ /* capture list */
-+ /* 56 */ struct list_head capture_link;
-+
-+ /* FIFTH CACHE LINE */
-+
-+ /* 64 */ struct rcu_head rcu;
-+ /* crosses cache line */
-+
-+ /* SIXTH CACHE LINE */
-+
-+ /* the real blocknr (where io is going to/from) */
-+ /* 80 */ reiser4_block_nr blocknr;
-+ /* Parent item type, unformatted and CRC need it for offset => key conversion. */
-+ /* NOTE: this parent_item_id looks like jnode type. */
-+ /* 88 */ reiser4_plugin_id parent_item_id;
-+ /* 92 */
-+#if REISER4_DEBUG
-+ /* number of pages referenced by the jnode (meaningful while capturing of
-+ page clusters) */
-+ int page_count;
-+ /* list of all jnodes for debugging purposes. */
-+ struct list_head jnodes;
-+ /* how many times this jnode was written in one transaction */
-+ int written;
-+ /* this indicates which atom's list the jnode is on */
-+ atom_list list;
-+#endif
-+} __attribute__ ((aligned(16)));
-+
-+/*
-+ * jnode types. Enumeration of existing jnode types.
-+ */
-+typedef enum {
-+ JNODE_UNFORMATTED_BLOCK, /* unformatted block */
-+ JNODE_FORMATTED_BLOCK, /* formatted block, znode */
-+ JNODE_BITMAP, /* bitmap */
-+ JNODE_IO_HEAD, /* jnode representing a block in the
-+ * wandering log */
-+ JNODE_INODE, /* jnode embedded into inode */
-+ LAST_JNODE_TYPE
-+} jnode_type;
-+
-+/* jnode states */
-+typedef enum {
-+ /* jnode's page is loaded and data checked */
-+ JNODE_PARSED = 0,
-+ /* node was deleted, not all locks on it were released. This
-+ node is empty and is going to be removed from the tree
-+ shortly. */
-+ JNODE_HEARD_BANSHEE = 1,
-+ /* left sibling pointer is valid */
-+ JNODE_LEFT_CONNECTED = 2,
-+ /* right sibling pointer is valid */
-+ JNODE_RIGHT_CONNECTED = 3,
-+
-+ /* znode was just created and doesn't yet have a pointer from
-+ its parent */
-+ JNODE_ORPHAN = 4,
-+
-+ /* this node was created by its transaction and has not been assigned
-+ a block address. */
-+ JNODE_CREATED = 5,
-+
-+ /* this node is currently relocated */
-+ JNODE_RELOC = 6,
-+ /* this node is currently wandered */
-+ JNODE_OVRWR = 7,
-+
-+ /* this znode has been modified */
-+ JNODE_DIRTY = 8,
-+
-+ /* znode lock is being invalidated */
-+ JNODE_IS_DYING = 9,
-+
-+ /* THIS PLACE IS INTENTIONALLY LEFT BLANK */
-+
-+ /* jnode is queued for flushing. */
-+ JNODE_FLUSH_QUEUED = 12,
-+
-+ /* In the following bits jnode type is encoded. */
-+ JNODE_TYPE_1 = 13,
-+ JNODE_TYPE_2 = 14,
-+ JNODE_TYPE_3 = 15,
-+
-+ /* jnode is being destroyed */
-+ JNODE_RIP = 16,
-+
-+ /* znode was not captured during locking (it might so be because
-+ ->level != LEAF_LEVEL and lock_mode == READ_LOCK) */
-+ JNODE_MISSED_IN_CAPTURE = 17,
-+
-+ /* write is in progress */
-+ JNODE_WRITEBACK = 18,
-+
-+ /* FIXME: now it is used by crypto-compress plugin only */
-+ JNODE_NEW = 19,
-+
-+ /* delimiting keys are already set for this znode. */
-+ JNODE_DKSET = 20,
-+
-+ /* when this bit is set page and jnode can not be disconnected */
-+ JNODE_WRITE_PREPARED = 21,
-+
-+ JNODE_CLUSTER_PAGE = 22,
-+ /* Jnode is marked for repacking, that means the reiser4 flush and the
-+ * block allocator should process this node special way */
-+ JNODE_REPACK = 23,
-+ /* node should be converted by flush in squalloc phase */
-+ JNODE_CONVERTIBLE = 24,
-+ /*
-+ * When jnode is dirtied for the first time in given transaction,
-+ * do_jnode_make_dirty() checks whether this jnode can possible became
-+ * member of overwrite set. If so, this bit is set, and one block is
-+ * reserved in the ->flush_reserved space of atom.
-+ *
-+ * This block is "used" (and JNODE_FLUSH_RESERVED bit is cleared) when
-+ *
-+ * (1) flush decides that we want this block to go into relocate
-+ * set after all.
-+ *
-+ * (2) wandering log is allocated (by log writer)
-+ *
-+ * (3) extent is allocated
-+ *
-+ */
-+ JNODE_FLUSH_RESERVED = 29
-+} reiser4_jnode_state;
-+
-+/* Macros for accessing the jnode state. */
-+
-+static inline void JF_CLR(jnode * j, int f)
-+{
-+ assert("unknown-1", j->magic == JMAGIC);
-+ clear_bit(f, &j->state);
-+}
-+static inline int JF_ISSET(const jnode * j, int f)
-+{
-+ assert("unknown-2", j->magic == JMAGIC);
-+ return test_bit(f, &((jnode *) j)->state);
-+}
-+static inline void JF_SET(jnode * j, int f)
-+{
-+ assert("unknown-3", j->magic == JMAGIC);
-+ set_bit(f, &j->state);
-+}
-+
-+static inline int JF_TEST_AND_SET(jnode * j, int f)
-+{
-+ assert("unknown-4", j->magic == JMAGIC);
-+ return test_and_set_bit(f, &j->state);
-+}
-+
-+static inline void spin_lock_jnode(jnode *node)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
-+ LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_LT(spin_locked_jnode, 2)));
-+
-+ spin_lock(&(node->guard));
-+
-+ LOCK_CNT_INC(spin_locked_jnode);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_unlock_jnode(jnode *node)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_jnode);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(node->guard));
-+}
-+
-+static inline int jnode_is_in_deleteset(const jnode * node)
-+{
-+ return JF_ISSET(node, JNODE_RELOC);
-+}
-+
-+extern int init_jnodes(void);
-+extern void done_jnodes(void);
-+
-+/* Jnode routines */
-+extern jnode *jalloc(void);
-+extern void jfree(jnode * node) NONNULL;
-+extern jnode *jclone(jnode *);
-+extern jnode *jlookup(reiser4_tree * tree,
-+ oid_t objectid, unsigned long ind) NONNULL;
-+extern jnode *jfind(struct address_space *, unsigned long index) NONNULL;
-+extern jnode *jnode_by_page(struct page *pg) NONNULL;
-+extern jnode *jnode_of_page(struct page *pg) NONNULL;
-+void jnode_attach_page(jnode * node, struct page *pg);
-+
-+void unhash_unformatted_jnode(jnode *);
-+extern jnode *page_next_jnode(jnode * node) NONNULL;
-+extern void jnode_init(jnode * node, reiser4_tree * tree, jnode_type) NONNULL;
-+extern void jnode_make_dirty(jnode * node) NONNULL;
-+extern void jnode_make_clean(jnode * node) NONNULL;
-+extern void jnode_make_wander_nolock(jnode * node) NONNULL;
-+extern void jnode_make_wander(jnode *) NONNULL;
-+extern void znode_make_reloc(znode *, flush_queue_t *) NONNULL;
-+extern void unformatted_make_reloc(jnode *, flush_queue_t *) NONNULL;
-+extern struct address_space *jnode_get_mapping(const jnode * node) NONNULL;
-+
-+/**
-+ * jnode_get_block
-+ * @node: jnode to query
-+ *
-+ */
-+static inline const reiser4_block_nr *jnode_get_block(const jnode *node)
-+{
-+ assert("nikita-528", node != NULL);
-+
-+ return &node->blocknr;
-+}
-+
-+/**
-+ * jnode_set_block
-+ * @node: jnode to update
-+ * @blocknr: new block nr
-+ */
-+static inline void jnode_set_block(jnode *node, const reiser4_block_nr *blocknr)
-+{
-+ assert("nikita-2020", node != NULL);
-+ assert("umka-055", blocknr != NULL);
-+ node->blocknr = *blocknr;
-+}
-+
-+
-+/* block number for IO. Usually this is the same as jnode_get_block(), unless
-+ * jnode was emergency flushed---then block number chosen by eflush is
-+ * used. */
-+static inline const reiser4_block_nr *jnode_get_io_block(jnode * node)
-+{
-+ assert("nikita-2768", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+
-+ return jnode_get_block(node);
-+}
-+
-+/* Jnode flush interface. */
-+extern reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t * pos);
-+extern flush_queue_t *reiser4_pos_fq(flush_pos_t * pos);
-+
-+/* FIXME-VS: these are used in plugin/item/extent.c */
-+
-+/* does extent_get_block have to be called */
-+#define jnode_mapped(node) JF_ISSET (node, JNODE_MAPPED)
-+#define jnode_set_mapped(node) JF_SET (node, JNODE_MAPPED)
-+
-+/* the node should be converted during flush squalloc phase */
-+#define jnode_convertible(node) JF_ISSET (node, JNODE_CONVERTIBLE)
-+#define jnode_set_convertible(node) JF_SET (node, JNODE_CONVERTIBLE)
-+
-+/* Macros to convert from jnode to znode, znode to jnode. These are macros
-+ because C doesn't allow overloading of const prototypes. */
-+#define ZJNODE(x) (& (x) -> zjnode)
-+#define JZNODE(x) \
-+({ \
-+ typeof (x) __tmp_x; \
-+ \
-+ __tmp_x = (x); \
-+ assert ("jmacd-1300", jnode_is_znode (__tmp_x)); \
-+ (znode*) __tmp_x; \
-+})
-+
-+extern int jnodes_tree_init(reiser4_tree * tree);
-+extern int jnodes_tree_done(reiser4_tree * tree);
-+
-+#if REISER4_DEBUG
-+
-+extern int znode_is_any_locked(const znode * node);
-+extern void jnode_list_remove(jnode * node);
-+
-+#else
-+
-+#define jnode_list_remove(node) noop
-+
-+#endif
-+
-+int znode_is_root(const znode * node) NONNULL;
-+
-+/* bump reference counter on @node */
-+static inline void add_x_ref(jnode * node /* node to increase x_count of */ )
-+{
-+ assert("nikita-1911", node != NULL);
-+
-+ atomic_inc(&node->x_count);
-+ LOCK_CNT_INC(x_refs);
-+}
-+
-+static inline void dec_x_ref(jnode * node)
-+{
-+ assert("nikita-3215", node != NULL);
-+ assert("nikita-3216", atomic_read(&node->x_count) > 0);
-+
-+ atomic_dec(&node->x_count);
-+ assert("nikita-3217", LOCK_CNT_GTZ(x_refs));
-+ LOCK_CNT_DEC(x_refs);
-+}
-+
-+/* jref() - increase counter of references to jnode/znode (x_count) */
-+static inline jnode *jref(jnode * node)
-+{
-+ assert("jmacd-508", (node != NULL) && !IS_ERR(node));
-+ add_x_ref(node);
-+ return node;
-+}
-+
-+/* get the page of jnode */
-+static inline struct page *jnode_page(const jnode * node)
-+{
-+ return node->pg;
-+}
-+
-+/* return pointer to jnode data */
-+static inline char *jdata(const jnode * node)
-+{
-+ assert("nikita-1415", node != NULL);
-+ assert("nikita-3198", jnode_page(node) != NULL);
-+ return node->data;
-+}
-+
-+static inline int jnode_is_loaded(const jnode * node)
-+{
-+ assert("zam-506", node != NULL);
-+ return atomic_read(&node->d_count) > 0;
-+}
-+
-+extern void page_clear_jnode(struct page *page, jnode * node) NONNULL;
-+
-+static inline void jnode_set_reloc(jnode * node)
-+{
-+ assert("nikita-2431", node != NULL);
-+ assert("nikita-2432", !JF_ISSET(node, JNODE_OVRWR));
-+ JF_SET(node, JNODE_RELOC);
-+}
-+
-+/* jload/jwrite/junload give a bread/bwrite/brelse functionality for jnodes */
-+
-+extern int jload_gfp(jnode *, gfp_t, int do_kmap) NONNULL;
-+
-+static inline int jload(jnode *node)
-+{
-+ return jload_gfp(node, reiser4_ctx_gfp_mask_get(), 1);
-+}
-+
-+extern int jinit_new(jnode *, gfp_t) NONNULL;
-+extern int jstartio(jnode *) NONNULL;
-+
-+extern void jdrop(jnode *) NONNULL;
-+extern int jwait_io(jnode *, int rw) NONNULL;
-+
-+void jload_prefetch(jnode *);
-+
-+extern jnode *reiser4_alloc_io_head(const reiser4_block_nr * block) NONNULL;
-+extern void reiser4_drop_io_head(jnode * node) NONNULL;
-+
-+static inline reiser4_tree *jnode_get_tree(const jnode * node)
-+{
-+ assert("nikita-2691", node != NULL);
-+ return node->tree;
-+}
-+
-+extern void pin_jnode_data(jnode *);
-+extern void unpin_jnode_data(jnode *);
-+
-+static inline jnode_type jnode_get_type(const jnode * node)
-+{
-+ static const unsigned long state_mask =
-+ (1 << JNODE_TYPE_1) | (1 << JNODE_TYPE_2) | (1 << JNODE_TYPE_3);
-+
-+ static jnode_type mask_to_type[] = {
-+ /* JNODE_TYPE_3 : JNODE_TYPE_2 : JNODE_TYPE_1 */
-+
-+ /* 000 */
-+ [0] = JNODE_FORMATTED_BLOCK,
-+ /* 001 */
-+ [1] = JNODE_UNFORMATTED_BLOCK,
-+ /* 010 */
-+ [2] = JNODE_BITMAP,
-+ /* 011 */
-+ [3] = LAST_JNODE_TYPE, /*invalid */
-+ /* 100 */
-+ [4] = JNODE_INODE,
-+ /* 101 */
-+ [5] = LAST_JNODE_TYPE,
-+ /* 110 */
-+ [6] = JNODE_IO_HEAD,
-+ /* 111 */
-+ [7] = LAST_JNODE_TYPE, /* invalid */
-+ };
-+
-+ return mask_to_type[(node->state & state_mask) >> JNODE_TYPE_1];
-+}
-+
-+/* returns true if node is a znode */
-+static inline int jnode_is_znode(const jnode * node)
-+{
-+ return jnode_get_type(node) == JNODE_FORMATTED_BLOCK;
-+}
-+
-+static inline int jnode_is_flushprepped(jnode * node)
-+{
-+ assert("jmacd-78212", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ return !JF_ISSET(node, JNODE_DIRTY) || JF_ISSET(node, JNODE_RELOC) ||
-+ JF_ISSET(node, JNODE_OVRWR);
-+}
-+
-+/* Return true if @node has already been processed by the squeeze and allocate
-+ process. This implies the block address has been finalized for the
-+ duration of this atom (or it is clean and will remain in place). If this
-+ returns true you may use the block number as a hint. */
-+static inline int jnode_check_flushprepped(jnode * node)
-+{
-+ int result;
-+
-+ /* It must be clean or relocated or wandered. New allocations are set to relocate. */
-+ spin_lock_jnode(node);
-+ result = jnode_is_flushprepped(node);
-+ spin_unlock_jnode(node);
-+ return result;
-+}
-+
-+/* returns true if node is unformatted */
-+static inline int jnode_is_unformatted(const jnode * node)
-+{
-+ assert("jmacd-0123", node != NULL);
-+ return jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK;
-+}
-+
-+/* returns true if node represents a cluster cache page */
-+static inline int jnode_is_cluster_page(const jnode * node)
-+{
-+ assert("edward-50", node != NULL);
-+ return (JF_ISSET(node, JNODE_CLUSTER_PAGE));
-+}
-+
-+/* returns true is node is builtin inode's jnode */
-+static inline int jnode_is_inode(const jnode * node)
-+{
-+ assert("vs-1240", node != NULL);
-+ return jnode_get_type(node) == JNODE_INODE;
-+}
-+
-+static inline jnode_plugin *jnode_ops_of(const jnode_type type)
-+{
-+ assert("nikita-2367", type < LAST_JNODE_TYPE);
-+ return jnode_plugin_by_id((reiser4_plugin_id) type);
-+}
-+
-+static inline jnode_plugin *jnode_ops(const jnode * node)
-+{
-+ assert("nikita-2366", node != NULL);
-+
-+ return jnode_ops_of(jnode_get_type(node));
-+}
-+
-+/* Get the index of a block. */
-+static inline unsigned long jnode_get_index(jnode * node)
-+{
-+ return jnode_ops(node)->index(node);
-+}
-+
-+/* return true if "node" is the root */
-+static inline int jnode_is_root(const jnode * node)
-+{
-+ return jnode_is_znode(node) && znode_is_root(JZNODE(node));
-+}
-+
-+extern struct address_space *mapping_jnode(const jnode * node);
-+extern unsigned long index_jnode(const jnode * node);
-+
-+static inline void jput(jnode * node);
-+extern void jput_final(jnode * node);
-+
-+/* bump data counter on @node */
-+static inline void add_d_ref(jnode * node /* node to increase d_count of */ )
-+{
-+ assert("nikita-1962", node != NULL);
-+
-+ atomic_inc(&node->d_count);
-+ if (jnode_is_unformatted(node) || jnode_is_znode(node))
-+ LOCK_CNT_INC(d_refs);
-+}
-+
-+/* jput() - decrement x_count reference counter on znode.
-+
-+ Count may drop to 0, jnode stays in cache until memory pressure causes the
-+ eviction of its page. The c_count variable also ensures that children are
-+ pressured out of memory before the parent. The jnode remains hashed as
-+ long as the VM allows its page to stay in memory.
-+*/
-+static inline void jput(jnode * node)
-+{
-+ assert("jmacd-509", node != NULL);
-+ assert("jmacd-510", atomic_read(&node->x_count) > 0);
-+ assert("zam-926", reiser4_schedulable());
-+ LOCK_CNT_DEC(x_refs);
-+
-+ rcu_read_lock();
-+ /*
-+ * we don't need any kind of lock here--jput_final() uses RCU.
-+ */
-+ if (unlikely(atomic_dec_and_test(&node->x_count))) {
-+ jput_final(node);
-+ } else
-+ rcu_read_unlock();
-+ assert("nikita-3473", reiser4_schedulable());
-+}
-+
-+extern void jrelse(jnode * node);
-+extern void jrelse_tail(jnode * node);
-+
-+extern jnode *jnode_rip_sync(reiser4_tree * t, jnode * node);
-+
-+/* resolve race with jput */
-+static inline jnode *jnode_rip_check(reiser4_tree * tree, jnode * node)
-+{
-+ if (unlikely(JF_ISSET(node, JNODE_RIP)))
-+ node = jnode_rip_sync(tree, node);
-+ return node;
-+}
-+
-+extern reiser4_key *jnode_build_key(const jnode *node, reiser4_key * key);
-+
-+#if REISER4_DEBUG
-+extern int jnode_invariant_f(const jnode *node, char const **msg);
-+#endif
-+
-+extern jnode_plugin jnode_plugins[LAST_JNODE_TYPE];
-+
-+/* __JNODE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/kassign.c linux-2.6.20/fs/reiser4/kassign.c
---- linux-2.6.20.orig/fs/reiser4/kassign.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/kassign.c 2007-05-06 14:50:43.734986973 +0400
-@@ -0,0 +1,661 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Key assignment policy implementation */
-+
-+/*
-+ * In reiser4 every piece of file system data and meta-data has a key. Keys
-+ * are used to store information in and retrieve it from reiser4 internal
-+ * tree. In addition to this, keys define _ordering_ of all file system
-+ * information: things having close keys are placed into the same or
-+ * neighboring (in the tree order) nodes of the tree. As our block allocator
-+ * tries to respect tree order (see flush.c), keys also define order in which
-+ * things are laid out on the disk, and hence, affect performance directly.
-+ *
-+ * Obviously, assignment of keys to data and meta-data should be consistent
-+ * across whole file system. Algorithm that calculates a key for a given piece
-+ * of data or meta-data is referred to as "key assignment".
-+ *
-+ * Key assignment is too expensive to be implemented as a plugin (that is,
-+ * with an ability to support different key assignment schemas in the same
-+ * compiled kernel image). As a compromise, all key-assignment functions and
-+ * data-structures are collected in this single file, so that modifications to
-+ * key assignment algorithm can be localized. Additional changes may be
-+ * required in key.[ch].
-+ *
-+ * Current default reiser4 key assignment algorithm is dubbed "Plan A". As one
-+ * may guess, there is "Plan B" too.
-+ *
-+ */
-+
-+/*
-+ * Additional complication with key assignment implementation is a requirement
-+ * to support different key length.
-+ */
-+
-+/*
-+ * KEY ASSIGNMENT: PLAN A, LONG KEYS.
-+ *
-+ * DIRECTORY ITEMS
-+ *
-+ * | 60 | 4 | 7 |1| 56 | 64 | 64 |
-+ * +--------------+---+---+-+-------------+------------------+-----------------+
-+ * | dirid | 0 | F |H| prefix-1 | prefix-2 | prefix-3/hash |
-+ * +--------------+---+---+-+-------------+------------------+-----------------+
-+ * | | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * dirid objectid of directory this item is for
-+ *
-+ * F fibration, see fs/reiser4/plugin/fibration.[ch]
-+ *
-+ * H 1 if last 8 bytes of the key contain hash,
-+ * 0 if last 8 bytes of the key contain prefix-3
-+ *
-+ * prefix-1 first 7 characters of file name.
-+ * Padded by zeroes if name is not long enough.
-+ *
-+ * prefix-2 next 8 characters of the file name.
-+ *
-+ * prefix-3 next 8 characters of the file name.
-+ *
-+ * hash hash of the rest of file name (i.e., portion of file
-+ * name not included into prefix-1 and prefix-2).
-+ *
-+ * File names shorter than 23 (== 7 + 8 + 8) characters are completely encoded
-+ * in the key. Such file names are called "short". They are distinguished by H
-+ * bit set 0 in the key.
-+ *
-+ * Other file names are "long". For long name, H bit is 1, and first 15 (== 7
-+ * + 8) characters are encoded in prefix-1 and prefix-2 portions of the
-+ * key. Last 8 bytes of the key are occupied by hash of the remaining
-+ * characters of the name.
-+ *
-+ * This key assignment reaches following important goals:
-+ *
-+ * (1) directory entries are sorted in approximately lexicographical
-+ * order.
-+ *
-+ * (2) collisions (when multiple directory items have the same key), while
-+ * principally unavoidable in a tree with fixed length keys, are rare.
-+ *
-+ * STAT DATA
-+ *
-+ * | 60 | 4 | 64 | 4 | 60 | 64 |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | locality id | 1 | ordering | 0 | objectid | 0 |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * ordering copy of second 8-byte portion of the key of directory
-+ * entry for the first name of this object. Ordering has a form
-+ * {
-+ * fibration :7;
-+ * h :1;
-+ * prefix1 :56;
-+ * }
-+ * see description of key for directory entry above.
-+ *
-+ * objectid object id for this object
-+ *
-+ * This key assignment policy is designed to keep stat-data in the same order
-+ * as corresponding directory items, thus speeding up readdir/stat types of
-+ * workload.
-+ *
-+ * FILE BODY
-+ *
-+ * | 60 | 4 | 64 | 4 | 60 | 64 |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | locality id | 4 | ordering | 0 | objectid | offset |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * ordering the same as in the key of stat-data for this object
-+ *
-+ * objectid object id for this object
-+ *
-+ * offset logical offset from the beginning of this file.
-+ * Measured in bytes.
-+ *
-+ *
-+ * KEY ASSIGNMENT: PLAN A, SHORT KEYS.
-+ *
-+ * DIRECTORY ITEMS
-+ *
-+ * | 60 | 4 | 7 |1| 56 | 64 |
-+ * +--------------+---+---+-+-------------+-----------------+
-+ * | dirid | 0 | F |H| prefix-1 | prefix-2/hash |
-+ * +--------------+---+---+-+-------------+-----------------+
-+ * | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * dirid objectid of directory this item is for
-+ *
-+ * F fibration, see fs/reiser4/plugin/fibration.[ch]
-+ *
-+ * H 1 if last 8 bytes of the key contain hash,
-+ * 0 if last 8 bytes of the key contain prefix-2
-+ *
-+ * prefix-1 first 7 characters of file name.
-+ * Padded by zeroes if name is not long enough.
-+ *
-+ * prefix-2 next 8 characters of the file name.
-+ *
-+ * hash hash of the rest of file name (i.e., portion of file
-+ * name not included into prefix-1).
-+ *
-+ * File names shorter than 15 (== 7 + 8) characters are completely encoded in
-+ * the key. Such file names are called "short". They are distinguished by H
-+ * bit set in the key.
-+ *
-+ * Other file names are "long". For long name, H bit is 0, and first 7
-+ * characters are encoded in prefix-1 portion of the key. Last 8 bytes of the
-+ * key are occupied by hash of the remaining characters of the name.
-+ *
-+ * STAT DATA
-+ *
-+ * | 60 | 4 | 4 | 60 | 64 |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | locality id | 1 | 0 | objectid | 0 |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * objectid object id for this object
-+ *
-+ * FILE BODY
-+ *
-+ * | 60 | 4 | 4 | 60 | 64 |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | locality id | 4 | 0 | objectid | offset |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * objectid object id for this object
-+ *
-+ * offset logical offset from the beginning of this file.
-+ * Measured in bytes.
-+ *
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "key.h"
-+#include "kassign.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "super.h"
-+#include "dscale.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block, etc */
-+
-+/* bitmask for H bit (see comment at the beginning of this file */
-+static const __u64 longname_mark = 0x0100000000000000ull;
-+/* bitmask for F and H portions of the key. */
-+static const __u64 fibration_mask = 0xff00000000000000ull;
-+
-+/* return true if name is not completely encoded in @key */
-+int is_longname_key(const reiser4_key * key)
-+{
-+ __u64 highpart;
-+
-+ assert("nikita-2863", key != NULL);
-+ if (get_key_type(key) != KEY_FILE_NAME_MINOR)
-+ reiser4_print_key("oops", key);
-+ assert("nikita-2864", get_key_type(key) == KEY_FILE_NAME_MINOR);
-+
-+ if (REISER4_LARGE_KEY)
-+ highpart = get_key_ordering(key);
-+ else
-+ highpart = get_key_objectid(key);
-+
-+ return (highpart & longname_mark) ? 1 : 0;
-+}
-+
-+/* return true if @name is too long to be completely encoded in the key */
-+int is_longname(const char *name UNUSED_ARG, int len)
-+{
-+ if (REISER4_LARGE_KEY)
-+ return len > 23;
-+ else
-+ return len > 15;
-+}
-+
-+/* code ascii string into __u64.
-+
-+ Put characters of @name into result (@str) one after another starting
-+ from @start_idx-th highest (arithmetically) byte. This produces
-+ endian-safe encoding. memcpy(2) will not do.
-+
-+*/
-+static __u64 pack_string(const char *name /* string to encode */ ,
-+ int start_idx /* highest byte in result from
-+ * which to start encoding */ )
-+{
-+ unsigned i;
-+ __u64 str;
-+
-+ str = 0;
-+ for (i = 0; (i < sizeof str - start_idx) && name[i]; ++i) {
-+ str <<= 8;
-+ str |= (unsigned char)name[i];
-+ }
-+ str <<= (sizeof str - i - start_idx) << 3;
-+ return str;
-+}
-+
-+/* opposite to pack_string(). Takes value produced by pack_string(), restores
-+ * string encoded in it and stores result in @buf */
-+char * reiser4_unpack_string(__u64 value, char *buf)
-+{
-+ do {
-+ *buf = value >> (64 - 8);
-+ if (*buf)
-+ ++buf;
-+ value <<= 8;
-+ } while (value != 0);
-+ *buf = 0;
-+ return buf;
-+}
-+
-+/* obtain name encoded in @key and store it in @buf */
-+char *extract_name_from_key(const reiser4_key * key, char *buf)
-+{
-+ char *c;
-+
-+ assert("nikita-2868", !is_longname_key(key));
-+
-+ c = buf;
-+ if (REISER4_LARGE_KEY) {
-+ c = reiser4_unpack_string(get_key_ordering(key) &
-+ ~fibration_mask, c);
-+ c = reiser4_unpack_string(get_key_fulloid(key), c);
-+ } else
-+ c = reiser4_unpack_string(get_key_fulloid(key) &
-+ ~fibration_mask, c);
-+ reiser4_unpack_string(get_key_offset(key), c);
-+ return buf;
-+}
-+
-+/**
-+ * complete_entry_key - calculate entry key by name
-+ * @dir: directory where entry is (or will be) in
-+ * @name: name to calculate key of
-+ * @len: lenth of name
-+ * @result: place to store result in
-+ *
-+ * Sets fields of entry key @result which depend on file name.
-+ * When REISER4_LARGE_KEY is defined three fields of @result are set: ordering,
-+ * objectid and offset. Otherwise, objectid and offset are set.
-+ */
-+void complete_entry_key(const struct inode *dir, const char *name,
-+ int len, reiser4_key *result)
-+{
-+#if REISER4_LARGE_KEY
-+ __u64 ordering;
-+ __u64 objectid;
-+ __u64 offset;
-+
-+ assert("nikita-1139", dir != NULL);
-+ assert("nikita-1142", result != NULL);
-+ assert("nikita-2867", strlen(name) == len);
-+
-+ /*
-+ * key allocation algorithm for directory entries in case of large
-+ * keys:
-+ *
-+ * If name is not longer than 7 + 8 + 8 = 23 characters, put first 7
-+ * characters into ordering field of key, next 8 charactes (if any)
-+ * into objectid field of key and next 8 ones (of any) into offset
-+ * field of key
-+ *
-+ * If file name is longer than 23 characters, put first 7 characters
-+ * into key's ordering, next 8 to objectid and hash of remaining
-+ * characters into offset field.
-+ *
-+ * To distinguish above cases, in latter set up unused high bit in
-+ * ordering field.
-+ */
-+
-+ /* [0-6] characters to ordering */
-+ ordering = pack_string(name, 1);
-+ if (len > 7) {
-+ /* [7-14] characters to objectid */
-+ objectid = pack_string(name + 7, 0);
-+ if (len > 15) {
-+ if (len <= 23) {
-+ /* [15-23] characters to offset */
-+ offset = pack_string(name + 15, 0);
-+ } else {
-+ /* note in a key the fact that offset contains hash. */
-+ ordering |= longname_mark;
-+
-+ /* offset is the hash of the file name's tail. */
-+ offset = inode_hash_plugin(dir)->hash(name + 15,
-+ len - 15);
-+ }
-+ } else {
-+ offset = 0ull;
-+ }
-+ } else {
-+ objectid = 0ull;
-+ offset = 0ull;
-+ }
-+
-+ assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
-+ ordering |= inode_fibration_plugin(dir)->fibre(dir, name, len);
-+
-+ set_key_ordering(result, ordering);
-+ set_key_fulloid(result, objectid);
-+ set_key_offset(result, offset);
-+ return;
-+
-+#else
-+ __u64 objectid;
-+ __u64 offset;
-+
-+ assert("nikita-1139", dir != NULL);
-+ assert("nikita-1142", result != NULL);
-+ assert("nikita-2867", strlen(name) == len);
-+
-+ /*
-+ * key allocation algorithm for directory entries in case of not large
-+ * keys:
-+ *
-+ * If name is not longer than 7 + 8 = 15 characters, put first 7
-+ * characters into objectid field of key, next 8 charactes (if any)
-+ * into offset field of key
-+ *
-+ * If file name is longer than 15 characters, put first 7 characters
-+ * into key's objectid, and hash of remaining characters into offset
-+ * field.
-+ *
-+ * To distinguish above cases, in latter set up unused high bit in
-+ * objectid field.
-+ */
-+
-+ /* [0-6] characters to objectid */
-+ objectid = pack_string(name, 1);
-+ if (len > 7) {
-+ if (len <= 15) {
-+ /* [7-14] characters to offset */
-+ offset = pack_string(name + 7, 0);
-+ } else {
-+ /* note in a key the fact that offset contains hash. */
-+ objectid |= longname_mark;
-+
-+ /* offset is the hash of the file name. */
-+ offset = inode_hash_plugin(dir)->hash(name + 7,
-+ len - 7);
-+ }
-+ } else
-+ offset = 0ull;
-+
-+ assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
-+ objectid |= inode_fibration_plugin(dir)->fibre(dir, name, len);
-+
-+ set_key_fulloid(result, objectid);
-+ set_key_offset(result, offset);
-+ return;
-+#endif /* ! REISER4_LARGE_KEY */
-+}
-+
-+/* true, if @key is the key of "." */
-+int is_dot_key(const reiser4_key * key /* key to check */ )
-+{
-+ assert("nikita-1717", key != NULL);
-+ assert("nikita-1718", get_key_type(key) == KEY_FILE_NAME_MINOR);
-+ return
-+ (get_key_ordering(key) == 0ull) &&
-+ (get_key_objectid(key) == 0ull) && (get_key_offset(key) == 0ull);
-+}
-+
-+/* build key for stat-data.
-+
-+ return key of stat-data of this object. This should became sd plugin
-+ method in the future. For now, let it be here.
-+
-+*/
-+reiser4_key *build_sd_key(const struct inode * target /* inode of an object */ ,
-+ reiser4_key * result /* resulting key of @target
-+ stat-data */ )
-+{
-+ assert("nikita-261", result != NULL);
-+
-+ reiser4_key_init(result);
-+ set_key_locality(result, reiser4_inode_data(target)->locality_id);
-+ set_key_ordering(result, get_inode_ordering(target));
-+ set_key_objectid(result, get_inode_oid(target));
-+ set_key_type(result, KEY_SD_MINOR);
-+ set_key_offset(result, (__u64) 0);
-+ return result;
-+}
-+
-+/* encode part of key into &obj_key_id
-+
-+ This encodes into @id part of @key sufficient to restore @key later,
-+ given that latter is key of object (key of stat-data).
-+
-+ See &obj_key_id
-+*/
-+int build_obj_key_id(const reiser4_key * key /* key to encode */ ,
-+ obj_key_id * id /* id where key is encoded in */ )
-+{
-+ assert("nikita-1151", key != NULL);
-+ assert("nikita-1152", id != NULL);
-+
-+ memcpy(id, key, sizeof *id);
-+ return 0;
-+}
-+
-+/* encode reference to @obj in @id.
-+
-+ This is like build_obj_key_id() above, but takes inode as parameter. */
-+int build_inode_key_id(const struct inode *obj /* object to build key of */ ,
-+ obj_key_id * id /* result */ )
-+{
-+ reiser4_key sdkey;
-+
-+ assert("nikita-1166", obj != NULL);
-+ assert("nikita-1167", id != NULL);
-+
-+ build_sd_key(obj, &sdkey);
-+ build_obj_key_id(&sdkey, id);
-+ return 0;
-+}
-+
-+/* decode @id back into @key
-+
-+ Restore key of object stat-data from @id. This is dual to
-+ build_obj_key_id() above.
-+*/
-+int extract_key_from_id(const obj_key_id * id /* object key id to extract key
-+ * from */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-1153", id != NULL);
-+ assert("nikita-1154", key != NULL);
-+
-+ reiser4_key_init(key);
-+ memcpy(key, id, sizeof *id);
-+ return 0;
-+}
-+
-+/* extract objectid of directory from key of directory entry within said
-+ directory.
-+ */
-+oid_t extract_dir_id_from_key(const reiser4_key * de_key /* key of
-+ * directory
-+ * entry */ )
-+{
-+ assert("nikita-1314", de_key != NULL);
-+ return get_key_locality(de_key);
-+}
-+
-+/* encode into @id key of directory entry.
-+
-+ Encode into @id information sufficient to later distinguish directory
-+ entries within the same directory. This is not whole key, because all
-+ directory entries within directory item share locality which is equal
-+ to objectid of their directory.
-+
-+*/
-+int build_de_id(const struct inode *dir /* inode of directory */ ,
-+ const struct qstr *name /* name to be given to @obj by
-+ * directory entry being
-+ * constructed */ ,
-+ de_id * id /* short key of directory entry */ )
-+{
-+ reiser4_key key;
-+
-+ assert("nikita-1290", dir != NULL);
-+ assert("nikita-1292", id != NULL);
-+
-+ /* NOTE-NIKITA this is suboptimal. */
-+ inode_dir_plugin(dir)->build_entry_key(dir, name, &key);
-+ return build_de_id_by_key(&key, id);
-+}
-+
-+/* encode into @id key of directory entry.
-+
-+ Encode into @id information sufficient to later distinguish directory
-+ entries within the same directory. This is not whole key, because all
-+ directory entries within directory item share locality which is equal
-+ to objectid of their directory.
-+
-+*/
-+int build_de_id_by_key(const reiser4_key * entry_key /* full key of directory
-+ * entry */ ,
-+ de_id * id /* short key of directory entry */ )
-+{
-+ memcpy(id, ((__u64 *) entry_key) + 1, sizeof *id);
-+ return 0;
-+}
-+
-+/* restore from @id key of directory entry.
-+
-+ Function dual to build_de_id(): given @id and locality, build full
-+ key of directory entry within directory item.
-+
-+*/
-+int extract_key_from_de_id(const oid_t locality /* locality of directory
-+ * entry */ ,
-+ const de_id * id /* directory entry id */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ /* no need to initialise key here: all fields are overwritten */
-+ memcpy(((__u64 *) key) + 1, id, sizeof *id);
-+ set_key_locality(key, locality);
-+ set_key_type(key, KEY_FILE_NAME_MINOR);
-+ return 0;
-+}
-+
-+/* compare two &de_id's */
-+cmp_t de_id_cmp(const de_id * id1 /* first &de_id to compare */ ,
-+ const de_id * id2 /* second &de_id to compare */ )
-+{
-+ /* NOTE-NIKITA ugly implementation */
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ extract_key_from_de_id((oid_t) 0, id1, &k1);
-+ extract_key_from_de_id((oid_t) 0, id2, &k2);
-+ return keycmp(&k1, &k2);
-+}
-+
-+/* compare &de_id with key */
-+cmp_t de_id_key_cmp(const de_id * id /* directory entry id to compare */ ,
-+ const reiser4_key * key /* key to compare */ )
-+{
-+ cmp_t result;
-+ reiser4_key *k1;
-+
-+ k1 = (reiser4_key *) (((unsigned long)id) - sizeof key->el[0]);
-+ result = KEY_DIFF_EL(k1, key, 1);
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, key, 2);
-+ if (REISER4_LARGE_KEY && result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, key, 3);
-+ }
-+ }
-+ return result;
-+}
-+
-+/*
-+ * return number of bytes necessary to encode @inode identity.
-+ */
-+int inode_onwire_size(const struct inode *inode)
-+{
-+ int result;
-+
-+ result = dscale_bytes(get_inode_oid(inode));
-+ result += dscale_bytes(get_inode_locality(inode));
-+
-+ /*
-+ * ordering is large (it usually has highest bits set), so it makes
-+ * little sense to dscale it.
-+ */
-+ if (REISER4_LARGE_KEY)
-+ result += sizeof(get_inode_ordering(inode));
-+ return result;
-+}
-+
-+/*
-+ * encode @inode identity at @start
-+ */
-+char *build_inode_onwire(const struct inode *inode, char *start)
-+{
-+ start += dscale_write(start, get_inode_locality(inode));
-+ start += dscale_write(start, get_inode_oid(inode));
-+
-+ if (REISER4_LARGE_KEY) {
-+ put_unaligned(cpu_to_le64(get_inode_ordering(inode)), (__le64 *)start);
-+ start += sizeof(get_inode_ordering(inode));
-+ }
-+ return start;
-+}
-+
-+/*
-+ * extract key that was previously encoded by build_inode_onwire() at @addr
-+ */
-+char *extract_obj_key_id_from_onwire(char *addr, obj_key_id * key_id)
-+{
-+ __u64 val;
-+
-+ addr += dscale_read(addr, &val);
-+ val = (val << KEY_LOCALITY_SHIFT) | KEY_SD_MINOR;
-+ put_unaligned(cpu_to_le64(val), (__le64 *)key_id->locality);
-+ addr += dscale_read(addr, &val);
-+ put_unaligned(cpu_to_le64(val), (__le64 *)key_id->objectid);
-+#if REISER4_LARGE_KEY
-+ memcpy(&key_id->ordering, addr, sizeof key_id->ordering);
-+ addr += sizeof key_id->ordering;
-+#endif
-+ return addr;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/kassign.h linux-2.6.20/fs/reiser4/kassign.h
---- linux-2.6.20.orig/fs/reiser4/kassign.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/kassign.h 2007-05-06 14:50:43.734986973 +0400
-@@ -0,0 +1,110 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Key assignment policy interface. See kassign.c for details. */
-+
-+#if !defined( __KASSIGN_H__ )
-+#define __KASSIGN_H__
-+
-+#include "forward.h"
-+#include "key.h"
-+#include "dformat.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block, etc */
-+#include <linux/dcache.h> /* for struct qstr */
-+
-+/* key assignment functions */
-+
-+/* Information from which key of file stat-data can be uniquely
-+ restored. This depends on key assignment policy for
-+ stat-data. Currently it's enough to store object id and locality id
-+ (60+60==120) bits, because minor packing locality and offset of
-+ stat-data key are always known constants: KEY_SD_MINOR and 0
-+ respectively. For simplicity 4 bits are wasted in each id, and just
-+ two 64 bit integers are stored.
-+
-+ This field has to be byte-aligned, because we don't want to waste
-+ space in directory entries. There is another side of a coin of
-+ course: we waste CPU and bus bandwidth in stead, by copying data back
-+ and forth.
-+
-+ Next optimization: &obj_key_id is mainly used to address stat data from
-+ directory entries. Under the assumption that majority of files only have
-+ only name (one hard link) from *the* parent directory it seems reasonable
-+ to only store objectid of stat data and take its locality from key of
-+ directory item.
-+
-+ This requires some flag to be added to the &obj_key_id to distinguish
-+ between these two cases. Remaining bits in flag byte are then asking to be
-+ used to store file type.
-+
-+ This optimization requires changes in directory item handling code.
-+
-+*/
-+typedef struct obj_key_id {
-+ d8 locality[sizeof(__u64)];
-+ ON_LARGE_KEY(d8 ordering[sizeof(__u64)];
-+ )
-+ d8 objectid[sizeof(__u64)];
-+}
-+obj_key_id;
-+
-+/* Information sufficient to uniquely identify directory entry within
-+ compressed directory item.
-+
-+ For alignment issues see &obj_key_id above.
-+*/
-+typedef struct de_id {
-+ ON_LARGE_KEY(d8 ordering[sizeof(__u64)];)
-+ d8 objectid[sizeof(__u64)];
-+ d8 offset[sizeof(__u64)];
-+}
-+de_id;
-+
-+extern int inode_onwire_size(const struct inode *obj);
-+extern char *build_inode_onwire(const struct inode *obj, char *area);
-+extern char *extract_obj_key_id_from_onwire(char *area, obj_key_id * key_id);
-+
-+extern int build_inode_key_id(const struct inode *obj, obj_key_id * id);
-+extern int extract_key_from_id(const obj_key_id * id, reiser4_key * key);
-+extern int build_obj_key_id(const reiser4_key * key, obj_key_id * id);
-+extern oid_t extract_dir_id_from_key(const reiser4_key * de_key);
-+extern int build_de_id(const struct inode *dir, const struct qstr *name,
-+ de_id * id);
-+extern int build_de_id_by_key(const reiser4_key * entry_key, de_id * id);
-+extern int extract_key_from_de_id(const oid_t locality, const de_id * id,
-+ reiser4_key * key);
-+extern cmp_t de_id_cmp(const de_id * id1, const de_id * id2);
-+extern cmp_t de_id_key_cmp(const de_id * id, const reiser4_key * key);
-+
-+extern int build_readdir_key_common(struct file *dir, reiser4_key * result);
-+extern void build_entry_key_common(const struct inode *dir,
-+ const struct qstr *name,
-+ reiser4_key * result);
-+extern void build_entry_key_stable_entry(const struct inode *dir,
-+ const struct qstr *name,
-+ reiser4_key * result);
-+extern int is_dot_key(const reiser4_key * key);
-+extern reiser4_key *build_sd_key(const struct inode *target,
-+ reiser4_key * result);
-+
-+extern int is_longname_key(const reiser4_key * key);
-+extern int is_longname(const char *name, int len);
-+extern char *extract_name_from_key(const reiser4_key * key, char *buf);
-+extern char *reiser4_unpack_string(__u64 value, char *buf);
-+extern void complete_entry_key(const struct inode *dir, const char *name,
-+ int len, reiser4_key *result);
-+
-+/* __KASSIGN_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/Kconfig linux-2.6.20/fs/reiser4/Kconfig
---- linux-2.6.20.orig/fs/reiser4/Kconfig 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/Kconfig 2007-05-06 14:50:43.734986973 +0400
-@@ -0,0 +1,32 @@
-+config REISER4_FS
-+ tristate "Reiser4 (EXPERIMENTAL)"
-+ depends on EXPERIMENTAL
-+ select ZLIB_INFLATE
-+ select ZLIB_DEFLATE
-+ select CRYPTO
-+ help
-+ Reiser4 is a filesystem that performs all filesystem operations
-+ as atomic transactions, which means that it either performs a
-+ write, or it does not, and in the event of a crash it does not
-+ partially perform it or corrupt it.
-+
-+ It stores files in dancing trees, which are like balanced trees but
-+ faster. It packs small files together so that they share blocks
-+ without wasting space. This means you can use it to store really
-+ small files. It also means that it saves you disk space. It avoids
-+ hassling you with anachronisms like having a maximum number of
-+ inodes, and wasting space if you use less than that number.
-+
-+ Reiser4 is a distinct filesystem type from reiserfs (V3).
-+ It's therefore not possible to use reiserfs file systems
-+ with reiser4.
-+
-+ To learn more about reiser4, go to http://www.namesys.com
-+
-+config REISER4_DEBUG
-+ bool "Enable reiser4 debug mode"
-+ depends on REISER4_FS
-+ help
-+ Don't use this unless you are debugging reiser4.
-+
-+ If unsure, say N.
-diff -urN linux-2.6.20.orig/fs/reiser4/key.c linux-2.6.20/fs/reiser4/key.c
---- linux-2.6.20.orig/fs/reiser4/key.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/key.c 2007-05-06 14:50:43.734986973 +0400
-@@ -0,0 +1,137 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Key manipulations. */
-+
-+#include "debug.h"
-+#include "key.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+
-+/* Minimal possible key: all components are zero. It is presumed that this is
-+ independent of key scheme. */
-+static const reiser4_key MINIMAL_KEY = {
-+ .el = {
-+ 0ull,
-+ ON_LARGE_KEY(0ull,)
-+ 0ull,
-+ 0ull
-+ }
-+};
-+
-+/* Maximal possible key: all components are ~0. It is presumed that this is
-+ independent of key scheme. */
-+static const reiser4_key MAXIMAL_KEY = {
-+ .el = {
-+ __constant_cpu_to_le64(~0ull),
-+ ON_LARGE_KEY(__constant_cpu_to_le64(~0ull),)
-+ __constant_cpu_to_le64(~0ull),
-+ __constant_cpu_to_le64(~0ull)
-+ }
-+};
-+
-+/* Initialize key. */
-+void reiser4_key_init(reiser4_key * key /* key to init */ )
-+{
-+ assert("nikita-1169", key != NULL);
-+ memset(key, 0, sizeof *key);
-+}
-+
-+/* minimal possible key in the tree. Return pointer to the static storage. */
-+const reiser4_key *reiser4_min_key(void)
-+{
-+ return &MINIMAL_KEY;
-+}
-+
-+/* maximum possible key in the tree. Return pointer to the static storage. */
-+const reiser4_key *reiser4_max_key(void)
-+{
-+ return &MAXIMAL_KEY;
-+}
-+
-+#if REISER4_DEBUG
-+/* debugging aid: print symbolic name of key type */
-+static const char *type_name(unsigned int key_type /* key type */ )
-+{
-+ switch (key_type) {
-+ case KEY_FILE_NAME_MINOR:
-+ return "file name";
-+ case KEY_SD_MINOR:
-+ return "stat data";
-+ case KEY_ATTR_NAME_MINOR:
-+ return "attr name";
-+ case KEY_ATTR_BODY_MINOR:
-+ return "attr body";
-+ case KEY_BODY_MINOR:
-+ return "file body";
-+ default:
-+ return "unknown";
-+ }
-+}
-+
-+/* debugging aid: print human readable information about key */
-+void reiser4_print_key(const char *prefix /* prefix to print */ ,
-+ const reiser4_key * key /* key to print */ )
-+{
-+ /* turn bold on */
-+ /* printf ("\033[1m"); */
-+ if (key == NULL)
-+ printk("%s: null key\n", prefix);
-+ else {
-+ if (REISER4_LARGE_KEY)
-+ printk("%s: (%Lx:%x:%Lx:%Lx:%Lx:%Lx)", prefix,
-+ get_key_locality(key),
-+ get_key_type(key),
-+ get_key_ordering(key),
-+ get_key_band(key),
-+ get_key_objectid(key), get_key_offset(key));
-+ else
-+ printk("%s: (%Lx:%x:%Lx:%Lx:%Lx)", prefix,
-+ get_key_locality(key),
-+ get_key_type(key),
-+ get_key_band(key),
-+ get_key_objectid(key), get_key_offset(key));
-+ /*
-+ * if this is a key of directory entry, try to decode part of
-+ * a name stored in the key, and output it.
-+ */
-+ if (get_key_type(key) == KEY_FILE_NAME_MINOR) {
-+ char buf[DE_NAME_BUF_LEN];
-+ char *c;
-+
-+ c = buf;
-+ c = reiser4_unpack_string(get_key_ordering(key), c);
-+ reiser4_unpack_string(get_key_fulloid(key), c);
-+ printk("[%s", buf);
-+ if (is_longname_key(key))
-+ /*
-+ * only part of the name is stored in the key.
-+ */
-+ printk("...]\n");
-+ else {
-+ /*
-+ * whole name is stored in the key.
-+ */
-+ reiser4_unpack_string(get_key_offset(key), buf);
-+ printk("%s]\n", buf);
-+ }
-+ } else {
-+ printk("[%s]\n", type_name(get_key_type(key)));
-+ }
-+ }
-+ /* turn bold off */
-+ /* printf ("\033[m\017"); */
-+}
-+
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/key.h linux-2.6.20/fs/reiser4/key.h
---- linux-2.6.20.orig/fs/reiser4/key.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/key.h 2007-05-06 14:50:43.738988223 +0400
-@@ -0,0 +1,384 @@
-+/* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Declarations of key-related data-structures and operations on keys. */
-+
-+#if !defined( __REISER4_KEY_H__ )
-+#define __REISER4_KEY_H__
-+
-+#include "dformat.h"
-+#include "forward.h"
-+#include "debug.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+
-+/* Operations on keys in reiser4 tree */
-+
-+/* No access to any of these fields shall be done except via a
-+ wrapping macro/function, and that wrapping macro/function shall
-+ convert to little endian order. Compare keys will consider cpu byte order. */
-+
-+/* A storage layer implementation difference between a regular unix file body and its attributes is in the typedef below
-+ which causes all of the attributes of a file to be near in key to all of the other attributes for all of the files
-+ within that directory, and not near to the file itself. It is interesting to consider whether this is the wrong
-+ approach, and whether there should be no difference at all. For current usage patterns this choice is probably the
-+ right one. */
-+
-+/* possible values for minor packing locality (4 bits required) */
-+typedef enum {
-+ /* file name */
-+ KEY_FILE_NAME_MINOR = 0,
-+ /* stat-data */
-+ KEY_SD_MINOR = 1,
-+ /* file attribute name */
-+ KEY_ATTR_NAME_MINOR = 2,
-+ /* file attribute value */
-+ KEY_ATTR_BODY_MINOR = 3,
-+ /* file body (tail or extent) */
-+ KEY_BODY_MINOR = 4,
-+} key_minor_locality;
-+
-+/* everything stored in the tree has a unique key, which means that the tree is (logically) fully ordered by key.
-+ Physical order is determined by dynamic heuristics that attempt to reflect key order when allocating available space,
-+ and by the repacker. It is stylistically better to put aggregation information into the key. Thus, if you want to
-+ segregate extents from tails, it is better to give them distinct minor packing localities rather than changing
-+ block_alloc.c to check the node type when deciding where to allocate the node.
-+
-+ The need to randomly displace new directories and large files disturbs this symmetry unfortunately. However, it
-+ should be noted that this is a need that is not clearly established given the existence of a repacker. Also, in our
-+ current implementation tails have a different minor packing locality from extents, and no files have both extents and
-+ tails, so maybe symmetry can be had without performance cost after all. Symmetry is what we ship for now....
-+*/
-+
-+/* Arbitrary major packing localities can be assigned to objects using
-+ the reiser4(filenameA/..packing<=some_number) system call.
-+
-+ In reiser4, the creat() syscall creates a directory
-+
-+ whose default flow (that which is referred to if the directory is
-+ read as a file) is the traditional unix file body.
-+
-+ whose directory plugin is the 'filedir'
-+
-+ whose major packing locality is that of the parent of the object created.
-+
-+ The static_stat item is a particular commonly used directory
-+ compression (the one for normal unix files).
-+
-+ The filedir plugin checks to see if the static_stat item exists.
-+ There is a unique key for static_stat. If yes, then it uses the
-+ static_stat item for all of the values that it contains. The
-+ static_stat item contains a flag for each stat it contains which
-+ indicates whether one should look outside the static_stat item for its
-+ contents.
-+*/
-+
-+/* offset of fields in reiser4_key. Value of each element of this enum
-+ is index within key (thought as array of __u64's) where this field
-+ is. */
-+typedef enum {
-+ /* major "locale", aka dirid. Sits in 1st element */
-+ KEY_LOCALITY_INDEX = 0,
-+ /* minor "locale", aka item type. Sits in 1st element */
-+ KEY_TYPE_INDEX = 0,
-+ ON_LARGE_KEY(KEY_ORDERING_INDEX,)
-+ /* "object band". Sits in 2nd element */
-+ KEY_BAND_INDEX,
-+ /* objectid. Sits in 2nd element */
-+ KEY_OBJECTID_INDEX = KEY_BAND_INDEX,
-+ /* full objectid. Sits in 2nd element */
-+ KEY_FULLOID_INDEX = KEY_BAND_INDEX,
-+ /* Offset. Sits in 3rd element */
-+ KEY_OFFSET_INDEX,
-+ /* Name hash. Sits in 3rd element */
-+ KEY_HASH_INDEX = KEY_OFFSET_INDEX,
-+ KEY_CACHELINE_END = KEY_OFFSET_INDEX,
-+ KEY_LAST_INDEX
-+} reiser4_key_field_index;
-+
-+/* key in reiser4 internal "balanced" tree. It is just array of three
-+ 64bit integers in disk byte order (little-endian by default). This
-+ array is actually indexed by reiser4_key_field. Each __u64 within
-+ this array is called "element". Logical key component encoded within
-+ elements are called "fields".
-+
-+ We declare this as union with second component dummy to suppress
-+ inconvenient array<->pointer casts implied in C. */
-+union reiser4_key {
-+ __le64 el[KEY_LAST_INDEX];
-+ int pad;
-+};
-+
-+/* bitmasks showing where within reiser4_key particular key is stored. */
-+/* major locality occupies higher 60 bits of the first element */
-+#define KEY_LOCALITY_MASK 0xfffffffffffffff0ull
-+
-+/* minor locality occupies lower 4 bits of the first element */
-+#define KEY_TYPE_MASK 0xfull
-+
-+/* controversial band occupies higher 4 bits of the 2nd element */
-+#define KEY_BAND_MASK 0xf000000000000000ull
-+
-+/* objectid occupies lower 60 bits of the 2nd element */
-+#define KEY_OBJECTID_MASK 0x0fffffffffffffffull
-+
-+/* full 64bit objectid*/
-+#define KEY_FULLOID_MASK 0xffffffffffffffffull
-+
-+/* offset is just 3rd L.M.Nt itself */
-+#define KEY_OFFSET_MASK 0xffffffffffffffffull
-+
-+/* ordering is whole second element */
-+#define KEY_ORDERING_MASK 0xffffffffffffffffull
-+
-+/* how many bits key element should be shifted to left to get particular field */
-+typedef enum {
-+ KEY_LOCALITY_SHIFT = 4,
-+ KEY_TYPE_SHIFT = 0,
-+ KEY_BAND_SHIFT = 60,
-+ KEY_OBJECTID_SHIFT = 0,
-+ KEY_FULLOID_SHIFT = 0,
-+ KEY_OFFSET_SHIFT = 0,
-+ KEY_ORDERING_SHIFT = 0,
-+} reiser4_key_field_shift;
-+
-+static inline __u64
-+get_key_el(const reiser4_key * key, reiser4_key_field_index off)
-+{
-+ assert("nikita-753", key != NULL);
-+ assert("nikita-754", off < KEY_LAST_INDEX);
-+ return le64_to_cpu(get_unaligned(&key->el[off]));
-+}
-+
-+static inline void
-+set_key_el(reiser4_key * key, reiser4_key_field_index off, __u64 value)
-+{
-+ assert("nikita-755", key != NULL);
-+ assert("nikita-756", off < KEY_LAST_INDEX);
-+ put_unaligned(cpu_to_le64(value), &key->el[off]);
-+}
-+
-+/* macro to define getter and setter functions for field F with type T */
-+#define DEFINE_KEY_FIELD( L, U, T ) \
-+static inline T get_key_ ## L ( const reiser4_key *key ) \
-+{ \
-+ assert( "nikita-750", key != NULL ); \
-+ return ( T ) ( get_key_el( key, KEY_ ## U ## _INDEX ) & \
-+ KEY_ ## U ## _MASK ) >> KEY_ ## U ## _SHIFT; \
-+} \
-+ \
-+static inline void set_key_ ## L ( reiser4_key *key, T loc ) \
-+{ \
-+ __u64 el; \
-+ \
-+ assert( "nikita-752", key != NULL ); \
-+ \
-+ el = get_key_el( key, KEY_ ## U ## _INDEX ); \
-+ /* clear field bits in the key */ \
-+ el &= ~KEY_ ## U ## _MASK; \
-+ /* actually it should be \
-+ \
-+ el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \
-+ \
-+ but we trust user to never pass values that wouldn't fit \
-+ into field. Clearing extra bits is one operation, but this \
-+ function is time-critical. \
-+ But check this in assertion. */ \
-+ assert( "nikita-759", ( ( loc << KEY_ ## U ## _SHIFT ) & \
-+ ~KEY_ ## U ## _MASK ) == 0 ); \
-+ el |= ( loc << KEY_ ## U ## _SHIFT ); \
-+ set_key_el( key, KEY_ ## U ## _INDEX, el ); \
-+}
-+
-+typedef __u64 oid_t;
-+
-+/* define get_key_locality(), set_key_locality() */
-+DEFINE_KEY_FIELD(locality, LOCALITY, oid_t);
-+/* define get_key_type(), set_key_type() */
-+DEFINE_KEY_FIELD(type, TYPE, key_minor_locality);
-+/* define get_key_band(), set_key_band() */
-+DEFINE_KEY_FIELD(band, BAND, __u64);
-+/* define get_key_objectid(), set_key_objectid() */
-+DEFINE_KEY_FIELD(objectid, OBJECTID, oid_t);
-+/* define get_key_fulloid(), set_key_fulloid() */
-+DEFINE_KEY_FIELD(fulloid, FULLOID, oid_t);
-+/* define get_key_offset(), set_key_offset() */
-+DEFINE_KEY_FIELD(offset, OFFSET, __u64);
-+#if (REISER4_LARGE_KEY)
-+/* define get_key_ordering(), set_key_ordering() */
-+DEFINE_KEY_FIELD(ordering, ORDERING, __u64);
-+#else
-+static inline __u64 get_key_ordering(const reiser4_key * key)
-+{
-+ return 0;
-+}
-+
-+static inline void set_key_ordering(reiser4_key * key, __u64 val)
-+{
-+}
-+#endif
-+
-+/* key comparison result */
-+typedef enum { LESS_THAN = -1, /* if first key is less than second */
-+ EQUAL_TO = 0, /* if keys are equal */
-+ GREATER_THAN = +1 /* if first key is greater than second */
-+} cmp_t;
-+
-+void reiser4_key_init(reiser4_key * key);
-+
-+/* minimal possible key in the tree. Return pointer to the static storage. */
-+extern const reiser4_key *reiser4_min_key(void);
-+extern const reiser4_key *reiser4_max_key(void);
-+
-+/* helper macro for keycmp() */
-+#define KEY_DIFF(k1, k2, field) \
-+({ \
-+ typeof (get_key_ ## field (k1)) f1; \
-+ typeof (get_key_ ## field (k2)) f2; \
-+ \
-+ f1 = get_key_ ## field (k1); \
-+ f2 = get_key_ ## field (k2); \
-+ \
-+ (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \
-+})
-+
-+/* helper macro for keycmp() */
-+#define KEY_DIFF_EL(k1, k2, off) \
-+({ \
-+ __u64 e1; \
-+ __u64 e2; \
-+ \
-+ e1 = get_key_el(k1, off); \
-+ e2 = get_key_el(k2, off); \
-+ \
-+ (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \
-+})
-+
-+/* compare `k1' and `k2'. This function is a heart of "key allocation
-+ policy". All you need to implement new policy is to add yet another
-+ clause here. */
-+static inline cmp_t keycmp(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ cmp_t result;
-+
-+ /*
-+ * This function is the heart of reiser4 tree-routines. Key comparison
-+ * is among most heavily used operations in the file system.
-+ */
-+
-+ assert("nikita-439", k1 != NULL);
-+ assert("nikita-440", k2 != NULL);
-+
-+ /* there is no actual branch here: condition is compile time constant
-+ * and constant folding and propagation ensures that only one branch
-+ * is actually compiled in. */
-+
-+ if (REISER4_PLANA_KEY_ALLOCATION) {
-+ /* if physical order of fields in a key is identical
-+ with logical order, we can implement key comparison
-+ as three 64bit comparisons. */
-+ /* logical order of fields in plan-a:
-+ locality->type->objectid->offset. */
-+ /* compare locality and type at once */
-+ result = KEY_DIFF_EL(k1, k2, 0);
-+ if (result == EQUAL_TO) {
-+ /* compare objectid (and band if it's there) */
-+ result = KEY_DIFF_EL(k1, k2, 1);
-+ /* compare offset */
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, k2, 2);
-+ if (REISER4_LARGE_KEY && result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, k2, 3);
-+ }
-+ }
-+ }
-+ } else if (REISER4_3_5_KEY_ALLOCATION) {
-+ result = KEY_DIFF(k1, k2, locality);
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF(k1, k2, objectid);
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF(k1, k2, type);
-+ if (result == EQUAL_TO)
-+ result = KEY_DIFF(k1, k2, offset);
-+ }
-+ }
-+ } else
-+ impossible("nikita-441", "Unknown key allocation scheme!");
-+ return result;
-+}
-+
-+/* true if @k1 equals @k2 */
-+static inline int keyeq(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1879", k1 != NULL);
-+ assert("nikita-1880", k2 != NULL);
-+ return !memcmp(k1, k2, sizeof *k1);
-+}
-+
-+/* true if @k1 is less than @k2 */
-+static inline int keylt(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1952", k1 != NULL);
-+ assert("nikita-1953", k2 != NULL);
-+ return keycmp(k1, k2) == LESS_THAN;
-+}
-+
-+/* true if @k1 is less than or equal to @k2 */
-+static inline int keyle(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1954", k1 != NULL);
-+ assert("nikita-1955", k2 != NULL);
-+ return keycmp(k1, k2) != GREATER_THAN;
-+}
-+
-+/* true if @k1 is greater than @k2 */
-+static inline int keygt(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1959", k1 != NULL);
-+ assert("nikita-1960", k2 != NULL);
-+ return keycmp(k1, k2) == GREATER_THAN;
-+}
-+
-+/* true if @k1 is greater than or equal to @k2 */
-+static inline int keyge(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1956", k1 != NULL);
-+ assert("nikita-1957", k2 != NULL); /* October 4: sputnik launched
-+ * November 3: Laika */
-+ return keycmp(k1, k2) != LESS_THAN;
-+}
-+
-+static inline void prefetchkey(reiser4_key * key)
-+{
-+ prefetch(key);
-+ prefetch(&key->el[KEY_CACHELINE_END]);
-+}
-+
-+/* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) =
-+ 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */
-+/* size of a buffer suitable to hold human readable key representation */
-+#define KEY_BUF_LEN (80)
-+
-+#if REISER4_DEBUG
-+extern void reiser4_print_key(const char *prefix, const reiser4_key * key);
-+#else
-+#define reiser4_print_key(p,k) noop
-+#endif
-+
-+/* __FS_REISERFS_KEY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/ktxnmgrd.c linux-2.6.20/fs/reiser4/ktxnmgrd.c
---- linux-2.6.20.orig/fs/reiser4/ktxnmgrd.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/ktxnmgrd.c 2007-05-06 14:50:43.738988223 +0400
-@@ -0,0 +1,215 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Transaction manager daemon. */
-+
-+/*
-+ * ktxnmgrd is a kernel daemon responsible for committing transactions. It is
-+ * needed/important for the following reasons:
-+ *
-+ * 1. in reiser4 atom is not committed immediately when last transaction
-+ * handle closes, unless atom is either too old or too large (see
-+ * atom_should_commit()). This is done to avoid committing too frequently.
-+ * because:
-+ *
-+ * 2. sometimes we don't want to commit atom when closing last transaction
-+ * handle even if it is old and fat enough. For example, because we are at
-+ * this point under directory semaphore, and committing would stall all
-+ * accesses to this directory.
-+ *
-+ * ktxnmgrd binds its time sleeping on condition variable. When is awakes
-+ * either due to (tunable) timeout or because it was explicitly woken up by
-+ * call to ktxnmgrd_kick(), it scans list of all atoms and commits ones
-+ * eligible.
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "tree.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/sched.h> /* for struct task_struct */
-+#include <linux/wait.h>
-+#include <linux/suspend.h>
-+#include <linux/kernel.h>
-+#include <linux/writeback.h>
-+#include <linux/kthread.h>
-+#include <linux/freezer.h>
-+
-+static int scan_mgr(struct super_block *);
-+
-+/*
-+ * change current->comm so that ps, top, and friends will see changed
-+ * state. This serves no useful purpose whatsoever, but also costs nothing. May
-+ * be it will make lonely system administrator feeling less alone at 3 A.M.
-+ */
-+#define set_comm( state ) \
-+ snprintf( current -> comm, sizeof( current -> comm ), \
-+ "%s:%s:%s", __FUNCTION__, (super)->s_id, ( state ) )
-+
-+/**
-+ * ktxnmgrd - kernel txnmgr daemon
-+ * @arg: pointer to super block
-+ *
-+ * The background transaction manager daemon, started as a kernel thread during
-+ * reiser4 initialization.
-+ */
-+static int ktxnmgrd(void *arg)
-+{
-+ struct super_block *super;
-+ ktxnmgrd_context *ctx;
-+ txn_mgr *mgr;
-+ int done = 0;
-+
-+ super = arg;
-+ mgr = &get_super_private(super)->tmgr;
-+
-+ /*
-+ * do_fork() just copies task_struct into the new thread. ->fs_context
-+ * shouldn't be copied of course. This shouldn't be a problem for the
-+ * rest of the code though.
-+ */
-+ current->journal_info = NULL;
-+ ctx = mgr->daemon;
-+ while (1) {
-+ try_to_freeze();
-+ set_comm("wait");
-+ {
-+ DEFINE_WAIT(__wait);
-+
-+ prepare_to_wait(&ctx->wait, &__wait, TASK_INTERRUPTIBLE);
-+ if (kthread_should_stop()) {
-+ done = 1;
-+ } else
-+ schedule_timeout(ctx->timeout);
-+ finish_wait(&ctx->wait, &__wait);
-+ }
-+ if (done)
-+ break;
-+ set_comm("run");
-+ spin_lock(&ctx->guard);
-+ /*
-+ * wait timed out or ktxnmgrd was woken up by explicit request
-+ * to commit something. Scan list of atoms in txnmgr and look
-+ * for too old atoms.
-+ */
-+ do {
-+ ctx->rescan = 0;
-+ scan_mgr(super);
-+ spin_lock(&ctx->guard);
-+ if (ctx->rescan) {
-+ /*
-+ * the list could be modified while ctx
-+ * spinlock was released, we have to repeat
-+ * scanning from the beginning
-+ */
-+ break;
-+ }
-+ } while (ctx->rescan);
-+ spin_unlock(&ctx->guard);
-+ }
-+ return 0;
-+}
-+
-+#undef set_comm
-+
-+/**
-+ * reiser4_init_ktxnmgrd - initialize ktxnmgrd context and start kernel daemon
-+ * @super: pointer to super block
-+ *
-+ * Allocates and initializes ktxnmgrd_context, attaches it to transaction
-+ * manager. Starts kernel txnmgr daemon. This is called on mount.
-+ */
-+int reiser4_init_ktxnmgrd(struct super_block *super)
-+{
-+ txn_mgr *mgr;
-+ ktxnmgrd_context *ctx;
-+
-+ mgr = &get_super_private(super)->tmgr;
-+
-+ assert("zam-1014", mgr->daemon == NULL);
-+
-+ ctx = kmalloc(sizeof(ktxnmgrd_context), reiser4_ctx_gfp_mask_get());
-+ if (ctx == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ assert("nikita-2442", ctx != NULL);
-+
-+ memset(ctx, 0, sizeof *ctx);
-+ init_waitqueue_head(&ctx->wait);
-+
-+ /*kcond_init(&ctx->startup);*/
-+ spin_lock_init(&ctx->guard);
-+ ctx->timeout = REISER4_TXNMGR_TIMEOUT;
-+ ctx->rescan = 1;
-+ mgr->daemon = ctx;
-+
-+ ctx->tsk = kthread_run(ktxnmgrd, super, "ktxnmgrd");
-+ if (IS_ERR(ctx->tsk)) {
-+ int ret = PTR_ERR(ctx->tsk);
-+ mgr->daemon = NULL;
-+ kfree(ctx);
-+ return RETERR(ret);
-+ }
-+ return 0;
-+}
-+
-+void ktxnmgrd_kick(txn_mgr *mgr)
-+{
-+ assert("nikita-3234", mgr != NULL);
-+ assert("nikita-3235", mgr->daemon != NULL);
-+ wake_up(&mgr->daemon->wait);
-+}
-+
-+int is_current_ktxnmgrd(void)
-+{
-+ return (get_current_super_private()->tmgr.daemon->tsk == current);
-+}
-+
-+/**
-+ * scan_mgr - commit atoms which are to be committed
-+ * @super: super block to commit atoms of
-+ *
-+ * Commits old atoms.
-+ */
-+static int scan_mgr(struct super_block *super)
-+{
-+ int ret;
-+ reiser4_context ctx;
-+
-+ init_stack_context(&ctx, super);
-+
-+ ret = commit_some_atoms(&get_super_private(super)->tmgr);
-+
-+ reiser4_exit_context(&ctx);
-+ return ret;
-+}
-+
-+/**
-+ * reiser4_done_ktxnmgrd - stop kernel thread and frees ktxnmgrd context
-+ * @mgr:
-+ *
-+ * This is called on umount. Stops ktxnmgrd and free t
-+ */
-+void reiser4_done_ktxnmgrd(struct super_block *super)
-+{
-+ txn_mgr *mgr;
-+
-+ mgr = &get_super_private(super)->tmgr;
-+ assert("zam-1012", mgr->daemon != NULL);
-+
-+ kthread_stop(mgr->daemon->tsk);
-+ kfree(mgr->daemon);
-+ mgr->daemon = NULL;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/ktxnmgrd.h linux-2.6.20/fs/reiser4/ktxnmgrd.h
---- linux-2.6.20.orig/fs/reiser4/ktxnmgrd.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/ktxnmgrd.h 2007-05-06 14:50:43.738988223 +0400
-@@ -0,0 +1,52 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Transaction manager daemon. See ktxnmgrd.c for comments. */
-+
-+#ifndef __KTXNMGRD_H__
-+#define __KTXNMGRD_H__
-+
-+#include "txnmgr.h"
-+
-+#include <linux/fs.h>
-+#include <linux/wait.h>
-+#include <linux/completion.h>
-+#include <linux/spinlock.h>
-+#include <asm/atomic.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+/* in this structure all data necessary to start up, shut down and communicate
-+ * with ktxnmgrd are kept. */
-+struct ktxnmgrd_context {
-+ /* wait queue head on which ktxnmgrd sleeps */
-+ wait_queue_head_t wait;
-+ /* spin lock protecting all fields of this structure */
-+ spinlock_t guard;
-+ /* timeout of sleeping on ->wait */
-+ signed long timeout;
-+ /* kernel thread running ktxnmgrd */
-+ struct task_struct *tsk;
-+ /* list of all file systems served by this ktxnmgrd */
-+ struct list_head queue;
-+ /* should ktxnmgrd repeat scanning of atoms? */
-+ unsigned int rescan:1;
-+};
-+
-+extern int reiser4_init_ktxnmgrd(struct super_block *);
-+extern void reiser4_done_ktxnmgrd(struct super_block *);
-+
-+extern void ktxnmgrd_kick(txn_mgr * mgr);
-+extern int is_current_ktxnmgrd(void);
-+
-+/* __KTXNMGRD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/lock.c linux-2.6.20/fs/reiser4/lock.c
---- linux-2.6.20.orig/fs/reiser4/lock.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/lock.c 2007-05-06 14:50:43.742989473 +0400
-@@ -0,0 +1,1232 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Traditional deadlock avoidance is achieved by acquiring all locks in a single
-+ order. V4 balances the tree from the bottom up, and searches the tree from
-+ the top down, and that is really the way we want it, so tradition won't work
-+ for us.
-+
-+ Instead we have two lock orderings, a high priority lock ordering, and a low
-+ priority lock ordering. Each node in the tree has a lock in its znode.
-+
-+ Suppose we have a set of processes which lock (R/W) tree nodes. Each process
-+ has a set (maybe empty) of already locked nodes ("process locked set"). Each
-+ process may have a pending lock request to a node locked by another process.
-+ Note: we lock and unlock, but do not transfer locks: it is possible
-+ transferring locks instead would save some bus locking....
-+
-+ Deadlock occurs when we have a loop constructed from process locked sets and
-+ lock request vectors.
-+
-+ NOTE: The reiser4 "tree" is a tree on disk, but its cached representation in
-+ memory is extended with "znodes" with which we connect nodes with their left
-+ and right neighbors using sibling pointers stored in the znodes. When we
-+ perform balancing operations we often go from left to right and from right to
-+ left.
-+
-+ +-P1-+ +-P3-+
-+ |+--+| V1 |+--+|
-+ ||N1|| -------> ||N3||
-+ |+--+| |+--+|
-+ +----+ +----+
-+ ^ |
-+ |V2 |V3
-+ | v
-+ +---------P2---------+
-+ |+--+ +--+|
-+ ||N2| -------- |N4||
-+ |+--+ +--+|
-+ +--------------------+
-+
-+ We solve this by ensuring that only low priority processes lock in top to
-+ bottom order and from right to left, and high priority processes lock from
-+ bottom to top and left to right.
-+
-+ ZAM-FIXME-HANS: order not just node locks in this way, order atom locks, and
-+ kill those damn busy loops.
-+ ANSWER(ZAM): atom locks (which are introduced by ASTAGE_CAPTURE_WAIT atom
-+ stage) cannot be ordered that way. There are no rules what nodes can belong
-+ to the atom and what nodes cannot. We cannot define what is right or left
-+ direction, what is top or bottom. We can take immediate parent or side
-+ neighbor of one node, but nobody guarantees that, say, left neighbor node is
-+ not a far right neighbor for other nodes from the same atom. It breaks
-+ deadlock avoidance rules and hi-low priority locking cannot be applied for
-+ atom locks.
-+
-+ How does it help to avoid deadlocks ?
-+
-+ Suppose we have a deadlock with n processes. Processes from one priority
-+ class never deadlock because they take locks in one consistent
-+ order.
-+
-+ So, any possible deadlock loop must have low priority as well as high
-+ priority processes. There are no other lock priority levels except low and
-+ high. We know that any deadlock loop contains at least one node locked by a
-+ low priority process and requested by a high priority process. If this
-+ situation is caught and resolved it is sufficient to avoid deadlocks.
-+
-+ V4 DEADLOCK PREVENTION ALGORITHM IMPLEMENTATION.
-+
-+ The deadlock prevention algorithm is based on comparing
-+ priorities of node owners (processes which keep znode locked) and
-+ requesters (processes which want to acquire a lock on znode). We
-+ implement a scheme where low-priority owners yield locks to
-+ high-priority requesters. We created a signal passing system that
-+ is used to ask low-priority processes to yield one or more locked
-+ znodes.
-+
-+ The condition when a znode needs to change its owners is described by the
-+ following formula:
-+
-+ #############################################
-+ # #
-+ # (number of high-priority requesters) > 0 #
-+ # AND #
-+ # (numbers of high-priority owners) == 0 #
-+ # #
-+ #############################################
-+
-+ Note that a low-priority process delays node releasing if another
-+ high-priority process owns this node. So, slightly more strictly speaking,
-+ to have a deadlock capable cycle you must have a loop in which a high
-+ priority process is waiting on a low priority process to yield a node, which
-+ is slightly different from saying a high priority process is waiting on a
-+ node owned by a low priority process.
-+
-+ It is enough to avoid deadlocks if we prevent any low-priority process from
-+ falling asleep if its locked set contains a node which satisfies the
-+ deadlock condition.
-+
-+ That condition is implicitly or explicitly checked in all places where new
-+ high-priority requests may be added or removed from node request queue or
-+ high-priority process takes or releases a lock on node. The main
-+ goal of these checks is to never lose the moment when node becomes "has
-+ wrong owners" and send "must-yield-this-lock" signals to its low-pri owners
-+ at that time.
-+
-+ The information about received signals is stored in the per-process
-+ structure (lock stack) and analyzed before a low-priority process goes to
-+ sleep but after a "fast" attempt to lock a node fails. Any signal wakes
-+ sleeping process up and forces him to re-check lock status and received
-+ signal info. If "must-yield-this-lock" signals were received the locking
-+ primitive (longterm_lock_znode()) fails with -E_DEADLOCK error code.
-+
-+ V4 LOCKING DRAWBACKS
-+
-+ If we have already balanced on one level, and we are propagating our changes
-+ upward to a higher level, it could be very messy to surrender all locks on
-+ the lower level because we put so much computational work into it, and
-+ reverting them to their state before they were locked might be very complex.
-+ We also don't want to acquire all locks before performing balancing because
-+ that would either be almost as much work as the balancing, or it would be
-+ too conservative and lock too much. We want balancing to be done only at
-+ high priority. Yet, we might want to go to the left one node and use some
-+ of its empty space... So we make one attempt at getting the node to the left
-+ using try_lock, and if it fails we do without it, because we didn't really
-+ need it, it was only a nice to have.
-+
-+ LOCK STRUCTURES DESCRIPTION
-+
-+ The following data structures are used in the reiser4 locking
-+ implementation:
-+
-+ All fields related to long-term locking are stored in znode->lock.
-+
-+ The lock stack is a per thread object. It owns all znodes locked by the
-+ thread. One znode may be locked by several threads in case of read lock or
-+ one znode may be write locked by one thread several times. The special link
-+ objects (lock handles) support n<->m relation between znodes and lock
-+ owners.
-+
-+ <Thread 1> <Thread 2>
-+
-+ +---------+ +---------+
-+ | LS1 | | LS2 |
-+ +---------+ +---------+
-+ ^ ^
-+ |---------------+ +----------+
-+ v v v v
-+ +---------+ +---------+ +---------+ +---------+
-+ | LH1 | | LH2 | | LH3 | | LH4 |
-+ +---------+ +---------+ +---------+ +---------+
-+ ^ ^ ^ ^
-+ | +------------+ |
-+ v v v
-+ +---------+ +---------+ +---------+
-+ | Z1 | | Z2 | | Z3 |
-+ +---------+ +---------+ +---------+
-+
-+ Thread 1 locked znodes Z1 and Z2, thread 2 locked znodes Z2 and Z3. The
-+ picture above shows that lock stack LS1 has a list of 2 lock handles LH1 and
-+ LH2, lock stack LS2 has a list with lock handles LH3 and LH4 on it. Znode
-+ Z1 is locked by only one thread, znode has only one lock handle LH1 on its
-+ list, similar situation is for Z3 which is locked by the thread 2 only. Z2
-+ is locked (for read) twice by different threads and two lock handles are on
-+ its list. Each lock handle represents a single relation of a locking of a
-+ znode by a thread. Locking of a znode is an establishing of a locking
-+ relation between the lock stack and the znode by adding of a new lock handle
-+ to a list of lock handles, the lock stack. The lock stack links all lock
-+ handles for all znodes locked by the lock stack. The znode list groups all
-+ lock handles for all locks stacks which locked the znode.
-+
-+ Yet another relation may exist between znode and lock owners. If lock
-+ procedure cannot immediately take lock on an object it adds the lock owner
-+ on special `requestors' list belongs to znode. That list represents a
-+ queue of pending lock requests. Because one lock owner may request only
-+ only one lock object at a time, it is a 1->n relation between lock objects
-+ and a lock owner implemented as it is described above. Full information
-+ (priority, pointers to lock and link objects) about each lock request is
-+ stored in lock owner structure in `request' field.
-+
-+ SHORT_TERM LOCKING
-+
-+ This is a list of primitive operations over lock stacks / lock handles /
-+ znodes and locking descriptions for them.
-+
-+ 1. locking / unlocking which is done by two list insertion/deletion, one
-+ to/from znode's list of lock handles, another one is to/from lock stack's
-+ list of lock handles. The first insertion is protected by
-+ znode->lock.guard spinlock. The list owned by the lock stack can be
-+ modified only by thread who owns the lock stack and nobody else can
-+ modify/read it. There is nothing to be protected by a spinlock or
-+ something else.
-+
-+ 2. adding/removing a lock request to/from znode requesters list. The rule is
-+ that znode->lock.guard spinlock should be taken for this.
-+
-+ 3. we can traverse list of lock handles and use references to lock stacks who
-+ locked given znode if znode->lock.guard spinlock is taken.
-+
-+ 4. If a lock stack is associated with a znode as a lock requestor or lock
-+ owner its existence is guaranteed by znode->lock.guard spinlock. Some its
-+ (lock stack's) fields should be protected from being accessed in parallel
-+ by two or more threads. Please look at lock_stack structure definition
-+ for the info how those fields are protected. */
-+
-+/* Znode lock and capturing intertwining. */
-+/* In current implementation we capture formatted nodes before locking
-+ them. Take a look on longterm lock znode, reiser4_try_capture() request
-+ precedes locking requests. The longterm_lock_znode function unconditionally
-+ captures znode before even checking of locking conditions.
-+
-+ Another variant is to capture znode after locking it. It was not tested, but
-+ at least one deadlock condition is supposed to be there. One thread has
-+ locked a znode (Node-1) and calls reiser4_try_capture() for it.
-+ reiser4_try_capture() sleeps because znode's atom has CAPTURE_WAIT state.
-+ Second thread is a flushing thread, its current atom is the atom Node-1
-+ belongs to. Second thread wants to lock Node-1 and sleeps because Node-1
-+ is locked by the first thread. The described situation is a deadlock. */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "znode.h"
-+#include "jnode.h"
-+#include "tree.h"
-+#include "plugin/node/node.h"
-+#include "super.h"
-+
-+#include <linux/spinlock.h>
-+
-+#if REISER4_DEBUG
-+static int request_is_deadlock_safe(znode *, znode_lock_mode,
-+ znode_lock_request);
-+#endif
-+
-+/* Returns a lock owner associated with current thread */
-+lock_stack *get_current_lock_stack(void)
-+{
-+ return &get_current_context()->stack;
-+}
-+
-+/* Wakes up all low priority owners informing them about possible deadlock */
-+static void wake_up_all_lopri_owners(znode * node)
-+{
-+ lock_handle *handle;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+ list_for_each_entry(handle, &node->lock.owners, owners_link) {
-+ assert("nikita-1832", handle->node == node);
-+ /* count this signal in owner->nr_signaled */
-+ if (!handle->signaled) {
-+ handle->signaled = 1;
-+ atomic_inc(&handle->owner->nr_signaled);
-+ /* Wake up a single process */
-+ reiser4_wake_up(handle->owner);
-+ }
-+ }
-+}
-+
-+/* Adds a lock to a lock owner, which means creating a link to the lock and
-+ putting the link into the two lists all links are on (the doubly linked list
-+ that forms the lock_stack, and the doubly linked list of links attached
-+ to a lock.
-+*/
-+static inline void
-+link_object(lock_handle * handle, lock_stack * owner, znode * node)
-+{
-+ assert("jmacd-810", handle->owner == NULL);
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ handle->owner = owner;
-+ handle->node = node;
-+
-+ assert("reiser4-4",
-+ ergo(list_empty_careful(&owner->locks), owner->nr_locks == 0));
-+
-+ /* add lock handle to the end of lock_stack's list of locks */
-+ list_add_tail(&handle->locks_link, &owner->locks);
-+ ON_DEBUG(owner->nr_locks++);
-+ reiser4_ctx_gfp_mask_set();
-+
-+ /* add lock handle to the head of znode's list of owners */
-+ list_add(&handle->owners_link, &node->lock.owners);
-+ handle->signaled = 0;
-+}
-+
-+/* Breaks a relation between a lock and its owner */
-+static inline void unlink_object(lock_handle * handle)
-+{
-+ assert("zam-354", handle->owner != NULL);
-+ assert("nikita-1608", handle->node != NULL);
-+ assert_spin_locked(&(handle->node->lock.guard));
-+ assert("nikita-1829", handle->owner == get_current_lock_stack());
-+ assert("reiser4-5", handle->owner->nr_locks > 0);
-+
-+ /* remove lock handle from lock_stack's list of locks */
-+ list_del(&handle->locks_link);
-+ ON_DEBUG(handle->owner->nr_locks--);
-+ reiser4_ctx_gfp_mask_set();
-+ assert("reiser4-6",
-+ ergo(list_empty_careful(&handle->owner->locks),
-+ handle->owner->nr_locks == 0));
-+ /* remove lock handle from znode's list of owners */
-+ list_del(&handle->owners_link);
-+ /* indicates that lock handle is free now */
-+ handle->node = NULL;
-+#if REISER4_DEBUG
-+ INIT_LIST_HEAD(&handle->locks_link);
-+ INIT_LIST_HEAD(&handle->owners_link);
-+ handle->owner = NULL;
-+#endif
-+}
-+
-+/* Actually locks an object knowing that we are able to do this */
-+static void lock_object(lock_stack * owner)
-+{
-+ lock_request *request;
-+ znode *node;
-+
-+ request = &owner->request;
-+ node = request->node;
-+ assert_spin_locked(&(node->lock.guard));
-+ if (request->mode == ZNODE_READ_LOCK) {
-+ node->lock.nr_readers++;
-+ } else {
-+ /* check that we don't switched from read to write lock */
-+ assert("nikita-1840", node->lock.nr_readers <= 0);
-+ /* We allow recursive locking; a node can be locked several
-+ times for write by same process */
-+ node->lock.nr_readers--;
-+ }
-+
-+ link_object(request->handle, owner, node);
-+
-+ if (owner->curpri) {
-+ node->lock.nr_hipri_owners++;
-+ }
-+}
-+
-+/* Check for recursive write locking */
-+static int recursive(lock_stack * owner)
-+{
-+ int ret;
-+ znode *node;
-+ lock_handle *lh;
-+
-+ node = owner->request.node;
-+
-+ /* Owners list is not empty for a locked node */
-+ assert("zam-314", !list_empty_careful(&node->lock.owners));
-+ assert("nikita-1841", owner == get_current_lock_stack());
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ lh = list_entry(node->lock.owners.next, lock_handle, owners_link);
-+ ret = (lh->owner == owner);
-+
-+ /* Recursive read locking should be done usual way */
-+ assert("zam-315", !ret || owner->request.mode == ZNODE_WRITE_LOCK);
-+ /* mixing of read/write locks is not allowed */
-+ assert("zam-341", !ret || znode_is_wlocked(node));
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+/* Returns true if the lock is held by the calling thread. */
-+int znode_is_any_locked(const znode * node)
-+{
-+ lock_handle *handle;
-+ lock_stack *stack;
-+ int ret;
-+
-+ if (!znode_is_locked(node)) {
-+ return 0;
-+ }
-+
-+ stack = get_current_lock_stack();
-+
-+ spin_lock_stack(stack);
-+
-+ ret = 0;
-+
-+ list_for_each_entry(handle, &stack->locks, locks_link) {
-+ if (handle->node == node) {
-+ ret = 1;
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_stack(stack);
-+
-+ return ret;
-+}
-+
-+#endif
-+
-+/* Returns true if a write lock is held by the calling thread. */
-+int znode_is_write_locked(const znode * node)
-+{
-+ lock_stack *stack;
-+ lock_handle *handle;
-+
-+ assert("jmacd-8765", node != NULL);
-+
-+ if (!znode_is_wlocked(node)) {
-+ return 0;
-+ }
-+
-+ stack = get_current_lock_stack();
-+
-+ /*
-+ * When znode is write locked, all owner handles point to the same lock
-+ * stack. Get pointer to lock stack from the first lock handle from
-+ * znode's owner list
-+ */
-+ handle = list_entry(node->lock.owners.next, lock_handle, owners_link);
-+
-+ return (handle->owner == stack);
-+}
-+
-+/* This "deadlock" condition is the essential part of reiser4 locking
-+ implementation. This condition is checked explicitly by calling
-+ check_deadlock_condition() or implicitly in all places where znode lock
-+ state (set of owners and request queue) is changed. Locking code is
-+ designed to use this condition to trigger procedure of passing object from
-+ low priority owner(s) to high priority one(s).
-+
-+ The procedure results in passing an event (setting lock_handle->signaled
-+ flag) and counting this event in nr_signaled field of owner's lock stack
-+ object and wakeup owner's process.
-+*/
-+static inline int check_deadlock_condition(znode * node)
-+{
-+ assert_spin_locked(&(node->lock.guard));
-+ return node->lock.nr_hipri_requests > 0
-+ && node->lock.nr_hipri_owners == 0;
-+}
-+
-+static int check_livelock_condition(znode * node, znode_lock_mode mode)
-+{
-+ zlock * lock = &node->lock;
-+
-+ return mode == ZNODE_READ_LOCK &&
-+ lock -> nr_readers >= 0 && lock->nr_hipri_write_requests > 0;
-+}
-+
-+/* checks lock/request compatibility */
-+static int can_lock_object(lock_stack * owner)
-+{
-+ znode *node = owner->request.node;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ /* See if the node is disconnected. */
-+ if (unlikely(ZF_ISSET(node, JNODE_IS_DYING)))
-+ return RETERR(-EINVAL);
-+
-+ /* Do not ever try to take a lock if we are going in low priority
-+ direction and a node have a high priority request without high
-+ priority owners. */
-+ if (unlikely(!owner->curpri && check_deadlock_condition(node)))
-+ return RETERR(-E_REPEAT);
-+ if (unlikely(owner->curpri && check_livelock_condition(node, owner->request.mode)))
-+ return RETERR(-E_REPEAT);
-+ if (unlikely(!is_lock_compatible(node, owner->request.mode)))
-+ return RETERR(-E_REPEAT);
-+ return 0;
-+}
-+
-+/* Setting of a high priority to the process. It clears "signaled" flags
-+ because znode locked by high-priority process can't satisfy our "deadlock
-+ condition". */
-+static void set_high_priority(lock_stack * owner)
-+{
-+ assert("nikita-1846", owner == get_current_lock_stack());
-+ /* Do nothing if current priority is already high */
-+ if (!owner->curpri) {
-+ /* We don't need locking for owner->locks list, because, this
-+ * function is only called with the lock stack of the current
-+ * thread, and no other thread can play with owner->locks list
-+ * and/or change ->node pointers of lock handles in this list.
-+ *
-+ * (Interrupts also are not involved.)
-+ */
-+ lock_handle *item = list_entry(owner->locks.next, lock_handle, locks_link);
-+ while (&owner->locks != &item->locks_link) {
-+ znode *node = item->node;
-+
-+ spin_lock_zlock(&node->lock);
-+
-+ node->lock.nr_hipri_owners++;
-+
-+ /* we can safely set signaled to zero, because
-+ previous statement (nr_hipri_owners ++) guarantees
-+ that signaled will be never set again. */
-+ item->signaled = 0;
-+ spin_unlock_zlock(&node->lock);
-+
-+ item = list_entry(item->locks_link.next, lock_handle, locks_link);
-+ }
-+ owner->curpri = 1;
-+ atomic_set(&owner->nr_signaled, 0);
-+ }
-+}
-+
-+/* Sets a low priority to the process. */
-+static void set_low_priority(lock_stack * owner)
-+{
-+ assert("nikita-3075", owner == get_current_lock_stack());
-+ /* Do nothing if current priority is already low */
-+ if (owner->curpri) {
-+ /* scan all locks (lock handles) held by @owner, which is
-+ actually current thread, and check whether we are reaching
-+ deadlock possibility anywhere.
-+ */
-+ lock_handle *handle = list_entry(owner->locks.next, lock_handle, locks_link);
-+ while (&owner->locks != &handle->locks_link) {
-+ znode *node = handle->node;
-+ spin_lock_zlock(&node->lock);
-+ /* this thread just was hipri owner of @node, so
-+ nr_hipri_owners has to be greater than zero. */
-+ assert("nikita-1835", node->lock.nr_hipri_owners > 0);
-+ node->lock.nr_hipri_owners--;
-+ /* If we have deadlock condition, adjust a nr_signaled
-+ field. It is enough to set "signaled" flag only for
-+ current process, other low-pri owners will be
-+ signaled and waken up after current process unlocks
-+ this object and any high-priority requestor takes
-+ control. */
-+ if (check_deadlock_condition(node)
-+ && !handle->signaled) {
-+ handle->signaled = 1;
-+ atomic_inc(&owner->nr_signaled);
-+ }
-+ spin_unlock_zlock(&node->lock);
-+ handle = list_entry(handle->locks_link.next, lock_handle, locks_link);
-+ }
-+ owner->curpri = 0;
-+ }
-+}
-+
-+static void remove_lock_request(lock_stack * requestor)
-+{
-+ zlock * lock = &requestor->request.node->lock;
-+
-+ if (requestor->curpri) {
-+ assert("nikita-1838", lock->nr_hipri_requests > 0);
-+ lock->nr_hipri_requests--;
-+ if (requestor->request.mode == ZNODE_WRITE_LOCK)
-+ lock->nr_hipri_write_requests --;
-+ }
-+ list_del(&requestor->requestors_link);
-+}
-+
-+static void invalidate_all_lock_requests(znode * node)
-+{
-+ lock_stack *requestor, *tmp;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) {
-+ remove_lock_request(requestor);
-+ requestor->request.ret_code = -EINVAL;
-+ reiser4_wake_up(requestor);
-+ requestor->request.mode = ZNODE_NO_LOCK;
-+ }
-+}
-+
-+static void dispatch_lock_requests(znode * node)
-+{
-+ lock_stack *requestor, *tmp;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) {
-+ if (znode_is_write_locked(node))
-+ break;
-+ if (!can_lock_object(requestor)) {
-+ lock_object(requestor);
-+ remove_lock_request(requestor);
-+ requestor->request.ret_code = 0;
-+ reiser4_wake_up(requestor);
-+ requestor->request.mode = ZNODE_NO_LOCK;
-+ }
-+ }
-+}
-+
-+/* release long-term lock, acquired by longterm_lock_znode() */
-+void longterm_unlock_znode(lock_handle * handle)
-+{
-+ znode *node = handle->node;
-+ lock_stack *oldowner = handle->owner;
-+ int hipri;
-+ int readers;
-+ int rdelta;
-+ int youdie;
-+
-+ /*
-+ * this is time-critical and highly optimized code. Modify carefully.
-+ */
-+
-+ assert("jmacd-1021", handle != NULL);
-+ assert("jmacd-1022", handle->owner != NULL);
-+ assert("nikita-1392", LOCK_CNT_GTZ(long_term_locked_znode));
-+
-+ assert("zam-130", oldowner == get_current_lock_stack());
-+
-+ LOCK_CNT_DEC(long_term_locked_znode);
-+
-+ /*
-+ * to minimize amount of operations performed under lock, pre-compute
-+ * all variables used within critical section. This makes code
-+ * obscure.
-+ */
-+
-+ /* was this lock of hi or lo priority */
-+ hipri = oldowner->curpri ? 1 : 0;
-+ /* number of readers */
-+ readers = node->lock.nr_readers;
-+ /* +1 if write lock, -1 if read lock */
-+ rdelta = (readers > 0) ? -1 : +1;
-+ /* true if node is to die and write lock is released */
-+ youdie = ZF_ISSET(node, JNODE_HEARD_BANSHEE) && (readers < 0);
-+
-+ spin_lock_zlock(&node->lock);
-+
-+ assert("zam-101", znode_is_locked(node));
-+
-+ /* Adjust a number of high priority owners of this lock */
-+ assert("nikita-1836", node->lock.nr_hipri_owners >= hipri);
-+ node->lock.nr_hipri_owners -= hipri;
-+
-+ /* Handle znode deallocation on last write-lock release. */
-+ if (znode_is_wlocked_once(node)) {
-+ if (youdie) {
-+ forget_znode(handle);
-+ assert("nikita-2191", znode_invariant(node));
-+ zput(node);
-+ return;
-+ }
-+ }
-+
-+ if (handle->signaled)
-+ atomic_dec(&oldowner->nr_signaled);
-+
-+ /* Unlocking means owner<->object link deletion */
-+ unlink_object(handle);
-+
-+ /* This is enough to be sure whether an object is completely
-+ unlocked. */
-+ node->lock.nr_readers += rdelta;
-+
-+ /* If the node is locked it must have an owners list. Likewise, if
-+ the node is unlocked it must have an empty owners list. */
-+ assert("zam-319", equi(znode_is_locked(node),
-+ !list_empty_careful(&node->lock.owners)));
-+
-+#if REISER4_DEBUG
-+ if (!znode_is_locked(node))
-+ ++node->times_locked;
-+#endif
-+
-+ /* If there are pending lock requests we wake up a requestor */
-+ if (!znode_is_wlocked(node))
-+ dispatch_lock_requests(node);
-+ if (check_deadlock_condition(node))
-+ wake_up_all_lopri_owners(node);
-+ spin_unlock_zlock(&node->lock);
-+
-+ /* minus one reference from handle->node */
-+ assert("nikita-2190", znode_invariant(node));
-+ ON_DEBUG(check_lock_data());
-+ ON_DEBUG(check_lock_node_data(node));
-+ zput(node);
-+}
-+
-+/* final portion of longterm-lock */
-+static int
-+lock_tail(lock_stack * owner, int ok, znode_lock_mode mode)
-+{
-+ znode *node = owner->request.node;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ /* If we broke with (ok == 0) it means we can_lock, now do it. */
-+ if (ok == 0) {
-+ lock_object(owner);
-+ owner->request.mode = 0;
-+ /* count a reference from lockhandle->node
-+
-+ znode was already referenced at the entry to this function,
-+ hence taking spin-lock here is not necessary (see comment
-+ in the zref()).
-+ */
-+ zref(node);
-+
-+ LOCK_CNT_INC(long_term_locked_znode);
-+ }
-+ spin_unlock_zlock(&node->lock);
-+ ON_DEBUG(check_lock_data());
-+ ON_DEBUG(check_lock_node_data(node));
-+ return ok;
-+}
-+
-+/*
-+ * version of longterm_znode_lock() optimized for the most common case: read
-+ * lock without any special flags. This is the kind of lock that any tree
-+ * traversal takes on the root node of the tree, which is very frequent.
-+ */
-+static int longterm_lock_tryfast(lock_stack * owner)
-+{
-+ int result;
-+ znode *node;
-+ zlock *lock;
-+
-+ node = owner->request.node;
-+ lock = &node->lock;
-+
-+ assert("nikita-3340", reiser4_schedulable());
-+ assert("nikita-3341", request_is_deadlock_safe(node,
-+ ZNODE_READ_LOCK,
-+ ZNODE_LOCK_LOPRI));
-+ spin_lock_zlock(lock);
-+ result = can_lock_object(owner);
-+ spin_unlock_zlock(lock);
-+
-+ if (likely(result != -EINVAL)) {
-+ spin_lock_znode(node);
-+ result = reiser4_try_capture(ZJNODE(node), ZNODE_READ_LOCK, 0);
-+ spin_unlock_znode(node);
-+ spin_lock_zlock(lock);
-+ if (unlikely(result != 0)) {
-+ owner->request.mode = 0;
-+ } else {
-+ result = can_lock_object(owner);
-+ if (unlikely(result == -E_REPEAT)) {
-+ /* fall back to longterm_lock_znode() */
-+ spin_unlock_zlock(lock);
-+ return 1;
-+ }
-+ }
-+ return lock_tail(owner, result, ZNODE_READ_LOCK);
-+ } else
-+ return 1;
-+}
-+
-+/* locks given lock object */
-+int longterm_lock_znode(
-+ /* local link object (allocated by lock owner thread, usually on its own
-+ * stack) */
-+ lock_handle * handle,
-+ /* znode we want to lock. */
-+ znode * node,
-+ /* {ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}; */
-+ znode_lock_mode mode,
-+ /* {0, -EINVAL, -E_DEADLOCK}, see return codes description. */
-+ znode_lock_request request) {
-+ int ret;
-+ int hipri = (request & ZNODE_LOCK_HIPRI) != 0;
-+ int non_blocking = 0;
-+ int has_atom;
-+ txn_capture cap_flags;
-+ zlock *lock;
-+ txn_handle *txnh;
-+ tree_level level;
-+
-+ /* Get current process context */
-+ lock_stack *owner = get_current_lock_stack();
-+
-+ /* Check that the lock handle is initialized and isn't already being
-+ * used. */
-+ assert("jmacd-808", handle->owner == NULL);
-+ assert("nikita-3026", reiser4_schedulable());
-+ assert("nikita-3219", request_is_deadlock_safe(node, mode, request));
-+ assert("zam-1056", atomic_read(&ZJNODE(node)->x_count) > 0);
-+ /* long term locks are not allowed in the VM contexts (->writepage(),
-+ * prune_{d,i}cache()).
-+ *
-+ * FIXME this doesn't work due to unused-dentry-with-unlinked-inode
-+ * bug caused by d_splice_alias() only working for directories.
-+ */
-+ assert("nikita-3547", 1 || ((current->flags & PF_MEMALLOC) == 0));
-+ assert ("zam-1055", mode != ZNODE_NO_LOCK);
-+
-+ cap_flags = 0;
-+ if (request & ZNODE_LOCK_NONBLOCK) {
-+ cap_flags |= TXN_CAPTURE_NONBLOCKING;
-+ non_blocking = 1;
-+ }
-+
-+ if (request & ZNODE_LOCK_DONT_FUSE)
-+ cap_flags |= TXN_CAPTURE_DONT_FUSE;
-+
-+ /* If we are changing our process priority we must adjust a number
-+ of high priority owners for each znode that we already lock */
-+ if (hipri) {
-+ set_high_priority(owner);
-+ } else {
-+ set_low_priority(owner);
-+ }
-+
-+ level = znode_get_level(node);
-+
-+ /* Fill request structure with our values. */
-+ owner->request.mode = mode;
-+ owner->request.handle = handle;
-+ owner->request.node = node;
-+
-+ txnh = get_current_context()->trans;
-+ lock = &node->lock;
-+
-+ if (mode == ZNODE_READ_LOCK && request == 0) {
-+ ret = longterm_lock_tryfast(owner);
-+ if (ret <= 0)
-+ return ret;
-+ }
-+
-+ has_atom = (txnh->atom != NULL);
-+
-+ /* Synchronize on node's zlock guard lock. */
-+ spin_lock_zlock(lock);
-+
-+ if (znode_is_locked(node) &&
-+ mode == ZNODE_WRITE_LOCK && recursive(owner))
-+ return lock_tail(owner, 0, mode);
-+
-+ for (;;) {
-+ /* Check the lock's availability: if it is unavaiable we get
-+ E_REPEAT, 0 indicates "can_lock", otherwise the node is
-+ invalid. */
-+ ret = can_lock_object(owner);
-+
-+ if (unlikely(ret == -EINVAL)) {
-+ /* @node is dying. Leave it alone. */
-+ break;
-+ }
-+
-+ if (unlikely(ret == -E_REPEAT && non_blocking)) {
-+ /* either locking of @node by the current thread will
-+ * lead to the deadlock, or lock modes are
-+ * incompatible. */
-+ break;
-+ }
-+
-+ assert("nikita-1844", (ret == 0)
-+ || ((ret == -E_REPEAT) && !non_blocking));
-+ /* If we can get the lock... Try to capture first before
-+ taking the lock. */
-+
-+ /* first handle commonest case where node and txnh are already
-+ * in the same atom. */
-+ /* safe to do without taking locks, because:
-+ *
-+ * 1. read of aligned word is atomic with respect to writes to
-+ * this word
-+ *
-+ * 2. false negatives are handled in reiser4_try_capture().
-+ *
-+ * 3. false positives are impossible.
-+ *
-+ * PROOF: left as an exercise to the curious reader.
-+ *
-+ * Just kidding. Here is one:
-+ *
-+ * At the time T0 txnh->atom is stored in txnh_atom.
-+ *
-+ * At the time T1 node->atom is stored in node_atom.
-+ *
-+ * At the time T2 we observe that
-+ *
-+ * txnh_atom != NULL && node_atom == txnh_atom.
-+ *
-+ * Imagine that at this moment we acquire node and txnh spin
-+ * lock in this order. Suppose that under spin lock we have
-+ *
-+ * node->atom != txnh->atom, (S1)
-+ *
-+ * at the time T3.
-+ *
-+ * txnh->atom != NULL still, because txnh is open by the
-+ * current thread.
-+ *
-+ * Suppose node->atom == NULL, that is, node was un-captured
-+ * between T1, and T3. But un-capturing of formatted node is
-+ * always preceded by the call to reiser4_invalidate_lock(),
-+ * which marks znode as JNODE_IS_DYING under zlock spin
-+ * lock. Contradiction, because can_lock_object() above checks
-+ * for JNODE_IS_DYING. Hence, node->atom != NULL at T3.
-+ *
-+ * Suppose that node->atom != node_atom, that is, atom, node
-+ * belongs to was fused into another atom: node_atom was fused
-+ * into node->atom. Atom of txnh was equal to node_atom at T2,
-+ * which means that under spin lock, txnh->atom == node->atom,
-+ * because txnh->atom can only follow fusion
-+ * chain. Contradicts S1.
-+ *
-+ * The same for hypothesis txnh->atom != txnh_atom. Hence,
-+ * node->atom == node_atom == txnh_atom == txnh->atom. Again
-+ * contradicts S1. Hence S1 is false. QED.
-+ *
-+ */
-+
-+ if (likely(has_atom && ZJNODE(node)->atom == txnh->atom)) {
-+ ;
-+ } else {
-+ /*
-+ * unlock zlock spin lock here. It is possible for
-+ * longterm_unlock_znode() to sneak in here, but there
-+ * is no harm: reiser4_invalidate_lock() will mark znode
-+ * as JNODE_IS_DYING and this will be noted by
-+ * can_lock_object() below.
-+ */
-+ spin_unlock_zlock(lock);
-+ spin_lock_znode(node);
-+ ret = reiser4_try_capture(ZJNODE(node), mode, cap_flags);
-+ spin_unlock_znode(node);
-+ spin_lock_zlock(lock);
-+ if (unlikely(ret != 0)) {
-+ /* In the failure case, the txnmgr releases
-+ the znode's lock (or in some cases, it was
-+ released a while ago). There's no need to
-+ reacquire it so we should return here,
-+ avoid releasing the lock. */
-+ owner->request.mode = 0;
-+ break;
-+ }
-+
-+ /* Check the lock's availability again -- this is
-+ because under some circumstances the capture code
-+ has to release and reacquire the znode spinlock. */
-+ ret = can_lock_object(owner);
-+ }
-+
-+ /* This time, a return of (ret == 0) means we can lock, so we
-+ should break out of the loop. */
-+ if (likely(ret != -E_REPEAT || non_blocking))
-+ break;
-+
-+ /* Lock is unavailable, we have to wait. */
-+ ret = reiser4_prepare_to_sleep(owner);
-+ if (unlikely(ret != 0))
-+ break;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+ if (hipri) {
-+ /* If we are going in high priority direction then
-+ increase high priority requests counter for the
-+ node */
-+ lock->nr_hipri_requests++;
-+ if (mode == ZNODE_WRITE_LOCK)
-+ lock->nr_hipri_write_requests ++;
-+ /* If there are no high priority owners for a node,
-+ then immediately wake up low priority owners, so
-+ they can detect possible deadlock */
-+ if (lock->nr_hipri_owners == 0)
-+ wake_up_all_lopri_owners(node);
-+ }
-+ list_add_tail(&owner->requestors_link, &lock->requestors);
-+
-+ /* Ok, here we have prepared a lock request, so unlock
-+ a znode ... */
-+ spin_unlock_zlock(lock);
-+ /* ... and sleep */
-+ reiser4_go_to_sleep(owner);
-+ if (owner->request.mode == ZNODE_NO_LOCK)
-+ goto request_is_done;
-+ spin_lock_zlock(lock);
-+ if (owner->request.mode == ZNODE_NO_LOCK) {
-+ spin_unlock_zlock(lock);
-+ request_is_done:
-+ if (owner->request.ret_code == 0) {
-+ LOCK_CNT_INC(long_term_locked_znode);
-+ zref(node);
-+ }
-+ return owner->request.ret_code;
-+ }
-+ remove_lock_request(owner);
-+ }
-+
-+ return lock_tail(owner, ret, mode);
-+}
-+
-+/* lock object invalidation means changing of lock object state to `INVALID'
-+ and waiting for all other processes to cancel theirs lock requests. */
-+void reiser4_invalidate_lock(lock_handle * handle /* path to lock
-+ * owner and lock
-+ * object is being
-+ * invalidated. */ )
-+{
-+ znode *node = handle->node;
-+ lock_stack *owner = handle->owner;
-+
-+ assert("zam-325", owner == get_current_lock_stack());
-+ assert("zam-103", znode_is_write_locked(node));
-+ assert("nikita-1393", !ZF_ISSET(node, JNODE_LEFT_CONNECTED));
-+ assert("nikita-1793", !ZF_ISSET(node, JNODE_RIGHT_CONNECTED));
-+ assert("nikita-1394", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert("nikita-3097", znode_is_wlocked_once(node));
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ if (handle->signaled)
-+ atomic_dec(&owner->nr_signaled);
-+
-+ ZF_SET(node, JNODE_IS_DYING);
-+ unlink_object(handle);
-+ node->lock.nr_readers = 0;
-+
-+ invalidate_all_lock_requests(node);
-+ spin_unlock_zlock(&node->lock);
-+}
-+
-+/* Initializes lock_stack. */
-+void init_lock_stack(lock_stack * owner /* pointer to
-+ * allocated
-+ * structure. */ )
-+{
-+ INIT_LIST_HEAD(&owner->locks);
-+ INIT_LIST_HEAD(&owner->requestors_link);
-+ spin_lock_init(&owner->sguard);
-+ owner->curpri = 1;
-+ init_waitqueue_head(&owner->wait);
-+}
-+
-+/* Initializes lock object. */
-+void reiser4_init_lock(zlock * lock /* pointer on allocated
-+ * uninitialized lock object
-+ * structure. */ )
-+{
-+ memset(lock, 0, sizeof(zlock));
-+ spin_lock_init(&lock->guard);
-+ INIT_LIST_HEAD(&lock->requestors);
-+ INIT_LIST_HEAD(&lock->owners);
-+}
-+
-+/* Transfer a lock handle (presumably so that variables can be moved between stack and
-+ heap locations). */
-+static void
-+move_lh_internal(lock_handle * new, lock_handle * old, int unlink_old)
-+{
-+ znode *node = old->node;
-+ lock_stack *owner = old->owner;
-+ int signaled;
-+
-+ /* locks_list, modified by link_object() is not protected by
-+ anything. This is valid because only current thread ever modifies
-+ locks_list of its lock_stack.
-+ */
-+ assert("nikita-1827", owner == get_current_lock_stack());
-+ assert("nikita-1831", new->owner == NULL);
-+
-+ spin_lock_zlock(&node->lock);
-+
-+ signaled = old->signaled;
-+ if (unlink_old) {
-+ unlink_object(old);
-+ } else {
-+ if (node->lock.nr_readers > 0) {
-+ node->lock.nr_readers += 1;
-+ } else {
-+ node->lock.nr_readers -= 1;
-+ }
-+ if (signaled) {
-+ atomic_inc(&owner->nr_signaled);
-+ }
-+ if (owner->curpri) {
-+ node->lock.nr_hipri_owners += 1;
-+ }
-+ LOCK_CNT_INC(long_term_locked_znode);
-+
-+ zref(node);
-+ }
-+ link_object(new, owner, node);
-+ new->signaled = signaled;
-+
-+ spin_unlock_zlock(&node->lock);
-+}
-+
-+void move_lh(lock_handle * new, lock_handle * old)
-+{
-+ move_lh_internal(new, old, /*unlink_old */ 1);
-+}
-+
-+void copy_lh(lock_handle * new, lock_handle * old)
-+{
-+ move_lh_internal(new, old, /*unlink_old */ 0);
-+}
-+
-+/* after getting -E_DEADLOCK we unlock znodes until this function returns false */
-+int reiser4_check_deadlock(void)
-+{
-+ lock_stack *owner = get_current_lock_stack();
-+ return atomic_read(&owner->nr_signaled) != 0;
-+}
-+
-+/* Before going to sleep we re-check "release lock" requests which might come from threads with hi-pri lock
-+ priorities. */
-+int reiser4_prepare_to_sleep(lock_stack * owner)
-+{
-+ assert("nikita-1847", owner == get_current_lock_stack());
-+
-+ /* We return -E_DEADLOCK if one or more "give me the lock" messages are
-+ * counted in nr_signaled */
-+ if (unlikely(atomic_read(&owner->nr_signaled) != 0)) {
-+ assert("zam-959", !owner->curpri);
-+ return RETERR(-E_DEADLOCK);
-+ }
-+ return 0;
-+}
-+
-+/* Wakes up a single thread */
-+void __reiser4_wake_up(lock_stack * owner)
-+{
-+ atomic_set(&owner->wakeup, 1);
-+ wake_up(&owner->wait);
-+}
-+
-+/* Puts a thread to sleep */
-+void reiser4_go_to_sleep(lock_stack * owner)
-+{
-+ /* Well, we might sleep here, so holding of any spinlocks is no-no */
-+ assert("nikita-3027", reiser4_schedulable());
-+
-+ wait_event(owner->wait, atomic_read(&owner->wakeup));
-+ atomic_set(&owner->wakeup, 0);
-+}
-+
-+int lock_stack_isclean(lock_stack * owner)
-+{
-+ if (list_empty_careful(&owner->locks)) {
-+ assert("zam-353", atomic_read(&owner->nr_signaled) == 0);
-+ return 1;
-+ }
-+
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/*
-+ * debugging functions
-+ */
-+
-+static void list_check(struct list_head *head)
-+{
-+ struct list_head *pos;
-+
-+ list_for_each(pos, head)
-+ assert("", (pos->prev != NULL && pos->next != NULL &&
-+ pos->prev->next == pos && pos->next->prev == pos));
-+}
-+
-+/* check consistency of locking data-structures hanging of the @stack */
-+static void check_lock_stack(lock_stack * stack)
-+{
-+ spin_lock_stack(stack);
-+ /* check that stack->locks is not corrupted */
-+ list_check(&stack->locks);
-+ spin_unlock_stack(stack);
-+}
-+
-+/* check consistency of locking data structures */
-+void check_lock_data(void)
-+{
-+ check_lock_stack(&get_current_context()->stack);
-+}
-+
-+/* check consistency of locking data structures for @node */
-+void check_lock_node_data(znode * node)
-+{
-+ spin_lock_zlock(&node->lock);
-+ list_check(&node->lock.owners);
-+ list_check(&node->lock.requestors);
-+ spin_unlock_zlock(&node->lock);
-+}
-+
-+/* check that given lock request is dead lock safe. This check is, of course,
-+ * not exhaustive. */
-+static int
-+request_is_deadlock_safe(znode * node, znode_lock_mode mode,
-+ znode_lock_request request)
-+{
-+ lock_stack *owner;
-+
-+ owner = get_current_lock_stack();
-+ /*
-+ * check that hipri lock request is not issued when there are locked
-+ * nodes at the higher levels.
-+ */
-+ if (request & ZNODE_LOCK_HIPRI && !(request & ZNODE_LOCK_NONBLOCK) &&
-+ znode_get_level(node) != 0) {
-+ lock_handle *item;
-+
-+ list_for_each_entry(item, &owner->locks, locks_link) {
-+ znode *other;
-+
-+ other = item->node;
-+
-+ if (znode_get_level(other) == 0)
-+ continue;
-+ if (znode_get_level(other) > znode_get_level(node))
-+ return 0;
-+ }
-+ }
-+ return 1;
-+}
-+
-+#endif
-+
-+/* return pointer to static storage with name of lock_mode. For
-+ debugging */
-+const char *lock_mode_name(znode_lock_mode lock /* lock mode to get name of */ )
-+{
-+ if (lock == ZNODE_READ_LOCK)
-+ return "read";
-+ else if (lock == ZNODE_WRITE_LOCK)
-+ return "write";
-+ else {
-+ static char buf[30];
-+
-+ sprintf(buf, "unknown: %i", lock);
-+ return buf;
-+ }
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 79
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/lock.h linux-2.6.20/fs/reiser4/lock.h
---- linux-2.6.20.orig/fs/reiser4/lock.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/lock.h 2007-05-06 14:50:43.742989473 +0400
-@@ -0,0 +1,249 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Long term locking data structures. See lock.c for details. */
-+
-+#ifndef __LOCK_H__
-+#define __LOCK_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/node/node.h"
-+#include "txnmgr.h"
-+#include "readahead.h"
-+
-+#include <linux/types.h>
-+#include <linux/spinlock.h>
-+#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
-+#include <asm/atomic.h>
-+#include <linux/wait.h>
-+
-+/* Per-znode lock object */
-+struct zlock {
-+ spinlock_t guard;
-+ /* The number of readers if positive; the number of recursively taken
-+ write locks if negative. Protected by zlock spin lock. */
-+ int nr_readers;
-+ /* A number of processes (lock_stacks) that have this object
-+ locked with high priority */
-+ unsigned nr_hipri_owners;
-+ /* A number of attempts to lock znode in high priority direction */
-+ unsigned nr_hipri_requests;
-+ /* A linked list of lock_handle objects that contains pointers
-+ for all lock_stacks which have this lock object locked */
-+ unsigned nr_hipri_write_requests;
-+ struct list_head owners;
-+ /* A linked list of lock_stacks that wait for this lock */
-+ struct list_head requestors;
-+};
-+
-+static inline void spin_lock_zlock(zlock *lock)
-+{
-+ /* check that zlock is not locked */
-+ assert("", LOCK_CNT_NIL(spin_locked_zlock));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", LOCK_CNT_NIL(spin_locked_stack));
-+
-+ spin_lock(&lock->guard);
-+
-+ LOCK_CNT_INC(spin_locked_zlock);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_unlock_zlock(zlock *lock)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_zlock));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_zlock);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&lock->guard);
-+}
-+
-+#define lock_is_locked(lock) ((lock)->nr_readers != 0)
-+#define lock_is_rlocked(lock) ((lock)->nr_readers > 0)
-+#define lock_is_wlocked(lock) ((lock)->nr_readers < 0)
-+#define lock_is_wlocked_once(lock) ((lock)->nr_readers == -1)
-+#define lock_can_be_rlocked(lock) ((lock)->nr_readers >=0)
-+#define lock_mode_compatible(lock, mode) \
-+ (((mode) == ZNODE_WRITE_LOCK && !lock_is_locked(lock)) || \
-+ ((mode) == ZNODE_READ_LOCK && lock_can_be_rlocked(lock)))
-+
-+/* Since we have R/W znode locks we need additional bidirectional `link'
-+ objects to implement n<->m relationship between lock owners and lock
-+ objects. We call them `lock handles'.
-+
-+ Locking: see lock.c/"SHORT-TERM LOCKING"
-+*/
-+struct lock_handle {
-+ /* This flag indicates that a signal to yield a lock was passed to
-+ lock owner and counted in owner->nr_signalled
-+
-+ Locking: this is accessed under spin lock on ->node.
-+ */
-+ int signaled;
-+ /* A link to owner of a lock */
-+ lock_stack *owner;
-+ /* A link to znode locked */
-+ znode *node;
-+ /* A list of all locks for a process */
-+ struct list_head locks_link;
-+ /* A list of all owners for a znode */
-+ struct list_head owners_link;
-+};
-+
-+typedef struct lock_request {
-+ /* A pointer to uninitialized link object */
-+ lock_handle *handle;
-+ /* A pointer to the object we want to lock */
-+ znode *node;
-+ /* Lock mode (ZNODE_READ_LOCK or ZNODE_WRITE_LOCK) */
-+ znode_lock_mode mode;
-+ /* how dispatch_lock_requests() returns lock request result code */
-+ int ret_code;
-+} lock_request;
-+
-+/* A lock stack structure for accumulating locks owned by a process */
-+struct lock_stack {
-+ /* A guard lock protecting a lock stack */
-+ spinlock_t sguard;
-+ /* number of znodes which were requested by high priority processes */
-+ atomic_t nr_signaled;
-+ /* Current priority of a process
-+
-+ This is only accessed by the current thread and thus requires no
-+ locking.
-+ */
-+ int curpri;
-+ /* A list of all locks owned by this process. Elements can be added to
-+ * this list only by the current thread. ->node pointers in this list
-+ * can be only changed by the current thread. */
-+ struct list_head locks;
-+ /* When lock_stack waits for the lock, it puts itself on double-linked
-+ requestors list of that lock */
-+ struct list_head requestors_link;
-+ /* Current lock request info.
-+
-+ This is only accessed by the current thread and thus requires no
-+ locking.
-+ */
-+ lock_request request;
-+ /* the following two fields are the lock stack's
-+ * synchronization object to use with the standard linux/wait.h
-+ * interface. See reiser4_go_to_sleep and __reiser4_wake_up for
-+ * usage details. */
-+ wait_queue_head_t wait;
-+ atomic_t wakeup;
-+#if REISER4_DEBUG
-+ int nr_locks; /* number of lock handles in the above list */
-+#endif
-+};
-+
-+/*
-+ User-visible znode locking functions
-+*/
-+
-+extern int longterm_lock_znode(lock_handle * handle,
-+ znode * node,
-+ znode_lock_mode mode,
-+ znode_lock_request request);
-+
-+extern void longterm_unlock_znode(lock_handle * handle);
-+
-+extern int reiser4_check_deadlock(void);
-+
-+extern lock_stack *get_current_lock_stack(void);
-+
-+extern void init_lock_stack(lock_stack * owner);
-+extern void reiser4_init_lock(zlock * lock);
-+
-+static inline void init_lh(lock_handle *lh)
-+{
-+#if REISER4_DEBUG
-+ memset(lh, 0, sizeof *lh);
-+ INIT_LIST_HEAD(&lh->locks_link);
-+ INIT_LIST_HEAD(&lh->owners_link);
-+#else
-+ lh->node = NULL;
-+#endif
-+}
-+
-+static inline void done_lh(lock_handle *lh)
-+{
-+ assert("zam-342", lh != NULL);
-+ if (lh->node != NULL)
-+ longterm_unlock_znode(lh);
-+}
-+
-+extern void move_lh(lock_handle * new, lock_handle * old);
-+extern void copy_lh(lock_handle * new, lock_handle * old);
-+
-+extern int reiser4_prepare_to_sleep(lock_stack * owner);
-+extern void reiser4_go_to_sleep(lock_stack * owner);
-+extern void __reiser4_wake_up(lock_stack * owner);
-+
-+extern int lock_stack_isclean(lock_stack * owner);
-+
-+/* zlock object state check macros: only used in assertions. Both forms imply that the
-+ lock is held by the current thread. */
-+extern int znode_is_write_locked(const znode *);
-+extern void reiser4_invalidate_lock(lock_handle *);
-+
-+/* lock ordering is: first take zlock spin lock, then lock stack spin lock */
-+#define spin_ordering_pred_stack(stack) \
-+ (LOCK_CNT_NIL(spin_locked_stack) && \
-+ LOCK_CNT_NIL(spin_locked_txnmgr) && \
-+ LOCK_CNT_NIL(spin_locked_inode) && \
-+ LOCK_CNT_NIL(rw_locked_cbk_cache) && \
-+ LOCK_CNT_NIL(spin_locked_super_eflush) )
-+
-+static inline void spin_lock_stack(lock_stack *stack)
-+{
-+ assert("", spin_ordering_pred_stack(stack));
-+ spin_lock(&(stack->sguard));
-+ LOCK_CNT_INC(spin_locked_stack);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_unlock_stack(lock_stack *stack)
-+{
-+ assert_spin_locked(&(stack->sguard));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_stack));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+ LOCK_CNT_DEC(spin_locked_stack);
-+ LOCK_CNT_DEC(spin_locked);
-+ spin_unlock(&(stack->sguard));
-+}
-+
-+static inline void reiser4_wake_up(lock_stack * owner)
-+{
-+ spin_lock_stack(owner);
-+ __reiser4_wake_up(owner);
-+ spin_unlock_stack(owner);
-+}
-+
-+const char *lock_mode_name(znode_lock_mode lock);
-+
-+#if REISER4_DEBUG
-+extern void check_lock_data(void);
-+extern void check_lock_node_data(znode * node);
-+#else
-+#define check_lock_data() noop
-+#define check_lock_node_data() noop
-+#endif
-+
-+/* __LOCK_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/Makefile linux-2.6.20/fs/reiser4/Makefile
---- linux-2.6.20.orig/fs/reiser4/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/Makefile 2007-05-06 14:50:43.742989473 +0400
-@@ -0,0 +1,99 @@
-+#
-+# reiser4/Makefile
-+#
-+
-+obj-$(CONFIG_REISER4_FS) += reiser4.o
-+
-+reiser4-y := \
-+ debug.o \
-+ jnode.o \
-+ znode.o \
-+ key.o \
-+ pool.o \
-+ tree_mod.o \
-+ estimate.o \
-+ carry.o \
-+ carry_ops.o \
-+ lock.o \
-+ tree.o \
-+ context.o \
-+ tap.o \
-+ coord.o \
-+ block_alloc.o \
-+ txnmgr.o \
-+ kassign.o \
-+ flush.o \
-+ wander.o \
-+ eottl.o \
-+ search.o \
-+ page_cache.o \
-+ seal.o \
-+ dscale.o \
-+ flush_queue.o \
-+ ktxnmgrd.o \
-+ blocknrset.o \
-+ super.o \
-+ super_ops.o \
-+ fsdata.o \
-+ export_ops.o \
-+ oid.o \
-+ tree_walk.o \
-+ inode.o \
-+ vfs_ops.o \
-+ as_ops.o \
-+ entd.o\
-+ readahead.o \
-+ status_flags.o \
-+ init_super.o \
-+ safe_link.o \
-+ \
-+ plugin/plugin.o \
-+ plugin/plugin_set.o \
-+ plugin/node/node.o \
-+ plugin/object.o \
-+ plugin/cluster.o \
-+ plugin/inode_ops.o \
-+ plugin/inode_ops_rename.o \
-+ plugin/file_ops.o \
-+ plugin/file_ops_readdir.o \
-+ plugin/file_plugin_common.o \
-+ plugin/file/file.o \
-+ plugin/file/tail_conversion.o \
-+ plugin/file/file_conversion.o \
-+ plugin/file/symlink.o \
-+ plugin/file/cryptcompress.o \
-+ plugin/dir_plugin_common.o \
-+ plugin/dir/hashed_dir.o \
-+ plugin/dir/seekable_dir.o \
-+ plugin/node/node40.o \
-+ \
-+ plugin/crypto/cipher.o \
-+ plugin/crypto/digest.o \
-+ \
-+ plugin/compress/minilzo.o \
-+ plugin/compress/compress.o \
-+ plugin/compress/compress_mode.o \
-+ \
-+ plugin/item/static_stat.o \
-+ plugin/item/sde.o \
-+ plugin/item/cde.o \
-+ plugin/item/blackbox.o \
-+ plugin/item/internal.o \
-+ plugin/item/tail.o \
-+ plugin/item/ctail.o \
-+ plugin/item/extent.o \
-+ plugin/item/extent_item_ops.o \
-+ plugin/item/extent_file_ops.o \
-+ plugin/item/extent_flush_ops.o \
-+ \
-+ plugin/hash.o \
-+ plugin/fibration.o \
-+ plugin/tail_policy.o \
-+ plugin/item/item.o \
-+ \
-+ plugin/security/perm.o \
-+ plugin/space/bitmap.o \
-+ \
-+ plugin/disk_format/disk_format40.o \
-+ plugin/disk_format/disk_format.o
-+
-diff -urN linux-2.6.20.orig/fs/reiser4/oid.c linux-2.6.20/fs/reiser4/oid.c
---- linux-2.6.20.orig/fs/reiser4/oid.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/oid.c 2007-05-06 14:50:43.742989473 +0400
-@@ -0,0 +1,141 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "super.h"
-+#include "txnmgr.h"
-+
-+/* we used to have oid allocation plugin. It was removed because it
-+ was recognized as providing unneeded level of abstraction. If one
-+ ever will find it useful - look at yet_unneeded_abstractions/oid
-+*/
-+
-+/*
-+ * initialize in-memory data for oid allocator at @super. @nr_files and @next
-+ * are provided by disk format plugin that reads them from the disk during
-+ * mount.
-+ */
-+int oid_init_allocator(struct super_block *super, oid_t nr_files, oid_t next)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(super);
-+
-+ sbinfo->next_to_use = next;
-+ sbinfo->oids_in_use = nr_files;
-+ return 0;
-+}
-+
-+/*
-+ * allocate oid and return it. ABSOLUTE_MAX_OID is returned when allocator
-+ * runs out of oids.
-+ */
-+oid_t oid_allocate(struct super_block * super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ oid_t oid;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ if (sbinfo->next_to_use != ABSOLUTE_MAX_OID) {
-+ oid = sbinfo->next_to_use++;
-+ sbinfo->oids_in_use++;
-+ } else
-+ oid = ABSOLUTE_MAX_OID;
-+ spin_unlock_reiser4_super(sbinfo);
-+ return oid;
-+}
-+
-+/*
-+ * Tell oid allocator that @oid is now free.
-+ */
-+int oid_release(struct super_block *super, oid_t oid UNUSED_ARG)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ sbinfo->oids_in_use--;
-+ spin_unlock_reiser4_super(sbinfo);
-+ return 0;
-+}
-+
-+/*
-+ * return next @oid that would be allocated (i.e., returned by oid_allocate())
-+ * without actually allocating it. This is used by disk format plugin to save
-+ * oid allocator state on the disk.
-+ */
-+oid_t oid_next(const struct super_block * super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ oid_t oid;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ oid = sbinfo->next_to_use;
-+ spin_unlock_reiser4_super(sbinfo);
-+ return oid;
-+}
-+
-+/*
-+ * returns number of currently used oids. This is used by statfs(2) to report
-+ * number of "inodes" and by disk format plugin to save oid allocator state on
-+ * the disk.
-+ */
-+long oids_used(const struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ oid_t used;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ used = sbinfo->oids_in_use;
-+ spin_unlock_reiser4_super(sbinfo);
-+ if (used < (__u64) ((long)~0) >> 1)
-+ return (long)used;
-+ else
-+ return (long)-1;
-+}
-+
-+/*
-+ * Count oid as allocated in atom. This is done after call to oid_allocate()
-+ * at the point when we are irrevocably committed to creation of the new file
-+ * (i.e., when oid allocation cannot be any longer rolled back due to some
-+ * error).
-+ */
-+void oid_count_allocated(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ atom->nr_objects_created++;
-+ spin_unlock_atom(atom);
-+}
-+
-+/*
-+ * Count oid as free in atom. This is done after call to oid_release() at the
-+ * point when we are irrevocably committed to the deletion of the file (i.e.,
-+ * when oid release cannot be any longer rolled back due to some error).
-+ */
-+void oid_count_released(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ atom->nr_objects_deleted++;
-+ spin_unlock_atom(atom);
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/page_cache.c linux-2.6.20/fs/reiser4/page_cache.c
---- linux-2.6.20.orig/fs/reiser4/page_cache.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/page_cache.c 2007-05-06 14:50:43.742989473 +0400
-@@ -0,0 +1,736 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Memory pressure hooks. Fake inodes handling. */
-+
-+/* GLOSSARY
-+
-+ . Formatted and unformatted nodes.
-+ Elements of reiser4 balanced tree to store data and metadata.
-+ Unformatted nodes are pointed to by extent pointers. Such nodes
-+ are used to store data of large objects. Unlike unformatted nodes,
-+ formatted ones have associated format described by node4X plugin.
-+
-+ . Jnode (or journal node)
-+ The in-memory header which is used to track formatted and unformatted
-+ nodes, bitmap nodes, etc. In particular, jnodes are used to track
-+ transactional information associated with each block(see reiser4/jnode.c
-+ for details).
-+
-+ . Znode
-+ The in-memory header which is used to track formatted nodes. Contains
-+ embedded jnode (see reiser4/znode.c for details).
-+*/
-+
-+/* We store all file system meta data (and data, of course) in the page cache.
-+
-+ What does this mean? In stead of using bread/brelse we create special
-+ "fake" inode (one per super block) and store content of formatted nodes
-+ into pages bound to this inode in the page cache. In newer kernels bread()
-+ already uses inode attached to block device (bd_inode). Advantage of having
-+ our own fake inode is that we can install appropriate methods in its
-+ address_space operations. Such methods are called by VM on memory pressure
-+ (or during background page flushing) and we can use them to react
-+ appropriately.
-+
-+ In initial version we only support one block per page. Support for multiple
-+ blocks per page is complicated by relocation.
-+
-+ To each page, used by reiser4, jnode is attached. jnode is analogous to
-+ buffer head. Difference is that jnode is bound to the page permanently:
-+ jnode cannot be removed from memory until its backing page is.
-+
-+ jnode contain pointer to page (->pg field) and page contain pointer to
-+ jnode in ->private field. Pointer from jnode to page is protected to by
-+ jnode's spinlock and pointer from page to jnode is protected by page lock
-+ (PG_locked bit). Lock ordering is: first take page lock, then jnode spin
-+ lock. To go into reverse direction use jnode_lock_page() function that uses
-+ standard try-lock-and-release device.
-+
-+ Properties:
-+
-+ 1. when jnode-to-page mapping is established (by jnode_attach_page()), page
-+ reference counter is increased.
-+
-+ 2. when jnode-to-page mapping is destroyed (by page_clear_jnode(), page
-+ reference counter is decreased.
-+
-+ 3. on jload() reference counter on jnode page is increased, page is
-+ kmapped and `referenced'.
-+
-+ 4. on jrelse() inverse operations are performed.
-+
-+ 5. kmapping/kunmapping of unformatted pages is done by read/write methods.
-+
-+ DEADLOCKS RELATED TO MEMORY PRESSURE. [OUTDATED. Only interesting
-+ historically.]
-+
-+ [In the following discussion, `lock' invariably means long term lock on
-+ znode.] (What about page locks?)
-+
-+ There is some special class of deadlock possibilities related to memory
-+ pressure. Locks acquired by other reiser4 threads are accounted for in
-+ deadlock prevention mechanism (lock.c), but when ->vm_writeback() is
-+ invoked additional hidden arc is added to the locking graph: thread that
-+ tries to allocate memory waits for ->vm_writeback() to finish. If this
-+ thread keeps lock and ->vm_writeback() tries to acquire this lock, deadlock
-+ prevention is useless.
-+
-+ Another related problem is possibility for ->vm_writeback() to run out of
-+ memory itself. This is not a problem for ext2 and friends, because their
-+ ->vm_writeback() don't allocate much memory, but reiser4 flush is
-+ definitely able to allocate huge amounts of memory.
-+
-+ It seems that there is no reliable way to cope with the problems above. In
-+ stead it was decided that ->vm_writeback() (as invoked in the kswapd
-+ context) wouldn't perform any flushing itself, but rather should just wake
-+ up some auxiliary thread dedicated for this purpose (or, the same thread
-+ that does periodic commit of old atoms (ktxnmgrd.c)).
-+
-+ Details:
-+
-+ 1. Page is called `reclaimable' against particular reiser4 mount F if this
-+ page can be ultimately released by try_to_free_pages() under presumptions
-+ that:
-+
-+ a. ->vm_writeback() for F is no-op, and
-+
-+ b. none of the threads accessing F are making any progress, and
-+
-+ c. other reiser4 mounts obey the same memory reservation protocol as F
-+ (described below).
-+
-+ For example, clean un-pinned page, or page occupied by ext2 data are
-+ reclaimable against any reiser4 mount.
-+
-+ When there is more than one reiser4 mount in a system, condition (c) makes
-+ reclaim-ability not easily verifiable beyond trivial cases mentioned above.
-+
-+ THIS COMMENT IS VALID FOR "MANY BLOCKS ON PAGE" CASE
-+
-+ Fake inode is used to bound formatted nodes and each node is indexed within
-+ fake inode by its block number. If block size of smaller than page size, it
-+ may so happen that block mapped to the page with formatted node is occupied
-+ by unformatted node or is unallocated. This lead to some complications,
-+ because flushing whole page can lead to an incorrect overwrite of
-+ unformatted node that is moreover, can be cached in some other place as
-+ part of the file body. To avoid this, buffers for unformatted nodes are
-+ never marked dirty. Also pages in the fake are never marked dirty. This
-+ rules out usage of ->writepage() as memory pressure hook. In stead
-+ ->releasepage() is used.
-+
-+ Josh is concerned that page->buffer is going to die. This should not pose
-+ significant problem though, because we need to add some data structures to
-+ the page anyway (jnode) and all necessary book keeping can be put there.
-+
-+*/
-+
-+/* Life cycle of pages/nodes.
-+
-+ jnode contains reference to page and page contains reference back to
-+ jnode. This reference is counted in page ->count. Thus, page bound to jnode
-+ cannot be released back into free pool.
-+
-+ 1. Formatted nodes.
-+
-+ 1. formatted node is represented by znode. When new znode is created its
-+ ->pg pointer is NULL initially.
-+
-+ 2. when node content is loaded into znode (by call to zload()) for the
-+ first time following happens (in call to ->read_node() or
-+ ->allocate_node()):
-+
-+ 1. new page is added to the page cache.
-+
-+ 2. this page is attached to znode and its ->count is increased.
-+
-+ 3. page is kmapped.
-+
-+ 3. if more calls to zload() follow (without corresponding zrelses), page
-+ counter is left intact and in its stead ->d_count is increased in znode.
-+
-+ 4. each call to zrelse decreases ->d_count. When ->d_count drops to zero
-+ ->release_node() is called and page is kunmapped as result.
-+
-+ 5. at some moment node can be captured by a transaction. Its ->x_count
-+ is then increased by transaction manager.
-+
-+ 6. if node is removed from the tree (empty node with JNODE_HEARD_BANSHEE
-+ bit set) following will happen (also see comment at the top of znode.c):
-+
-+ 1. when last lock is released, node will be uncaptured from
-+ transaction. This released reference that transaction manager acquired
-+ at the step 5.
-+
-+ 2. when last reference is released, zput() detects that node is
-+ actually deleted and calls ->delete_node()
-+ operation. page_cache_delete_node() implementation detaches jnode from
-+ page and releases page.
-+
-+ 7. otherwise (node wasn't removed from the tree), last reference to
-+ znode will be released after transaction manager committed transaction
-+ node was in. This implies squallocing of this node (see
-+ flush.c). Nothing special happens at this point. Znode is still in the
-+ hash table and page is still attached to it.
-+
-+ 8. znode is actually removed from the memory because of the memory
-+ pressure, or during umount (znodes_tree_done()). Anyway, znode is
-+ removed by the call to zdrop(). At this moment, page is detached from
-+ znode and removed from the inode address space.
-+
-+*/
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "super.h"
-+#include "entd.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/swap.h> /* for struct page */
-+#include <linux/pagemap.h>
-+#include <linux/bio.h>
-+#include <linux/writeback.h>
-+#include <linux/blkdev.h>
-+
-+static struct bio *page_bio(struct page *, jnode *, int rw, gfp_t gfp);
-+
-+static struct address_space_operations formatted_fake_as_ops;
-+
-+static const oid_t fake_ino = 0x1;
-+static const oid_t bitmap_ino = 0x2;
-+static const oid_t cc_ino = 0x3;
-+
-+static void
-+init_fake_inode(struct super_block *super, struct inode *fake,
-+ struct inode **pfake)
-+{
-+ assert("nikita-2168", fake->i_state & I_NEW);
-+ fake->i_mapping->a_ops = &formatted_fake_as_ops;
-+ *pfake = fake;
-+ /* NOTE-NIKITA something else? */
-+ unlock_new_inode(fake);
-+}
-+
-+/**
-+ * reiser4_init_formatted_fake - iget inodes for formatted nodes and bitmaps
-+ * @super: super block to init fake inode for
-+ *
-+ * Initializes fake inode to which formatted nodes are bound in the page cache
-+ * and inode for bitmaps.
-+ */
-+int reiser4_init_formatted_fake(struct super_block *super)
-+{
-+ struct inode *fake;
-+ struct inode *bitmap;
-+ struct inode *cc;
-+ reiser4_super_info_data *sinfo;
-+
-+ assert("nikita-1703", super != NULL);
-+
-+ sinfo = get_super_private_nocheck(super);
-+ fake = iget_locked(super, oid_to_ino(fake_ino));
-+
-+ if (fake != NULL) {
-+ init_fake_inode(super, fake, &sinfo->fake);
-+
-+ bitmap = iget_locked(super, oid_to_ino(bitmap_ino));
-+ if (bitmap != NULL) {
-+ init_fake_inode(super, bitmap, &sinfo->bitmap);
-+
-+ cc = iget_locked(super, oid_to_ino(cc_ino));
-+ if (cc != NULL) {
-+ init_fake_inode(super, cc, &sinfo->cc);
-+ return 0;
-+ } else {
-+ iput(sinfo->fake);
-+ iput(sinfo->bitmap);
-+ sinfo->fake = NULL;
-+ sinfo->bitmap = NULL;
-+ }
-+ } else {
-+ iput(sinfo->fake);
-+ sinfo->fake = NULL;
-+ }
-+ }
-+ return RETERR(-ENOMEM);
-+}
-+
-+/**
-+ * reiser4_done_formatted_fake - release inode used by formatted nodes and bitmaps
-+ * @super: super block to init fake inode for
-+ *
-+ * Releases inodes which were used as address spaces of bitmap and formatted
-+ * nodes.
-+ */
-+void reiser4_done_formatted_fake(struct super_block *super)
-+{
-+ reiser4_super_info_data *sinfo;
-+
-+ sinfo = get_super_private_nocheck(super);
-+
-+ if (sinfo->fake != NULL) {
-+ iput(sinfo->fake);
-+ sinfo->fake = NULL;
-+ }
-+
-+ if (sinfo->bitmap != NULL) {
-+ iput(sinfo->bitmap);
-+ sinfo->bitmap = NULL;
-+ }
-+
-+ if (sinfo->cc != NULL) {
-+ iput(sinfo->cc);
-+ sinfo->cc = NULL;
-+ }
-+ return;
-+}
-+
-+void reiser4_wait_page_writeback(struct page *page)
-+{
-+ assert("zam-783", PageLocked(page));
-+
-+ do {
-+ unlock_page(page);
-+ wait_on_page_writeback(page);
-+ lock_page(page);
-+ } while (PageWriteback(page));
-+}
-+
-+/* return tree @page is in */
-+reiser4_tree *reiser4_tree_by_page(const struct page *page /* page to query */ )
-+{
-+ assert("nikita-2461", page != NULL);
-+ return &get_super_private(page->mapping->host->i_sb)->tree;
-+}
-+
-+/* completion handler for single page bio-based read.
-+
-+ mpage_end_io_read() would also do. But it's static.
-+
-+*/
-+static int
-+end_bio_single_page_read(struct bio *bio, unsigned int bytes_done UNUSED_ARG,
-+ int err UNUSED_ARG)
-+{
-+ struct page *page;
-+
-+ if (bio->bi_size != 0) {
-+ warning("nikita-3332", "Truncated single page read: %i",
-+ bio->bi_size);
-+ return 1;
-+ }
-+
-+ page = bio->bi_io_vec[0].bv_page;
-+
-+ if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-+ SetPageUptodate(page);
-+ } else {
-+ ClearPageUptodate(page);
-+ SetPageError(page);
-+ }
-+ unlock_page(page);
-+ bio_put(bio);
-+ return 0;
-+}
-+
-+/* completion handler for single page bio-based write.
-+
-+ mpage_end_io_write() would also do. But it's static.
-+
-+*/
-+static int
-+end_bio_single_page_write(struct bio *bio, unsigned int bytes_done UNUSED_ARG,
-+ int err UNUSED_ARG)
-+{
-+ struct page *page;
-+
-+ if (bio->bi_size != 0) {
-+ warning("nikita-3333", "Truncated single page write: %i",
-+ bio->bi_size);
-+ return 1;
-+ }
-+
-+ page = bio->bi_io_vec[0].bv_page;
-+
-+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-+ SetPageError(page);
-+ end_page_writeback(page);
-+ bio_put(bio);
-+ return 0;
-+}
-+
-+/* ->readpage() method for formatted nodes */
-+static int formatted_readpage(struct file *f UNUSED_ARG,
-+ struct page *page /* page to read */ )
-+{
-+ assert("nikita-2412", PagePrivate(page) && jprivate(page));
-+ return reiser4_page_io(page, jprivate(page), READ,
-+ reiser4_ctx_gfp_mask_get());
-+}
-+
-+/**
-+ * reiser4_page_io - submit single-page bio request
-+ * @page: page to perform io for
-+ * @node: jnode of page
-+ * @rw: read or write
-+ * @gfp: gfp mask for bio allocation
-+ *
-+ * Submits single page read or write.
-+ */
-+int reiser4_page_io(struct page *page, jnode *node, int rw, gfp_t gfp)
-+{
-+ struct bio *bio;
-+ int result;
-+
-+ assert("nikita-2094", page != NULL);
-+ assert("nikita-2226", PageLocked(page));
-+ assert("nikita-2634", node != NULL);
-+ assert("nikita-2893", rw == READ || rw == WRITE);
-+
-+ if (rw) {
-+ if (unlikely(page->mapping->host->i_sb->s_flags & MS_RDONLY)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ }
-+
-+ bio = page_bio(page, node, rw, gfp);
-+ if (!IS_ERR(bio)) {
-+ if (rw == WRITE) {
-+ SetPageWriteback(page);
-+ unlock_page(page);
-+ }
-+ reiser4_submit_bio(rw, bio);
-+ result = 0;
-+ } else {
-+ unlock_page(page);
-+ result = PTR_ERR(bio);
-+ }
-+
-+ return result;
-+}
-+
-+/* helper function to construct bio for page */
-+static struct bio *page_bio(struct page *page, jnode * node, int rw, gfp_t gfp)
-+{
-+ struct bio *bio;
-+ assert("nikita-2092", page != NULL);
-+ assert("nikita-2633", node != NULL);
-+
-+ /* Simple implementation in the assumption that blocksize == pagesize.
-+
-+ We only have to submit one block, but submit_bh() will allocate bio
-+ anyway, so lets use all the bells-and-whistles of bio code.
-+ */
-+
-+ bio = bio_alloc(gfp, 1);
-+ if (bio != NULL) {
-+ int blksz;
-+ struct super_block *super;
-+ reiser4_block_nr blocknr;
-+
-+ super = page->mapping->host->i_sb;
-+ assert("nikita-2029", super != NULL);
-+ blksz = super->s_blocksize;
-+ assert("nikita-2028", blksz == (int)PAGE_CACHE_SIZE);
-+
-+ spin_lock_jnode(node);
-+ blocknr = *jnode_get_io_block(node);
-+ spin_unlock_jnode(node);
-+
-+ assert("nikita-2275", blocknr != (reiser4_block_nr) 0);
-+ assert("nikita-2276", !reiser4_blocknr_is_fake(&blocknr));
-+
-+ bio->bi_bdev = super->s_bdev;
-+ /* fill bio->bi_sector before calling bio_add_page(), because
-+ * q->merge_bvec_fn may want to inspect it (see
-+ * drivers/md/linear.c:linear_mergeable_bvec() for example. */
-+ bio->bi_sector = blocknr * (blksz >> 9);
-+
-+ if (!bio_add_page(bio, page, blksz, 0)) {
-+ warning("nikita-3452",
-+ "Single page bio cannot be constructed");
-+ return ERR_PTR(RETERR(-EINVAL));
-+ }
-+
-+ /* bio -> bi_idx is filled by bio_init() */
-+ bio->bi_end_io = (rw == READ) ?
-+ end_bio_single_page_read : end_bio_single_page_write;
-+
-+ return bio;
-+ } else
-+ return ERR_PTR(RETERR(-ENOMEM));
-+}
-+
-+/* this function is internally called by jnode_make_dirty() */
-+int reiser4_set_page_dirty_internal(struct page *page)
-+{
-+ struct address_space *mapping;
-+
-+ mapping = page->mapping;
-+ BUG_ON(mapping == NULL);
-+
-+ if (!TestSetPageDirty(page)) {
-+ if (mapping_cap_account_dirty(mapping))
-+ inc_zone_page_state(page, NR_FILE_DIRTY);
-+
-+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-+ }
-+
-+ /* znode must be dirty ? */
-+ if (mapping->host == reiser4_get_super_fake(mapping->host->i_sb))
-+ assert("", JF_ISSET(jprivate(page), JNODE_DIRTY));
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/**
-+ * can_hit_entd
-+ *
-+ * This is used on
-+ */
-+static int can_hit_entd(reiser4_context *ctx, struct super_block *s)
-+{
-+ if (ctx == NULL || ((unsigned long)ctx->magic) != context_magic)
-+ return 1;
-+ if (ctx->super != s)
-+ return 1;
-+ if (get_super_private(s)->entd.tsk == current)
-+ return 0;
-+ if (!lock_stack_isclean(&ctx->stack))
-+ return 0;
-+ if (ctx->trans->atom != NULL)
-+ return 0;
-+ return 1;
-+}
-+
-+#endif
-+
-+/**
-+ * reiser4_writepage - writepage of struct address_space_operations
-+ * @page: page to write
-+ * @wbc:
-+ *
-+ *
-+ */
-+/* Common memory pressure notification. */
-+int reiser4_writepage(struct page *page,
-+ struct writeback_control *wbc)
-+{
-+ struct super_block *s;
-+ reiser4_context *ctx;
-+
-+ assert("vs-828", PageLocked(page));
-+
-+ s = page->mapping->host->i_sb;
-+ ctx = get_current_context_check();
-+
-+ assert("", can_hit_entd(ctx, s));
-+
-+ return write_page_by_ent(page, wbc);
-+}
-+
-+/* ->set_page_dirty() method of formatted address_space */
-+static int formatted_set_page_dirty(struct page *page)
-+{
-+ assert("nikita-2173", page != NULL);
-+ BUG();
-+ return __set_page_dirty_nobuffers(page);
-+}
-+
-+/* writepages method of address space operations in reiser4 is used to involve
-+ into transactions pages which are dirtied via mmap. Only regular files can
-+ have such pages. Fake inode is used to access formatted nodes via page
-+ cache. As formatted nodes can never be mmaped, fake inode's writepages has
-+ nothing to do */
-+static int
-+writepages_fake(struct address_space *mapping, struct writeback_control *wbc)
-+{
-+ return 0;
-+}
-+
-+/* address space operations for the fake inode */
-+static struct address_space_operations formatted_fake_as_ops = {
-+ /* Perform a writeback of a single page as a memory-freeing
-+ * operation. */
-+ .writepage = reiser4_writepage,
-+ /* this is called to read formatted node */
-+ .readpage = formatted_readpage,
-+ /* ->sync_page() method of fake inode address space operations. Called
-+ from wait_on_page() and lock_page().
-+
-+ This is most annoyingly misnomered method. Actually it is called
-+ from wait_on_page_bit() and lock_page() and its purpose is to
-+ actually start io by jabbing device drivers.
-+ */
-+ .sync_page = block_sync_page,
-+ /* Write back some dirty pages from this mapping. Called from sync.
-+ called during sync (pdflush) */
-+ .writepages = writepages_fake,
-+ /* Set a page dirty */
-+ .set_page_dirty = formatted_set_page_dirty,
-+ /* used for read-ahead. Not applicable */
-+ .readpages = NULL,
-+ .prepare_write = NULL,
-+ .commit_write = NULL,
-+ .bmap = NULL,
-+ /* called just before page is being detached from inode mapping and
-+ removed from memory. Called on truncate, cut/squeeze, and
-+ umount. */
-+ .invalidatepage = reiser4_invalidatepage,
-+ /* this is called by shrink_cache() so that file system can try to
-+ release objects (jnodes, buffers, journal heads) attached to page
-+ and, may be made page itself free-able.
-+ */
-+ .releasepage = reiser4_releasepage,
-+ .direct_IO = NULL
-+};
-+
-+/* called just before page is released (no longer used by reiser4). Callers:
-+ jdelete() and extent2tail(). */
-+void reiser4_drop_page(struct page *page)
-+{
-+ assert("nikita-2181", PageLocked(page));
-+ clear_page_dirty_for_io(page);
-+ ClearPageUptodate(page);
-+#if defined(PG_skipped)
-+ ClearPageSkipped(page);
-+#endif
-+ unlock_page(page);
-+}
-+
-+#define JNODE_GANG_SIZE (16)
-+
-+/* find all jnodes from range specified and invalidate them */
-+static int
-+truncate_jnodes_range(struct inode *inode, pgoff_t from, pgoff_t count)
-+{
-+ reiser4_inode *info;
-+ int truncated_jnodes;
-+ reiser4_tree *tree;
-+ unsigned long index;
-+ unsigned long end;
-+
-+ if (inode_file_plugin(inode) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
-+ /* No need to get rid of jnodes here: if the single jnode of
-+ page cluster did not have page, then it was found and killed
-+ before in
-+ truncate_page_cluster_cryptcompress()->jput()->jput_final(),
-+ otherwise it will be dropped by reiser4_invalidatepage() */
-+ return 0;
-+ truncated_jnodes = 0;
-+
-+ info = reiser4_inode_data(inode);
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ index = from;
-+ end = from + count;
-+
-+ while (1) {
-+ jnode *gang[JNODE_GANG_SIZE];
-+ int taken;
-+ int i;
-+ jnode *node;
-+
-+ assert("nikita-3466", index <= end);
-+
-+ read_lock_tree(tree);
-+ taken =
-+ radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info),
-+ (void **)gang, index,
-+ JNODE_GANG_SIZE);
-+ for (i = 0; i < taken; ++i) {
-+ node = gang[i];
-+ if (index_jnode(node) < end)
-+ jref(node);
-+ else
-+ gang[i] = NULL;
-+ }
-+ read_unlock_tree(tree);
-+
-+ for (i = 0; i < taken; ++i) {
-+ node = gang[i];
-+ if (node != NULL) {
-+ index = max(index, index_jnode(node));
-+ spin_lock_jnode(node);
-+ assert("edward-1457", node->pg == NULL);
-+ /* this is always called after
-+ truncate_inode_pages_range(). Therefore, here
-+ jnode can not have page. New pages can not be
-+ created because truncate_jnodes_range goes
-+ under exclusive access on file obtained,
-+ where as new page creation requires
-+ non-exclusive access obtained */
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ reiser4_uncapture_jnode(node);
-+ unhash_unformatted_jnode(node);
-+ truncated_jnodes++;
-+ jput(node);
-+ } else
-+ break;
-+ }
-+ if (i != taken || taken == 0)
-+ break;
-+ }
-+ return truncated_jnodes;
-+}
-+
-+/* Truncating files in reiser4: problems and solutions.
-+
-+ VFS calls fs's truncate after it has called truncate_inode_pages()
-+ to get rid of pages corresponding to part of file being truncated.
-+ In reiser4 it may cause existence of unallocated extents which do
-+ not have jnodes. Flush code does not expect that. Solution of this
-+ problem is straightforward. As vfs's truncate is implemented using
-+ setattr operation, it seems reasonable to have ->setattr() that
-+ will cut file body. However, flush code also does not expect dirty
-+ pages without parent items, so it is impossible to cut all items,
-+ then truncate all pages in two steps. We resolve this problem by
-+ cutting items one-by-one. Each such fine-grained step performed
-+ under longterm znode lock calls at the end ->kill_hook() method of
-+ a killed item to remove its binded pages and jnodes.
-+
-+ The following function is a common part of mentioned kill hooks.
-+ Also, this is called before tail-to-extent conversion (to not manage
-+ few copies of the data).
-+*/
-+void reiser4_invalidate_pages(struct address_space *mapping, pgoff_t from,
-+ unsigned long count, int even_cows)
-+{
-+ loff_t from_bytes, count_bytes;
-+
-+ if (count == 0)
-+ return;
-+ from_bytes = ((loff_t) from) << PAGE_CACHE_SHIFT;
-+ count_bytes = ((loff_t) count) << PAGE_CACHE_SHIFT;
-+
-+ unmap_mapping_range(mapping, from_bytes, count_bytes, even_cows);
-+ truncate_inode_pages_range(mapping, from_bytes,
-+ from_bytes + count_bytes - 1);
-+ truncate_jnodes_range(mapping->host, from, count);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/page_cache.h linux-2.6.20/fs/reiser4/page_cache.h
---- linux-2.6.20.orig/fs/reiser4/page_cache.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/page_cache.h 2007-05-06 14:50:43.746990723 +0400
-@@ -0,0 +1,68 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* Memory pressure hooks. Fake inodes handling. See page_cache.c. */
-+
-+#if !defined( __REISER4_PAGE_CACHE_H__ )
-+#define __REISER4_PAGE_CACHE_H__
-+
-+#include "forward.h"
-+#include "context.h" /* for reiser4_ctx_gfp_mask_get() */
-+
-+#include <linux/fs.h> /* for struct super_block, address_space */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/pagemap.h> /* for lock_page() */
-+#include <linux/vmalloc.h> /* for __vmalloc() */
-+
-+extern int reiser4_init_formatted_fake(struct super_block *);
-+extern void reiser4_done_formatted_fake(struct super_block *);
-+
-+extern reiser4_tree *reiser4_tree_by_page(const struct page *);
-+
-+extern int reiser4_set_page_dirty_internal(struct page *);
-+
-+#define reiser4_submit_bio(rw, bio) submit_bio((rw), (bio))
-+
-+extern void reiser4_wait_page_writeback(struct page *);
-+static inline void lock_and_wait_page_writeback(struct page *page)
-+{
-+ lock_page(page);
-+ if (unlikely(PageWriteback(page)))
-+ reiser4_wait_page_writeback(page);
-+}
-+
-+#define jprivate(page) ((jnode *)page_private(page))
-+
-+extern int reiser4_page_io(struct page *, jnode *, int rw, gfp_t);
-+extern void reiser4_drop_page(struct page *);
-+extern void reiser4_invalidate_pages(struct address_space *, pgoff_t from,
-+ unsigned long count, int even_cows);
-+extern void capture_reiser4_inodes(struct super_block *,
-+ struct writeback_control *);
-+static inline void * reiser4_vmalloc (unsigned long size)
-+{
-+ return __vmalloc(size,
-+ reiser4_ctx_gfp_mask_get() | __GFP_HIGHMEM,
-+ PAGE_KERNEL);
-+}
-+
-+#define PAGECACHE_TAG_REISER4_MOVED PAGECACHE_TAG_DIRTY
-+
-+#if REISER4_DEBUG
-+extern void print_page(const char *prefix, struct page *page);
-+#else
-+#define print_page(prf, p) noop
-+#endif
-+
-+/* __REISER4_PAGE_CACHE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/cluster.c linux-2.6.20/fs/reiser4/plugin/cluster.c
---- linux-2.6.20.orig/fs/reiser4/plugin/cluster.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/cluster.c 2007-05-06 14:50:43.746990723 +0400
-@@ -0,0 +1,71 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Contains reiser4 cluster plugins (see
-+ http://www.namesys.com/cryptcompress_design.html
-+ "Concepts of clustering" for details). */
-+
-+#include "plugin_header.h"
-+#include "plugin.h"
-+#include "../inode.h"
-+
-+static int change_cluster(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ assert("edward-1324", inode != NULL);
-+ assert("edward-1325", plugin != NULL);
-+ assert("edward-1326", is_reiser4_inode(inode));
-+ assert("edward-1327", plugin->h.type_id == REISER4_CLUSTER_PLUGIN_TYPE);
-+
-+ /* Can't change the cluster plugin for already existent regular files. */
-+ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
-+ return RETERR(-EINVAL);
-+
-+ /* If matches, nothing to change. */
-+ if (inode_hash_plugin(inode) != NULL &&
-+ inode_hash_plugin(inode)->h.id == plugin->h.id)
-+ return 0;
-+
-+ return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_CLUSTER, plugin);
-+}
-+
-+static reiser4_plugin_ops cluster_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = &change_cluster
-+};
-+
-+#define SUPPORT_CLUSTER(SHIFT, ID, LABEL, DESC) \
-+ [CLUSTER_ ## ID ## _ID] = { \
-+ .h = { \
-+ .type_id = REISER4_CLUSTER_PLUGIN_TYPE, \
-+ .id = CLUSTER_ ## ID ## _ID, \
-+ .pops = &cluster_plugin_ops, \
-+ .label = LABEL, \
-+ .desc = DESC, \
-+ .linkage = {NULL, NULL} \
-+ }, \
-+ .shift = SHIFT \
-+ }
-+
-+cluster_plugin cluster_plugins[LAST_CLUSTER_ID] = {
-+ SUPPORT_CLUSTER(16, 64K, "64K", "Large"),
-+ SUPPORT_CLUSTER(15, 32K, "32K", "Big"),
-+ SUPPORT_CLUSTER(14, 16K, "16K", "Average"),
-+ SUPPORT_CLUSTER(13, 8K, "8K", "Small"),
-+ SUPPORT_CLUSTER(12, 4K, "4K", "Minimal")
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/cluster.h linux-2.6.20/fs/reiser4/plugin/cluster.h
---- linux-2.6.20.orig/fs/reiser4/plugin/cluster.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/cluster.h 2007-05-06 14:50:43.746990723 +0400
-@@ -0,0 +1,343 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* This file contains page/cluster index translators and offset modulators
-+ See http://www.namesys.com/cryptcompress_design.html for details */
-+
-+#if !defined( __FS_REISER4_CLUSTER_H__ )
-+#define __FS_REISER4_CLUSTER_H__
-+
-+#include "../inode.h"
-+
-+static inline int inode_cluster_shift(struct inode *inode)
-+{
-+ assert("edward-92", inode != NULL);
-+ assert("edward-93", reiser4_inode_data(inode) != NULL);
-+
-+ return inode_cluster_plugin(inode)->shift;
-+}
-+
-+static inline unsigned cluster_nrpages_shift(struct inode *inode)
-+{
-+ return inode_cluster_shift(inode) - PAGE_CACHE_SHIFT;
-+}
-+
-+/* cluster size in page units */
-+static inline unsigned cluster_nrpages(struct inode *inode)
-+{
-+ return 1U << cluster_nrpages_shift(inode);
-+}
-+
-+static inline size_t inode_cluster_size(struct inode *inode)
-+{
-+ assert("edward-96", inode != NULL);
-+
-+ return 1U << inode_cluster_shift(inode);
-+}
-+
-+static inline cloff_t pg_to_clust(pgoff_t idx, struct inode *inode)
-+{
-+ return idx >> cluster_nrpages_shift(inode);
-+}
-+
-+static inline pgoff_t clust_to_pg(cloff_t idx, struct inode *inode)
-+{
-+ return idx << cluster_nrpages_shift(inode);
-+}
-+
-+static inline pgoff_t pg_to_clust_to_pg(pgoff_t idx, struct inode *inode)
-+{
-+ return clust_to_pg(pg_to_clust(idx, inode), inode);
-+}
-+
-+static inline pgoff_t off_to_pg(loff_t off)
-+{
-+ return (off >> PAGE_CACHE_SHIFT);
-+}
-+
-+static inline loff_t pg_to_off(pgoff_t idx)
-+{
-+ return ((loff_t) (idx) << PAGE_CACHE_SHIFT);
-+}
-+
-+static inline cloff_t off_to_clust(loff_t off, struct inode *inode)
-+{
-+ return off >> inode_cluster_shift(inode);
-+}
-+
-+static inline loff_t clust_to_off(cloff_t idx, struct inode *inode)
-+{
-+ return (loff_t) idx << inode_cluster_shift(inode);
-+}
-+
-+static inline unsigned long count_to_nr(loff_t count, unsigned shift)
-+{
-+ return (count + (1UL << shift) - 1) >> shift;
-+}
-+
-+/* number of pages occupied by @count bytes */
-+static inline pgoff_t count_to_nrpages(loff_t count)
-+{
-+ return count_to_nr(count, PAGE_CACHE_SHIFT);
-+}
-+
-+/* number of clusters occupied by @count bytes */
-+static inline cloff_t count_to_nrclust(loff_t count, struct inode *inode)
-+{
-+ return count_to_nr(count, inode_cluster_shift(inode));
-+}
-+
-+/* number of clusters occupied by @count pages */
-+static inline cloff_t pgcount_to_nrclust(pgoff_t count, struct inode *inode)
-+{
-+ return count_to_nr(count, cluster_nrpages_shift(inode));
-+}
-+
-+static inline loff_t off_to_clust_to_off(loff_t off, struct inode *inode)
-+{
-+ return clust_to_off(off_to_clust(off, inode), inode);
-+}
-+
-+static inline pgoff_t off_to_clust_to_pg(loff_t off, struct inode *inode)
-+{
-+ return clust_to_pg(off_to_clust(off, inode), inode);
-+}
-+
-+static inline unsigned off_to_pgoff(loff_t off)
-+{
-+ return off & (PAGE_CACHE_SIZE - 1);
-+}
-+
-+static inline unsigned off_to_cloff(loff_t off, struct inode *inode)
-+{
-+ return off & ((loff_t) (inode_cluster_size(inode)) - 1);
-+}
-+
-+static inline unsigned
-+pg_to_off_to_cloff(unsigned long idx, struct inode *inode)
-+{
-+ return off_to_cloff(pg_to_off(idx), inode);
-+}
-+
-+/* if @size != 0, returns index of the page
-+ which contains the last byte of the file */
-+static inline pgoff_t size_to_pg(loff_t size)
-+{
-+ return (size ? off_to_pg(size - 1) : 0);
-+}
-+
-+/* minimal index of the page which doesn't contain
-+ file data */
-+static inline pgoff_t size_to_next_pg(loff_t size)
-+{
-+ return (size ? off_to_pg(size - 1) + 1 : 0);
-+}
-+
-+/* how many bytes of file of size @cnt can be contained
-+ in page of index @idx */
-+static inline unsigned cnt_to_pgcnt(loff_t cnt, pgoff_t idx)
-+{
-+ if (idx > off_to_pg(cnt))
-+ return 0;
-+ if (idx < off_to_pg(cnt))
-+ return PAGE_CACHE_SIZE;
-+ return off_to_pgoff(cnt);
-+}
-+
-+/* how many bytes of file of size @cnt can be contained
-+ in logical cluster of index @idx */
-+static inline unsigned cnt_to_clcnt(loff_t cnt, cloff_t idx,
-+ struct inode *inode)
-+{
-+ if (idx > off_to_clust(cnt, inode))
-+ return 0;
-+ if (idx < off_to_clust(cnt, inode))
-+ return inode_cluster_size(inode);
-+ return off_to_cloff(cnt, inode);
-+}
-+
-+static inline unsigned
-+fsize_to_count(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ assert("edward-288", clust != NULL);
-+ assert("edward-289", inode != NULL);
-+
-+ return cnt_to_clcnt(inode->i_size, clust->index, inode);
-+}
-+
-+static inline int
-+cluster_is_complete(reiser4_cluster_t * clust, struct inode * inode)
-+{
-+ return clust->tc.lsize == inode_cluster_size(inode);
-+}
-+
-+static inline void reiser4_slide_init(reiser4_slide_t * win)
-+{
-+ assert("edward-1084", win != NULL);
-+ memset(win, 0, sizeof *win);
-+}
-+
-+static inline tfm_action
-+cluster_get_tfm_act(tfm_cluster_t * tc)
-+{
-+ assert("edward-1356", tc != NULL);
-+ return tc->act;
-+}
-+
-+static inline void
-+cluster_set_tfm_act(tfm_cluster_t * tc, tfm_action act)
-+{
-+ assert("edward-1356", tc != NULL);
-+ tc->act = act;
-+}
-+
-+static inline void
-+cluster_init_act (reiser4_cluster_t * clust, tfm_action act, reiser4_slide_t * window){
-+ assert("edward-84", clust != NULL);
-+ memset(clust, 0, sizeof *clust);
-+ cluster_set_tfm_act(&clust->tc, act);
-+ clust->dstat = INVAL_DISK_CLUSTER;
-+ clust->win = window;
-+}
-+
-+static inline void
-+cluster_init_read(reiser4_cluster_t * clust, reiser4_slide_t * window)
-+{
-+ cluster_init_act (clust, TFMA_READ, window);
-+}
-+
-+static inline void
-+cluster_init_write(reiser4_cluster_t * clust, reiser4_slide_t * window)
-+{
-+ cluster_init_act (clust, TFMA_WRITE, window);
-+}
-+
-+static inline int dclust_get_extension_dsize(hint_t * hint)
-+{
-+ return hint->ext_coord.extension.ctail.dsize;
-+}
-+
-+static inline void dclust_set_extension_dsize(hint_t * hint, int dsize)
-+{
-+ hint->ext_coord.extension.ctail.dsize = dsize;
-+}
-+
-+static inline int dclust_get_extension_shift(hint_t * hint)
-+{
-+ return hint->ext_coord.extension.ctail.shift;
-+}
-+
-+static inline int dclust_get_extension_ncount(hint_t * hint)
-+{
-+ return hint->ext_coord.extension.ctail.ncount;
-+}
-+
-+static inline void dclust_inc_extension_ncount(hint_t * hint)
-+{
-+ hint->ext_coord.extension.ctail.ncount ++;
-+}
-+
-+static inline void dclust_init_extension(hint_t * hint)
-+{
-+ memset(&hint->ext_coord.extension.ctail, 0,
-+ sizeof(hint->ext_coord.extension.ctail));
-+}
-+
-+static inline int hint_is_unprepped_dclust(hint_t * hint)
-+{
-+ assert("edward-1451", hint_is_valid(hint));
-+ return dclust_get_extension_shift(hint) == (int)UCTAIL_SHIFT;
-+}
-+
-+static inline void coord_set_between_clusters(coord_t * coord)
-+{
-+#if REISER4_DEBUG
-+ int result;
-+ result = zload(coord->node);
-+ assert("edward-1296", !result);
-+#endif
-+ if (!coord_is_between_items(coord)) {
-+ coord->between = AFTER_ITEM;
-+ coord->unit_pos = 0;
-+ }
-+#if REISER4_DEBUG
-+ zrelse(coord->node);
-+#endif
-+}
-+
-+int reiser4_inflate_cluster(reiser4_cluster_t *, struct inode *);
-+int find_disk_cluster(reiser4_cluster_t *, struct inode *, int read,
-+ znode_lock_mode mode);
-+int flush_cluster_pages(reiser4_cluster_t *, jnode *, struct inode *);
-+int reiser4_deflate_cluster(reiser4_cluster_t *, struct inode *);
-+void truncate_page_cluster_cryptcompress(struct inode *inode, cloff_t start,
-+ int even_cows);
-+void invalidate_hint_cluster(reiser4_cluster_t * clust);
-+void put_hint_cluster(reiser4_cluster_t * clust, struct inode *inode,
-+ znode_lock_mode mode);
-+int get_disk_cluster_locked(reiser4_cluster_t * clust, struct inode *inode,
-+ znode_lock_mode lock_mode);
-+void reset_cluster_params(reiser4_cluster_t * clust);
-+int set_cluster_by_page(reiser4_cluster_t * clust, struct page * page,
-+ int count);
-+int prepare_page_cluster(struct inode *inode, reiser4_cluster_t * clust,
-+ int capture);
-+void reiser4_release_cluster_pages(reiser4_cluster_t *);
-+void put_cluster_handle(reiser4_cluster_t * clust);
-+int grab_tfm_stream(struct inode *inode, tfm_cluster_t * tc, tfm_stream_id id);
-+int tfm_cluster_is_uptodate(tfm_cluster_t * tc);
-+void tfm_cluster_set_uptodate(tfm_cluster_t * tc);
-+void tfm_cluster_clr_uptodate(tfm_cluster_t * tc);
-+
-+/* move cluster handle to the target position
-+ specified by the page of index @pgidx
-+*/
-+static inline void move_cluster_forward(reiser4_cluster_t * clust,
-+ struct inode *inode,
-+ pgoff_t pgidx)
-+{
-+ assert("edward-1297", clust != NULL);
-+ assert("edward-1298", inode != NULL);
-+
-+ reset_cluster_params(clust);
-+ if (clust->index_valid &&
-+ /* Hole in the indices. Hint became invalid and can not be
-+ used by find_cluster_item() even if seal/node versions
-+ will coincide */
-+ pg_to_clust(pgidx, inode) != clust->index + 1) {
-+ reiser4_unset_hint(clust->hint);
-+ invalidate_hint_cluster(clust);
-+ }
-+ clust->index = pg_to_clust(pgidx, inode);
-+ clust->index_valid = 1;
-+}
-+
-+static inline int
-+alloc_clust_pages(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ assert("edward-791", clust != NULL);
-+ assert("edward-792", inode != NULL);
-+ clust->pages =
-+ kmalloc(sizeof(*clust->pages) << inode_cluster_shift(inode),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!clust->pages)
-+ return -ENOMEM;
-+ return 0;
-+}
-+
-+static inline void free_clust_pages(reiser4_cluster_t * clust)
-+{
-+ kfree(clust->pages);
-+}
-+
-+#endif /* __FS_REISER4_CLUSTER_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/compress/compress.c linux-2.6.20/fs/reiser4/plugin/compress/compress.c
---- linux-2.6.20.orig/fs/reiser4/plugin/compress/compress.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/compress/compress.c 2007-05-06 14:50:43.746990723 +0400
-@@ -0,0 +1,381 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* reiser4 compression transform plugins */
-+
-+#include "../../debug.h"
-+#include "../../inode.h"
-+#include "../plugin.h"
-+#include "minilzo.h"
-+
-+#include <linux/zlib.h>
-+#include <linux/types.h>
-+#include <linux/hardirq.h>
-+
-+static int change_compression(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ assert("edward-1316", inode != NULL);
-+ assert("edward-1317", plugin != NULL);
-+ assert("edward-1318", is_reiser4_inode(inode));
-+ assert("edward-1319",
-+ plugin->h.type_id == REISER4_COMPRESSION_PLUGIN_TYPE);
-+
-+ /* cannot change compression plugin of already existing regular object */
-+ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
-+ return RETERR(-EINVAL);
-+
-+ /* If matches, nothing to change. */
-+ if (inode_hash_plugin(inode) != NULL &&
-+ inode_hash_plugin(inode)->h.id == plugin->h.id)
-+ return 0;
-+
-+ return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_COMPRESSION, plugin);
-+}
-+
-+static reiser4_plugin_ops compression_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = &change_compression
-+};
-+
-+/******************************************************************************/
-+/* gzip1 compression */
-+/******************************************************************************/
-+
-+#define GZIP1_DEF_LEVEL Z_BEST_SPEED
-+#define GZIP1_DEF_WINBITS 15
-+#define GZIP1_DEF_MEMLEVEL MAX_MEM_LEVEL
-+
-+static int gzip1_init(void)
-+{
-+ int ret = -EINVAL;
-+#if REISER4_ZLIB
-+ ret = 0;
-+#endif
-+ if (ret == -EINVAL)
-+ warning("edward-1337", "Zlib not compiled into kernel");
-+ return ret;
-+}
-+
-+static int gzip1_overrun(unsigned src_len UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+static coa_t gzip1_alloc(tfm_action act)
-+{
-+ coa_t coa = NULL;
-+#if REISER4_ZLIB
-+ int ret = 0;
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ coa = reiser4_vmalloc(zlib_deflate_workspacesize());
-+ if (!coa) {
-+ ret = -ENOMEM;
-+ break;
-+ }
-+ memset(coa, 0, zlib_deflate_workspacesize());
-+ break;
-+ case TFMA_READ: /* decompress */
-+ coa = reiser4_vmalloc(zlib_inflate_workspacesize());
-+ if (!coa) {
-+ ret = -ENOMEM;
-+ break;
-+ }
-+ memset(coa, 0, zlib_inflate_workspacesize());
-+ break;
-+ default:
-+ impossible("edward-767",
-+ "trying to alloc workspace for unknown tfm action");
-+ }
-+ if (ret) {
-+ warning("edward-768",
-+ "alloc workspace for gzip1 (tfm action = %d) failed\n",
-+ act);
-+ return ERR_PTR(ret);
-+ }
-+#endif
-+ return coa;
-+}
-+
-+static void gzip1_free(coa_t coa, tfm_action act)
-+{
-+ assert("edward-769", coa != NULL);
-+
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ vfree(coa);
-+ break;
-+ case TFMA_READ: /* decompress */
-+ vfree(coa);
-+ break;
-+ default:
-+ impossible("edward-770", "unknown tfm action");
-+ }
-+ return;
-+}
-+
-+static int gzip1_min_size_deflate(void)
-+{
-+ return 64;
-+}
-+
-+static void
-+gzip1_compress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+#if REISER4_ZLIB
-+ int ret = 0;
-+ struct z_stream_s stream;
-+
-+ memset(&stream, 0, sizeof(stream));
-+
-+ assert("edward-842", coa != NULL);
-+ assert("edward-875", src_len != 0);
-+
-+ stream.workspace = coa;
-+ ret = zlib_deflateInit2(&stream, GZIP1_DEF_LEVEL, Z_DEFLATED,
-+ -GZIP1_DEF_WINBITS, GZIP1_DEF_MEMLEVEL,
-+ Z_DEFAULT_STRATEGY);
-+ if (ret != Z_OK) {
-+ warning("edward-771", "zlib_deflateInit2 returned %d\n", ret);
-+ goto rollback;
-+ }
-+ ret = zlib_deflateReset(&stream);
-+ if (ret != Z_OK) {
-+ warning("edward-772", "zlib_deflateReset returned %d\n", ret);
-+ goto rollback;
-+ }
-+ stream.next_in = src_first;
-+ stream.avail_in = src_len;
-+ stream.next_out = dst_first;
-+ stream.avail_out = *dst_len;
-+
-+ ret = zlib_deflate(&stream, Z_FINISH);
-+ if (ret != Z_STREAM_END) {
-+ if (ret != Z_OK)
-+ warning("edward-773",
-+ "zlib_deflate returned %d\n", ret);
-+ goto rollback;
-+ }
-+ *dst_len = stream.total_out;
-+ return;
-+ rollback:
-+ *dst_len = src_len;
-+#endif
-+ return;
-+}
-+
-+static void
-+gzip1_decompress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+#if REISER4_ZLIB
-+ int ret = 0;
-+ struct z_stream_s stream;
-+
-+ memset(&stream, 0, sizeof(stream));
-+
-+ assert("edward-843", coa != NULL);
-+ assert("edward-876", src_len != 0);
-+
-+ stream.workspace = coa;
-+ ret = zlib_inflateInit2(&stream, -GZIP1_DEF_WINBITS);
-+ if (ret != Z_OK) {
-+ warning("edward-774", "zlib_inflateInit2 returned %d\n", ret);
-+ return;
-+ }
-+ ret = zlib_inflateReset(&stream);
-+ if (ret != Z_OK) {
-+ warning("edward-775", "zlib_inflateReset returned %d\n", ret);
-+ return;
-+ }
-+
-+ stream.next_in = src_first;
-+ stream.avail_in = src_len;
-+ stream.next_out = dst_first;
-+ stream.avail_out = *dst_len;
-+
-+ ret = zlib_inflate(&stream, Z_SYNC_FLUSH);
-+ /*
-+ * Work around a bug in zlib, which sometimes wants to taste an extra
-+ * byte when being used in the (undocumented) raw deflate mode.
-+ * (From USAGI).
-+ */
-+ if (ret == Z_OK && !stream.avail_in && stream.avail_out) {
-+ u8 zerostuff = 0;
-+ stream.next_in = &zerostuff;
-+ stream.avail_in = 1;
-+ ret = zlib_inflate(&stream, Z_FINISH);
-+ }
-+ if (ret != Z_STREAM_END) {
-+ warning("edward-776", "zlib_inflate returned %d\n", ret);
-+ return;
-+ }
-+ *dst_len = stream.total_out;
-+#endif
-+ return;
-+}
-+
-+/******************************************************************************/
-+/* lzo1 compression */
-+/******************************************************************************/
-+
-+static int lzo1_init(void)
-+{
-+ int ret;
-+ ret = lzo_init();
-+ if (ret != LZO_E_OK)
-+ warning("edward-848", "lzo_init() failed with ret = %d\n", ret);
-+ return ret;
-+}
-+
-+static int lzo1_overrun(unsigned in_len)
-+{
-+ return in_len / 64 + 16 + 3;
-+}
-+
-+#define LZO_HEAP_SIZE(size) \
-+ sizeof(lzo_align_t) * (((size) + (sizeof(lzo_align_t) - 1)) / sizeof(lzo_align_t))
-+
-+static coa_t lzo1_alloc(tfm_action act)
-+{
-+ int ret = 0;
-+ coa_t coa = NULL;
-+
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ coa = reiser4_vmalloc(LZO_HEAP_SIZE(LZO1X_1_MEM_COMPRESS));
-+ if (!coa) {
-+ ret = -ENOMEM;
-+ break;
-+ }
-+ memset(coa, 0, LZO_HEAP_SIZE(LZO1X_1_MEM_COMPRESS));
-+ case TFMA_READ: /* decompress */
-+ break;
-+ default:
-+ impossible("edward-877",
-+ "trying to alloc workspace for unknown tfm action");
-+ }
-+ if (ret) {
-+ warning("edward-878",
-+ "alloc workspace for lzo1 (tfm action = %d) failed\n",
-+ act);
-+ return ERR_PTR(ret);
-+ }
-+ return coa;
-+}
-+
-+static void lzo1_free(coa_t coa, tfm_action act)
-+{
-+ assert("edward-879", coa != NULL);
-+
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ vfree(coa);
-+ break;
-+ case TFMA_READ: /* decompress */
-+ impossible("edward-1304",
-+ "trying to free non-allocated workspace");
-+ default:
-+ impossible("edward-880", "unknown tfm action");
-+ }
-+ return;
-+}
-+
-+static int lzo1_min_size_deflate(void)
-+{
-+ return 256;
-+}
-+
-+static void
-+lzo1_compress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+ int result;
-+
-+ assert("edward-846", coa != NULL);
-+ assert("edward-847", src_len != 0);
-+
-+ result = lzo1x_1_compress(src_first, src_len, dst_first, dst_len, coa);
-+ if (result != LZO_E_OK) {
-+ warning("edward-849", "lzo1x_1_compress failed\n");
-+ goto out;
-+ }
-+ if (*dst_len >= src_len) {
-+ //warning("edward-850", "lzo1x_1_compress: incompressible data\n");
-+ goto out;
-+ }
-+ return;
-+ out:
-+ *dst_len = src_len;
-+ return;
-+}
-+
-+static void
-+lzo1_decompress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+ int result;
-+
-+ assert("edward-851", coa == NULL);
-+ assert("edward-852", src_len != 0);
-+
-+ result = lzo1x_decompress(src_first, src_len, dst_first, dst_len, NULL);
-+ if (result != LZO_E_OK)
-+ warning("edward-853", "lzo1x_1_decompress failed\n");
-+ return;
-+}
-+
-+compression_plugin compression_plugins[LAST_COMPRESSION_ID] = {
-+ [LZO1_COMPRESSION_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .id = LZO1_COMPRESSION_ID,
-+ .pops = &compression_plugin_ops,
-+ .label = "lzo1",
-+ .desc = "lzo1 compression transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = lzo1_init,
-+ .overrun = lzo1_overrun,
-+ .alloc = lzo1_alloc,
-+ .free = lzo1_free,
-+ .min_size_deflate = lzo1_min_size_deflate,
-+ .checksum = reiser4_adler32,
-+ .compress = lzo1_compress,
-+ .decompress = lzo1_decompress
-+ },
-+ [GZIP1_COMPRESSION_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .id = GZIP1_COMPRESSION_ID,
-+ .pops = &compression_plugin_ops,
-+ .label = "gzip1",
-+ .desc = "gzip1 compression transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = gzip1_init,
-+ .overrun = gzip1_overrun,
-+ .alloc = gzip1_alloc,
-+ .free = gzip1_free,
-+ .min_size_deflate = gzip1_min_size_deflate,
-+ .checksum = reiser4_adler32,
-+ .compress = gzip1_compress,
-+ .decompress = gzip1_decompress
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/compress/compress.h linux-2.6.20/fs/reiser4/plugin/compress/compress.h
---- linux-2.6.20.orig/fs/reiser4/plugin/compress/compress.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/compress/compress.h 2007-05-06 14:50:43.746990723 +0400
-@@ -0,0 +1,38 @@
-+#if !defined( __FS_REISER4_COMPRESS_H__ )
-+#define __FS_REISER4_COMPRESS_H__
-+
-+#include <linux/types.h>
-+#include <linux/string.h>
-+
-+typedef enum {
-+ TFMA_READ,
-+ TFMA_WRITE,
-+ TFMA_LAST
-+} tfm_action;
-+
-+/* builtin compression plugins */
-+
-+typedef enum {
-+ LZO1_COMPRESSION_ID,
-+ GZIP1_COMPRESSION_ID,
-+ LAST_COMPRESSION_ID,
-+} reiser4_compression_id;
-+
-+typedef unsigned long cloff_t;
-+typedef void *coa_t;
-+typedef coa_t coa_set[LAST_COMPRESSION_ID][TFMA_LAST];
-+
-+__u32 reiser4_adler32(char *data, __u32 len);
-+
-+#endif /* __FS_REISER4_COMPRESS_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/compress/compress_mode.c linux-2.6.20/fs/reiser4/plugin/compress/compress_mode.c
---- linux-2.6.20.orig/fs/reiser4/plugin/compress/compress_mode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/compress/compress_mode.c 2007-05-06 14:50:43.750991972 +0400
-@@ -0,0 +1,162 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* This file contains Reiser4 compression mode plugins.
-+
-+ Compression mode plugin is a set of handlers called by compressor
-+ at flush time and represent some heuristics including the ones
-+ which are to avoid compression of incompressible data, see
-+ http://www.namesys.com/cryptcompress_design.html for more details.
-+*/
-+#include "../../inode.h"
-+#include "../plugin.h"
-+
-+static int should_deflate_none(struct inode * inode, cloff_t index)
-+{
-+ return 0;
-+}
-+
-+static int should_deflate_common(struct inode * inode, cloff_t index)
-+{
-+ return compression_is_on(cryptcompress_inode_data(inode));
-+}
-+
-+static int discard_hook_ultim(struct inode *inode, cloff_t index)
-+{
-+ turn_off_compression(cryptcompress_inode_data(inode));
-+ return 0;
-+}
-+
-+static int discard_hook_lattd(struct inode *inode, cloff_t index)
-+{
-+ cryptcompress_info_t * info = cryptcompress_inode_data(inode);
-+
-+ assert("edward-1462",
-+ get_lattice_factor(info) >= MIN_LATTICE_FACTOR &&
-+ get_lattice_factor(info) <= MAX_LATTICE_FACTOR);
-+
-+ turn_off_compression(info);
-+ if (get_lattice_factor(info) < MAX_LATTICE_FACTOR)
-+ set_lattice_factor(info, get_lattice_factor(info) << 1);
-+ return 0;
-+}
-+
-+static int accept_hook_lattd(struct inode *inode, cloff_t index)
-+{
-+ turn_on_compression(cryptcompress_inode_data(inode));
-+ set_lattice_factor(cryptcompress_inode_data(inode), MIN_LATTICE_FACTOR);
-+ return 0;
-+}
-+
-+/* Check on dynamic lattice, the adaptive compression modes which
-+ defines the following behavior:
-+
-+ Compression is on: try to compress everything and turn
-+ it off, whenever cluster is incompressible.
-+
-+ Compression is off: try to compress clusters of indexes
-+ k * FACTOR (k = 0, 1, 2, ...) and turn it on, if some of
-+ them is compressible. If incompressible, then increase FACTOR */
-+
-+/* check if @index belongs to one-dimensional lattice
-+ of sparce factor @factor */
-+static int is_on_lattice(cloff_t index, int factor)
-+{
-+ return (factor ? index % factor == 0: index == 0);
-+}
-+
-+static int should_deflate_lattd(struct inode * inode, cloff_t index)
-+{
-+ return should_deflate_common(inode, index) ||
-+ is_on_lattice(index,
-+ get_lattice_factor
-+ (cryptcompress_inode_data(inode)));
-+}
-+
-+/* compression mode_plugins */
-+compression_mode_plugin compression_mode_plugins[LAST_COMPRESSION_MODE_ID] = {
-+ [NONE_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = NONE_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "none",
-+ .desc = "Compress nothing",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_none,
-+ .accept_hook = NULL,
-+ .discard_hook = NULL
-+ },
-+ /* Check-on-dynamic-lattice adaptive compression mode */
-+ [LATTD_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = LATTD_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "lattd",
-+ .desc = "Check on dynamic lattice",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_lattd,
-+ .accept_hook = accept_hook_lattd,
-+ .discard_hook = discard_hook_lattd
-+ },
-+ /* Check-ultimately compression mode:
-+ Turn off compression forever as soon as we meet
-+ incompressible data */
-+ [ULTIM_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = ULTIM_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "ultim",
-+ .desc = "Check ultimately",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_common,
-+ .accept_hook = NULL,
-+ .discard_hook = discard_hook_ultim
-+ },
-+ /* Force-to-compress-everything compression mode */
-+ [FORCE_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = FORCE_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "force",
-+ .desc = "Force to compress everything",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = NULL,
-+ .accept_hook = NULL,
-+ .discard_hook = NULL
-+ },
-+ /* Convert-to-extent compression mode.
-+ In this mode items will be converted to extents and management
-+ will be passed to (classic) unix file plugin as soon as ->write()
-+ detects that the first complete logical cluster (of index #0) is
-+ incompressible. */
-+ [CONVX_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = CONVX_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "conv",
-+ .desc = "Convert to extent",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_common,
-+ .accept_hook = NULL,
-+ .discard_hook = NULL
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/compress/lzoconf.h linux-2.6.20/fs/reiser4/plugin/compress/lzoconf.h
---- linux-2.6.20.orig/fs/reiser4/plugin/compress/lzoconf.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/compress/lzoconf.h 2007-05-06 14:50:43.750991972 +0400
-@@ -0,0 +1,216 @@
-+/* lzoconf.h -- configuration for the LZO real-time data compression library
-+ adopted for reiser4 compression transform plugin.
-+
-+ This file is part of the LZO real-time data compression library
-+ and not included in any proprietary licenses of reiser4.
-+
-+ Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
-+ All Rights Reserved.
-+
-+ The LZO library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU General Public License as
-+ published by the Free Software Foundation; either version 2 of
-+ the License, or (at your option) any later version.
-+
-+ The LZO library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ GNU General Public License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with the LZO library; see the file COPYING.
-+ If not, write to the Free Software Foundation, Inc.,
-+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-+
-+ Markus F.X.J. Oberhumer
-+ <markus@oberhumer.com>
-+ http://www.oberhumer.com/opensource/lzo/
-+ */
-+
-+#include <linux/kernel.h> /* for UINT_MAX, ULONG_MAX - edward */
-+
-+#ifndef __LZOCONF_H
-+#define __LZOCONF_H
-+
-+#define LZO_VERSION 0x1080
-+#define LZO_VERSION_STRING "1.08"
-+#define LZO_VERSION_DATE "Jul 12 2002"
-+
-+/* internal Autoconf configuration file - only used when building LZO */
-+
-+/***********************************************************************
-+// LZO requires a conforming <limits.h>
-+************************************************************************/
-+
-+#define CHAR_BIT 8
-+#define USHRT_MAX 0xffff
-+
-+/* workaround a cpp bug under hpux 10.20 */
-+#define LZO_0xffffffffL 4294967295ul
-+
-+/***********************************************************************
-+// architecture defines
-+************************************************************************/
-+
-+#if !defined(__LZO_i386)
-+# if defined(__i386__) || defined(__386__) || defined(_M_IX86)
-+# define __LZO_i386
-+# endif
-+#endif
-+
-+/* memory checkers */
-+#if !defined(__LZO_CHECKER)
-+# if defined(__BOUNDS_CHECKING_ON)
-+# define __LZO_CHECKER
-+# elif defined(__CHECKER__)
-+# define __LZO_CHECKER
-+# elif defined(__INSURE__)
-+# define __LZO_CHECKER
-+# elif defined(__PURIFY__)
-+# define __LZO_CHECKER
-+# endif
-+#endif
-+
-+/***********************************************************************
-+// integral and pointer types
-+************************************************************************/
-+
-+/* Integral types with 32 bits or more */
-+#if !defined(LZO_UINT32_MAX)
-+# if (UINT_MAX >= LZO_0xffffffffL)
-+ typedef unsigned int lzo_uint32;
-+ typedef int lzo_int32;
-+# define LZO_UINT32_MAX UINT_MAX
-+# define LZO_INT32_MAX INT_MAX
-+# define LZO_INT32_MIN INT_MIN
-+# elif (ULONG_MAX >= LZO_0xffffffffL)
-+ typedef unsigned long lzo_uint32;
-+ typedef long lzo_int32;
-+# define LZO_UINT32_MAX ULONG_MAX
-+# define LZO_INT32_MAX LONG_MAX
-+# define LZO_INT32_MIN LONG_MIN
-+# else
-+# error "lzo_uint32"
-+# endif
-+#endif
-+
-+/* lzo_uint is used like size_t */
-+#if !defined(LZO_UINT_MAX)
-+# if (UINT_MAX >= LZO_0xffffffffL)
-+ typedef unsigned int lzo_uint;
-+ typedef int lzo_int;
-+# define LZO_UINT_MAX UINT_MAX
-+# define LZO_INT_MAX INT_MAX
-+# define LZO_INT_MIN INT_MIN
-+# elif (ULONG_MAX >= LZO_0xffffffffL)
-+ typedef unsigned long lzo_uint;
-+ typedef long lzo_int;
-+# define LZO_UINT_MAX ULONG_MAX
-+# define LZO_INT_MAX LONG_MAX
-+# define LZO_INT_MIN LONG_MIN
-+# else
-+# error "lzo_uint"
-+# endif
-+#endif
-+
-+ typedef int lzo_bool;
-+
-+/***********************************************************************
-+// memory models
-+************************************************************************/
-+
-+/* Memory model that allows to access memory at offsets of lzo_uint. */
-+#if !defined(__LZO_MMODEL)
-+# if (LZO_UINT_MAX <= UINT_MAX)
-+# define __LZO_MMODEL
-+# else
-+# error "__LZO_MMODEL"
-+# endif
-+#endif
-+
-+/* no typedef here because of const-pointer issues */
-+#define lzo_byte unsigned char __LZO_MMODEL
-+#define lzo_bytep unsigned char __LZO_MMODEL *
-+#define lzo_charp char __LZO_MMODEL *
-+#define lzo_voidp void __LZO_MMODEL *
-+#define lzo_shortp short __LZO_MMODEL *
-+#define lzo_ushortp unsigned short __LZO_MMODEL *
-+#define lzo_uint32p lzo_uint32 __LZO_MMODEL *
-+#define lzo_int32p lzo_int32 __LZO_MMODEL *
-+#define lzo_uintp lzo_uint __LZO_MMODEL *
-+#define lzo_intp lzo_int __LZO_MMODEL *
-+#define lzo_voidpp lzo_voidp __LZO_MMODEL *
-+#define lzo_bytepp lzo_bytep __LZO_MMODEL *
-+
-+#ifndef lzo_sizeof_dict_t
-+# define lzo_sizeof_dict_t sizeof(lzo_bytep)
-+#endif
-+
-+typedef int (*lzo_compress_t) (const lzo_byte * src, lzo_uint src_len,
-+ lzo_byte * dst, lzo_uintp dst_len,
-+ lzo_voidp wrkmem);
-+
-+
-+/***********************************************************************
-+// error codes and prototypes
-+************************************************************************/
-+
-+/* Error codes for the compression/decompression functions. Negative
-+ * values are errors, positive values will be used for special but
-+ * normal events.
-+ */
-+#define LZO_E_OK 0
-+#define LZO_E_ERROR (-1)
-+#define LZO_E_OUT_OF_MEMORY (-2) /* not used right now */
-+#define LZO_E_NOT_COMPRESSIBLE (-3) /* not used right now */
-+#define LZO_E_INPUT_OVERRUN (-4)
-+#define LZO_E_OUTPUT_OVERRUN (-5)
-+#define LZO_E_LOOKBEHIND_OVERRUN (-6)
-+#define LZO_E_EOF_NOT_FOUND (-7)
-+#define LZO_E_INPUT_NOT_CONSUMED (-8)
-+
-+/* lzo_init() should be the first function you call.
-+ * Check the return code !
-+ *
-+ * lzo_init() is a macro to allow checking that the library and the
-+ * compiler's view of various types are consistent.
-+ */
-+#define lzo_init() __lzo_init2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\
-+ (int)sizeof(long),(int)sizeof(lzo_uint32),(int)sizeof(lzo_uint),\
-+ (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\
-+ (int)sizeof(lzo_compress_t))
-+ extern int __lzo_init2(unsigned, int, int, int, int, int, int,
-+ int, int, int);
-+
-+/* checksum functions */
-+extern lzo_uint32 lzo_crc32(lzo_uint32 _c, const lzo_byte * _buf,
-+ lzo_uint _len);
-+/* misc. */
-+ typedef union {
-+ lzo_bytep p;
-+ lzo_uint u;
-+ } __lzo_pu_u;
-+ typedef union {
-+ lzo_bytep p;
-+ lzo_uint32 u32;
-+ } __lzo_pu32_u;
-+ typedef union {
-+ void *vp;
-+ lzo_bytep bp;
-+ lzo_uint32 u32;
-+ long l;
-+ } lzo_align_t;
-+
-+#define LZO_PTR_ALIGN_UP(_ptr,_size) \
-+ ((_ptr) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(_ptr),(lzo_uint)(_size)))
-+
-+/* deprecated - only for backward compatibility */
-+#define LZO_ALIGN(_ptr,_size) LZO_PTR_ALIGN_UP(_ptr,_size)
-+
-+#endif /* already included */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/compress/Makefile linux-2.6.20/fs/reiser4/plugin/compress/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/compress/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/compress/Makefile 2007-05-06 14:50:43.750991972 +0400
-@@ -0,0 +1,6 @@
-+obj-$(CONFIG_REISER4_FS) += compress_plugins.o
-+
-+compress_plugins-objs := \
-+ compress.o \
-+ minilzo.o \
-+ compress_mode.o
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/compress/minilzo.c linux-2.6.20/fs/reiser4/plugin/compress/minilzo.c
---- linux-2.6.20.orig/fs/reiser4/plugin/compress/minilzo.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/compress/minilzo.c 2007-05-06 14:50:43.754993222 +0400
-@@ -0,0 +1,1967 @@
-+/* minilzo.c -- mini subset of the LZO real-time data compression library
-+ adopted for reiser4 compression transform plugin.
-+
-+ This file is part of the LZO real-time data compression library
-+ and not included in any proprietary licenses of reiser4.
-+
-+ Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
-+ All Rights Reserved.
-+
-+ The LZO library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU General Public License as
-+ published by the Free Software Foundation; either version 2 of
-+ the License, or (at your option) any later version.
-+
-+ The LZO library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ GNU General Public License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with the LZO library; see the file COPYING.
-+ If not, write to the Free Software Foundation, Inc.,
-+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-+
-+ Markus F.X.J. Oberhumer
-+ <markus@oberhumer.com>
-+ http://www.oberhumer.com/opensource/lzo/
-+ */
-+
-+/*
-+ * NOTE:
-+ * the full LZO package can be found at
-+ * http://www.oberhumer.com/opensource/lzo/
-+ */
-+
-+#include "../../debug.h" /* for reiser4 assert macro -edward */
-+
-+#define __LZO_IN_MINILZO
-+#define LZO_BUILD
-+
-+#include "minilzo.h"
-+
-+#if !defined(MINILZO_VERSION) || (MINILZO_VERSION != 0x1080)
-+# error "version mismatch in miniLZO source files"
-+#endif
-+
-+#ifndef __LZO_CONF_H
-+#define __LZO_CONF_H
-+
-+# define BOUNDS_CHECKING_OFF_DURING(stmt) stmt
-+# define BOUNDS_CHECKING_OFF_IN_EXPR(expr) (expr)
-+
-+# define HAVE_MEMCMP
-+# define HAVE_MEMCPY
-+# define HAVE_MEMMOVE
-+# define HAVE_MEMSET
-+
-+#undef NDEBUG
-+#if !defined(LZO_DEBUG)
-+# define NDEBUG
-+#endif
-+#if defined(LZO_DEBUG) || !defined(NDEBUG)
-+# if !defined(NO_STDIO_H)
-+# include <stdio.h>
-+# endif
-+#endif
-+
-+#if !defined(LZO_COMPILE_TIME_ASSERT)
-+# define LZO_COMPILE_TIME_ASSERT(expr) \
-+ { typedef int __lzo_compile_time_assert_fail[1 - 2 * !(expr)]; }
-+#endif
-+
-+#if !defined(LZO_UNUSED)
-+# if 1
-+# define LZO_UNUSED(var) ((void)&var)
-+# elif 0
-+# define LZO_UNUSED(var) { typedef int __lzo_unused[sizeof(var) ? 2 : 1]; }
-+# else
-+# define LZO_UNUSED(parm) (parm = parm)
-+# endif
-+#endif
-+
-+#if defined(NO_MEMCMP)
-+# undef HAVE_MEMCMP
-+#endif
-+
-+#if !defined(HAVE_MEMSET)
-+# undef memset
-+# define memset lzo_memset
-+#endif
-+
-+# define LZO_BYTE(x) ((unsigned char) ((x) & 0xff))
-+
-+#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b))
-+#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b))
-+#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c))
-+#define LZO_MIN3(a,b,c) ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c))
-+
-+#define lzo_sizeof(type) ((lzo_uint) (sizeof(type)))
-+
-+#define LZO_HIGH(array) ((lzo_uint) (sizeof(array)/sizeof(*(array))))
-+
-+#define LZO_SIZE(bits) (1u << (bits))
-+#define LZO_MASK(bits) (LZO_SIZE(bits) - 1)
-+
-+#define LZO_LSIZE(bits) (1ul << (bits))
-+#define LZO_LMASK(bits) (LZO_LSIZE(bits) - 1)
-+
-+#define LZO_USIZE(bits) ((lzo_uint) 1 << (bits))
-+#define LZO_UMASK(bits) (LZO_USIZE(bits) - 1)
-+
-+#define LZO_STYPE_MAX(b) (((1l << (8*(b)-2)) - 1l) + (1l << (8*(b)-2)))
-+#define LZO_UTYPE_MAX(b) (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1)))
-+
-+#if !defined(SIZEOF_UNSIGNED)
-+# if (UINT_MAX == 0xffff)
-+# define SIZEOF_UNSIGNED 2
-+# elif (UINT_MAX == LZO_0xffffffffL)
-+# define SIZEOF_UNSIGNED 4
-+# elif (UINT_MAX >= LZO_0xffffffffL)
-+# define SIZEOF_UNSIGNED 8
-+# else
-+# error "SIZEOF_UNSIGNED"
-+# endif
-+#endif
-+
-+#if !defined(SIZEOF_UNSIGNED_LONG)
-+# if (ULONG_MAX == LZO_0xffffffffL)
-+# define SIZEOF_UNSIGNED_LONG 4
-+# elif (ULONG_MAX >= LZO_0xffffffffL)
-+# define SIZEOF_UNSIGNED_LONG 8
-+# else
-+# error "SIZEOF_UNSIGNED_LONG"
-+# endif
-+#endif
-+
-+#if !defined(SIZEOF_SIZE_T)
-+# define SIZEOF_SIZE_T SIZEOF_UNSIGNED
-+#endif
-+#if !defined(SIZE_T_MAX)
-+# define SIZE_T_MAX LZO_UTYPE_MAX(SIZEOF_SIZE_T)
-+#endif
-+
-+#if 1 && defined(__LZO_i386) && (UINT_MAX == LZO_0xffffffffL)
-+# if !defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX == 0xffff)
-+# define LZO_UNALIGNED_OK_2
-+# endif
-+# if !defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX == LZO_0xffffffffL)
-+# define LZO_UNALIGNED_OK_4
-+# endif
-+#endif
-+
-+#if defined(LZO_UNALIGNED_OK_2) || defined(LZO_UNALIGNED_OK_4)
-+# if !defined(LZO_UNALIGNED_OK)
-+# define LZO_UNALIGNED_OK
-+# endif
-+#endif
-+
-+#if defined(__LZO_NO_UNALIGNED)
-+# undef LZO_UNALIGNED_OK
-+# undef LZO_UNALIGNED_OK_2
-+# undef LZO_UNALIGNED_OK_4
-+#endif
-+
-+#if defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX != 0xffff)
-+# error "LZO_UNALIGNED_OK_2 must not be defined on this system"
-+#endif
-+#if defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
-+# error "LZO_UNALIGNED_OK_4 must not be defined on this system"
-+#endif
-+
-+#if defined(__LZO_NO_ALIGNED)
-+# undef LZO_ALIGNED_OK_4
-+#endif
-+
-+#if defined(LZO_ALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
-+# error "LZO_ALIGNED_OK_4 must not be defined on this system"
-+#endif
-+
-+#define LZO_LITTLE_ENDIAN 1234
-+#define LZO_BIG_ENDIAN 4321
-+#define LZO_PDP_ENDIAN 3412
-+
-+#if !defined(LZO_BYTE_ORDER)
-+# if defined(MFX_BYTE_ORDER)
-+# define LZO_BYTE_ORDER MFX_BYTE_ORDER
-+# elif defined(__LZO_i386)
-+# define LZO_BYTE_ORDER LZO_LITTLE_ENDIAN
-+# elif defined(BYTE_ORDER)
-+# define LZO_BYTE_ORDER BYTE_ORDER
-+# elif defined(__BYTE_ORDER)
-+# define LZO_BYTE_ORDER __BYTE_ORDER
-+# endif
-+#endif
-+
-+#if defined(LZO_BYTE_ORDER)
-+# if (LZO_BYTE_ORDER != LZO_LITTLE_ENDIAN) && \
-+ (LZO_BYTE_ORDER != LZO_BIG_ENDIAN)
-+# error "invalid LZO_BYTE_ORDER"
-+# endif
-+#endif
-+
-+#if defined(LZO_UNALIGNED_OK) && !defined(LZO_BYTE_ORDER)
-+# error "LZO_BYTE_ORDER is not defined"
-+#endif
-+
-+#define LZO_OPTIMIZE_GNUC_i386_IS_BUGGY
-+
-+#if defined(NDEBUG) && !defined(LZO_DEBUG) && !defined(__LZO_CHECKER)
-+# if defined(__GNUC__) && defined(__i386__)
-+# if !defined(LZO_OPTIMIZE_GNUC_i386_IS_BUGGY)
-+# define LZO_OPTIMIZE_GNUC_i386
-+# endif
-+# endif
-+#endif
-+
-+extern const lzo_uint32 _lzo_crc32_table[256];
-+
-+#define _LZO_STRINGIZE(x) #x
-+#define _LZO_MEXPAND(x) _LZO_STRINGIZE(x)
-+
-+#define _LZO_CONCAT2(a,b) a ## b
-+#define _LZO_CONCAT3(a,b,c) a ## b ## c
-+#define _LZO_CONCAT4(a,b,c,d) a ## b ## c ## d
-+#define _LZO_CONCAT5(a,b,c,d,e) a ## b ## c ## d ## e
-+
-+#define _LZO_ECONCAT2(a,b) _LZO_CONCAT2(a,b)
-+#define _LZO_ECONCAT3(a,b,c) _LZO_CONCAT3(a,b,c)
-+#define _LZO_ECONCAT4(a,b,c,d) _LZO_CONCAT4(a,b,c,d)
-+#define _LZO_ECONCAT5(a,b,c,d,e) _LZO_CONCAT5(a,b,c,d,e)
-+
-+#ifndef __LZO_PTR_H
-+#define __LZO_PTR_H
-+
-+#if !defined(lzo_ptrdiff_t)
-+# if (UINT_MAX >= LZO_0xffffffffL)
-+typedef ptrdiff_t lzo_ptrdiff_t;
-+# else
-+typedef long lzo_ptrdiff_t;
-+# endif
-+#endif
-+
-+#if !defined(__LZO_HAVE_PTR_T)
-+# if defined(lzo_ptr_t)
-+# define __LZO_HAVE_PTR_T
-+# endif
-+#endif
-+#if !defined(__LZO_HAVE_PTR_T)
-+# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_LONG)
-+# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_LONG)
-+typedef unsigned long lzo_ptr_t;
-+typedef long lzo_sptr_t;
-+# define __LZO_HAVE_PTR_T
-+# endif
-+# endif
-+#endif
-+#if !defined(__LZO_HAVE_PTR_T)
-+# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED)
-+# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED)
-+typedef unsigned int lzo_ptr_t;
-+typedef int lzo_sptr_t;
-+# define __LZO_HAVE_PTR_T
-+# endif
-+# endif
-+#endif
-+#if !defined(__LZO_HAVE_PTR_T)
-+# if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_SHORT)
-+# if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_SHORT)
-+typedef unsigned short lzo_ptr_t;
-+typedef short lzo_sptr_t;
-+# define __LZO_HAVE_PTR_T
-+# endif
-+# endif
-+#endif
-+#if !defined(__LZO_HAVE_PTR_T)
-+# if defined(LZO_HAVE_CONFIG_H) || defined(SIZEOF_CHAR_P)
-+# error "no suitable type for lzo_ptr_t"
-+# else
-+typedef unsigned long lzo_ptr_t;
-+typedef long lzo_sptr_t;
-+# define __LZO_HAVE_PTR_T
-+# endif
-+#endif
-+
-+#define PTR(a) ((lzo_ptr_t) (a))
-+#define PTR_LINEAR(a) PTR(a)
-+#define PTR_ALIGNED_4(a) ((PTR_LINEAR(a) & 3) == 0)
-+#define PTR_ALIGNED_8(a) ((PTR_LINEAR(a) & 7) == 0)
-+#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0)
-+#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0)
-+
-+#define PTR_LT(a,b) (PTR(a) < PTR(b))
-+#define PTR_GE(a,b) (PTR(a) >= PTR(b))
-+#define PTR_DIFF(a,b) ((lzo_ptrdiff_t) (PTR(a) - PTR(b)))
-+#define pd(a,b) ((lzo_uint) ((a)-(b)))
-+
-+typedef union {
-+ char a_char;
-+ unsigned char a_uchar;
-+ short a_short;
-+ unsigned short a_ushort;
-+ int a_int;
-+ unsigned int a_uint;
-+ long a_long;
-+ unsigned long a_ulong;
-+ lzo_int a_lzo_int;
-+ lzo_uint a_lzo_uint;
-+ lzo_int32 a_lzo_int32;
-+ lzo_uint32 a_lzo_uint32;
-+ ptrdiff_t a_ptrdiff_t;
-+ lzo_ptrdiff_t a_lzo_ptrdiff_t;
-+ lzo_ptr_t a_lzo_ptr_t;
-+ lzo_voidp a_lzo_voidp;
-+ void *a_void_p;
-+ lzo_bytep a_lzo_bytep;
-+ lzo_bytepp a_lzo_bytepp;
-+ lzo_uintp a_lzo_uintp;
-+ lzo_uint *a_lzo_uint_p;
-+ lzo_uint32p a_lzo_uint32p;
-+ lzo_uint32 *a_lzo_uint32_p;
-+ unsigned char *a_uchar_p;
-+ char *a_char_p;
-+} lzo_full_align_t;
-+
-+#endif
-+#define LZO_DETERMINISTIC
-+#define LZO_DICT_USE_PTR
-+# define lzo_dict_t const lzo_bytep
-+# define lzo_dict_p lzo_dict_t __LZO_MMODEL *
-+#if !defined(lzo_moff_t)
-+#define lzo_moff_t lzo_uint
-+#endif
-+#endif
-+static lzo_ptr_t __lzo_ptr_linear(const lzo_voidp ptr)
-+{
-+ return PTR_LINEAR(ptr);
-+}
-+
-+static unsigned __lzo_align_gap(const lzo_voidp ptr, lzo_uint size)
-+{
-+ lzo_ptr_t p, s, n;
-+
-+ assert("lzo-01", size > 0);
-+
-+ p = __lzo_ptr_linear(ptr);
-+ s = (lzo_ptr_t) (size - 1);
-+ n = (((p + s) / size) * size) - p;
-+
-+ assert("lzo-02", (long)n >= 0);
-+ assert("lzo-03", n <= s);
-+
-+ return (unsigned)n;
-+}
-+
-+#ifndef __LZO_UTIL_H
-+#define __LZO_UTIL_H
-+
-+#ifndef __LZO_CONF_H
-+#endif
-+
-+#if 1 && defined(HAVE_MEMCPY)
-+#define MEMCPY8_DS(dest,src,len) \
-+ memcpy(dest,src,len); \
-+ dest += len; \
-+ src += len
-+#endif
-+
-+#if !defined(MEMCPY8_DS)
-+
-+#define MEMCPY8_DS(dest,src,len) \
-+ { register lzo_uint __l = (len) / 8; \
-+ do { \
-+ *dest++ = *src++; \
-+ *dest++ = *src++; \
-+ *dest++ = *src++; \
-+ *dest++ = *src++; \
-+ *dest++ = *src++; \
-+ *dest++ = *src++; \
-+ *dest++ = *src++; \
-+ *dest++ = *src++; \
-+ } while (--__l > 0); }
-+
-+#endif
-+
-+#define MEMCPY_DS(dest,src,len) \
-+ do *dest++ = *src++; \
-+ while (--len > 0)
-+
-+#define MEMMOVE_DS(dest,src,len) \
-+ do *dest++ = *src++; \
-+ while (--len > 0)
-+
-+#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET)
-+
-+#define BZERO8_PTR(s,l,n) memset((s),0,(lzo_uint)(l)*(n))
-+
-+#else
-+
-+#define BZERO8_PTR(s,l,n) \
-+ lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n))
-+
-+#endif
-+#endif
-+
-+/* If you use the LZO library in a product, you *must* keep this
-+ * copyright string in the executable of your product.
-+ */
-+
-+static const lzo_byte __lzo_copyright[] =
-+#if !defined(__LZO_IN_MINLZO)
-+ LZO_VERSION_STRING;
-+#else
-+ "\n\n\n"
-+ "LZO real-time data compression library.\n"
-+ "Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Markus Franz Xaver Johannes Oberhumer\n"
-+ "<markus.oberhumer@jk.uni-linz.ac.at>\n"
-+ "http://www.oberhumer.com/opensource/lzo/\n"
-+ "\n"
-+ "LZO version: v" LZO_VERSION_STRING ", " LZO_VERSION_DATE "\n"
-+ "LZO build date: " __DATE__ " " __TIME__ "\n\n"
-+ "LZO special compilation options:\n"
-+#ifdef __cplusplus
-+ " __cplusplus\n"
-+#endif
-+#if defined(__PIC__)
-+ " __PIC__\n"
-+#elif defined(__pic__)
-+ " __pic__\n"
-+#endif
-+#if (UINT_MAX < LZO_0xffffffffL)
-+ " 16BIT\n"
-+#endif
-+#if defined(__LZO_STRICT_16BIT)
-+ " __LZO_STRICT_16BIT\n"
-+#endif
-+#if (UINT_MAX > LZO_0xffffffffL)
-+ " UINT_MAX=" _LZO_MEXPAND(UINT_MAX) "\n"
-+#endif
-+#if (ULONG_MAX > LZO_0xffffffffL)
-+ " ULONG_MAX=" _LZO_MEXPAND(ULONG_MAX) "\n"
-+#endif
-+#if defined(LZO_BYTE_ORDER)
-+ " LZO_BYTE_ORDER=" _LZO_MEXPAND(LZO_BYTE_ORDER) "\n"
-+#endif
-+#if defined(LZO_UNALIGNED_OK_2)
-+ " LZO_UNALIGNED_OK_2\n"
-+#endif
-+#if defined(LZO_UNALIGNED_OK_4)
-+ " LZO_UNALIGNED_OK_4\n"
-+#endif
-+#if defined(LZO_ALIGNED_OK_4)
-+ " LZO_ALIGNED_OK_4\n"
-+#endif
-+#if defined(LZO_DICT_USE_PTR)
-+ " LZO_DICT_USE_PTR\n"
-+#endif
-+#if defined(__LZO_QUERY_COMPRESS)
-+ " __LZO_QUERY_COMPRESS\n"
-+#endif
-+#if defined(__LZO_QUERY_DECOMPRESS)
-+ " __LZO_QUERY_DECOMPRESS\n"
-+#endif
-+#if defined(__LZO_IN_MINILZO)
-+ " __LZO_IN_MINILZO\n"
-+#endif
-+ "\n\n" "$Id: LZO " LZO_VERSION_STRING " built " __DATE__ " " __TIME__
-+#if defined(__GNUC__) && defined(__VERSION__)
-+ " by gcc " __VERSION__
-+#elif defined(__BORLANDC__)
-+ " by Borland C " _LZO_MEXPAND(__BORLANDC__)
-+#elif defined(_MSC_VER)
-+ " by Microsoft C " _LZO_MEXPAND(_MSC_VER)
-+#elif defined(__PUREC__)
-+ " by Pure C " _LZO_MEXPAND(__PUREC__)
-+#elif defined(__SC__)
-+ " by Symantec C " _LZO_MEXPAND(__SC__)
-+#elif defined(__TURBOC__)
-+ " by Turbo C " _LZO_MEXPAND(__TURBOC__)
-+#elif defined(__WATCOMC__)
-+ " by Watcom C " _LZO_MEXPAND(__WATCOMC__)
-+#endif
-+ " $\n"
-+ "$Copyright: LZO (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Markus Franz Xaver Johannes Oberhumer $\n";
-+#endif
-+
-+#define LZO_BASE 65521u
-+#define LZO_NMAX 5552
-+
-+#define LZO_DO1(buf,i) {s1 += buf[i]; s2 += s1;}
-+#define LZO_DO2(buf,i) LZO_DO1(buf,i); LZO_DO1(buf,i+1);
-+#define LZO_DO4(buf,i) LZO_DO2(buf,i); LZO_DO2(buf,i+2);
-+#define LZO_DO8(buf,i) LZO_DO4(buf,i); LZO_DO4(buf,i+4);
-+#define LZO_DO16(buf,i) LZO_DO8(buf,i); LZO_DO8(buf,i+8);
-+
-+# define IS_SIGNED(type) (((type) (-1)) < ((type) 0))
-+# define IS_UNSIGNED(type) (((type) (-1)) > ((type) 0))
-+
-+#define IS_POWER_OF_2(x) (((x) & ((x) - 1)) == 0)
-+
-+static lzo_bool schedule_insns_bug(void);
-+static lzo_bool strength_reduce_bug(int *);
-+
-+# define __lzo_assert(x) ((x) ? 1 : 0)
-+
-+#undef COMPILE_TIME_ASSERT
-+
-+# define COMPILE_TIME_ASSERT(expr) LZO_COMPILE_TIME_ASSERT(expr)
-+
-+static lzo_bool basic_integral_check(void)
-+{
-+ lzo_bool r = 1;
-+
-+ COMPILE_TIME_ASSERT(CHAR_BIT == 8);
-+ COMPILE_TIME_ASSERT(sizeof(char) == 1);
-+ COMPILE_TIME_ASSERT(sizeof(short) >= 2);
-+ COMPILE_TIME_ASSERT(sizeof(long) >= 4);
-+ COMPILE_TIME_ASSERT(sizeof(int) >= sizeof(short));
-+ COMPILE_TIME_ASSERT(sizeof(long) >= sizeof(int));
-+
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint) == sizeof(lzo_int));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == sizeof(lzo_int32));
-+
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) >= 4);
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) >= sizeof(unsigned));
-+#if defined(__LZO_STRICT_16BIT)
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint) == 2);
-+#else
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint) >= 4);
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint) >= sizeof(unsigned));
-+#endif
-+
-+#if (USHRT_MAX == 65535u)
-+ COMPILE_TIME_ASSERT(sizeof(short) == 2);
-+#elif (USHRT_MAX == LZO_0xffffffffL)
-+ COMPILE_TIME_ASSERT(sizeof(short) == 4);
-+#elif (USHRT_MAX >= LZO_0xffffffffL)
-+ COMPILE_TIME_ASSERT(sizeof(short) > 4);
-+#endif
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned char));
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned short));
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned));
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned long));
-+ COMPILE_TIME_ASSERT(IS_SIGNED(short));
-+ COMPILE_TIME_ASSERT(IS_SIGNED(int));
-+ COMPILE_TIME_ASSERT(IS_SIGNED(long));
-+
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_uint32));
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_uint));
-+ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_int32));
-+ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_int));
-+
-+ COMPILE_TIME_ASSERT(INT_MAX == LZO_STYPE_MAX(sizeof(int)));
-+ COMPILE_TIME_ASSERT(UINT_MAX == LZO_UTYPE_MAX(sizeof(unsigned)));
-+ COMPILE_TIME_ASSERT(LONG_MAX == LZO_STYPE_MAX(sizeof(long)));
-+ COMPILE_TIME_ASSERT(ULONG_MAX == LZO_UTYPE_MAX(sizeof(unsigned long)));
-+ COMPILE_TIME_ASSERT(USHRT_MAX == LZO_UTYPE_MAX(sizeof(unsigned short)));
-+ COMPILE_TIME_ASSERT(LZO_UINT32_MAX ==
-+ LZO_UTYPE_MAX(sizeof(lzo_uint32)));
-+ COMPILE_TIME_ASSERT(LZO_UINT_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint)));
-+
-+ r &= __lzo_assert(LZO_BYTE(257) == 1);
-+
-+ return r;
-+}
-+
-+static lzo_bool basic_ptr_check(void)
-+{
-+ lzo_bool r = 1;
-+
-+ COMPILE_TIME_ASSERT(sizeof(char *) >= sizeof(int));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_byte *) >= sizeof(char *));
-+
-+ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_byte *));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_voidpp));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_bytepp));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_voidp) >= sizeof(lzo_uint));
-+
-+ COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) == sizeof(lzo_voidp));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) == sizeof(lzo_sptr_t));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) >= sizeof(lzo_uint));
-+
-+ COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= 4);
-+ COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= sizeof(ptrdiff_t));
-+
-+ COMPILE_TIME_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));
-+ COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= sizeof(lzo_uint));
-+
-+#if defined(SIZEOF_CHAR_P)
-+ COMPILE_TIME_ASSERT(SIZEOF_CHAR_P == sizeof(char *));
-+#endif
-+#if defined(SIZEOF_PTRDIFF_T)
-+ COMPILE_TIME_ASSERT(SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t));
-+#endif
-+
-+ COMPILE_TIME_ASSERT(IS_SIGNED(ptrdiff_t));
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(size_t));
-+ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_ptrdiff_t));
-+ COMPILE_TIME_ASSERT(IS_SIGNED(lzo_sptr_t));
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_ptr_t));
-+ COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_moff_t));
-+
-+ return r;
-+}
-+
-+static lzo_bool ptr_check(void)
-+{
-+ lzo_bool r = 1;
-+ int i;
-+ char _wrkmem[10 * sizeof(lzo_byte *) + sizeof(lzo_full_align_t)];
-+ lzo_bytep wrkmem;
-+ lzo_bytepp dict;
-+ unsigned char x[4 * sizeof(lzo_full_align_t)];
-+ long d;
-+ lzo_full_align_t a;
-+ lzo_full_align_t u;
-+
-+ for (i = 0; i < (int)sizeof(x); i++)
-+ x[i] = LZO_BYTE(i);
-+
-+ wrkmem =
-+ LZO_PTR_ALIGN_UP((lzo_byte *) _wrkmem, sizeof(lzo_full_align_t));
-+
-+ u.a_lzo_bytep = wrkmem;
-+ dict = u.a_lzo_bytepp;
-+
-+ d = (long)((const lzo_bytep)dict - (const lzo_bytep)_wrkmem);
-+ r &= __lzo_assert(d >= 0);
-+ r &= __lzo_assert(d < (long)sizeof(lzo_full_align_t));
-+
-+ memset(&a, 0, sizeof(a));
-+ r &= __lzo_assert(a.a_lzo_voidp == NULL);
-+
-+ memset(&a, 0xff, sizeof(a));
-+ r &= __lzo_assert(a.a_ushort == USHRT_MAX);
-+ r &= __lzo_assert(a.a_uint == UINT_MAX);
-+ r &= __lzo_assert(a.a_ulong == ULONG_MAX);
-+ r &= __lzo_assert(a.a_lzo_uint == LZO_UINT_MAX);
-+ r &= __lzo_assert(a.a_lzo_uint32 == LZO_UINT32_MAX);
-+
-+ if (r == 1) {
-+ for (i = 0; i < 8; i++)
-+ r &= __lzo_assert((const lzo_voidp)(&dict[i]) ==
-+ (const
-+ lzo_voidp)(&wrkmem[i *
-+ sizeof(lzo_byte
-+ *)]));
-+ }
-+
-+ memset(&a, 0, sizeof(a));
-+ r &= __lzo_assert(a.a_char_p == NULL);
-+ r &= __lzo_assert(a.a_lzo_bytep == NULL);
-+ r &= __lzo_assert(NULL == (void *)0);
-+ if (r == 1) {
-+ for (i = 0; i < 10; i++)
-+ dict[i] = wrkmem;
-+ BZERO8_PTR(dict + 1, sizeof(dict[0]), 8);
-+ r &= __lzo_assert(dict[0] == wrkmem);
-+ for (i = 1; i < 9; i++)
-+ r &= __lzo_assert(dict[i] == NULL);
-+ r &= __lzo_assert(dict[9] == wrkmem);
-+ }
-+
-+ if (r == 1) {
-+ unsigned k = 1;
-+ const unsigned n = (unsigned)sizeof(lzo_uint32);
-+ lzo_byte *p0;
-+ lzo_byte *p1;
-+
-+ k += __lzo_align_gap(&x[k], n);
-+ p0 = (lzo_bytep) & x[k];
-+#if defined(PTR_LINEAR)
-+ r &= __lzo_assert((PTR_LINEAR(p0) & (n - 1)) == 0);
-+#else
-+ r &= __lzo_assert(n == 4);
-+ r &= __lzo_assert(PTR_ALIGNED_4(p0));
-+#endif
-+
-+ r &= __lzo_assert(k >= 1);
-+ p1 = (lzo_bytep) & x[1];
-+ r &= __lzo_assert(PTR_GE(p0, p1));
-+
-+ r &= __lzo_assert(k < 1 + n);
-+ p1 = (lzo_bytep) & x[1 + n];
-+ r &= __lzo_assert(PTR_LT(p0, p1));
-+
-+ if (r == 1) {
-+ lzo_uint32 v0, v1;
-+
-+ u.a_uchar_p = &x[k];
-+ v0 = *u.a_lzo_uint32_p;
-+ u.a_uchar_p = &x[k + n];
-+ v1 = *u.a_lzo_uint32_p;
-+
-+ r &= __lzo_assert(v0 > 0);
-+ r &= __lzo_assert(v1 > 0);
-+ }
-+ }
-+
-+ return r;
-+}
-+
-+static int _lzo_config_check(void)
-+{
-+ lzo_bool r = 1;
-+ int i;
-+ union {
-+ lzo_uint32 a;
-+ unsigned short b;
-+ lzo_uint32 aa[4];
-+ unsigned char x[4 * sizeof(lzo_full_align_t)];
-+ } u;
-+
-+ COMPILE_TIME_ASSERT((int)((unsigned char)((signed char)-1)) == 255);
-+ COMPILE_TIME_ASSERT((((unsigned char)128) << (int)(8 * sizeof(int) - 8))
-+ < 0);
-+
-+ r &= basic_integral_check();
-+ r &= basic_ptr_check();
-+ if (r != 1)
-+ return LZO_E_ERROR;
-+
-+ u.a = 0;
-+ u.b = 0;
-+ for (i = 0; i < (int)sizeof(u.x); i++)
-+ u.x[i] = LZO_BYTE(i);
-+
-+#if defined(LZO_BYTE_ORDER)
-+ if (r == 1) {
-+# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-+ lzo_uint32 a = (lzo_uint32) (u.a & LZO_0xffffffffL);
-+ unsigned short b = (unsigned short)(u.b & 0xffff);
-+ r &= __lzo_assert(a == 0x03020100L);
-+ r &= __lzo_assert(b == 0x0100);
-+# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
-+ lzo_uint32 a = u.a >> (8 * sizeof(u.a) - 32);
-+ unsigned short b = u.b >> (8 * sizeof(u.b) - 16);
-+ r &= __lzo_assert(a == 0x00010203L);
-+ r &= __lzo_assert(b == 0x0001);
-+# else
-+# error "invalid LZO_BYTE_ORDER"
-+# endif
-+ }
-+#endif
-+
-+#if defined(LZO_UNALIGNED_OK_2)
-+ COMPILE_TIME_ASSERT(sizeof(short) == 2);
-+ if (r == 1) {
-+ unsigned short b[4];
-+
-+ for (i = 0; i < 4; i++)
-+ b[i] = *(const unsigned short *)&u.x[i];
-+
-+# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-+ r &= __lzo_assert(b[0] == 0x0100);
-+ r &= __lzo_assert(b[1] == 0x0201);
-+ r &= __lzo_assert(b[2] == 0x0302);
-+ r &= __lzo_assert(b[3] == 0x0403);
-+# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
-+ r &= __lzo_assert(b[0] == 0x0001);
-+ r &= __lzo_assert(b[1] == 0x0102);
-+ r &= __lzo_assert(b[2] == 0x0203);
-+ r &= __lzo_assert(b[3] == 0x0304);
-+# endif
-+ }
-+#endif
-+
-+#if defined(LZO_UNALIGNED_OK_4)
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == 4);
-+ if (r == 1) {
-+ lzo_uint32 a[4];
-+
-+ for (i = 0; i < 4; i++)
-+ a[i] = *(const lzo_uint32 *)&u.x[i];
-+
-+# if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-+ r &= __lzo_assert(a[0] == 0x03020100L);
-+ r &= __lzo_assert(a[1] == 0x04030201L);
-+ r &= __lzo_assert(a[2] == 0x05040302L);
-+ r &= __lzo_assert(a[3] == 0x06050403L);
-+# elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
-+ r &= __lzo_assert(a[0] == 0x00010203L);
-+ r &= __lzo_assert(a[1] == 0x01020304L);
-+ r &= __lzo_assert(a[2] == 0x02030405L);
-+ r &= __lzo_assert(a[3] == 0x03040506L);
-+# endif
-+ }
-+#endif
-+
-+#if defined(LZO_ALIGNED_OK_4)
-+ COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == 4);
-+#endif
-+
-+ COMPILE_TIME_ASSERT(lzo_sizeof_dict_t == sizeof(lzo_dict_t));
-+
-+ if (r == 1) {
-+ r &= __lzo_assert(!schedule_insns_bug());
-+ }
-+
-+ if (r == 1) {
-+ static int x[3];
-+ static unsigned xn = 3;
-+ register unsigned j;
-+
-+ for (j = 0; j < xn; j++)
-+ x[j] = (int)j - 3;
-+ r &= __lzo_assert(!strength_reduce_bug(x));
-+ }
-+
-+ if (r == 1) {
-+ r &= ptr_check();
-+ }
-+
-+ return r == 1 ? LZO_E_OK : LZO_E_ERROR;
-+}
-+
-+static lzo_bool schedule_insns_bug(void)
-+{
-+#if defined(__LZO_CHECKER)
-+ return 0;
-+#else
-+ const int clone[] = { 1, 2, 0 };
-+ const int *q;
-+ q = clone;
-+ return (*q) ? 0 : 1;
-+#endif
-+}
-+
-+static lzo_bool strength_reduce_bug(int *x)
-+{
-+ return x[0] != -3 || x[1] != -2 || x[2] != -1;
-+}
-+
-+#undef COMPILE_TIME_ASSERT
-+
-+int __lzo_init2(unsigned v, int s1, int s2, int s3, int s4, int s5,
-+ int s6, int s7, int s8, int s9)
-+{
-+ int r;
-+
-+ if (v == 0)
-+ return LZO_E_ERROR;
-+
-+ r = (s1 == -1 || s1 == (int)sizeof(short)) &&
-+ (s2 == -1 || s2 == (int)sizeof(int)) &&
-+ (s3 == -1 || s3 == (int)sizeof(long)) &&
-+ (s4 == -1 || s4 == (int)sizeof(lzo_uint32)) &&
-+ (s5 == -1 || s5 == (int)sizeof(lzo_uint)) &&
-+ (s6 == -1 || s6 == (int)lzo_sizeof_dict_t) &&
-+ (s7 == -1 || s7 == (int)sizeof(char *)) &&
-+ (s8 == -1 || s8 == (int)sizeof(lzo_voidp)) &&
-+ (s9 == -1 || s9 == (int)sizeof(lzo_compress_t));
-+ if (!r)
-+ return LZO_E_ERROR;
-+
-+ r = _lzo_config_check();
-+ if (r != LZO_E_OK)
-+ return r;
-+
-+ return r;
-+}
-+
-+#define do_compress _lzo1x_1_do_compress
-+
-+#define LZO_NEED_DICT_H
-+#define D_BITS 14
-+#define D_INDEX1(d,p) d = DM((0x21*DX3(p,5,5,6)) >> 5)
-+#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
-+
-+#ifndef __LZO_CONFIG1X_H
-+#define __LZO_CONFIG1X_H
-+
-+#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z)
-+# define LZO1X
-+#endif
-+
-+#define LZO_EOF_CODE
-+#undef LZO_DETERMINISTIC
-+
-+#define M1_MAX_OFFSET 0x0400
-+#ifndef M2_MAX_OFFSET
-+#define M2_MAX_OFFSET 0x0800
-+#endif
-+#define M3_MAX_OFFSET 0x4000
-+#define M4_MAX_OFFSET 0xbfff
-+
-+#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET)
-+
-+#define M1_MIN_LEN 2
-+#define M1_MAX_LEN 2
-+#define M2_MIN_LEN 3
-+#ifndef M2_MAX_LEN
-+#define M2_MAX_LEN 8
-+#endif
-+#define M3_MIN_LEN 3
-+#define M3_MAX_LEN 33
-+#define M4_MIN_LEN 3
-+#define M4_MAX_LEN 9
-+
-+#define M1_MARKER 0
-+#define M2_MARKER 64
-+#define M3_MARKER 32
-+#define M4_MARKER 16
-+
-+#ifndef MIN_LOOKAHEAD
-+#define MIN_LOOKAHEAD (M2_MAX_LEN + 1)
-+#endif
-+
-+#if defined(LZO_NEED_DICT_H)
-+
-+#ifndef LZO_HASH
-+#define LZO_HASH LZO_HASH_LZO_INCREMENTAL_B
-+#endif
-+#define DL_MIN_LEN M2_MIN_LEN
-+
-+#ifndef __LZO_DICT_H
-+#define __LZO_DICT_H
-+
-+#if !defined(D_BITS) && defined(DBITS)
-+# define D_BITS DBITS
-+#endif
-+#if !defined(D_BITS)
-+# error "D_BITS is not defined"
-+#endif
-+#if (D_BITS < 16)
-+# define D_SIZE LZO_SIZE(D_BITS)
-+# define D_MASK LZO_MASK(D_BITS)
-+#else
-+# define D_SIZE LZO_USIZE(D_BITS)
-+# define D_MASK LZO_UMASK(D_BITS)
-+#endif
-+#define D_HIGH ((D_MASK >> 1) + 1)
-+
-+#if !defined(DD_BITS)
-+# define DD_BITS 0
-+#endif
-+#define DD_SIZE LZO_SIZE(DD_BITS)
-+#define DD_MASK LZO_MASK(DD_BITS)
-+
-+#if !defined(DL_BITS)
-+# define DL_BITS (D_BITS - DD_BITS)
-+#endif
-+#if (DL_BITS < 16)
-+# define DL_SIZE LZO_SIZE(DL_BITS)
-+# define DL_MASK LZO_MASK(DL_BITS)
-+#else
-+# define DL_SIZE LZO_USIZE(DL_BITS)
-+# define DL_MASK LZO_UMASK(DL_BITS)
-+#endif
-+
-+#if (D_BITS != DL_BITS + DD_BITS)
-+# error "D_BITS does not match"
-+#endif
-+#if (D_BITS < 8 || D_BITS > 18)
-+# error "invalid D_BITS"
-+#endif
-+#if (DL_BITS < 8 || DL_BITS > 20)
-+# error "invalid DL_BITS"
-+#endif
-+#if (DD_BITS < 0 || DD_BITS > 6)
-+# error "invalid DD_BITS"
-+#endif
-+
-+#if !defined(DL_MIN_LEN)
-+# define DL_MIN_LEN 3
-+#endif
-+#if !defined(DL_SHIFT)
-+# define DL_SHIFT ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN)
-+#endif
-+
-+#define LZO_HASH_GZIP 1
-+#define LZO_HASH_GZIP_INCREMENTAL 2
-+#define LZO_HASH_LZO_INCREMENTAL_A 3
-+#define LZO_HASH_LZO_INCREMENTAL_B 4
-+
-+#if !defined(LZO_HASH)
-+# error "choose a hashing strategy"
-+#endif
-+
-+#if (DL_MIN_LEN == 3)
-+# define _DV2_A(p,shift1,shift2) \
-+ (((( (lzo_uint32)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2])
-+# define _DV2_B(p,shift1,shift2) \
-+ (((( (lzo_uint32)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0])
-+# define _DV3_B(p,shift1,shift2,shift3) \
-+ ((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0])
-+#elif (DL_MIN_LEN == 2)
-+# define _DV2_A(p,shift1,shift2) \
-+ (( (lzo_uint32)(p[0]) << shift1) ^ p[1])
-+# define _DV2_B(p,shift1,shift2) \
-+ (( (lzo_uint32)(p[1]) << shift1) ^ p[2])
-+#else
-+# error "invalid DL_MIN_LEN"
-+#endif
-+#define _DV_A(p,shift) _DV2_A(p,shift,shift)
-+#define _DV_B(p,shift) _DV2_B(p,shift,shift)
-+#define DA2(p,s1,s2) \
-+ (((((lzo_uint32)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0])
-+#define DS2(p,s1,s2) \
-+ (((((lzo_uint32)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0])
-+#define DX2(p,s1,s2) \
-+ (((((lzo_uint32)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0])
-+#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0])
-+#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0])
-+#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0])
-+#define DMS(v,s) ((lzo_uint) (((v) & (D_MASK >> (s))) << (s)))
-+#define DM(v) DMS(v,0)
-+
-+#if (LZO_HASH == LZO_HASH_GZIP)
-+# define _DINDEX(dv,p) (_DV_A((p),DL_SHIFT))
-+
-+#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL)
-+# define __LZO_HASH_INCREMENTAL
-+# define DVAL_FIRST(dv,p) dv = _DV_A((p),DL_SHIFT)
-+# define DVAL_NEXT(dv,p) dv = (((dv) << DL_SHIFT) ^ p[2])
-+# define _DINDEX(dv,p) (dv)
-+# define DVAL_LOOKAHEAD DL_MIN_LEN
-+
-+#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A)
-+# define __LZO_HASH_INCREMENTAL
-+# define DVAL_FIRST(dv,p) dv = _DV_A((p),5)
-+# define DVAL_NEXT(dv,p) \
-+ dv ^= (lzo_uint32)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2])
-+# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5)
-+# define DVAL_LOOKAHEAD DL_MIN_LEN
-+
-+#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B)
-+# define __LZO_HASH_INCREMENTAL
-+# define DVAL_FIRST(dv,p) dv = _DV_B((p),5)
-+# define DVAL_NEXT(dv,p) \
-+ dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_uint32)(p[2]) << (2*5)))
-+# define _DINDEX(dv,p) ((0x9f5f * (dv)) >> 5)
-+# define DVAL_LOOKAHEAD DL_MIN_LEN
-+
-+#else
-+# error "choose a hashing strategy"
-+#endif
-+
-+#ifndef DINDEX
-+#define DINDEX(dv,p) ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS)
-+#endif
-+#if !defined(DINDEX1) && defined(D_INDEX1)
-+#define DINDEX1 D_INDEX1
-+#endif
-+#if !defined(DINDEX2) && defined(D_INDEX2)
-+#define DINDEX2 D_INDEX2
-+#endif
-+
-+#if !defined(__LZO_HASH_INCREMENTAL)
-+# define DVAL_FIRST(dv,p) ((void) 0)
-+# define DVAL_NEXT(dv,p) ((void) 0)
-+# define DVAL_LOOKAHEAD 0
-+#endif
-+
-+#if !defined(DVAL_ASSERT)
-+#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG)
-+static void DVAL_ASSERT(lzo_uint32 dv, const lzo_byte * p)
-+{
-+ lzo_uint32 df;
-+ DVAL_FIRST(df, (p));
-+ assert(DINDEX(dv, p) == DINDEX(df, p));
-+}
-+#else
-+# define DVAL_ASSERT(dv,p) ((void) 0)
-+#endif
-+#endif
-+
-+# define DENTRY(p,in) (p)
-+# define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex]
-+
-+#if (DD_BITS == 0)
-+
-+# define UPDATE_D(dict,drun,dv,p,in) dict[ DINDEX(dv,p) ] = DENTRY(p,in)
-+# define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in)
-+# define UPDATE_P(ptr,drun,p,in) (ptr)[0] = DENTRY(p,in)
-+
-+#else
-+
-+# define UPDATE_D(dict,drun,dv,p,in) \
-+ dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
-+# define UPDATE_I(dict,drun,index,p,in) \
-+ dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
-+# define UPDATE_P(ptr,drun,p,in) \
-+ (ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK
-+
-+#endif
-+
-+#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \
-+ (m_pos == NULL || (m_off = (lzo_moff_t) (ip - m_pos)) > max_offset)
-+
-+#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
-+ (BOUNDS_CHECKING_OFF_IN_EXPR( \
-+ (PTR_LT(m_pos,in) || \
-+ (m_off = (lzo_moff_t) PTR_DIFF(ip,m_pos)) <= 0 || \
-+ m_off > max_offset) ))
-+
-+#if defined(LZO_DETERMINISTIC)
-+# define LZO_CHECK_MPOS LZO_CHECK_MPOS_DET
-+#else
-+# define LZO_CHECK_MPOS LZO_CHECK_MPOS_NON_DET
-+#endif
-+#endif
-+#endif
-+#endif
-+#define DO_COMPRESS lzo1x_1_compress
-+static
-+lzo_uint do_compress(const lzo_byte * in, lzo_uint in_len,
-+ lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem)
-+{
-+ register const lzo_byte *ip;
-+ lzo_byte *op;
-+ const lzo_byte *const in_end = in + in_len;
-+ const lzo_byte *const ip_end = in + in_len - M2_MAX_LEN - 5;
-+ const lzo_byte *ii;
-+ lzo_dict_p const dict = (lzo_dict_p) wrkmem;
-+
-+ op = out;
-+ ip = in;
-+ ii = ip;
-+
-+ ip += 4;
-+ for (;;) {
-+ register const lzo_byte *m_pos;
-+
-+ lzo_moff_t m_off;
-+ lzo_uint m_len;
-+ lzo_uint dindex;
-+
-+ DINDEX1(dindex, ip);
-+ GINDEX(m_pos, m_off, dict, dindex, in);
-+ if (LZO_CHECK_MPOS_NON_DET(m_pos, m_off, in, ip, M4_MAX_OFFSET))
-+ goto literal;
-+#if 1
-+ if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
-+ goto try_match;
-+ DINDEX2(dindex, ip);
-+#endif
-+ GINDEX(m_pos, m_off, dict, dindex, in);
-+ if (LZO_CHECK_MPOS_NON_DET(m_pos, m_off, in, ip, M4_MAX_OFFSET))
-+ goto literal;
-+ if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
-+ goto try_match;
-+ goto literal;
-+
-+ try_match:
-+#if 1 && defined(LZO_UNALIGNED_OK_2)
-+ if (*(const lzo_ushortp)m_pos != *(const lzo_ushortp)ip) {
-+#else
-+ if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) {
-+#endif
-+ ;
-+ } else {
-+ if (m_pos[2] == ip[2]) {
-+ goto match;
-+ } else {
-+ ;
-+ }
-+ }
-+
-+ literal:
-+ UPDATE_I(dict, 0, dindex, ip, in);
-+ ++ip;
-+ if (ip >= ip_end)
-+ break;
-+ continue;
-+
-+ match:
-+ UPDATE_I(dict, 0, dindex, ip, in);
-+ if (pd(ip, ii) > 0) {
-+ register lzo_uint t = pd(ip, ii);
-+
-+ if (t <= 3) {
-+ assert("lzo-04", op - 2 > out);
-+ op[-2] |= LZO_BYTE(t);
-+ } else if (t <= 18)
-+ *op++ = LZO_BYTE(t - 3);
-+ else {
-+ register lzo_uint tt = t - 18;
-+
-+ *op++ = 0;
-+ while (tt > 255) {
-+ tt -= 255;
-+ *op++ = 0;
-+ }
-+ assert("lzo-05", tt > 0);
-+ *op++ = LZO_BYTE(tt);
-+ }
-+ do
-+ *op++ = *ii++;
-+ while (--t > 0);
-+ }
-+
-+ assert("lzo-06", ii == ip);
-+ ip += 3;
-+ if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++
-+ || m_pos[6] != *ip++ || m_pos[7] != *ip++
-+ || m_pos[8] != *ip++
-+#ifdef LZO1Y
-+ || m_pos[9] != *ip++ || m_pos[10] != *ip++
-+ || m_pos[11] != *ip++ || m_pos[12] != *ip++
-+ || m_pos[13] != *ip++ || m_pos[14] != *ip++
-+#endif
-+ ) {
-+ --ip;
-+ m_len = ip - ii;
-+ assert("lzo-07", m_len >= 3);
-+ assert("lzo-08", m_len <= M2_MAX_LEN);
-+
-+ if (m_off <= M2_MAX_OFFSET) {
-+ m_off -= 1;
-+#if defined(LZO1X)
-+ *op++ =
-+ LZO_BYTE(((m_len -
-+ 1) << 5) | ((m_off & 7) << 2));
-+ *op++ = LZO_BYTE(m_off >> 3);
-+#elif defined(LZO1Y)
-+ *op++ =
-+ LZO_BYTE(((m_len +
-+ 1) << 4) | ((m_off & 3) << 2));
-+ *op++ = LZO_BYTE(m_off >> 2);
-+#endif
-+ } else if (m_off <= M3_MAX_OFFSET) {
-+ m_off -= 1;
-+ *op++ = LZO_BYTE(M3_MARKER | (m_len - 2));
-+ goto m3_m4_offset;
-+ } else
-+#if defined(LZO1X)
-+ {
-+ m_off -= 0x4000;
-+ assert("lzo-09", m_off > 0);
-+ assert("lzo-10", m_off <= 0x7fff);
-+ *op++ = LZO_BYTE(M4_MARKER |
-+ ((m_off & 0x4000) >> 11) |
-+ (m_len - 2));
-+ goto m3_m4_offset;
-+ }
-+#elif defined(LZO1Y)
-+ goto m4_match;
-+#endif
-+ } else {
-+ {
-+ const lzo_byte *end = in_end;
-+ const lzo_byte *m = m_pos + M2_MAX_LEN + 1;
-+ while (ip < end && *m == *ip)
-+ m++, ip++;
-+ m_len = (ip - ii);
-+ }
-+ assert("lzo-11", m_len > M2_MAX_LEN);
-+
-+ if (m_off <= M3_MAX_OFFSET) {
-+ m_off -= 1;
-+ if (m_len <= 33)
-+ *op++ =
-+ LZO_BYTE(M3_MARKER | (m_len - 2));
-+ else {
-+ m_len -= 33;
-+ *op++ = M3_MARKER | 0;
-+ goto m3_m4_len;
-+ }
-+ } else {
-+#if defined(LZO1Y)
-+ m4_match:
-+#endif
-+ m_off -= 0x4000;
-+ assert("lzo-12", m_off > 0);
-+ assert("lzo-13", m_off <= 0x7fff);
-+ if (m_len <= M4_MAX_LEN)
-+ *op++ = LZO_BYTE(M4_MARKER |
-+ ((m_off & 0x4000) >>
-+ 11) | (m_len - 2));
-+ else {
-+ m_len -= M4_MAX_LEN;
-+ *op++ =
-+ LZO_BYTE(M4_MARKER |
-+ ((m_off & 0x4000) >> 11));
-+ m3_m4_len:
-+ while (m_len > 255) {
-+ m_len -= 255;
-+ *op++ = 0;
-+ }
-+ assert("lzo-14", m_len > 0);
-+ *op++ = LZO_BYTE(m_len);
-+ }
-+ }
-+
-+ m3_m4_offset:
-+ *op++ = LZO_BYTE((m_off & 63) << 2);
-+ *op++ = LZO_BYTE(m_off >> 6);
-+ }
-+
-+ ii = ip;
-+ if (ip >= ip_end)
-+ break;
-+ }
-+
-+ *out_len = op - out;
-+ return pd(in_end, ii);
-+}
-+
-+int DO_COMPRESS(const lzo_byte * in, lzo_uint in_len,
-+ lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem)
-+{
-+ lzo_byte *op = out;
-+ lzo_uint t;
-+
-+#if defined(__LZO_QUERY_COMPRESS)
-+ if (__LZO_IS_COMPRESS_QUERY(in, in_len, out, out_len, wrkmem))
-+ return __LZO_QUERY_COMPRESS(in, in_len, out, out_len, wrkmem,
-+ D_SIZE, lzo_sizeof(lzo_dict_t));
-+#endif
-+
-+ if (in_len <= M2_MAX_LEN + 5)
-+ t = in_len;
-+ else {
-+ t = do_compress(in, in_len, op, out_len, wrkmem);
-+ op += *out_len;
-+ }
-+
-+ if (t > 0) {
-+ const lzo_byte *ii = in + in_len - t;
-+
-+ if (op == out && t <= 238)
-+ *op++ = LZO_BYTE(17 + t);
-+ else if (t <= 3)
-+ op[-2] |= LZO_BYTE(t);
-+ else if (t <= 18)
-+ *op++ = LZO_BYTE(t - 3);
-+ else {
-+ lzo_uint tt = t - 18;
-+
-+ *op++ = 0;
-+ while (tt > 255) {
-+ tt -= 255;
-+ *op++ = 0;
-+ }
-+ assert("lzo-15", tt > 0);
-+ *op++ = LZO_BYTE(tt);
-+ }
-+ do
-+ *op++ = *ii++;
-+ while (--t > 0);
-+ }
-+
-+ *op++ = M4_MARKER | 1;
-+ *op++ = 0;
-+ *op++ = 0;
-+
-+ *out_len = op - out;
-+ return LZO_E_OK;
-+}
-+
-+#undef do_compress
-+#undef DO_COMPRESS
-+#undef LZO_HASH
-+
-+#undef LZO_TEST_DECOMPRESS_OVERRUN
-+#undef LZO_TEST_DECOMPRESS_OVERRUN_INPUT
-+#undef LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT
-+#undef LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
-+#undef DO_DECOMPRESS
-+#define DO_DECOMPRESS lzo1x_decompress
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
-+# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-+# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2
-+# endif
-+# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-+# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2
-+# endif
-+# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-+# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
-+# endif
-+#endif
-+
-+#undef TEST_IP
-+#undef TEST_OP
-+#undef TEST_LOOKBEHIND
-+#undef NEED_IP
-+#undef NEED_OP
-+#undef HAVE_TEST_IP
-+#undef HAVE_TEST_OP
-+#undef HAVE_NEED_IP
-+#undef HAVE_NEED_OP
-+#undef HAVE_ANY_IP
-+#undef HAVE_ANY_OP
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
-+# define TEST_IP (ip < ip_end)
-+# endif
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
-+# define NEED_IP(x) \
-+ if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun
-+# endif
-+#endif
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
-+# define TEST_OP (op <= op_end)
-+# endif
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
-+# undef TEST_OP
-+# define NEED_OP(x) \
-+ if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun
-+# endif
-+#endif
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-+# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun
-+#else
-+# define TEST_LOOKBEHIND(m_pos,op) ((void) 0)
-+#endif
-+
-+#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
-+# define TEST_IP (ip < ip_end)
-+#endif
-+
-+#if defined(TEST_IP)
-+# define HAVE_TEST_IP
-+#else
-+# define TEST_IP 1
-+#endif
-+#if defined(TEST_OP)
-+# define HAVE_TEST_OP
-+#else
-+# define TEST_OP 1
-+#endif
-+
-+#if defined(NEED_IP)
-+# define HAVE_NEED_IP
-+#else
-+# define NEED_IP(x) ((void) 0)
-+#endif
-+#if defined(NEED_OP)
-+# define HAVE_NEED_OP
-+#else
-+# define NEED_OP(x) ((void) 0)
-+#endif
-+
-+#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
-+# define HAVE_ANY_IP
-+#endif
-+#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
-+# define HAVE_ANY_OP
-+#endif
-+
-+#undef __COPY4
-+#define __COPY4(dst,src) * (lzo_uint32p)(dst) = * (const lzo_uint32p)(src)
-+
-+#undef COPY4
-+#if defined(LZO_UNALIGNED_OK_4)
-+# define COPY4(dst,src) __COPY4(dst,src)
-+#elif defined(LZO_ALIGNED_OK_4)
-+# define COPY4(dst,src) __COPY4((lzo_ptr_t)(dst),(lzo_ptr_t)(src))
-+#endif
-+
-+#if defined(DO_DECOMPRESS)
-+int DO_DECOMPRESS(const lzo_byte * in, lzo_uint in_len,
-+ lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem)
-+#endif
-+{
-+ register lzo_byte *op;
-+ register const lzo_byte *ip;
-+ register lzo_uint t;
-+#if defined(COPY_DICT)
-+ lzo_uint m_off;
-+ const lzo_byte *dict_end;
-+#else
-+ register const lzo_byte *m_pos;
-+#endif
-+
-+ const lzo_byte *const ip_end = in + in_len;
-+#if defined(HAVE_ANY_OP)
-+ lzo_byte *const op_end = out + *out_len;
-+#endif
-+#if defined(LZO1Z)
-+ lzo_uint last_m_off = 0;
-+#endif
-+
-+ LZO_UNUSED(wrkmem);
-+
-+#if defined(__LZO_QUERY_DECOMPRESS)
-+ if (__LZO_IS_DECOMPRESS_QUERY(in, in_len, out, out_len, wrkmem))
-+ return __LZO_QUERY_DECOMPRESS(in, in_len, out, out_len, wrkmem,
-+ 0, 0);
-+#endif
-+
-+#if defined(COPY_DICT)
-+ if (dict) {
-+ if (dict_len > M4_MAX_OFFSET) {
-+ dict += dict_len - M4_MAX_OFFSET;
-+ dict_len = M4_MAX_OFFSET;
-+ }
-+ dict_end = dict + dict_len;
-+ } else {
-+ dict_len = 0;
-+ dict_end = NULL;
-+ }
-+#endif
-+
-+ *out_len = 0;
-+
-+ op = out;
-+ ip = in;
-+
-+ if (*ip > 17) {
-+ t = *ip++ - 17;
-+ if (t < 4)
-+ goto match_next;
-+ assert("lzo-16", t > 0);
-+ NEED_OP(t);
-+ NEED_IP(t + 1);
-+ do
-+ *op++ = *ip++;
-+ while (--t > 0);
-+ goto first_literal_run;
-+ }
-+
-+ while (TEST_IP && TEST_OP) {
-+ t = *ip++;
-+ if (t >= 16)
-+ goto match;
-+ if (t == 0) {
-+ NEED_IP(1);
-+ while (*ip == 0) {
-+ t += 255;
-+ ip++;
-+ NEED_IP(1);
-+ }
-+ t += 15 + *ip++;
-+ }
-+ assert("lzo-17", t > 0);
-+ NEED_OP(t + 3);
-+ NEED_IP(t + 4);
-+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
-+#if !defined(LZO_UNALIGNED_OK_4)
-+ if (PTR_ALIGNED2_4(op, ip)) {
-+#endif
-+ COPY4(op, ip);
-+ op += 4;
-+ ip += 4;
-+ if (--t > 0) {
-+ if (t >= 4) {
-+ do {
-+ COPY4(op, ip);
-+ op += 4;
-+ ip += 4;
-+ t -= 4;
-+ } while (t >= 4);
-+ if (t > 0)
-+ do
-+ *op++ = *ip++;
-+ while (--t > 0);
-+ } else
-+ do
-+ *op++ = *ip++;
-+ while (--t > 0);
-+ }
-+#if !defined(LZO_UNALIGNED_OK_4)
-+ } else
-+#endif
-+#endif
-+#if !defined(LZO_UNALIGNED_OK_4)
-+ {
-+ *op++ = *ip++;
-+ *op++ = *ip++;
-+ *op++ = *ip++;
-+ do
-+ *op++ = *ip++;
-+ while (--t > 0);
-+ }
-+#endif
-+
-+ first_literal_run:
-+
-+ t = *ip++;
-+ if (t >= 16)
-+ goto match;
-+#if defined(COPY_DICT)
-+#if defined(LZO1Z)
-+ m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
-+ last_m_off = m_off;
-+#else
-+ m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
-+#endif
-+ NEED_OP(3);
-+ t = 3;
-+ COPY_DICT(t, m_off)
-+#else
-+#if defined(LZO1Z)
-+ t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
-+ m_pos = op - t;
-+ last_m_off = t;
-+#else
-+ m_pos = op - (1 + M2_MAX_OFFSET);
-+ m_pos -= t >> 2;
-+ m_pos -= *ip++ << 2;
-+#endif
-+ TEST_LOOKBEHIND(m_pos, out);
-+ NEED_OP(3);
-+ *op++ = *m_pos++;
-+ *op++ = *m_pos++;
-+ *op++ = *m_pos;
-+#endif
-+ goto match_done;
-+
-+ while (TEST_IP && TEST_OP) {
-+ match:
-+ if (t >= 64) {
-+#if defined(COPY_DICT)
-+#if defined(LZO1X)
-+ m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
-+ t = (t >> 5) - 1;
-+#elif defined(LZO1Y)
-+ m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
-+ t = (t >> 4) - 3;
-+#elif defined(LZO1Z)
-+ m_off = t & 0x1f;
-+ if (m_off >= 0x1c)
-+ m_off = last_m_off;
-+ else {
-+ m_off = 1 + (m_off << 6) + (*ip++ >> 2);
-+ last_m_off = m_off;
-+ }
-+ t = (t >> 5) - 1;
-+#endif
-+#else
-+#if defined(LZO1X)
-+ m_pos = op - 1;
-+ m_pos -= (t >> 2) & 7;
-+ m_pos -= *ip++ << 3;
-+ t = (t >> 5) - 1;
-+#elif defined(LZO1Y)
-+ m_pos = op - 1;
-+ m_pos -= (t >> 2) & 3;
-+ m_pos -= *ip++ << 2;
-+ t = (t >> 4) - 3;
-+#elif defined(LZO1Z)
-+ {
-+ lzo_uint off = t & 0x1f;
-+ m_pos = op;
-+ if (off >= 0x1c) {
-+ assert(last_m_off > 0);
-+ m_pos -= last_m_off;
-+ } else {
-+ off =
-+ 1 + (off << 6) +
-+ (*ip++ >> 2);
-+ m_pos -= off;
-+ last_m_off = off;
-+ }
-+ }
-+ t = (t >> 5) - 1;
-+#endif
-+ TEST_LOOKBEHIND(m_pos, out);
-+ assert("lzo-18", t > 0);
-+ NEED_OP(t + 3 - 1);
-+ goto copy_match;
-+#endif
-+ } else if (t >= 32) {
-+ t &= 31;
-+ if (t == 0) {
-+ NEED_IP(1);
-+ while (*ip == 0) {
-+ t += 255;
-+ ip++;
-+ NEED_IP(1);
-+ }
-+ t += 31 + *ip++;
-+ }
-+#if defined(COPY_DICT)
-+#if defined(LZO1Z)
-+ m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
-+ last_m_off = m_off;
-+#else
-+ m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
-+#endif
-+#else
-+#if defined(LZO1Z)
-+ {
-+ lzo_uint off =
-+ 1 + (ip[0] << 6) + (ip[1] >> 2);
-+ m_pos = op - off;
-+ last_m_off = off;
-+ }
-+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-+ m_pos = op - 1;
-+ m_pos -= (*(const lzo_ushortp)ip) >> 2;
-+#else
-+ m_pos = op - 1;
-+ m_pos -= (ip[0] >> 2) + (ip[1] << 6);
-+#endif
-+#endif
-+ ip += 2;
-+ } else if (t >= 16) {
-+#if defined(COPY_DICT)
-+ m_off = (t & 8) << 11;
-+#else
-+ m_pos = op;
-+ m_pos -= (t & 8) << 11;
-+#endif
-+ t &= 7;
-+ if (t == 0) {
-+ NEED_IP(1);
-+ while (*ip == 0) {
-+ t += 255;
-+ ip++;
-+ NEED_IP(1);
-+ }
-+ t += 7 + *ip++;
-+ }
-+#if defined(COPY_DICT)
-+#if defined(LZO1Z)
-+ m_off += (ip[0] << 6) + (ip[1] >> 2);
-+#else
-+ m_off += (ip[0] >> 2) + (ip[1] << 6);
-+#endif
-+ ip += 2;
-+ if (m_off == 0)
-+ goto eof_found;
-+ m_off += 0x4000;
-+#if defined(LZO1Z)
-+ last_m_off = m_off;
-+#endif
-+#else
-+#if defined(LZO1Z)
-+ m_pos -= (ip[0] << 6) + (ip[1] >> 2);
-+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
-+ m_pos -= (*(const lzo_ushortp)ip) >> 2;
-+#else
-+ m_pos -= (ip[0] >> 2) + (ip[1] << 6);
-+#endif
-+ ip += 2;
-+ if (m_pos == op)
-+ goto eof_found;
-+ m_pos -= 0x4000;
-+#if defined(LZO1Z)
-+ last_m_off = op - m_pos;
-+#endif
-+#endif
-+ } else {
-+#if defined(COPY_DICT)
-+#if defined(LZO1Z)
-+ m_off = 1 + (t << 6) + (*ip++ >> 2);
-+ last_m_off = m_off;
-+#else
-+ m_off = 1 + (t >> 2) + (*ip++ << 2);
-+#endif
-+ NEED_OP(2);
-+ t = 2;
-+ COPY_DICT(t, m_off)
-+#else
-+#if defined(LZO1Z)
-+ t = 1 + (t << 6) + (*ip++ >> 2);
-+ m_pos = op - t;
-+ last_m_off = t;
-+#else
-+ m_pos = op - 1;
-+ m_pos -= t >> 2;
-+ m_pos -= *ip++ << 2;
-+#endif
-+ TEST_LOOKBEHIND(m_pos, out);
-+ NEED_OP(2);
-+ *op++ = *m_pos++;
-+ *op++ = *m_pos;
-+#endif
-+ goto match_done;
-+ }
-+
-+#if defined(COPY_DICT)
-+
-+ NEED_OP(t + 3 - 1);
-+ t += 3 - 1;
-+ COPY_DICT(t, m_off)
-+#else
-+
-+ TEST_LOOKBEHIND(m_pos, out);
-+ assert("lzo-19", t > 0);
-+ NEED_OP(t + 3 - 1);
-+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
-+#if !defined(LZO_UNALIGNED_OK_4)
-+ if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op, m_pos)) {
-+ assert((op - m_pos) >= 4);
-+#else
-+ if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
-+#endif
-+ COPY4(op, m_pos);
-+ op += 4;
-+ m_pos += 4;
-+ t -= 4 - (3 - 1);
-+ do {
-+ COPY4(op, m_pos);
-+ op += 4;
-+ m_pos += 4;
-+ t -= 4;
-+ } while (t >= 4);
-+ if (t > 0)
-+ do
-+ *op++ = *m_pos++;
-+ while (--t > 0);
-+ } else
-+#endif
-+ {
-+ copy_match:
-+ *op++ = *m_pos++;
-+ *op++ = *m_pos++;
-+ do
-+ *op++ = *m_pos++;
-+ while (--t > 0);
-+ }
-+
-+#endif
-+
-+ match_done:
-+#if defined(LZO1Z)
-+ t = ip[-1] & 3;
-+#else
-+ t = ip[-2] & 3;
-+#endif
-+ if (t == 0)
-+ break;
-+
-+ match_next:
-+ assert("lzo-20", t > 0);
-+ NEED_OP(t);
-+ NEED_IP(t + 1);
-+ do
-+ *op++ = *ip++;
-+ while (--t > 0);
-+ t = *ip++;
-+ }
-+ }
-+
-+#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
-+ *out_len = op - out;
-+ return LZO_E_EOF_NOT_FOUND;
-+#endif
-+
-+ eof_found:
-+ assert("lzo-21", t == 1);
-+ *out_len = op - out;
-+ return (ip == ip_end ? LZO_E_OK :
-+ (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
-+
-+#if defined(HAVE_NEED_IP)
-+ input_overrun:
-+ *out_len = op - out;
-+ return LZO_E_INPUT_OVERRUN;
-+#endif
-+
-+#if defined(HAVE_NEED_OP)
-+ output_overrun:
-+ *out_len = op - out;
-+ return LZO_E_OUTPUT_OVERRUN;
-+#endif
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-+ lookbehind_overrun:
-+ *out_len = op - out;
-+ return LZO_E_LOOKBEHIND_OVERRUN;
-+#endif
-+}
-+
-+#define LZO_TEST_DECOMPRESS_OVERRUN
-+#undef DO_DECOMPRESS
-+#define DO_DECOMPRESS lzo1x_decompress_safe
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
-+# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-+# define LZO_TEST_DECOMPRESS_OVERRUN_INPUT 2
-+# endif
-+# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-+# define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT 2
-+# endif
-+# if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-+# define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
-+# endif
-+#endif
-+
-+#undef TEST_IP
-+#undef TEST_OP
-+#undef TEST_LOOKBEHIND
-+#undef NEED_IP
-+#undef NEED_OP
-+#undef HAVE_TEST_IP
-+#undef HAVE_TEST_OP
-+#undef HAVE_NEED_IP
-+#undef HAVE_NEED_OP
-+#undef HAVE_ANY_IP
-+#undef HAVE_ANY_OP
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
-+# define TEST_IP (ip < ip_end)
-+# endif
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
-+# define NEED_IP(x) \
-+ if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x)) goto input_overrun
-+# endif
-+#endif
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
-+# define TEST_OP (op <= op_end)
-+# endif
-+# if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
-+# undef TEST_OP
-+# define NEED_OP(x) \
-+ if ((lzo_uint)(op_end - op) < (lzo_uint)(x)) goto output_overrun
-+# endif
-+#endif
-+
-+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
-+# define TEST_LOOKBEHIND(m_pos,out) if (m_pos < out) goto lookbehind_overrun
-+#else
-+# define TEST_LOOKBEHIND(m_pos,op) ((void) 0)
-+#endif
-+
-+#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
-+# define TEST_IP (ip < ip_end)
-+#endif
-+
-+#if defined(TEST_IP)
-+# define HAVE_TEST_IP
-+#else
-+# define TEST_IP 1
-+#endif
-+#if defined(TEST_OP)
-+# define HAVE_TEST_OP
-+#else
-+# define TEST_OP 1
-+#endif
-+
-+#if defined(NEED_IP)
-+# define HAVE_NEED_IP
-+#else
-+# define NEED_IP(x) ((void) 0)
-+#endif
-+#if defined(NEED_OP)
-+# define HAVE_NEED_OP
-+#else
-+# define NEED_OP(x) ((void) 0)
-+#endif
-+
-+#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
-+# define HAVE_ANY_IP
-+#endif
-+#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
-+# define HAVE_ANY_OP
-+#endif
-+
-+#undef __COPY4
-+#define __COPY4(dst,src) * (lzo_uint32p)(dst) = * (const lzo_uint32p)(src)
-+
-+#undef COPY4
-+#if defined(LZO_UNALIGNED_OK_4)
-+# define COPY4(dst,src) __COPY4(dst,src)
-+#elif defined(LZO_ALIGNED_OK_4)
-+# define COPY4(dst,src) __COPY4((lzo_ptr_t)(dst),(lzo_ptr_t)(src))
-+#endif
-+
-+/***** End of minilzo.c *****/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/compress/minilzo.h linux-2.6.20/fs/reiser4/plugin/compress/minilzo.h
---- linux-2.6.20.orig/fs/reiser4/plugin/compress/minilzo.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/compress/minilzo.h 2007-05-06 14:50:43.754993222 +0400
-@@ -0,0 +1,70 @@
-+/* minilzo.h -- mini subset of the LZO real-time data compression library
-+ adopted for reiser4 compression transform plugin.
-+
-+ This file is part of the LZO real-time data compression library
-+ and not included in any proprietary licenses of reiser4.
-+
-+ Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
-+ Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
-+ All Rights Reserved.
-+
-+ The LZO library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU General Public License as
-+ published by the Free Software Foundation; either version 2 of
-+ the License, or (at your option) any later version.
-+
-+ The LZO library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ GNU General Public License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with the LZO library; see the file COPYING.
-+ If not, write to the Free Software Foundation, Inc.,
-+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-+
-+ Markus F.X.J. Oberhumer
-+ <markus@oberhumer.com>
-+ http://www.oberhumer.com/opensource/lzo/
-+ */
-+
-+/*
-+ * NOTE:
-+ * the full LZO package can be found at
-+ * http://www.oberhumer.com/opensource/lzo/
-+ */
-+
-+#ifndef __MINILZO_H
-+#define __MINILZO_H
-+
-+#define MINILZO_VERSION 0x1080
-+
-+#include "lzoconf.h"
-+
-+/* Memory required for the wrkmem parameter.
-+ * When the required size is 0, you can also pass a NULL pointer.
-+ */
-+
-+#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS
-+#define LZO1X_1_MEM_COMPRESS ((lzo_uint32) (16384L * lzo_sizeof_dict_t))
-+#define LZO1X_MEM_DECOMPRESS (0)
-+
-+/* compression */
-+extern int lzo1x_1_compress(const lzo_byte * src, lzo_uint src_len,
-+ lzo_byte * dst, lzo_uintp dst_len,
-+ lzo_voidp wrkmem);
-+/* decompression */
-+extern int lzo1x_decompress(const lzo_byte * src, lzo_uint src_len,
-+ lzo_byte * dst, lzo_uintp dst_len,
-+ lzo_voidp wrkmem /* NOT USED */);
-+/* safe decompression with overrun testing */
-+extern int lzo1x_decompress_safe(const lzo_byte * src, lzo_uint src_len,
-+ lzo_byte * dst, lzo_uintp dst_len,
-+ lzo_voidp wrkmem /* NOT USED */ );
-+
-+#endif /* already included */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/crypto/cipher.c linux-2.6.20/fs/reiser4/plugin/crypto/cipher.c
---- linux-2.6.20.orig/fs/reiser4/plugin/crypto/cipher.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/crypto/cipher.c 2007-05-06 14:50:43.754993222 +0400
-@@ -0,0 +1,37 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser,
-+ licensing governed by reiser4/README */
-+/* Reiser4 cipher transform plugins */
-+
-+#include "../../debug.h"
-+#include "../plugin.h"
-+
-+cipher_plugin cipher_plugins[LAST_CIPHER_ID] = {
-+ [NONE_CIPHER_ID] = {
-+ .h = {
-+ .type_id = REISER4_CIPHER_PLUGIN_TYPE,
-+ .id = NONE_CIPHER_ID,
-+ .pops = NULL,
-+ .label = "none",
-+ .desc = "no cipher transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .alloc = NULL,
-+ .free = NULL,
-+ .scale = NULL,
-+ .align_stream = NULL,
-+ .setkey = NULL,
-+ .encrypt = NULL,
-+ .decrypt = NULL
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/crypto/cipher.h linux-2.6.20/fs/reiser4/plugin/crypto/cipher.h
---- linux-2.6.20.orig/fs/reiser4/plugin/crypto/cipher.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/crypto/cipher.h 2007-05-06 14:50:43.754993222 +0400
-@@ -0,0 +1,55 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* This file contains definitions for the objects operated
-+ by reiser4 key manager, which is something like keyring
-+ wrapped by appropriate reiser4 plugin */
-+
-+#if !defined( __FS_REISER4_CRYPT_H__ )
-+#define __FS_REISER4_CRYPT_H__
-+
-+#include <linux/crypto.h>
-+
-+/* key info imported from user space */
-+typedef struct crypto_data {
-+ int keysize; /* uninstantiated key size */
-+ __u8 * key; /* uninstantiated key */
-+ int keyid_size; /* size of passphrase */
-+ __u8 * keyid; /* passphrase */
-+} crypto_data_t;
-+
-+/* This object contains all needed infrastructure to implement
-+ cipher transform. This is operated (allocating, inheriting,
-+ validating, binding to host inode, etc..) by reiser4 key manager.
-+
-+ This info can be allocated in two cases:
-+ 1. importing a key from user space.
-+ 2. reading inode from disk */
-+typedef struct crypto_stat {
-+ struct inode * host;
-+ struct crypto_hash * digest;
-+ struct crypto_blkcipher * cipher;
-+#if 0
-+ cipher_key_plugin * kplug; /* key manager */
-+#endif
-+ __u8 * keyid; /* key fingerprint, created by digest plugin,
-+ using uninstantiated key and passphrase.
-+ supposed to be stored in disk stat-data */
-+ int inst; /* this indicates if the cipher key is
-+ instantiated (case 1 above) */
-+ int keysize; /* uninstantiated key size (bytes), supposed
-+ to be stored in disk stat-data */
-+ int keyload_count; /* number of the objects which has this
-+ crypto-stat attached */
-+} crypto_stat_t;
-+
-+#endif /* __FS_REISER4_CRYPT_H__ */
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/crypto/digest.c linux-2.6.20/fs/reiser4/plugin/crypto/digest.c
---- linux-2.6.20.orig/fs/reiser4/plugin/crypto/digest.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/crypto/digest.c 2007-05-06 14:50:43.754993222 +0400
-@@ -0,0 +1,58 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* reiser4 digest transform plugin (is used by cryptcompress object plugin) */
-+/* EDWARD-FIXME-HANS: and it does what? a digest is a what? */
-+#include "../../debug.h"
-+#include "../plugin_header.h"
-+#include "../plugin.h"
-+#include "../file/cryptcompress.h"
-+
-+#include <linux/types.h>
-+
-+extern digest_plugin digest_plugins[LAST_DIGEST_ID];
-+
-+static struct crypto_hash * alloc_sha256 (void)
-+{
-+#if REISER4_SHA256
-+ return crypto_alloc_hash ("sha256", 0, CRYPTO_ALG_ASYNC);
-+#else
-+ warning("edward-1418", "sha256 unsupported");
-+ return ERR_PTR(-EINVAL);
-+#endif
-+}
-+
-+static void free_sha256 (struct crypto_hash * tfm)
-+{
-+#if REISER4_SHA256
-+ crypto_free_hash(tfm);
-+#endif
-+ return;
-+}
-+
-+/* digest plugins */
-+digest_plugin digest_plugins[LAST_DIGEST_ID] = {
-+ [SHA256_32_DIGEST_ID] = {
-+ .h = {
-+ .type_id = REISER4_DIGEST_PLUGIN_TYPE,
-+ .id = SHA256_32_DIGEST_ID,
-+ .pops = NULL,
-+ .label = "sha256_32",
-+ .desc = "sha256_32 digest transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fipsize = sizeof(__u32),
-+ .alloc = alloc_sha256,
-+ .free = free_sha256
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/dir/dir.h linux-2.6.20/fs/reiser4/plugin/dir/dir.h
---- linux-2.6.20.orig/fs/reiser4/plugin/dir/dir.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/dir/dir.h 2007-05-06 14:50:43.754993222 +0400
-@@ -0,0 +1,36 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* this file contains declarations of methods implementing directory plugins */
-+
-+#if !defined( __REISER4_DIR_H__ )
-+#define __REISER4_DIR_H__
-+
-+/*#include "../../key.h"
-+
-+#include <linux/fs.h>*/
-+
-+/* declarations of functions implementing HASHED_DIR_PLUGIN_ID dir plugin */
-+
-+/* "hashed" directory methods of dir plugin */
-+void build_entry_key_hashed(const struct inode *, const struct qstr *,
-+ reiser4_key *);
-+
-+/* declarations of functions implementing SEEKABLE_HASHED_DIR_PLUGIN_ID dir plugin */
-+
-+/* "seekable" directory methods of dir plugin */
-+void build_entry_key_seekable(const struct inode *, const struct qstr *,
-+ reiser4_key *);
-+
-+/* __REISER4_DIR_H__ */
-+#endif
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/dir/hashed_dir.c linux-2.6.20/fs/reiser4/plugin/dir/hashed_dir.c
---- linux-2.6.20.orig/fs/reiser4/plugin/dir/hashed_dir.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/dir/hashed_dir.c 2007-05-06 14:50:43.754993222 +0400
-@@ -0,0 +1,81 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Directory plugin using hashes (see fs/reiser4/plugin/hash.c) to map file
-+ names to the files. */
-+
-+/*
-+ * Hashed directory logically consists of persistent directory
-+ * entries. Directory entry is a pair of a file name and a key of stat-data of
-+ * a file that has this name in the given directory.
-+ *
-+ * Directory entries are stored in the tree in the form of directory
-+ * items. Directory item should implement dir_entry_ops portion of item plugin
-+ * interface (see plugin/item/item.h). Hashed directory interacts with
-+ * directory item plugin exclusively through dir_entry_ops operations.
-+ *
-+ * Currently there are two implementations of directory items: "simple
-+ * directory item" (plugin/item/sde.[ch]), and "compound directory item"
-+ * (plugin/item/cde.[ch]) with the latter being the default.
-+ *
-+ * There is, however some delicate way through which directory code interferes
-+ * with item plugin: key assignment policy. A key for a directory item is
-+ * chosen by directory code, and as described in kassign.c, this key contains
-+ * a portion of file name. Directory item uses this knowledge to avoid storing
-+ * this portion of file name twice: in the key and in the directory item body.
-+ *
-+ */
-+
-+#include "../../inode.h"
-+
-+void complete_entry_key(const struct inode *, const char *name,
-+ int len, reiser4_key * result);
-+
-+/* this is implementation of build_entry_key method of dir
-+ plugin for HASHED_DIR_PLUGIN_ID
-+ */
-+void build_entry_key_hashed(const struct inode *dir, /* directory where entry is
-+ * (or will be) in.*/
-+ const struct qstr *qname, /* name of file referenced
-+ * by this entry */
-+ reiser4_key * result /* resulting key of directory
-+ * entry */ )
-+{
-+ const char *name;
-+ int len;
-+
-+ assert("nikita-1139", dir != NULL);
-+ assert("nikita-1140", qname != NULL);
-+ assert("nikita-1141", qname->name != NULL);
-+ assert("nikita-1142", result != NULL);
-+
-+ name = qname->name;
-+ len = qname->len;
-+
-+ assert("nikita-2867", strlen(name) == len);
-+
-+ reiser4_key_init(result);
-+ /* locality of directory entry's key is objectid of parent
-+ directory */
-+ set_key_locality(result, get_inode_oid(dir));
-+ /* minor packing locality is constant */
-+ set_key_type(result, KEY_FILE_NAME_MINOR);
-+ /* dot is special case---we always want it to be first entry in
-+ a directory. Actually, we just want to have smallest
-+ directory entry.
-+ */
-+ if (len == 1 && name[0] == '.')
-+ return;
-+
-+ /* initialize part of entry key which depends on file name */
-+ complete_entry_key(dir, name, len, result);
-+}
-+
-+/* Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/dir/Makefile linux-2.6.20/fs/reiser4/plugin/dir/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/dir/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/dir/Makefile 2007-05-06 14:50:43.758994472 +0400
-@@ -0,0 +1,5 @@
-+obj-$(CONFIG_REISER4_FS) += dir_plugins.o
-+
-+dir_plugins-objs := \
-+ hashed_dir.o \
-+ seekable_dir.o
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/dir/seekable_dir.c linux-2.6.20/fs/reiser4/plugin/dir/seekable_dir.c
---- linux-2.6.20.orig/fs/reiser4/plugin/dir/seekable_dir.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/dir/seekable_dir.c 2007-05-06 14:50:43.758994472 +0400
-@@ -0,0 +1,46 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "../../inode.h"
-+
-+/* this is implementation of build_entry_key method of dir
-+ plugin for SEEKABLE_HASHED_DIR_PLUGIN_ID
-+ This is for directories where we want repeatable and restartable readdir()
-+ even in case 32bit user level struct dirent (readdir(3)).
-+*/
-+void
-+build_entry_key_seekable(const struct inode *dir, const struct qstr *name,
-+ reiser4_key * result)
-+{
-+ oid_t objectid;
-+
-+ assert("nikita-2283", dir != NULL);
-+ assert("nikita-2284", name != NULL);
-+ assert("nikita-2285", name->name != NULL);
-+ assert("nikita-2286", result != NULL);
-+
-+ reiser4_key_init(result);
-+ /* locality of directory entry's key is objectid of parent
-+ directory */
-+ set_key_locality(result, get_inode_oid(dir));
-+ /* minor packing locality is constant */
-+ set_key_type(result, KEY_FILE_NAME_MINOR);
-+ /* dot is special case---we always want it to be first entry in
-+ a directory. Actually, we just want to have smallest
-+ directory entry.
-+ */
-+ if ((name->len == 1) && (name->name[0] == '.'))
-+ return;
-+
-+ /* objectid of key is 31 lowest bits of hash. */
-+ objectid =
-+ inode_hash_plugin(dir)->hash(name->name,
-+ (int)name->len) & 0x7fffffff;
-+
-+ assert("nikita-2303", !(objectid & ~KEY_OBJECTID_MASK));
-+ set_key_objectid(result, objectid);
-+
-+ /* offset is always 0. */
-+ set_key_offset(result, (__u64) 0);
-+ return;
-+}
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/dir_plugin_common.c linux-2.6.20/fs/reiser4/plugin/dir_plugin_common.c
---- linux-2.6.20.orig/fs/reiser4/plugin/dir_plugin_common.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/dir_plugin_common.c 2007-05-06 14:50:43.758994472 +0400
-@@ -0,0 +1,872 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+
-+/* this file contains typical implementations for most of methods of
-+ directory plugin
-+*/
-+
-+#include "../inode.h"
-+
-+int reiser4_find_entry(struct inode *dir, struct dentry *name,
-+ lock_handle *, znode_lock_mode, reiser4_dir_entry_desc *);
-+int reiser4_lookup_name(struct inode *parent, struct dentry *dentry, reiser4_key * key);
-+void check_light_weight(struct inode *inode, struct inode *parent);
-+
-+/* this is common implementation of get_parent method of dir plugin
-+ this is used by NFS kernel server to "climb" up directory tree to
-+ check permissions
-+ */
-+struct dentry *get_parent_common(struct inode *child)
-+{
-+ struct super_block *s;
-+ struct inode *parent;
-+ struct dentry dotdot;
-+ struct dentry *dentry;
-+ reiser4_key key;
-+ int result;
-+
-+ /*
-+ * lookup dotdot entry.
-+ */
-+
-+ s = child->i_sb;
-+ memset(&dotdot, 0, sizeof(dotdot));
-+ dotdot.d_name.name = "..";
-+ dotdot.d_name.len = 2;
-+ dotdot.d_op = &get_super_private(s)->ops.dentry;
-+
-+ result = reiser4_lookup_name(child, &dotdot, &key);
-+ if (result != 0)
-+ return ERR_PTR(result);
-+
-+ parent = reiser4_iget(s, &key, 1);
-+ if (!IS_ERR(parent)) {
-+ /*
-+ * FIXME-NIKITA dubious: attributes are inherited from @child
-+ * to @parent. But:
-+ *
-+ * (*) this is the only this we can do
-+ *
-+ * (*) attributes of light-weight object are inherited
-+ * from a parent through which object was looked up first,
-+ * so it is ambiguous anyway.
-+ *
-+ */
-+ check_light_weight(parent, child);
-+ reiser4_iget_complete(parent);
-+ dentry = d_alloc_anon(parent);
-+ if (dentry == NULL) {
-+ iput(parent);
-+ dentry = ERR_PTR(RETERR(-ENOMEM));
-+ } else
-+ dentry->d_op = &get_super_private(s)->ops.dentry;
-+ } else if (PTR_ERR(parent) == -ENOENT)
-+ dentry = ERR_PTR(RETERR(-ESTALE));
-+ else
-+ dentry = (void *)parent;
-+ return dentry;
-+}
-+
-+/* this is common implementation of is_name_acceptable method of dir
-+ plugin
-+ */
-+int is_name_acceptable_common(const struct inode *inode, /* directory to check */
-+ const char *name UNUSED_ARG, /* name to check */
-+ int len /* @name's length */ )
-+{
-+ assert("nikita-733", inode != NULL);
-+ assert("nikita-734", name != NULL);
-+ assert("nikita-735", len > 0);
-+
-+ return len <= reiser4_max_filename_len(inode);
-+}
-+
-+/* there is no common implementation of build_entry_key method of dir
-+ plugin. See plugin/dir/hashed_dir.c:build_entry_key_hashed() or
-+ plugin/dir/seekable.c:build_entry_key_seekable() for example
-+*/
-+
-+/* this is common implementation of build_readdir_key method of dir
-+ plugin
-+ see reiser4_readdir_common for more details
-+*/
-+int build_readdir_key_common(struct file *dir /* directory being read */ ,
-+ reiser4_key * result /* where to store key */ )
-+{
-+ reiser4_file_fsdata *fdata;
-+ struct inode *inode;
-+
-+ assert("nikita-1361", dir != NULL);
-+ assert("nikita-1362", result != NULL);
-+ assert("nikita-1363", dir->f_dentry != NULL);
-+ inode = dir->f_dentry->d_inode;
-+ assert("nikita-1373", inode != NULL);
-+
-+ fdata = reiser4_get_file_fsdata(dir);
-+ if (IS_ERR(fdata))
-+ return PTR_ERR(fdata);
-+ assert("nikita-1364", fdata != NULL);
-+ return extract_key_from_de_id(get_inode_oid(inode),
-+ &fdata->dir.readdir.position.
-+ dir_entry_key, result);
-+
-+}
-+
-+void reiser4_adjust_dir_file(struct inode *, const struct dentry *, int offset,
-+ int adj);
-+
-+/* this is common implementation of add_entry method of dir plugin
-+*/
-+int reiser4_add_entry_common(struct inode *object, /* directory to add new name
-+ * in */
-+ struct dentry *where, /* new name */
-+ reiser4_object_create_data * data, /* parameters of
-+ * new object */
-+ reiser4_dir_entry_desc * entry /* parameters of
-+ * new directory
-+ * entry */)
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle lh;
-+ reiser4_dentry_fsdata *fsdata;
-+ reiser4_block_nr reserve;
-+
-+ assert("nikita-1114", object != NULL);
-+ assert("nikita-1250", where != NULL);
-+
-+ fsdata = reiser4_get_dentry_fsdata(where);
-+ if (unlikely(IS_ERR(fsdata)))
-+ return PTR_ERR(fsdata);
-+
-+ reserve = inode_dir_plugin(object)->estimate.add_entry(object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ init_lh(&lh);
-+ coord = &fsdata->dec.entry_coord;
-+ coord_clear_iplug(coord);
-+
-+ /* check for this entry in a directory. This is plugin method. */
-+ result = reiser4_find_entry(object, where, &lh, ZNODE_WRITE_LOCK,
-+ entry);
-+ if (likely(result == -ENOENT)) {
-+ /* add new entry. Just pass control to the directory
-+ item plugin. */
-+ assert("nikita-1709", inode_dir_item_plugin(object));
-+ assert("nikita-2230", coord->node == lh.node);
-+ reiser4_seal_done(&fsdata->dec.entry_seal);
-+ result =
-+ inode_dir_item_plugin(object)->s.dir.add_entry(object,
-+ coord, &lh,
-+ where,
-+ entry);
-+ if (result == 0) {
-+ reiser4_adjust_dir_file(object, where,
-+ fsdata->dec.pos + 1, +1);
-+ INODE_INC_FIELD(object, i_size);
-+ }
-+ } else if (result == 0) {
-+ assert("nikita-2232", coord->node == lh.node);
-+ result = RETERR(-EEXIST);
-+ }
-+ done_lh(&lh);
-+
-+ return result;
-+}
-+
-+/**
-+ * rem_entry - remove entry from directory item
-+ * @dir:
-+ * @dentry:
-+ * @entry:
-+ * @coord:
-+ * @lh:
-+ *
-+ * Checks that coordinate @coord is set properly and calls item plugin
-+ * method to cut entry.
-+ */
-+static int
-+rem_entry(struct inode *dir, struct dentry *dentry,
-+ reiser4_dir_entry_desc * entry, coord_t * coord, lock_handle * lh)
-+{
-+ item_plugin *iplug;
-+ struct inode *child;
-+
-+ iplug = inode_dir_item_plugin(dir);
-+ child = dentry->d_inode;
-+ assert("nikita-3399", child != NULL);
-+
-+ /* check that we are really destroying an entry for @child */
-+ if (REISER4_DEBUG) {
-+ int result;
-+ reiser4_key key;
-+
-+ result = iplug->s.dir.extract_key(coord, &key);
-+ if (result != 0)
-+ return result;
-+ if (get_key_objectid(&key) != get_inode_oid(child)) {
-+ warning("nikita-3397",
-+ "rem_entry: %#llx != %#llx\n",
-+ get_key_objectid(&key),
-+ (unsigned long long)get_inode_oid(child));
-+ return RETERR(-EIO);
-+ }
-+ }
-+ return iplug->s.dir.rem_entry(dir, &dentry->d_name, coord, lh, entry);
-+}
-+
-+/**
-+ * reiser4_rem_entry_common - remove entry from a directory
-+ * @dir: directory to remove entry from
-+ * @where: name that is being removed
-+ * @entry: description of entry being removed
-+ *
-+ * This is common implementation of rem_entry method of dir plugin.
-+ */
-+int reiser4_rem_entry_common(struct inode *dir,
-+ struct dentry *dentry,
-+ reiser4_dir_entry_desc *entry)
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle lh;
-+ reiser4_dentry_fsdata *fsdata;
-+ __u64 tograb;
-+
-+ assert("nikita-1124", dir != NULL);
-+ assert("nikita-1125", dentry != NULL);
-+
-+ tograb = inode_dir_plugin(dir)->estimate.rem_entry(dir);
-+ result = reiser4_grab_space(tograb, BA_CAN_COMMIT | BA_RESERVED);
-+ if (result != 0)
-+ return RETERR(-ENOSPC);
-+
-+ init_lh(&lh);
-+
-+ /* check for this entry in a directory. This is plugin method. */
-+ result = reiser4_find_entry(dir, dentry, &lh, ZNODE_WRITE_LOCK, entry);
-+ fsdata = reiser4_get_dentry_fsdata(dentry);
-+ if (IS_ERR(fsdata)) {
-+ done_lh(&lh);
-+ return PTR_ERR(fsdata);
-+ }
-+
-+ coord = &fsdata->dec.entry_coord;
-+
-+ assert("nikita-3404",
-+ get_inode_oid(dentry->d_inode) != get_inode_oid(dir) ||
-+ dir->i_size <= 1);
-+
-+ coord_clear_iplug(coord);
-+ if (result == 0) {
-+ /* remove entry. Just pass control to the directory item
-+ plugin. */
-+ assert("vs-542", inode_dir_item_plugin(dir));
-+ reiser4_seal_done(&fsdata->dec.entry_seal);
-+ reiser4_adjust_dir_file(dir, dentry, fsdata->dec.pos, -1);
-+ result =
-+ WITH_COORD(coord,
-+ rem_entry(dir, dentry, entry, coord, &lh));
-+ if (result == 0) {
-+ if (dir->i_size >= 1)
-+ INODE_DEC_FIELD(dir, i_size);
-+ else {
-+ warning("nikita-2509", "Dir %llu is runt",
-+ (unsigned long long)
-+ get_inode_oid(dir));
-+ result = RETERR(-EIO);
-+ }
-+
-+ assert("nikita-3405", dentry->d_inode->i_nlink != 1 ||
-+ dentry->d_inode->i_size != 2 ||
-+ inode_dir_plugin(dentry->d_inode) == NULL);
-+ }
-+ }
-+ done_lh(&lh);
-+
-+ return result;
-+}
-+
-+static reiser4_block_nr estimate_init(struct inode *parent,
-+ struct inode *object);
-+static int create_dot_dotdot(struct inode *object, struct inode *parent);
-+
-+/* this is common implementation of init method of dir plugin
-+ create "." and ".." entries
-+*/
-+int reiser4_dir_init_common(struct inode *object, /* new directory */
-+ struct inode *parent, /* parent directory */
-+ reiser4_object_create_data * data /* info passed
-+ * to us, this
-+ * is filled by
-+ * reiser4()
-+ * syscall in
-+ * particular */)
-+{
-+ reiser4_block_nr reserve;
-+
-+ assert("nikita-680", object != NULL);
-+ assert("nikita-681", S_ISDIR(object->i_mode));
-+ assert("nikita-682", parent != NULL);
-+ assert("nikita-684", data != NULL);
-+ assert("nikita-686", data->id == DIRECTORY_FILE_PLUGIN_ID);
-+ assert("nikita-687", object->i_mode & S_IFDIR);
-+
-+ reserve = estimate_init(parent, object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ return create_dot_dotdot(object, parent);
-+}
-+
-+/* this is common implementation of done method of dir plugin
-+ remove "." entry
-+*/
-+int reiser4_dir_done_common(struct inode *object /* object being deleted */ )
-+{
-+ int result;
-+ reiser4_block_nr reserve;
-+ struct dentry goodby_dots;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-1449", object != NULL);
-+
-+ if (reiser4_inode_get_flag(object, REISER4_NO_SD))
-+ return 0;
-+
-+ /* of course, this can be rewritten to sweep everything in one
-+ reiser4_cut_tree(). */
-+ memset(&entry, 0, sizeof entry);
-+
-+ /* FIXME: this done method is called from reiser4_delete_dir_common which
-+ * reserved space already */
-+ reserve = inode_dir_plugin(object)->estimate.rem_entry(object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT | BA_RESERVED))
-+ return RETERR(-ENOSPC);
-+
-+ memset(&goodby_dots, 0, sizeof goodby_dots);
-+ entry.obj = goodby_dots.d_inode = object;
-+ goodby_dots.d_name.name = ".";
-+ goodby_dots.d_name.len = 1;
-+ result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
-+ reiser4_free_dentry_fsdata(&goodby_dots);
-+ if (unlikely(result != 0 && result != -ENOMEM && result != -ENOENT))
-+ /* only worth a warning
-+
-+ "values of \ eB\ f will give rise to dom!\n"
-+ -- v6src/s2/mv.c:89
-+ */
-+ warning("nikita-2252", "Cannot remove dot of %lli: %i",
-+ (unsigned long long)get_inode_oid(object), result);
-+ return 0;
-+}
-+
-+/* this is common implementation of attach method of dir plugin
-+*/
-+int reiser4_attach_common(struct inode *child UNUSED_ARG,
-+ struct inode *parent UNUSED_ARG)
-+{
-+ assert("nikita-2647", child != NULL);
-+ assert("nikita-2648", parent != NULL);
-+
-+ return 0;
-+}
-+
-+/* this is common implementation of detach method of dir plugin
-+ remove "..", decrease nlink on parent
-+*/
-+int reiser4_detach_common(struct inode *object, struct inode *parent)
-+{
-+ int result;
-+ struct dentry goodby_dots;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-2885", object != NULL);
-+ assert("nikita-2886", !reiser4_inode_get_flag(object, REISER4_NO_SD));
-+
-+ memset(&entry, 0, sizeof entry);
-+
-+ /* NOTE-NIKITA this only works if @parent is -the- parent of
-+ @object, viz. object whose key is stored in dotdot
-+ entry. Wouldn't work with hard-links on directories. */
-+ memset(&goodby_dots, 0, sizeof goodby_dots);
-+ entry.obj = goodby_dots.d_inode = parent;
-+ goodby_dots.d_name.name = "..";
-+ goodby_dots.d_name.len = 2;
-+ result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
-+ reiser4_free_dentry_fsdata(&goodby_dots);
-+ if (result == 0) {
-+ /* the dot should be the only entry remaining at this time... */
-+ assert("nikita-3400",
-+ object->i_size == 1 && object->i_nlink <= 2);
-+#if 0
-+ /* and, together with the only name directory can have, they
-+ * provides for the last 2 remaining references. If we get
-+ * here as part of error handling during mkdir, @object
-+ * possibly has no name yet, so its nlink == 1. If we get here
-+ * from rename (targeting empty directory), it has no name
-+ * already, so its nlink == 1. */
-+ assert("nikita-3401",
-+ object->i_nlink == 2 || object->i_nlink == 1);
-+#endif
-+
-+ /* decrement nlink of directory removed ".." pointed
-+ to */
-+ reiser4_del_nlink(parent, NULL, 0);
-+ }
-+ return result;
-+}
-+
-+/* this is common implementation of estimate.add_entry method of
-+ dir plugin
-+ estimation of adding entry which supposes that entry is inserting a
-+ unit into item
-+*/
-+reiser4_block_nr estimate_add_entry_common(const struct inode * inode)
-+{
-+ return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
-+}
-+
-+/* this is common implementation of estimate.rem_entry method of dir
-+ plugin
-+*/
-+reiser4_block_nr estimate_rem_entry_common(const struct inode * inode)
-+{
-+ return estimate_one_item_removal(reiser4_tree_by_inode(inode));
-+}
-+
-+/* this is common implementation of estimate.unlink method of dir
-+ plugin
-+*/
-+reiser4_block_nr
-+dir_estimate_unlink_common(const struct inode * parent,
-+ const struct inode * object)
-+{
-+ reiser4_block_nr res;
-+
-+ /* hashed_rem_entry(object) */
-+ res = inode_dir_plugin(object)->estimate.rem_entry(object);
-+ /* del_nlink(parent) */
-+ res += 2 * inode_file_plugin(parent)->estimate.update(parent);
-+
-+ return res;
-+}
-+
-+/*
-+ * helper for inode_ops ->lookup() and dir plugin's ->get_parent()
-+ * methods: if @inode is a light-weight file, setup its credentials
-+ * that are not stored in the stat-data in this case
-+ */
-+void check_light_weight(struct inode *inode, struct inode *parent)
-+{
-+ if (reiser4_inode_get_flag(inode, REISER4_LIGHT_WEIGHT)) {
-+ inode->i_uid = parent->i_uid;
-+ inode->i_gid = parent->i_gid;
-+ /* clear light-weight flag. If inode would be read by any
-+ other name, [ug]id wouldn't change. */
-+ reiser4_inode_clr_flag(inode, REISER4_LIGHT_WEIGHT);
-+ }
-+}
-+
-+/* looks for name specified in @dentry in directory @parent and if name is
-+ found - key of object found entry points to is stored in @entry->key */
-+int reiser4_lookup_name(struct inode *parent, /* inode of directory to lookup for
-+ * name in */
-+ struct dentry *dentry, /* name to look for */
-+ reiser4_key * key /* place to store key */ )
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle lh;
-+ const char *name;
-+ int len;
-+ reiser4_dir_entry_desc entry;
-+ reiser4_dentry_fsdata *fsdata;
-+
-+ assert("nikita-1247", parent != NULL);
-+ assert("nikita-1248", dentry != NULL);
-+ assert("nikita-1123", dentry->d_name.name != NULL);
-+ assert("vs-1486",
-+ dentry->d_op == &get_super_private(parent->i_sb)->ops.dentry);
-+
-+ name = dentry->d_name.name;
-+ len = dentry->d_name.len;
-+
-+ if (!inode_dir_plugin(parent)->is_name_acceptable(parent, name, len))
-+ /* some arbitrary error code to return */
-+ return RETERR(-ENAMETOOLONG);
-+
-+ fsdata = reiser4_get_dentry_fsdata(dentry);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+
-+ coord = &fsdata->dec.entry_coord;
-+ coord_clear_iplug(coord);
-+ init_lh(&lh);
-+
-+ /* find entry in a directory. This is plugin method. */
-+ result = reiser4_find_entry(parent, dentry, &lh, ZNODE_READ_LOCK,
-+ &entry);
-+ if (result == 0) {
-+ /* entry was found, extract object key from it. */
-+ result =
-+ WITH_COORD(coord,
-+ item_plugin_by_coord(coord)->s.dir.
-+ extract_key(coord, key));
-+ }
-+ done_lh(&lh);
-+ return result;
-+
-+}
-+
-+/* helper for reiser4_dir_init_common(): estimate number of blocks to reserve */
-+static reiser4_block_nr
-+estimate_init(struct inode *parent, struct inode *object)
-+{
-+ reiser4_block_nr res = 0;
-+
-+ assert("vpf-321", parent != NULL);
-+ assert("vpf-322", object != NULL);
-+
-+ /* hashed_add_entry(object) */
-+ res += inode_dir_plugin(object)->estimate.add_entry(object);
-+ /* reiser4_add_nlink(object) */
-+ res += inode_file_plugin(object)->estimate.update(object);
-+ /* hashed_add_entry(object) */
-+ res += inode_dir_plugin(object)->estimate.add_entry(object);
-+ /* reiser4_add_nlink(parent) */
-+ res += inode_file_plugin(parent)->estimate.update(parent);
-+
-+ return 0;
-+}
-+
-+/* helper function for reiser4_dir_init_common(). Create "." and ".." */
-+static int create_dot_dotdot(struct inode *object /* object to create dot and
-+ * dotdot for */ ,
-+ struct inode *parent /* parent of @object */)
-+{
-+ int result;
-+ struct dentry dots_entry;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-688", object != NULL);
-+ assert("nikita-689", S_ISDIR(object->i_mode));
-+ assert("nikita-691", parent != NULL);
-+
-+ /* We store dot and dotdot as normal directory entries. This is
-+ not necessary, because almost all information stored in them
-+ is already in the stat-data of directory, the only thing
-+ being missed is objectid of grand-parent directory that can
-+ easily be added there as extension.
-+
-+ But it is done the way it is done, because not storing dot
-+ and dotdot will lead to the following complications:
-+
-+ . special case handling in ->lookup().
-+ . addition of another extension to the sd.
-+ . dependency on key allocation policy for stat data.
-+
-+ */
-+
-+ memset(&entry, 0, sizeof entry);
-+ memset(&dots_entry, 0, sizeof dots_entry);
-+ entry.obj = dots_entry.d_inode = object;
-+ dots_entry.d_name.name = ".";
-+ dots_entry.d_name.len = 1;
-+ result = reiser4_add_entry_common(object, &dots_entry, NULL, &entry);
-+ reiser4_free_dentry_fsdata(&dots_entry);
-+
-+ if (result == 0) {
-+ result = reiser4_add_nlink(object, object, 0);
-+ if (result == 0) {
-+ entry.obj = dots_entry.d_inode = parent;
-+ dots_entry.d_name.name = "..";
-+ dots_entry.d_name.len = 2;
-+ result = reiser4_add_entry_common(object,
-+ &dots_entry, NULL, &entry);
-+ reiser4_free_dentry_fsdata(&dots_entry);
-+ /* if creation of ".." failed, iput() will delete
-+ object with ".". */
-+ if (result == 0) {
-+ result = reiser4_add_nlink(parent, object, 0);
-+ if (result != 0)
-+ /*
-+ * if we failed to bump i_nlink, try
-+ * to remove ".."
-+ */
-+ reiser4_detach_common(object, parent);
-+ }
-+ }
-+ }
-+
-+ if (result != 0) {
-+ /*
-+ * in the case of error, at least update stat-data so that,
-+ * ->i_nlink updates are not lingering.
-+ */
-+ reiser4_update_sd(object);
-+ reiser4_update_sd(parent);
-+ }
-+
-+ return result;
-+}
-+
-+/*
-+ * return 0 iff @coord contains a directory entry for the file with the name
-+ * @name.
-+ */
-+static int
-+check_item(const struct inode *dir, const coord_t * coord, const char *name)
-+{
-+ item_plugin *iplug;
-+ char buf[DE_NAME_BUF_LEN];
-+
-+ iplug = item_plugin_by_coord(coord);
-+ if (iplug == NULL) {
-+ warning("nikita-1135", "Cannot get item plugin");
-+ print_coord("coord", coord, 1);
-+ return RETERR(-EIO);
-+ } else if (item_id_by_coord(coord) !=
-+ item_id_by_plugin(inode_dir_item_plugin(dir))) {
-+ /* item id of current item does not match to id of items a
-+ directory is built of */
-+ warning("nikita-1136", "Wrong item plugin");
-+ print_coord("coord", coord, 1);
-+ return RETERR(-EIO);
-+ }
-+ assert("nikita-1137", iplug->s.dir.extract_name);
-+
-+ /* Compare name stored in this entry with name we are looking for.
-+
-+ NOTE-NIKITA Here should go code for support of something like
-+ unicode, code tables, etc.
-+ */
-+ return !!strcmp(name, iplug->s.dir.extract_name(coord, buf));
-+}
-+
-+static int
-+check_entry(const struct inode *dir, coord_t * coord, const struct qstr *name)
-+{
-+ return WITH_COORD(coord, check_item(dir, coord, name->name));
-+}
-+
-+/*
-+ * argument package used by entry_actor to scan entries with identical keys.
-+ */
-+typedef struct entry_actor_args {
-+ /* name we are looking for */
-+ const char *name;
-+ /* key of directory entry. entry_actor() scans through sequence of
-+ * items/units having the same key */
-+ reiser4_key *key;
-+ /* how many entries with duplicate key was scanned so far. */
-+ int non_uniq;
-+#if REISER4_USE_COLLISION_LIMIT
-+ /* scan limit */
-+ int max_non_uniq;
-+#endif
-+ /* return parameter: set to true, if ->name wasn't found */
-+ int not_found;
-+ /* what type of lock to take when moving to the next node during
-+ * scan */
-+ znode_lock_mode mode;
-+
-+ /* last coord that was visited during scan */
-+ coord_t last_coord;
-+ /* last node locked during scan */
-+ lock_handle last_lh;
-+ /* inode of directory */
-+ const struct inode *inode;
-+} entry_actor_args;
-+
-+/* Function called by reiser4_find_entry() to look for given name
-+ in the directory. */
-+static int entry_actor(reiser4_tree * tree UNUSED_ARG /* tree being scanned */ ,
-+ coord_t * coord /* current coord */ ,
-+ lock_handle * lh /* current lock handle */ ,
-+ void *entry_actor_arg /* argument to scan */ )
-+{
-+ reiser4_key unit_key;
-+ entry_actor_args *args;
-+
-+ assert("nikita-1131", tree != NULL);
-+ assert("nikita-1132", coord != NULL);
-+ assert("nikita-1133", entry_actor_arg != NULL);
-+
-+ args = entry_actor_arg;
-+ ++args->non_uniq;
-+#if REISER4_USE_COLLISION_LIMIT
-+ if (args->non_uniq > args->max_non_uniq) {
-+ args->not_found = 1;
-+ /* hash collision overflow. */
-+ return RETERR(-EBUSY);
-+ }
-+#endif
-+
-+ /*
-+ * did we just reach the end of the sequence of items/units with
-+ * identical keys?
-+ */
-+ if (!keyeq(args->key, unit_key_by_coord(coord, &unit_key))) {
-+ assert("nikita-1791",
-+ keylt(args->key, unit_key_by_coord(coord, &unit_key)));
-+ args->not_found = 1;
-+ args->last_coord.between = AFTER_UNIT;
-+ return 0;
-+ }
-+
-+ coord_dup(&args->last_coord, coord);
-+ /*
-+ * did scan just moved to the next node?
-+ */
-+ if (args->last_lh.node != lh->node) {
-+ int lock_result;
-+
-+ /*
-+ * if so, lock new node with the mode requested by the caller
-+ */
-+ done_lh(&args->last_lh);
-+ assert("nikita-1896", znode_is_any_locked(lh->node));
-+ lock_result = longterm_lock_znode(&args->last_lh, lh->node,
-+ args->mode, ZNODE_LOCK_HIPRI);
-+ if (lock_result != 0)
-+ return lock_result;
-+ }
-+ return check_item(args->inode, coord, args->name);
-+}
-+
-+/* Look for given @name within directory @dir.
-+
-+ This is called during lookup, creation and removal of directory
-+ entries and on reiser4_rename_common
-+
-+ First calculate key that directory entry for @name would have. Search
-+ for this key in the tree. If such key is found, scan all items with
-+ the same key, checking name in each directory entry along the way.
-+*/
-+int reiser4_find_entry(struct inode *dir, /* directory to scan */
-+ struct dentry *de, /* name to search for */
-+ lock_handle * lh, /* resulting lock handle */
-+ znode_lock_mode mode, /* required lock mode */
-+ reiser4_dir_entry_desc * entry /* parameters of found
-+ directory entry */)
-+{
-+ const struct qstr *name;
-+ seal_t *seal;
-+ coord_t *coord;
-+ int result;
-+ __u32 flags;
-+ de_location *dec;
-+ reiser4_dentry_fsdata *fsdata;
-+
-+ assert("nikita-1130", lh != NULL);
-+ assert("nikita-1128", dir != NULL);
-+
-+ name = &de->d_name;
-+ assert("nikita-1129", name != NULL);
-+
-+ /* dentry private data don't require lock, because dentry
-+ manipulations are protected by i_mutex on parent.
-+
-+ This is not so for inodes, because there is no -the- parent in
-+ inode case.
-+ */
-+ fsdata = reiser4_get_dentry_fsdata(de);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+ dec = &fsdata->dec;
-+
-+ coord = &dec->entry_coord;
-+ coord_clear_iplug(coord);
-+ seal = &dec->entry_seal;
-+ /* compose key of directory entry for @name */
-+ inode_dir_plugin(dir)->build_entry_key(dir, name, &entry->key);
-+
-+ if (reiser4_seal_is_set(seal)) {
-+ /* check seal */
-+ result = reiser4_seal_validate(seal, coord, &entry->key,
-+ lh, mode, ZNODE_LOCK_LOPRI);
-+ if (result == 0) {
-+ /* key was found. Check that it is really item we are
-+ looking for. */
-+ result = check_entry(dir, coord, name);
-+ if (result == 0)
-+ return 0;
-+ }
-+ }
-+ flags = (mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
-+ /*
-+ * find place in the tree where directory item should be located.
-+ */
-+ result = reiser4_object_lookup(dir, &entry->key, coord, lh, mode,
-+ FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL,
-+ flags, NULL /*ra_info */ );
-+ if (result == CBK_COORD_FOUND) {
-+ entry_actor_args arg;
-+
-+ /* fast path: no hash collisions */
-+ result = check_entry(dir, coord, name);
-+ if (result == 0) {
-+ reiser4_seal_init(seal, coord, &entry->key);
-+ dec->pos = 0;
-+ } else if (result > 0) {
-+ /* Iterate through all units with the same keys. */
-+ arg.name = name->name;
-+ arg.key = &entry->key;
-+ arg.not_found = 0;
-+ arg.non_uniq = 0;
-+#if REISER4_USE_COLLISION_LIMIT
-+ arg.max_non_uniq = max_hash_collisions(dir);
-+ assert("nikita-2851", arg.max_non_uniq > 1);
-+#endif
-+ arg.mode = mode;
-+ arg.inode = dir;
-+ coord_init_zero(&arg.last_coord);
-+ init_lh(&arg.last_lh);
-+
-+ result = reiser4_iterate_tree
-+ (reiser4_tree_by_inode(dir),
-+ coord, lh,
-+ entry_actor, &arg, mode, 1);
-+ /* if end of the tree or extent was reached during
-+ scanning. */
-+ if (arg.not_found || (result == -E_NO_NEIGHBOR)) {
-+ /* step back */
-+ done_lh(lh);
-+
-+ result = zload(arg.last_coord.node);
-+ if (result == 0) {
-+ coord_clear_iplug(&arg.last_coord);
-+ coord_dup(coord, &arg.last_coord);
-+ move_lh(lh, &arg.last_lh);
-+ result = RETERR(-ENOENT);
-+ zrelse(arg.last_coord.node);
-+ --arg.non_uniq;
-+ }
-+ }
-+
-+ done_lh(&arg.last_lh);
-+ if (result == 0)
-+ reiser4_seal_init(seal, coord, &entry->key);
-+
-+ if (result == 0 || result == -ENOENT) {
-+ assert("nikita-2580", arg.non_uniq > 0);
-+ dec->pos = arg.non_uniq - 1;
-+ }
-+ }
-+ } else
-+ dec->pos = -1;
-+ return result;
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format40.c linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format40.c
---- linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format40.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format40.c 2007-05-06 14:50:43.762995722 +0400
-@@ -0,0 +1,655 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../key.h"
-+#include "../node/node.h"
-+#include "../space/space_allocator.h"
-+#include "disk_format40.h"
-+#include "../plugin.h"
-+#include "../../txnmgr.h"
-+#include "../../jnode.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../wander.h"
-+#include "../../inode.h"
-+#include "../../ktxnmgrd.h"
-+#include "../../status_flags.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/buffer_head.h>
-+
-+/* reiser 4.0 default disk layout */
-+
-+/* Amount of free blocks needed to perform release_format40 when fs gets
-+ mounted RW: 1 for SB, 1 for non-leaves in overwrite set, 2 for tx header
-+ & tx record. */
-+#define RELEASE_RESERVED 4
-+
-+/* The greatest supported format40 version number */
-+#define FORMAT40_VERSION PLUGIN_LIBRARY_VERSION
-+
-+/* This flag indicates that backup should be updated
-+ (the update is performed by fsck) */
-+#define FORMAT40_UPDATE_BACKUP (1 << 31)
-+
-+/* functions to access fields of format40_disk_super_block */
-+static __u64 get_format40_block_count(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->block_count));
-+}
-+
-+static __u64 get_format40_free_blocks(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->free_blocks));
-+}
-+
-+static __u64 get_format40_root_block(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->root_block));
-+}
-+
-+static __u16 get_format40_tree_height(const format40_disk_super_block * sb)
-+{
-+ return le16_to_cpu(get_unaligned(&sb->tree_height));
-+}
-+
-+static __u64 get_format40_file_count(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->file_count));
-+}
-+
-+static __u64 get_format40_oid(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->oid));
-+}
-+
-+static __u32 get_format40_mkfs_id(const format40_disk_super_block * sb)
-+{
-+ return le32_to_cpu(get_unaligned(&sb->mkfs_id));
-+}
-+
-+static __u64 get_format40_flags(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->flags));
-+}
-+
-+static __u32 get_format40_version(const format40_disk_super_block * sb)
-+{
-+ return le32_to_cpu(get_unaligned(&sb->version)) &
-+ ~FORMAT40_UPDATE_BACKUP;
-+}
-+
-+static int update_backup_version(const format40_disk_super_block * sb)
-+{
-+ return (le32_to_cpu(get_unaligned(&sb->version)) &
-+ FORMAT40_UPDATE_BACKUP);
-+}
-+
-+static int update_disk_version(const format40_disk_super_block * sb)
-+{
-+ return (get_format40_version(sb) < FORMAT40_VERSION);
-+}
-+
-+static int incomplete_compatibility(const format40_disk_super_block * sb)
-+{
-+ return (get_format40_version(sb) > FORMAT40_VERSION);
-+}
-+
-+static format40_super_info *get_sb_info(struct super_block *super)
-+{
-+ return &get_super_private(super)->u.format40;
-+}
-+
-+static int consult_diskmap(struct super_block *s)
-+{
-+ format40_super_info *info;
-+ journal_location *jloc;
-+
-+ info = get_sb_info(s);
-+ jloc = &get_super_private(s)->jloc;
-+ /* Default format-specific locations, if there is nothing in
-+ * diskmap */
-+ jloc->footer = FORMAT40_JOURNAL_FOOTER_BLOCKNR;
-+ jloc->header = FORMAT40_JOURNAL_HEADER_BLOCKNR;
-+ info->loc.super = FORMAT40_OFFSET / s->s_blocksize;
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JF,
-+ &jloc->footer);
-+ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JH,
-+ &jloc->header);
-+ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_SUPER,
-+ &info->loc.super);
-+#endif
-+ return 0;
-+}
-+
-+/* find any valid super block of disk_format40 (even if the first
-+ super block is destroyed), will change block numbers of actual journal header/footer (jf/jh)
-+ if needed */
-+static struct buffer_head *find_a_disk_format40_super_block(struct super_block
-+ *s)
-+{
-+ struct buffer_head *super_bh;
-+ format40_disk_super_block *disk_sb;
-+ format40_super_info *info;
-+
-+ assert("umka-487", s != NULL);
-+
-+ info = get_sb_info(s);
-+
-+ super_bh = sb_bread(s, info->loc.super);
-+ if (super_bh == NULL)
-+ return ERR_PTR(RETERR(-EIO));
-+
-+ disk_sb = (format40_disk_super_block *) super_bh->b_data;
-+ if (strncmp(disk_sb->magic, FORMAT40_MAGIC, sizeof(FORMAT40_MAGIC))) {
-+ brelse(super_bh);
-+ return ERR_PTR(RETERR(-EINVAL));
-+ }
-+
-+ reiser4_set_block_count(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)));
-+ reiser4_set_data_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)) -
-+ le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
-+ reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
-+
-+ return super_bh;
-+}
-+
-+/* find the most recent version of super block. This is called after journal is
-+ replayed */
-+static struct buffer_head *read_super_block(struct super_block *s UNUSED_ARG)
-+{
-+ /* Here the most recent superblock copy has to be read. However, as
-+ journal replay isn't complete, we are using
-+ find_a_disk_format40_super_block() function. */
-+ return find_a_disk_format40_super_block(s);
-+}
-+
-+static int get_super_jnode(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ jnode *sb_jnode;
-+ int ret;
-+
-+ sb_jnode = reiser4_alloc_io_head(&get_sb_info(s)->loc.super);
-+
-+ ret = jload(sb_jnode);
-+
-+ if (ret) {
-+ reiser4_drop_io_head(sb_jnode);
-+ return ret;
-+ }
-+
-+ pin_jnode_data(sb_jnode);
-+ jrelse(sb_jnode);
-+
-+ sbinfo->u.format40.sb_jnode = sb_jnode;
-+
-+ return 0;
-+}
-+
-+static void done_super_jnode(struct super_block *s)
-+{
-+ jnode *sb_jnode = get_super_private(s)->u.format40.sb_jnode;
-+
-+ if (sb_jnode) {
-+ unpin_jnode_data(sb_jnode);
-+ reiser4_drop_io_head(sb_jnode);
-+ }
-+}
-+
-+typedef enum format40_init_stage {
-+ NONE_DONE = 0,
-+ CONSULT_DISKMAP,
-+ FIND_A_SUPER,
-+ INIT_JOURNAL_INFO,
-+ INIT_STATUS,
-+ JOURNAL_REPLAY,
-+ READ_SUPER,
-+ KEY_CHECK,
-+ INIT_OID,
-+ INIT_TREE,
-+ JOURNAL_RECOVER,
-+ INIT_SA,
-+ INIT_JNODE,
-+ ALL_DONE
-+} format40_init_stage;
-+
-+static format40_disk_super_block *copy_sb(const struct buffer_head *super_bh)
-+{
-+ format40_disk_super_block *sb_copy;
-+
-+ sb_copy = kmalloc(sizeof(format40_disk_super_block),
-+ reiser4_ctx_gfp_mask_get());
-+ if (sb_copy == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ memcpy(sb_copy, ((format40_disk_super_block *) super_bh->b_data),
-+ sizeof(format40_disk_super_block));
-+ return sb_copy;
-+}
-+
-+static int check_key_format(const format40_disk_super_block *sb_copy)
-+{
-+ if (!equi(REISER4_LARGE_KEY,
-+ get_format40_flags(sb_copy) & (1 << FORMAT40_LARGE_KEYS))) {
-+ warning("nikita-3228", "Key format mismatch. "
-+ "Only %s keys are supported.",
-+ REISER4_LARGE_KEY ? "large" : "small");
-+ return RETERR(-EINVAL);
-+ }
-+ return 0;
-+}
-+
-+/**
-+ * try_init_format40
-+ * @super:
-+ * @stage:
-+ *
-+ */
-+static int try_init_format40(struct super_block *super,
-+ format40_init_stage *stage)
-+{
-+ int result;
-+ struct buffer_head *super_bh;
-+ reiser4_super_info_data *sbinfo;
-+ format40_disk_super_block *sb_copy;
-+ tree_level height;
-+ reiser4_block_nr root_block;
-+ node_plugin *nplug;
-+
-+ assert("vs-475", super != NULL);
-+ assert("vs-474", get_super_private(super));
-+
-+ *stage = NONE_DONE;
-+
-+ result = consult_diskmap(super);
-+ if (result)
-+ return result;
-+ *stage = CONSULT_DISKMAP;
-+
-+ super_bh = find_a_disk_format40_super_block(super);
-+ if (IS_ERR(super_bh))
-+ return PTR_ERR(super_bh);
-+ brelse(super_bh);
-+ *stage = FIND_A_SUPER;
-+
-+ /* ok, we are sure that filesystem format is a format40 format */
-+
-+ /* map jnodes for journal control blocks (header, footer) to disk */
-+ result = reiser4_init_journal_info(super);
-+ if (result)
-+ return result;
-+ *stage = INIT_JOURNAL_INFO;
-+
-+ /* ok, we are sure that filesystem format is a format40 format */
-+ /* Now check it's state */
-+ result = reiser4_status_init(FORMAT40_STATUS_BLOCKNR);
-+ if (result != 0 && result != -EINVAL)
-+ /* -EINVAL means there is no magic, so probably just old
-+ * fs. */
-+ return result;
-+ *stage = INIT_STATUS;
-+
-+ result = reiser4_status_query(NULL, NULL);
-+ if (result == REISER4_STATUS_MOUNT_WARN)
-+ notice("vpf-1363", "Warning: mounting %s with errors.",
-+ super->s_id);
-+ if (result == REISER4_STATUS_MOUNT_RO)
-+ notice("vpf-1364", "Warning: mounting %s with fatal errors,"
-+ " forcing read-only mount.", super->s_id);
-+ result = reiser4_journal_replay(super);
-+ if (result)
-+ return result;
-+ *stage = JOURNAL_REPLAY;
-+
-+ super_bh = read_super_block(super);
-+ if (IS_ERR(super_bh))
-+ return PTR_ERR(super_bh);
-+ *stage = READ_SUPER;
-+
-+ /* allocate and make a copy of format40_disk_super_block */
-+ sb_copy = copy_sb(super_bh);
-+ brelse(super_bh);
-+
-+ if (IS_ERR(sb_copy))
-+ return PTR_ERR(sb_copy);
-+ printk("reiser4: %s: found disk format 4.0.%u.\n",
-+ super->s_id,
-+ get_format40_version(sb_copy));
-+ if (incomplete_compatibility(sb_copy))
-+ printk("reiser4: Warning: The last completely supported "
-+ "version of disk format40 is %u. Some objects of "
-+ "the semantic tree can be unaccessible.\n",
-+ FORMAT40_VERSION);
-+ /* make sure that key format of kernel and filesystem match */
-+ result = check_key_format(sb_copy);
-+ if (result) {
-+ kfree(sb_copy);
-+ return result;
-+ }
-+ *stage = KEY_CHECK;
-+
-+ result = oid_init_allocator(super, get_format40_file_count(sb_copy),
-+ get_format40_oid(sb_copy));
-+ if (result) {
-+ kfree(sb_copy);
-+ return result;
-+ }
-+ *stage = INIT_OID;
-+
-+ /* get things necessary to init reiser4_tree */
-+ root_block = get_format40_root_block(sb_copy);
-+ height = get_format40_tree_height(sb_copy);
-+ nplug = node_plugin_by_id(NODE40_ID);
-+
-+ /* initialize reiser4_super_info_data */
-+ sbinfo = get_super_private(super);
-+ assert("", sbinfo->tree.super == super);
-+ /* init reiser4_tree for the filesystem */
-+ result = reiser4_init_tree(&sbinfo->tree, &root_block, height, nplug);
-+ if (result) {
-+ kfree(sb_copy);
-+ return result;
-+ }
-+ *stage = INIT_TREE;
-+
-+ /*
-+ * initialize reiser4_super_info_data with data from format40 super
-+ * block
-+ */
-+ sbinfo->default_uid = 0;
-+ sbinfo->default_gid = 0;
-+ sbinfo->mkfs_id = get_format40_mkfs_id(sb_copy);
-+ /* number of blocks in filesystem and reserved space */
-+ reiser4_set_block_count(super, get_format40_block_count(sb_copy));
-+ sbinfo->blocks_free = get_format40_free_blocks(sb_copy);
-+ sbinfo->version = get_format40_version(sb_copy);
-+ kfree(sb_copy);
-+
-+ if (update_backup_version(sb_copy))
-+ printk("reiser4: Warning: metadata backup is not updated. "
-+ "Please run 'fsck.reiser4 --fix' on %s.\n",
-+ super->s_id);
-+
-+ sbinfo->fsuid = 0;
-+ sbinfo->fs_flags |= (1 << REISER4_ADG); /* hard links for directories
-+ * are not supported */
-+ sbinfo->fs_flags |= (1 << REISER4_ONE_NODE_PLUGIN); /* all nodes in
-+ * layout 40 are
-+ * of one
-+ * plugin */
-+ /* sbinfo->tmgr is initialized already */
-+
-+ /* recover sb data which were logged separately from sb block */
-+
-+ /* NOTE-NIKITA: reiser4_journal_recover_sb_data() calls
-+ * oid_init_allocator() and reiser4_set_free_blocks() with new
-+ * data. What's the reason to call them above? */
-+ result = reiser4_journal_recover_sb_data(super);
-+ if (result != 0)
-+ return result;
-+ *stage = JOURNAL_RECOVER;
-+
-+ /*
-+ * Set number of used blocks. The number of used blocks is not stored
-+ * neither in on-disk super block nor in the journal footer blocks. At
-+ * this moment actual values of total blocks and free block counters
-+ * are set in the reiser4 super block (in-memory structure) and we can
-+ * calculate number of used blocks from them.
-+ */
-+ reiser4_set_data_blocks(super,
-+ reiser4_block_count(super) -
-+ reiser4_free_blocks(super));
-+
-+#if REISER4_DEBUG
-+ sbinfo->min_blocks_used = 16 /* reserved area */ +
-+ 2 /* super blocks */ +
-+ 2 /* journal footer and header */ ;
-+#endif
-+
-+ /* init disk space allocator */
-+ result = sa_init_allocator(reiser4_get_space_allocator(super),
-+ super, NULL);
-+ if (result)
-+ return result;
-+ *stage = INIT_SA;
-+
-+ result = get_super_jnode(super);
-+ if (result == 0)
-+ *stage = ALL_DONE;
-+ return result;
-+}
-+
-+/* plugin->u.format.get_ready */
-+int init_format_format40(struct super_block *s, void *data UNUSED_ARG)
-+{
-+ int result;
-+ format40_init_stage stage;
-+
-+ result = try_init_format40(s, &stage);
-+ switch (stage) {
-+ case ALL_DONE:
-+ assert("nikita-3458", result == 0);
-+ break;
-+ case INIT_JNODE:
-+ done_super_jnode(s);
-+ case INIT_SA:
-+ sa_destroy_allocator(reiser4_get_space_allocator(s), s);
-+ case JOURNAL_RECOVER:
-+ case INIT_TREE:
-+ reiser4_done_tree(&get_super_private(s)->tree);
-+ case INIT_OID:
-+ case KEY_CHECK:
-+ case READ_SUPER:
-+ case JOURNAL_REPLAY:
-+ case INIT_STATUS:
-+ reiser4_status_finish();
-+ case INIT_JOURNAL_INFO:
-+ reiser4_done_journal_info(s);
-+ case FIND_A_SUPER:
-+ case CONSULT_DISKMAP:
-+ case NONE_DONE:
-+ break;
-+ default:
-+ impossible("nikita-3457", "init stage: %i", stage);
-+ }
-+
-+ if (!rofs_super(s) && reiser4_free_blocks(s) < RELEASE_RESERVED)
-+ return RETERR(-ENOSPC);
-+
-+ return result;
-+}
-+
-+static void pack_format40_super(const struct super_block *s, char *data)
-+{
-+ format40_disk_super_block *super_data =
-+ (format40_disk_super_block *) data;
-+
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+
-+ assert("zam-591", data != NULL);
-+
-+ put_unaligned(cpu_to_le64(reiser4_free_committed_blocks(s)),
-+ &super_data->free_blocks);
-+
-+ put_unaligned(cpu_to_le64(sbinfo->tree.root_block),
-+ &super_data->root_block);
-+
-+ put_unaligned(cpu_to_le64(oid_next(s)),
-+ &super_data->oid);
-+
-+ put_unaligned(cpu_to_le64(oids_used(s)),
-+ &super_data->file_count);
-+
-+ put_unaligned(cpu_to_le16(sbinfo->tree.height),
-+ &super_data->tree_height);
-+
-+ if (update_disk_version(super_data)) {
-+ __u32 version = FORMAT40_VERSION | FORMAT40_UPDATE_BACKUP;
-+
-+ put_unaligned(cpu_to_le32(version), &super_data->version);
-+ }
-+}
-+
-+/* plugin->u.format.log_super
-+ return a jnode which should be added to transaction when the super block
-+ gets logged */
-+jnode *log_super_format40(struct super_block *s)
-+{
-+ jnode *sb_jnode;
-+
-+ sb_jnode = get_super_private(s)->u.format40.sb_jnode;
-+
-+ jload(sb_jnode);
-+
-+ pack_format40_super(s, jdata(sb_jnode));
-+
-+ jrelse(sb_jnode);
-+
-+ return sb_jnode;
-+}
-+
-+/* plugin->u.format.release */
-+int release_format40(struct super_block *s)
-+{
-+ int ret;
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(s);
-+ assert("zam-579", sbinfo != NULL);
-+
-+ if (!rofs_super(s)) {
-+ ret = reiser4_capture_super_block(s);
-+ if (ret != 0)
-+ warning("vs-898",
-+ "reiser4_capture_super_block failed: %d",
-+ ret);
-+
-+ ret = txnmgr_force_commit_all(s, 1);
-+ if (ret != 0)
-+ warning("jmacd-74438", "txn_force failed: %d", ret);
-+
-+ all_grabbed2free();
-+ }
-+
-+ sa_destroy_allocator(&sbinfo->space_allocator, s);
-+ reiser4_done_journal_info(s);
-+ done_super_jnode(s);
-+
-+ rcu_barrier();
-+ reiser4_done_tree(&sbinfo->tree);
-+ /* call finish_rcu(), because some znode were "released" in
-+ * reiser4_done_tree(). */
-+ rcu_barrier();
-+
-+ return 0;
-+}
-+
-+#define FORMAT40_ROOT_LOCALITY 41
-+#define FORMAT40_ROOT_OBJECTID 42
-+
-+/* plugin->u.format.root_dir_key */
-+const reiser4_key *root_dir_key_format40(const struct super_block *super
-+ UNUSED_ARG)
-+{
-+ static const reiser4_key FORMAT40_ROOT_DIR_KEY = {
-+ .el = {
-+ __constant_cpu_to_le64((FORMAT40_ROOT_LOCALITY << 4) | KEY_SD_MINOR),
-+#if REISER4_LARGE_KEY
-+ ON_LARGE_KEY(0ull,)
-+#endif
-+ __constant_cpu_to_le64(FORMAT40_ROOT_OBJECTID),
-+ 0ull
-+ }
-+ };
-+
-+ return &FORMAT40_ROOT_DIR_KEY;
-+}
-+
-+/* plugin->u.format.check_open.
-+ Check the opened object for validness. For now it checks for the valid oid &
-+ locality only, can be improved later and it its work may depend on the mount
-+ options. */
-+int check_open_format40(const struct inode *object)
-+{
-+ oid_t max, oid;
-+
-+ max = oid_next(object->i_sb) - 1;
-+
-+ /* Check the oid. */
-+ oid = get_inode_oid(object);
-+ if (oid > max) {
-+ warning("vpf-1360", "The object with the oid %llu "
-+ "greater then the max used oid %llu found.",
-+ (unsigned long long)oid, (unsigned long long)max);
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ /* Check the locality. */
-+ oid = reiser4_inode_data(object)->locality_id;
-+ if (oid > max) {
-+ warning("vpf-1361", "The object with the locality %llu "
-+ "greater then the max used oid %llu found.",
-+ (unsigned long long)oid, (unsigned long long)max);
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.format.version_update.
-+ Perform all version update operations from the on-disk
-+ format40_disk_super_block.version on disk to FORMAT40_VERSION.
-+ */
-+int version_update_format40(struct super_block *super) {
-+ txn_handle * trans;
-+ lock_handle lh;
-+ txn_atom *atom;
-+ int ret;
-+
-+ /* Nothing to do if RO mount or the on-disk version is not less. */
-+ if (super->s_flags & MS_RDONLY)
-+ return 0;
-+
-+ if (get_super_private(super)->version >= FORMAT40_VERSION)
-+ return 0;
-+
-+ printk("reiser4: Updating disk format to 4.0.%u. The reiser4 metadata "
-+ "backup is left unchanged. Please run 'fsck.reiser4 --fix' "
-+ "on %s to update it too.\n", FORMAT40_VERSION, super->s_id);
-+
-+ /* Mark the uber znode dirty to call log_super on write_logs. */
-+ init_lh(&lh);
-+ ret = get_uber_znode(reiser4_get_tree(super), ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_HIPRI, &lh);
-+ if (ret != 0)
-+ return ret;
-+
-+ znode_make_dirty(lh.node);
-+ done_lh(&lh);
-+
-+ /* Update the backup blocks. */
-+
-+ /* Force write_logs immediately. */
-+ trans = get_current_context()->trans;
-+ atom = get_current_atom_locked();
-+ assert("vpf-1906", atom != NULL);
-+
-+ spin_lock_txnh(trans);
-+ return force_commit_atom(trans);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format40.h linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format40.h
---- linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format40.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format40.h 2007-05-06 14:50:43.762995722 +0400
-@@ -0,0 +1,109 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* this file contains:
-+ - definition of ondisk super block of standart disk layout for
-+ reiser 4.0 (layout 40)
-+ - definition of layout 40 specific portion of in-core super block
-+ - declarations of functions implementing methods of layout plugin
-+ for layout 40
-+ - declarations of functions used to get/set fields in layout 40 super block
-+*/
-+
-+#ifndef __DISK_FORMAT40_H__
-+#define __DISK_FORMAT40_H__
-+
-+/* magic for default reiser4 layout */
-+#define FORMAT40_MAGIC "ReIsEr40FoRmAt"
-+#define FORMAT40_OFFSET (REISER4_MASTER_OFFSET + PAGE_CACHE_SIZE)
-+
-+#include "../../dformat.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+
-+typedef enum {
-+ FORMAT40_LARGE_KEYS
-+} format40_flags;
-+
-+/* ondisk super block for format 40. It is 512 bytes long */
-+typedef struct format40_disk_super_block {
-+ /* 0 */ d64 block_count;
-+ /* number of block in a filesystem */
-+ /* 8 */ d64 free_blocks;
-+ /* number of free blocks */
-+ /* 16 */ d64 root_block;
-+ /* filesystem tree root block */
-+ /* 24 */ d64 oid;
-+ /* smallest free objectid */
-+ /* 32 */ d64 file_count;
-+ /* number of files in a filesystem */
-+ /* 40 */ d64 flushes;
-+ /* number of times super block was
-+ flushed. Needed if format 40
-+ will have few super blocks */
-+ /* 48 */ d32 mkfs_id;
-+ /* unique identifier of fs */
-+ /* 52 */ char magic[16];
-+ /* magic string ReIsEr40FoRmAt */
-+ /* 68 */ d16 tree_height;
-+ /* height of filesystem tree */
-+ /* 70 */ d16 formatting_policy;
-+ /* not used anymore */
-+ /* 72 */ d64 flags;
-+ /* 80 */ d32 version;
-+ /* on-disk format version number
-+ initially assigned by mkfs as the greatest format40
-+ version number supported by reiser4progs and updated
-+ in mount time in accordance with the greatest format40
-+ version number supported by kernel.
-+ Is used by fsck to catch possible corruption and
-+ for various compatibility issues */
-+ /* 84 */ char not_used[428];
-+} format40_disk_super_block;
-+
-+/* format 40 specific part of reiser4_super_info_data */
-+typedef struct format40_super_info {
-+/* format40_disk_super_block actual_sb; */
-+ jnode *sb_jnode;
-+ struct {
-+ reiser4_block_nr super;
-+ } loc;
-+} format40_super_info;
-+
-+/* Defines for journal header and footer respectively. */
-+#define FORMAT40_JOURNAL_HEADER_BLOCKNR \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 3)
-+
-+#define FORMAT40_JOURNAL_FOOTER_BLOCKNR \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 4)
-+
-+#define FORMAT40_STATUS_BLOCKNR \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 5)
-+
-+/* Diskmap declarations */
-+#define FORMAT40_PLUGIN_DISKMAP_ID ((REISER4_FORMAT_PLUGIN_TYPE<<16) | (FORMAT40_ID))
-+#define FORMAT40_SUPER 1
-+#define FORMAT40_JH 2
-+#define FORMAT40_JF 3
-+
-+/* declarations of functions implementing methods of layout plugin for
-+ format 40. The functions theirself are in disk_format40.c */
-+extern int init_format_format40(struct super_block *, void *data);
-+extern const reiser4_key *root_dir_key_format40(const struct super_block *);
-+extern int release_format40(struct super_block *s);
-+extern jnode *log_super_format40(struct super_block *s);
-+extern int check_open_format40(const struct inode *object);
-+extern int version_update_format40(struct super_block *super);
-+
-+/* __DISK_FORMAT40_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format.c linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format.c
---- linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format.c 2007-05-06 14:50:43.762995722 +0400
-@@ -0,0 +1,38 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../plugin_header.h"
-+#include "disk_format40.h"
-+#include "disk_format.h"
-+#include "../plugin.h"
-+
-+/* initialization of disk layout plugins */
-+disk_format_plugin format_plugins[LAST_FORMAT_ID] = {
-+ [FORMAT40_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMAT_PLUGIN_TYPE,
-+ .id = FORMAT40_ID,
-+ .pops = NULL,
-+ .label = "reiser40",
-+ .desc = "standard disk layout for reiser40",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init_format = init_format_format40,
-+ .root_dir_key = root_dir_key_format40,
-+ .release = release_format40,
-+ .log_super = log_super_format40,
-+ .check_open = check_open_format40,
-+ .version_update = version_update_format40
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format.h linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format.h
---- linux-2.6.20.orig/fs/reiser4/plugin/disk_format/disk_format.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/disk_format/disk_format.h 2007-05-06 14:50:43.762995722 +0400
-@@ -0,0 +1,27 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* identifiers for disk layouts, they are also used as indexes in array of disk
-+ plugins */
-+
-+#if !defined( __REISER4_DISK_FORMAT_H__ )
-+#define __REISER4_DISK_FORMAT_H__
-+
-+typedef enum {
-+ /* standard reiser4 disk layout plugin id */
-+ FORMAT40_ID,
-+ LAST_FORMAT_ID
-+} disk_format_id;
-+
-+/* __REISER4_DISK_FORMAT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/disk_format/Makefile linux-2.6.20/fs/reiser4/plugin/disk_format/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/disk_format/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/disk_format/Makefile 2007-05-06 14:50:43.762995722 +0400
-@@ -0,0 +1,5 @@
-+obj-$(CONFIG_REISER4_FS) += df_plugins.o
-+
-+df_plugins-objs := \
-+ disk_format40.o \
-+ disk_format.o
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/fibration.c linux-2.6.20/fs/reiser4/plugin/fibration.c
---- linux-2.6.20.orig/fs/reiser4/plugin/fibration.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/fibration.c 2007-05-06 14:50:43.762995722 +0400
-@@ -0,0 +1,175 @@
-+/* Copyright 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Directory fibrations */
-+
-+/*
-+ * Suppose we have a directory tree with sources of some project. During
-+ * compilation .o files are created within this tree. This makes access
-+ * to the original source files less efficient, because source files are
-+ * now "diluted" by object files: default directory plugin uses prefix
-+ * of a file name as a part of the key for directory entry (and this
-+ * part is also inherited by the key of file body). This means that
-+ * foo.o will be located close to foo.c and foo.h in the tree.
-+ *
-+ * To avoid this effect directory plugin fill highest 7 (unused
-+ * originally) bits of the second component of the directory entry key
-+ * by bit-pattern depending on the file name (see
-+ * fs/reiser4/kassign.c:build_entry_key_common()). These bits are called
-+ * "fibre". Fibre of the file name key is inherited by key of stat data
-+ * and keys of file body (in the case of REISER4_LARGE_KEY).
-+ *
-+ * Fibre for a given file is chosen by per-directory fibration
-+ * plugin. Names within given fibre are ordered lexicographically.
-+ */
-+
-+#include "../debug.h"
-+#include "plugin_header.h"
-+#include "plugin.h"
-+#include "../super.h"
-+#include "../inode.h"
-+
-+#include <linux/types.h>
-+
-+static const int fibre_shift = 57;
-+
-+#define FIBRE_NO(n) (((__u64)(n)) << fibre_shift)
-+
-+/*
-+ * Trivial fibration: all files of directory are just ordered
-+ * lexicographically.
-+ */
-+static __u64 fibre_trivial(const struct inode *dir, const char *name, int len)
-+{
-+ return FIBRE_NO(0);
-+}
-+
-+/*
-+ * dot-o fibration: place .o files after all others.
-+ */
-+static __u64 fibre_dot_o(const struct inode *dir, const char *name, int len)
-+{
-+ /* special treatment for .*\.o */
-+ if (len > 2 && name[len - 1] == 'o' && name[len - 2] == '.')
-+ return FIBRE_NO(1);
-+ else
-+ return FIBRE_NO(0);
-+}
-+
-+/*
-+ * ext.1 fibration: subdivide directory into 128 fibrations one for each
-+ * 7bit extension character (file "foo.h" goes into fibre "h"), plus
-+ * default fibre for the rest.
-+ */
-+static __u64 fibre_ext_1(const struct inode *dir, const char *name, int len)
-+{
-+ if (len > 2 && name[len - 2] == '.')
-+ return FIBRE_NO(name[len - 1]);
-+ else
-+ return FIBRE_NO(0);
-+}
-+
-+/*
-+ * ext.3 fibration: try to separate files with different 3-character
-+ * extensions from each other.
-+ */
-+static __u64 fibre_ext_3(const struct inode *dir, const char *name, int len)
-+{
-+ if (len > 4 && name[len - 4] == '.')
-+ return FIBRE_NO(name[len - 3] + name[len - 2] + name[len - 1]);
-+ else
-+ return FIBRE_NO(0);
-+}
-+
-+static int change_fibration(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ int result;
-+
-+ assert("nikita-3503", inode != NULL);
-+ assert("nikita-3504", plugin != NULL);
-+
-+ assert("nikita-3505", is_reiser4_inode(inode));
-+ assert("nikita-3506", inode_dir_plugin(inode) != NULL);
-+ assert("nikita-3507",
-+ plugin->h.type_id == REISER4_FIBRATION_PLUGIN_TYPE);
-+
-+ result = 0;
-+ if (inode_fibration_plugin(inode) == NULL ||
-+ inode_fibration_plugin(inode)->h.id != plugin->h.id) {
-+ if (is_dir_empty(inode) == 0)
-+ result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_FIBRATION, plugin);
-+ else
-+ result = RETERR(-ENOTEMPTY);
-+
-+ }
-+ return result;
-+}
-+
-+static reiser4_plugin_ops fibration_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = change_fibration
-+};
-+
-+/* fibration plugins */
-+fibration_plugin fibration_plugins[LAST_FIBRATION_ID] = {
-+ [FIBRATION_LEXICOGRAPHIC] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_LEXICOGRAPHIC,
-+ .pops = &fibration_plugin_ops,
-+ .label = "lexicographic",
-+ .desc = "no fibration",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_trivial
-+ },
-+ [FIBRATION_DOT_O] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_DOT_O,
-+ .pops = &fibration_plugin_ops,
-+ .label = "dot-o",
-+ .desc = "fibrate .o files separately",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_dot_o
-+ },
-+ [FIBRATION_EXT_1] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_EXT_1,
-+ .pops = &fibration_plugin_ops,
-+ .label = "ext-1",
-+ .desc = "fibrate file by single character extension",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_ext_1
-+ },
-+ [FIBRATION_EXT_3] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_EXT_3,
-+ .pops = &fibration_plugin_ops,
-+ .label = "ext-3",
-+ .desc = "fibrate file by three character extension",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_ext_3
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/fibration.h linux-2.6.20/fs/reiser4/plugin/fibration.h
---- linux-2.6.20.orig/fs/reiser4/plugin/fibration.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/fibration.h 2007-05-06 14:50:43.762995722 +0400
-@@ -0,0 +1,37 @@
-+/* Copyright 2004 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Fibration plugin used by hashed directory plugin to segment content
-+ * of directory. See fs/reiser4/plugin/fibration.c for more on this. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_FIBRATION_H__ )
-+#define __FS_REISER4_PLUGIN_FIBRATION_H__
-+
-+#include "plugin_header.h"
-+
-+typedef struct fibration_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+
-+ __u64(*fibre) (const struct inode * dir, const char *name, int len);
-+} fibration_plugin;
-+
-+typedef enum {
-+ FIBRATION_LEXICOGRAPHIC,
-+ FIBRATION_DOT_O,
-+ FIBRATION_EXT_1,
-+ FIBRATION_EXT_3,
-+ LAST_FIBRATION_ID
-+} reiser4_fibration_id;
-+
-+/* __FS_REISER4_PLUGIN_FIBRATION_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/cryptcompress.c linux-2.6.20/fs/reiser4/plugin/file/cryptcompress.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file/cryptcompress.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/cryptcompress.c 2007-05-06 14:50:43.770998222 +0400
-@@ -0,0 +1,3760 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+
-+/* This file contains implementations of inode/file/address_space/file plugin
-+ * operations specific for cryptcompress file plugin which manages files with
-+ * compressed and encrypted bodies. "Cryptcompress file" is built of items of
-+ * CTAIL_ID (see http://www.namesys.com/cryptcompress_design.html for details).
-+ */
-+
-+#include "../../inode.h"
-+#include "../cluster.h"
-+#include "../object.h"
-+#include "../../tree_walk.h"
-+#include "cryptcompress.h"
-+
-+#include <asm/scatterlist.h>
-+#include <linux/pagevec.h>
-+#include <asm/uaccess.h>
-+#include <linux/swap.h>
-+#include <linux/writeback.h>
-+#include <linux/random.h>
-+
-+/* get cryptcompress specific portion of inode */
-+cryptcompress_info_t *cryptcompress_inode_data(const struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->file_plugin_data.cryptcompress_info;
-+}
-+
-+/* plugin->u.file.init_inode_data */
-+void init_inode_data_cryptcompress(struct inode *inode,
-+ reiser4_object_create_data * crd,
-+ int create)
-+{
-+ cryptcompress_info_t *data;
-+
-+ data = cryptcompress_inode_data(inode);
-+ assert("edward-685", data != NULL);
-+
-+ memset(data, 0, sizeof(*data));
-+
-+ turn_on_compression(data);
-+ set_lattice_factor(data, MIN_LATTICE_FACTOR);
-+ init_inode_ordering(inode, crd, create);
-+}
-+
-+#if REISER4_DEBUG
-+int cryptcompress_inode_ok(struct inode *inode)
-+{
-+ if (!(reiser4_inode_data(inode)->plugin_mask & (1 << PSET_FILE)))
-+ return 0;
-+ if (!cluster_shift_ok(inode_cluster_shift(inode)))
-+ return 0;
-+ return 1;
-+}
-+#endif
-+
-+/* The following is a part of reiser4 cipher key manager
-+ which is called when opening/creating a cryptcompress file */
-+
-+/* get/set cipher key info */
-+crypto_stat_t * inode_crypto_stat (struct inode * inode)
-+{
-+ assert("edward-90", inode != NULL);
-+ assert("edward-91", reiser4_inode_data(inode) != NULL);
-+ return cryptcompress_inode_data(inode)->crypt;
-+}
-+
-+static void set_inode_crypto_stat (struct inode * inode, crypto_stat_t * stat)
-+{
-+ cryptcompress_inode_data(inode)->crypt = stat;
-+}
-+
-+/* allocate a cipher key info */
-+crypto_stat_t * reiser4_alloc_crypto_stat (struct inode * inode)
-+{
-+ crypto_stat_t * info;
-+ int fipsize;
-+
-+ info = kmalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
-+ if (!info)
-+ return ERR_PTR(-ENOMEM);
-+ memset(info, 0, sizeof (*info));
-+ fipsize = inode_digest_plugin(inode)->fipsize;
-+ info->keyid = kmalloc(fipsize, reiser4_ctx_gfp_mask_get());
-+ if (!info->keyid) {
-+ kfree(info);
-+ return ERR_PTR(-ENOMEM);
-+ }
-+ info->host = inode;
-+ return info;
-+}
-+
-+#if 0
-+/* allocate/free low-level info for cipher and digest
-+ transforms */
-+static int alloc_crypto_tfms(crypto_stat_t * info)
-+{
-+ struct crypto_blkcipher * ctfm = NULL;
-+ struct crypto_hash * dtfm = NULL;
-+ cipher_plugin * cplug = inode_cipher_plugin(info->host);
-+ digest_plugin * dplug = inode_digest_plugin(info->host);
-+
-+ if (cplug->alloc) {
-+ ctfm = cplug->alloc();
-+ if (IS_ERR(ctfm)) {
-+ warning("edward-1364",
-+ "Can not allocate info for %s\n",
-+ cplug->h.desc);
-+ return RETERR(PTR_ERR(ctfm));
-+ }
-+ }
-+ info_set_cipher(info, ctfm);
-+ if (dplug->alloc) {
-+ dtfm = dplug->alloc();
-+ if (IS_ERR(dtfm)) {
-+ warning("edward-1365",
-+ "Can not allocate info for %s\n",
-+ dplug->h.desc);
-+ goto unhappy_with_digest;
-+ }
-+ }
-+ info_set_digest(info, dtfm);
-+ return 0;
-+ unhappy_with_digest:
-+ if (cplug->free) {
-+ cplug->free(ctfm);
-+ info_set_cipher(info, NULL);
-+ }
-+ return RETERR(PTR_ERR(dtfm));
-+}
-+#endif
-+
-+static void
-+free_crypto_tfms(crypto_stat_t * info)
-+{
-+ assert("edward-1366", info != NULL);
-+ if (!info_get_cipher(info)) {
-+ assert("edward-1601", !info_get_digest(info));
-+ return;
-+ }
-+ inode_cipher_plugin(info->host)->free(info_get_cipher(info));
-+ info_set_cipher(info, NULL);
-+ inode_digest_plugin(info->host)->free(info_get_digest(info));
-+ info_set_digest(info, NULL);
-+ return;
-+}
-+
-+#if 0
-+/* create a key fingerprint for disk stat-data */
-+static int create_keyid (crypto_stat_t * info, crypto_data_t * data)
-+{
-+ int ret = -ENOMEM;
-+ size_t blk, pad;
-+ __u8 * dmem;
-+ __u8 * cmem;
-+ struct hash_desc ddesc;
-+ struct blkcipher_desc cdesc;
-+ struct scatterlist sg;
-+
-+ assert("edward-1367", info != NULL);
-+ assert("edward-1368", info->keyid != NULL);
-+
-+ ddesc.tfm = info_get_digest(info);
-+ ddesc.flags = 0;
-+ cdesc.tfm = info_get_cipher(info);
-+ cdesc.flags = 0;
-+
-+ dmem = kmalloc((size_t)crypto_hash_digestsize(ddesc.tfm),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!dmem)
-+ goto exit1;
-+
-+ blk = crypto_blkcipher_blocksize(cdesc.tfm);
-+
-+ pad = data->keyid_size % blk;
-+ pad = (pad ? blk - pad : 0);
-+
-+ cmem = kmalloc((size_t)data->keyid_size + pad,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!cmem)
-+ goto exit2;
-+ memcpy(cmem, data->keyid, data->keyid_size);
-+ memset(cmem + data->keyid_size, 0, pad);
-+
-+ sg.page = virt_to_page(cmem);
-+ sg.offset = offset_in_page(cmem);
-+ sg.length = data->keyid_size + pad;
-+
-+ ret = crypto_blkcipher_encrypt(&cdesc, &sg, &sg,
-+ data->keyid_size + pad);
-+ if (ret) {
-+ warning("edward-1369",
-+ "encryption failed flags=%x\n", cdesc.flags);
-+ goto exit3;
-+ }
-+ ret = crypto_hash_digest(&ddesc, &sg, sg.length, dmem);
-+ if (ret) {
-+ warning("edward-1602",
-+ "digest failed flags=%x\n", ddesc.flags);
-+ goto exit3;
-+ }
-+ memcpy(info->keyid, dmem, inode_digest_plugin(info->host)->fipsize);
-+ exit3:
-+ kfree(cmem);
-+ exit2:
-+ kfree(dmem);
-+ exit1:
-+ return ret;
-+}
-+#endif
-+
-+static void destroy_keyid(crypto_stat_t * info)
-+{
-+ assert("edward-1370", info != NULL);
-+ assert("edward-1371", info->keyid != NULL);
-+ kfree(info->keyid);
-+ return;
-+}
-+
-+static void __free_crypto_stat (struct inode * inode)
-+{
-+ crypto_stat_t * info = inode_crypto_stat(inode);
-+ assert("edward-1372", info != NULL);
-+
-+ free_crypto_tfms(info);
-+ destroy_keyid(info);
-+ kfree(info);
-+}
-+
-+#if 0
-+static void instantiate_crypto_stat(crypto_stat_t * info)
-+{
-+ assert("edward-1373", info != NULL);
-+ assert("edward-1374", info->inst == 0);
-+ info->inst = 1;
-+}
-+#endif
-+
-+static void uninstantiate_crypto_stat(crypto_stat_t * info)
-+{
-+ assert("edward-1375", info != NULL);
-+ info->inst = 0;
-+}
-+
-+static int crypto_stat_instantiated(crypto_stat_t * info)
-+{
-+ return info->inst;
-+}
-+
-+static int inode_has_cipher_key(struct inode * inode)
-+{
-+ assert("edward-1376", inode != NULL);
-+ return inode_crypto_stat(inode) &&
-+ crypto_stat_instantiated(inode_crypto_stat(inode));
-+}
-+
-+static void free_crypto_stat (struct inode * inode)
-+{
-+ uninstantiate_crypto_stat(inode_crypto_stat(inode));
-+ __free_crypto_stat(inode);
-+}
-+
-+static int need_cipher(struct inode * inode)
-+{
-+ return inode_cipher_plugin(inode) !=
-+ cipher_plugin_by_id(NONE_CIPHER_ID);
-+}
-+
-+/* Create a crypto-stat and attach result to the @object.
-+ If success is returned, then low-level cipher info contains
-+ an instantiated key */
-+#if 0
-+crypto_stat_t *
-+create_crypto_stat(struct inode * object,
-+ crypto_data_t * data /* this contains a (uninstantiated)
-+ cipher key imported from user
-+ space */)
-+{
-+ int ret;
-+ crypto_stat_t * info;
-+
-+ assert("edward-1377", data != NULL);
-+ assert("edward-1378", need_cipher(object));
-+
-+ if (inode_file_plugin(object) !=
-+ file_plugin_by_id(DIRECTORY_FILE_PLUGIN_ID))
-+ return ERR_PTR(-EINVAL);
-+
-+ info = reiser4_alloc_crypto_stat(object);
-+ if (IS_ERR(info))
-+ return info;
-+ ret = alloc_crypto_tfms(info);
-+ if (ret)
-+ goto err;
-+ /* instantiating a key */
-+ ret = crypto_blkcipher_setkey(info_get_cipher(info),
-+ data->key,
-+ data->keysize);
-+ if (ret) {
-+ warning("edward-1379",
-+ "setkey failed flags=%x\n",
-+ crypto_blkcipher_get_flags(info_get_cipher(info)));
-+ goto err;
-+ }
-+ info->keysize = data->keysize;
-+ ret = create_keyid(info, data);
-+ if (ret)
-+ goto err;
-+ instantiate_crypto_stat(info);
-+ return info;
-+ err:
-+ __free_crypto_stat(object);
-+ return ERR_PTR(ret);
-+}
-+#endif
-+
-+/* increment/decrement a load counter when
-+ attaching/detaching the crypto-stat to any object */
-+static void load_crypto_stat(crypto_stat_t * info)
-+{
-+ assert("edward-1380", info != NULL);
-+ inc_keyload_count(info);
-+}
-+
-+static void unload_crypto_stat(struct inode * inode)
-+{
-+ crypto_stat_t * info = inode_crypto_stat(inode);
-+ assert("edward-1381", info->keyload_count > 0);
-+
-+ dec_keyload_count(inode_crypto_stat(inode));
-+ if (info->keyload_count == 0)
-+ /* final release */
-+ free_crypto_stat(inode);
-+}
-+
-+/* attach/detach an existing crypto-stat */
-+void reiser4_attach_crypto_stat(struct inode * inode, crypto_stat_t * info)
-+{
-+ assert("edward-1382", inode != NULL);
-+ assert("edward-1383", info != NULL);
-+ assert("edward-1384", inode_crypto_stat(inode) == NULL);
-+
-+ set_inode_crypto_stat(inode, info);
-+ load_crypto_stat(info);
-+}
-+
-+/* returns true, if crypto stat can be attached to the @host */
-+#if REISER4_DEBUG
-+static int host_allows_crypto_stat(struct inode * host)
-+{
-+ int ret;
-+ file_plugin * fplug = inode_file_plugin(host);
-+
-+ switch (fplug->h.id) {
-+ case CRYPTCOMPRESS_FILE_PLUGIN_ID:
-+ ret = 1;
-+ break;
-+ default:
-+ ret = 0;
-+ }
-+ return ret;
-+}
-+#endif /* REISER4_DEBUG */
-+
-+static void reiser4_detach_crypto_stat(struct inode * inode)
-+{
-+ assert("edward-1385", inode != NULL);
-+ assert("edward-1386", host_allows_crypto_stat(inode));
-+
-+ if (inode_crypto_stat(inode))
-+ unload_crypto_stat(inode);
-+ set_inode_crypto_stat(inode, NULL);
-+}
-+
-+#if 0
-+
-+/* compare fingerprints of @child and @parent */
-+static int keyid_eq(crypto_stat_t * child, crypto_stat_t * parent)
-+{
-+ return !memcmp(child->keyid, parent->keyid, info_digest_plugin(parent)->fipsize);
-+}
-+
-+/* check if a crypto-stat (which is bound to @parent) can be inherited */
-+int can_inherit_crypto_cryptcompress(struct inode *child, struct inode *parent)
-+{
-+ if (!need_cipher(child))
-+ return 0;
-+ /* the child is created */
-+ if (!inode_crypto_stat(child))
-+ return 1;
-+ /* the child is looked up */
-+ if (!inode_crypto_stat(parent))
-+ return 0;
-+ return (inode_cipher_plugin(child) == inode_cipher_plugin(parent) &&
-+ inode_digest_plugin(child) == inode_digest_plugin(parent) &&
-+ inode_crypto_stat(child)->keysize == inode_crypto_stat(parent)->keysize &&
-+ keyid_eq(inode_crypto_stat(child), inode_crypto_stat(parent)));
-+}
-+#endif
-+
-+/* helper functions for ->create() method of the cryptcompress plugin */
-+static int inode_set_crypto(struct inode * object)
-+{
-+ reiser4_inode * info;
-+ if (!inode_crypto_stat(object)) {
-+ if (need_cipher(object))
-+ return RETERR(-EINVAL);
-+ /* the file is not to be encrypted */
-+ return 0;
-+ }
-+ info = reiser4_inode_data(object);
-+ info->extmask |= (1 << CRYPTO_STAT);
-+ return 0;
-+}
-+
-+static int inode_init_compression(struct inode * object)
-+{
-+ int result = 0;
-+ assert("edward-1461", object != NULL);
-+ if (inode_compression_plugin(object)->init)
-+ result = inode_compression_plugin(object)->init();
-+ return result;
-+}
-+
-+static int inode_check_cluster(struct inode * object)
-+{
-+ assert("edward-696", object != NULL);
-+
-+ if (inode_cluster_size(object) < PAGE_CACHE_SIZE) {
-+ warning("edward-1320", "Can not support '%s' "
-+ "logical clusters (less then page size)",
-+ inode_cluster_plugin(object)->h.label);
-+ return RETERR(-EINVAL);
-+ }
-+ return 0;
-+}
-+
-+/* ->destroy_inode() method of the cryptcompress plugin */
-+void destroy_inode_cryptcompress(struct inode * inode)
-+{
-+ assert("edward-23", cryptcompress_inode_data(inode)->pgcount == 0);
-+ reiser4_detach_crypto_stat(inode);
-+ return;
-+}
-+
-+/* ->create() method of the cryptcompress plugin
-+
-+. install plugins
-+. attach crypto info if specified
-+. attach compression info if specified
-+. attach cluster info
-+*/
-+int
-+create_cryptcompress(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data * data)
-+{
-+ int result;
-+ reiser4_inode *info;
-+
-+ assert("edward-23", object != NULL);
-+ assert("edward-24", parent != NULL);
-+ assert("edward-30", data != NULL);
-+ assert("edward-26", reiser4_inode_get_flag(object, REISER4_NO_SD));
-+ assert("edward-27", data->id == CRYPTCOMPRESS_FILE_PLUGIN_ID);
-+
-+ info = reiser4_inode_data(object);
-+
-+ assert("edward-29", info != NULL);
-+
-+ /* set file bit */
-+ info->plugin_mask |= (1 << PSET_FILE);
-+
-+ /* set crypto */
-+ result = inode_set_crypto(object);
-+ if (result)
-+ goto error;
-+ /* set compression */
-+ result = inode_init_compression(object);
-+ if (result)
-+ goto error;
-+ /* set cluster */
-+ result = inode_check_cluster(object);
-+ if (result)
-+ goto error;
-+
-+ /* save everything in disk stat-data */
-+ result = write_sd_by_inode_common(object);
-+ if (!result)
-+ return 0;
-+ error:
-+ reiser4_detach_crypto_stat(object);
-+ return result;
-+}
-+
-+/* ->open() method of the cryptcompress plugin */
-+int open_object_cryptcompress(struct inode * inode, struct file * file)
-+{
-+ int result;
-+ struct inode * parent;
-+
-+ assert("edward-1394", inode != NULL);
-+ assert("edward-1395", file != NULL);
-+ assert("edward-1396", file != NULL);
-+ assert("edward-1397", file->f_dentry->d_inode == inode);
-+ assert("edward-1398", file->f_dentry->d_parent != NULL);
-+ assert("edward-1399", file->f_dentry->d_parent->d_inode != NULL);
-+ assert("edward-698",
-+ inode_file_plugin(inode) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+ result = inode_check_cluster(inode);
-+ if (result)
-+ return result;
-+ result = inode_init_compression(inode);
-+ if (result)
-+ return result;
-+ if (!need_cipher(inode))
-+ /* the file is not to be ciphered */
-+ return 0;
-+ parent = file->f_dentry->d_parent->d_inode;
-+ if (!inode_has_cipher_key(inode))
-+ return RETERR(-EINVAL);
-+ return 0;
-+}
-+
-+/* returns a blocksize, the attribute of a cipher algorithm */
-+static unsigned int
-+cipher_blocksize(struct inode * inode)
-+{
-+ assert("edward-758", need_cipher(inode));
-+ assert("edward-1400", inode_crypto_stat(inode) != NULL);
-+ return crypto_blkcipher_blocksize
-+ (info_get_cipher(inode_crypto_stat(inode)));
-+}
-+
-+/* returns offset translated by scale factor of the crypto-algorithm */
-+static loff_t inode_scaled_offset (struct inode * inode,
-+ const loff_t src_off /* input offset */)
-+{
-+ assert("edward-97", inode != NULL);
-+
-+ if (!need_cipher(inode) ||
-+ src_off == get_key_offset(reiser4_min_key()) ||
-+ src_off == get_key_offset(reiser4_max_key()))
-+ return src_off;
-+
-+ return inode_cipher_plugin(inode)->scale(inode,
-+ cipher_blocksize(inode),
-+ src_off);
-+}
-+
-+/* returns disk cluster size */
-+size_t inode_scaled_cluster_size(struct inode * inode)
-+{
-+ assert("edward-110", inode != NULL);
-+
-+ return inode_scaled_offset(inode, inode_cluster_size(inode));
-+}
-+
-+static int new_cluster(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ return (clust_to_off(clust->index, inode) >= inode->i_size);
-+}
-+
-+/* set number of cluster pages */
-+static void set_cluster_nrpages(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ reiser4_slide_t *win;
-+
-+ assert("edward-180", clust != NULL);
-+ assert("edward-1040", inode != NULL);
-+
-+ win = clust->win;
-+ if (!win) {
-+ /* NOTE-EDWARD: i_size should be protected */
-+ clust->nr_pages =
-+ count_to_nrpages(fsize_to_count(clust, inode));
-+ return;
-+ }
-+ assert("edward-1176", clust->op != PCL_UNKNOWN);
-+ assert("edward-1064", win->off + win->count + win->delta != 0);
-+
-+ if (win->stat == HOLE_WINDOW &&
-+ win->off == 0 && win->count == inode_cluster_size(inode)) {
-+ /* special case: we start write hole from fake cluster */
-+ clust->nr_pages = 0;
-+ return;
-+ }
-+ clust->nr_pages =
-+ count_to_nrpages(max_count(win->off + win->count + win->delta,
-+ fsize_to_count(clust, inode)));
-+ return;
-+}
-+
-+/* ->key_by_inode() method of the cryptcompress plugin */
-+/* see plugin/plugin.h for details */
-+int
-+key_by_inode_cryptcompress(struct inode *inode, loff_t off, reiser4_key * key)
-+{
-+ loff_t clust_off;
-+
-+ assert("edward-64", inode != 0);
-+ // assert("edward-112", ergo(off != get_key_offset(reiser4_max_key()), !off_to_cloff(off, inode)));
-+ /* don't come here with other offsets */
-+
-+ clust_off =
-+ (off ==
-+ get_key_offset(reiser4_max_key())? get_key_offset(reiser4_max_key()) :
-+ off_to_clust_to_off(off, inode));
-+
-+ key_by_inode_and_offset_common(inode, 0, key);
-+ set_key_offset(key,
-+ (__u64) (!inode_crypto_stat(inode) ? clust_off :
-+ inode_scaled_offset(inode, clust_off)));
-+ return 0;
-+}
-+
-+/* plugin->flow_by_inode */
-+int
-+flow_by_inode_cryptcompress(struct inode *inode /* file to build flow for */ ,
-+ const char __user *buf /* user level buffer */ ,
-+ int user /* 1 if @buf is of user space, 0 - if it is
-+ kernel space */ ,
-+ loff_t size /* buffer size */ ,
-+ loff_t off /* offset to start io from */ ,
-+ rw_op op /* READ or WRITE */ ,
-+ flow_t * f /* resulting flow */ )
-+{
-+ assert("edward-436", f != NULL);
-+ assert("edward-149", inode != NULL);
-+ assert("edward-150", inode_file_plugin(inode) != NULL);
-+
-+ f->length = size;
-+ memcpy(&f->data, &buf, sizeof(buf));
-+ f->user = user;
-+ f->op = op;
-+
-+ if (op == WRITE_OP && user == 1)
-+ return 0;
-+ return key_by_inode_cryptcompress(inode, off, &f->key);
-+}
-+
-+static int
-+cryptcompress_hint_validate(hint_t * hint, const reiser4_key * key,
-+ znode_lock_mode lock_mode)
-+{
-+ coord_t *coord;
-+
-+ assert("edward-704", hint != NULL);
-+ assert("edward-1089", !hint_is_valid(hint));
-+ assert("edward-706", hint->lh.owner == NULL);
-+
-+ coord = &hint->ext_coord.coord;
-+
-+ if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
-+ /* hint either not set or set by different operation */
-+ return RETERR(-E_REPEAT);
-+
-+ if (get_key_offset(key) != hint->offset)
-+ /* hint is set for different key */
-+ return RETERR(-E_REPEAT);
-+
-+ assert("edward-707", reiser4_schedulable());
-+
-+ return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord,
-+ key, &hint->lh, lock_mode,
-+ ZNODE_LOCK_LOPRI);
-+}
-+
-+/* reserve disk space when writing a logical cluster */
-+static int reserve4cluster(struct inode *inode, reiser4_cluster_t *clust)
-+{
-+ int result = 0;
-+
-+ assert("edward-965", reiser4_schedulable());
-+ assert("edward-439", inode != NULL);
-+ assert("edward-440", clust != NULL);
-+ assert("edward-441", clust->pages != NULL);
-+
-+ if (clust->nr_pages == 0) {
-+ assert("edward-1152", clust->win != NULL);
-+ assert("edward-1153", clust->win->stat == HOLE_WINDOW);
-+ /* don't reserve space for fake disk clusteer */
-+ return 0;
-+ }
-+ assert("edward-442", jprivate(clust->pages[0]) != NULL);
-+
-+ result = reiser4_grab_space_force(estimate_insert_cluster(inode) +
-+ estimate_update_cluster(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ return result;
-+ clust->reserved = 1;
-+ grabbed2cluster_reserved(estimate_insert_cluster(inode) +
-+ estimate_update_cluster(inode));
-+#if REISER4_DEBUG
-+ clust->reserved_prepped = estimate_update_cluster(inode);
-+ clust->reserved_unprepped = estimate_insert_cluster(inode);
-+#endif
-+ /* there can be space grabbed by txnmgr_force_commit_all */
-+ return 0;
-+}
-+
-+/* free reserved disk space if writing a logical cluster fails */
-+static void
-+free_reserved4cluster(struct inode *inode, reiser4_cluster_t * clust, int count)
-+{
-+ assert("edward-967", clust->reserved == 1);
-+
-+ cluster_reserved2free(count);
-+ clust->reserved = 0;
-+}
-+
-+/* The core search procedure of the cryptcompress plugin.
-+ If returned value is not cbk_errored, then current znode is locked */
-+static int find_cluster_item(hint_t * hint,
-+ const reiser4_key * key, /* key of the item we are
-+ looking for */
-+ znode_lock_mode lock_mode /* which lock */ ,
-+ ra_info_t * ra_info, lookup_bias bias, __u32 flags)
-+{
-+ int result;
-+ reiser4_key ikey;
-+ int went_right = 0;
-+ coord_t *coord = &hint->ext_coord.coord;
-+ coord_t orig = *coord;
-+
-+ assert("edward-152", hint != NULL);
-+
-+ if (!hint_is_valid(hint)) {
-+ result = cryptcompress_hint_validate(hint, key, lock_mode);
-+ if (result == -E_REPEAT)
-+ goto traverse_tree;
-+ else if (result) {
-+ assert("edward-1216", 0);
-+ return result;
-+ }
-+ hint_set_valid(hint);
-+ }
-+ assert("edward-709", znode_is_any_locked(coord->node));
-+
-+ /* In-place lookup is going here, it means we just need to
-+ check if next item of the @coord match to the @keyhint) */
-+
-+ if (equal_to_rdk(coord->node, key)) {
-+ result = goto_right_neighbor(coord, &hint->lh);
-+ if (result == -E_NO_NEIGHBOR) {
-+ assert("edward-1217", 0);
-+ return RETERR(-EIO);
-+ }
-+ if (result)
-+ return result;
-+ assert("edward-1218", equal_to_ldk(coord->node, key));
-+ went_right = 1;
-+ } else {
-+ coord->item_pos++;
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ }
-+ result = zload(coord->node);
-+ if (result)
-+ return result;
-+ assert("edward-1219", !node_is_empty(coord->node));
-+
-+ if (!coord_is_existing_item(coord)) {
-+ zrelse(coord->node);
-+ goto not_found;
-+ }
-+ item_key_by_coord(coord, &ikey);
-+ zrelse(coord->node);
-+ if (!keyeq(key, &ikey))
-+ goto not_found;
-+ /* Ok, item is found, update node counts */
-+ if (went_right)
-+ dclust_inc_extension_ncount(hint);
-+ return CBK_COORD_FOUND;
-+
-+ not_found:
-+ assert("edward-1220", coord->item_pos > 0);
-+ //coord->item_pos--;
-+ /* roll back */
-+ *coord = orig;
-+ ON_DEBUG(coord_update_v(coord));
-+ return CBK_COORD_NOTFOUND;
-+
-+ traverse_tree:
-+ assert("edward-713", hint->lh.owner == NULL);
-+ assert("edward-714", reiser4_schedulable());
-+
-+ reiser4_unset_hint(hint);
-+ dclust_init_extension(hint);
-+ coord_init_zero(coord);
-+ result = coord_by_key(current_tree, key, coord, &hint->lh,
-+ lock_mode, bias, LEAF_LEVEL, LEAF_LEVEL,
-+ CBK_UNIQUE | flags, ra_info);
-+ if (cbk_errored(result))
-+ return result;
-+ if(result == CBK_COORD_FOUND)
-+ dclust_inc_extension_ncount(hint);
-+ hint_set_valid(hint);
-+ return result;
-+}
-+
-+/* This function is called by deflate[inflate] manager when
-+ creating a transformed/plain stream to check if we should
-+ create/cut some overhead. If this returns true, then @oh
-+ contains the size of this overhead.
-+ */
-+static int
-+need_cut_or_align(struct inode * inode, reiser4_cluster_t * clust,
-+ rw_op rw, int * oh)
-+{
-+ tfm_cluster_t * tc = &clust->tc;
-+ switch (rw) {
-+ case WRITE_OP: /* estimate align */
-+ *oh = tc->len % cipher_blocksize(inode);
-+ if (*oh != 0)
-+ return 1;
-+ break;
-+ case READ_OP: /* estimate cut */
-+ *oh = *(tfm_output_data(clust) + tc->len - 1);
-+ break;
-+ default:
-+ impossible("edward-1401", "bad option");
-+ }
-+ return (tc->len != tc->lsize);
-+}
-+
-+/* create/cut an overhead of transformed/plain stream */
-+static void
-+align_or_cut_overhead(struct inode * inode, reiser4_cluster_t * clust, rw_op rw)
-+{
-+ int oh;
-+ cipher_plugin * cplug = inode_cipher_plugin(inode);
-+
-+ assert("edward-1402", need_cipher(inode));
-+
-+ if (!need_cut_or_align(inode, clust, rw, &oh))
-+ return;
-+ switch (rw) {
-+ case WRITE_OP: /* do align */
-+ clust->tc.len +=
-+ cplug->align_stream(tfm_input_data(clust) +
-+ clust->tc.len, clust->tc.len,
-+ cipher_blocksize(inode));
-+ *(tfm_input_data(clust) + clust->tc.len - 1) =
-+ cipher_blocksize(inode) - oh;
-+ break;
-+ case READ_OP: /* do cut */
-+ assert("edward-1403", oh <= cipher_blocksize(inode));
-+ clust->tc.len -= oh;
-+ break;
-+ default:
-+ impossible("edward-1404", "bad option");
-+ }
-+ return;
-+}
-+
-+/* the following two functions are to evaluate results
-+ of compression transform */
-+static unsigned
-+max_cipher_overhead(struct inode * inode)
-+{
-+ if (!need_cipher(inode) || !inode_cipher_plugin(inode)->align_stream)
-+ return 0;
-+ return cipher_blocksize(inode);
-+}
-+
-+static int deflate_overhead(struct inode *inode)
-+{
-+ return (inode_compression_plugin(inode)->
-+ checksum ? DC_CHECKSUM_SIZE : 0);
-+}
-+
-+static unsigned deflate_overrun(struct inode * inode, int ilen)
-+{
-+ return coa_overrun(inode_compression_plugin(inode), ilen);
-+}
-+
-+/* Estimating compressibility of a logical cluster by various
-+ policies represented by compression mode plugin.
-+ If this returns false, then compressor won't be called for
-+ the cluster of index @index.
-+*/
-+static int should_compress(tfm_cluster_t * tc, cloff_t index,
-+ struct inode *inode)
-+{
-+ compression_plugin *cplug = inode_compression_plugin(inode);
-+ compression_mode_plugin *mplug = inode_compression_mode_plugin(inode);
-+
-+ assert("edward-1321", tc->len != 0);
-+ assert("edward-1322", cplug != NULL);
-+ assert("edward-1323", mplug != NULL);
-+
-+ return /* estimate by size */
-+ (cplug->min_size_deflate ?
-+ tc->len >= cplug->min_size_deflate() :
-+ 1) &&
-+ /* estimate by compression mode plugin */
-+ (mplug->should_deflate ?
-+ mplug->should_deflate(inode, index) :
-+ 1);
-+}
-+
-+/* Evaluating results of compression transform.
-+ Returns true, if we need to accept this results */
-+static int
-+save_compressed(int size_before, int size_after, struct inode * inode)
-+{
-+ return (size_after + deflate_overhead(inode) +
-+ max_cipher_overhead(inode) < size_before);
-+}
-+
-+/* Guess result of the evaluation above */
-+static int
-+need_inflate(reiser4_cluster_t * clust, struct inode *inode,
-+ int encrypted /* is cluster encrypted */ )
-+{
-+ tfm_cluster_t *tc = &clust->tc;
-+
-+ assert("edward-142", tc != 0);
-+ assert("edward-143", inode != NULL);
-+
-+ return tc->len <
-+ (encrypted ?
-+ inode_scaled_offset(inode, tc->lsize) :
-+ tc->lsize);
-+}
-+
-+/* If results of compression were accepted, then we add
-+ a checksum to catch possible disk cluster corruption.
-+ The following is a format of the data stored in disk clusters:
-+
-+ data This is (transformed) logical cluster.
-+ cipher_overhead This is created by ->align() method
-+ of cipher plugin. May be absent.
-+ checksum (4) This is created by ->checksum method
-+ of compression plugin to check
-+ integrity. May be absent.
-+
-+ Crypto overhead format:
-+
-+ data
-+ control_byte (1) contains aligned overhead size:
-+ 1 <= overhead <= cipher_blksize
-+*/
-+/* Append a checksum at the end of a transformed stream */
-+static void dc_set_checksum(compression_plugin * cplug, tfm_cluster_t * tc)
-+{
-+ __u32 checksum;
-+
-+ assert("edward-1309", tc != NULL);
-+ assert("edward-1310", tc->len > 0);
-+ assert("edward-1311", cplug->checksum != NULL);
-+
-+ checksum = cplug->checksum(tfm_stream_data(tc, OUTPUT_STREAM), tc->len);
-+ put_unaligned(cpu_to_le32(checksum),
-+ (d32 *)(tfm_stream_data(tc, OUTPUT_STREAM) + tc->len));
-+ tc->len += (int)DC_CHECKSUM_SIZE;
-+}
-+
-+/* Check a disk cluster checksum.
-+ Returns 0 if checksum is correct, otherwise returns 1 */
-+static int dc_check_checksum(compression_plugin * cplug, tfm_cluster_t * tc)
-+{
-+ assert("edward-1312", tc != NULL);
-+ assert("edward-1313", tc->len > (int)DC_CHECKSUM_SIZE);
-+ assert("edward-1314", cplug->checksum != NULL);
-+
-+ if (cplug->checksum(tfm_stream_data(tc, INPUT_STREAM),
-+ tc->len - (int)DC_CHECKSUM_SIZE) !=
-+ le32_to_cpu(get_unaligned((d32 *)
-+ (tfm_stream_data(tc, INPUT_STREAM)
-+ + tc->len - (int)DC_CHECKSUM_SIZE)))) {
-+ warning("edward-156",
-+ "Bad disk cluster checksum %d, (should be %d) Fsck?\n",
-+ (int)le32_to_cpu
-+ (get_unaligned((d32 *)
-+ (tfm_stream_data(tc, INPUT_STREAM) +
-+ tc->len - (int)DC_CHECKSUM_SIZE))),
-+ (int)cplug->checksum
-+ (tfm_stream_data(tc, INPUT_STREAM),
-+ tc->len - (int)DC_CHECKSUM_SIZE));
-+ return 1;
-+ }
-+ tc->len -= (int)DC_CHECKSUM_SIZE;
-+ return 0;
-+}
-+
-+/* get input/output stream for some transform action */
-+int grab_tfm_stream(struct inode * inode, tfm_cluster_t * tc,
-+ tfm_stream_id id)
-+{
-+ size_t size = inode_scaled_cluster_size(inode);
-+
-+ assert("edward-901", tc != NULL);
-+ assert("edward-1027", inode_compression_plugin(inode) != NULL);
-+
-+ if (cluster_get_tfm_act(tc) == TFMA_WRITE)
-+ size += deflate_overrun(inode, inode_cluster_size(inode));
-+
-+ if (!tfm_stream(tc, id) && id == INPUT_STREAM)
-+ alternate_streams(tc);
-+ if (!tfm_stream(tc, id))
-+ return alloc_tfm_stream(tc, size, id);
-+
-+ assert("edward-902", tfm_stream_is_set(tc, id));
-+
-+ if (tfm_stream_size(tc, id) < size)
-+ return realloc_tfm_stream(tc, size, id);
-+ return 0;
-+}
-+
-+/* Common deflate manager */
-+int reiser4_deflate_cluster(reiser4_cluster_t * clust, struct inode * inode)
-+{
-+ int result = 0;
-+ int compressed = 0;
-+ int encrypted = 0;
-+ tfm_cluster_t * tc = &clust->tc;
-+ compression_plugin * coplug;
-+
-+ assert("edward-401", inode != NULL);
-+ assert("edward-903", tfm_stream_is_set(tc, INPUT_STREAM));
-+ assert("edward-1348", cluster_get_tfm_act(tc) == TFMA_WRITE);
-+ assert("edward-498", !tfm_cluster_is_uptodate(tc));
-+
-+ coplug = inode_compression_plugin(inode);
-+ if (should_compress(tc, clust->index, inode)) {
-+ /* try to compress, discard bad results */
-+ __u32 dst_len;
-+ compression_mode_plugin * mplug =
-+ inode_compression_mode_plugin(inode);
-+ assert("edward-602", coplug != NULL);
-+ assert("edward-1423", coplug->compress != NULL);
-+
-+ result = grab_coa(tc, coplug);
-+ if (result) {
-+ warning("edward-1424",
-+ "alloc_coa failed with ret=%d, skipped compression",
-+ result);
-+ goto cipher;
-+ }
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result) {
-+ warning("edward-1425",
-+ "alloc stream failed with ret=%d, skipped compression",
-+ result);
-+ goto cipher;
-+ }
-+ dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
-+ coplug->compress(get_coa(tc, coplug->h.id, tc->act),
-+ tfm_input_data(clust), tc->len,
-+ tfm_output_data(clust), &dst_len);
-+ /* make sure we didn't overwrite extra bytes */
-+ assert("edward-603",
-+ dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
-+
-+ /* evaluate results of compression transform */
-+ if (save_compressed(tc->len, dst_len, inode)) {
-+ /* good result, accept */
-+ tc->len = dst_len;
-+ if (mplug->accept_hook != NULL) {
-+ result = mplug->accept_hook(inode, clust->index);
-+ if (result)
-+ warning("edward-1426",
-+ "accept_hook failed with ret=%d",
-+ result);
-+ }
-+ compressed = 1;
-+ }
-+ else {
-+ /* bad result, discard */
-+#if REISER4_DEBUG
-+ if (cluster_is_complete(clust, inode))
-+ warning("edward-1338",
-+ "incompressible cluster %lu (inode %llu)",
-+ clust->index,
-+ (unsigned long long)get_inode_oid(inode));
-+#endif
-+ if (mplug->discard_hook != NULL &&
-+ cluster_is_complete(clust, inode)) {
-+ result = mplug->discard_hook(inode,
-+ clust->index);
-+ if (result)
-+ warning("edward-1427",
-+ "discard_hook failed with ret=%d",
-+ result);
-+ }
-+ }
-+ }
-+ cipher:
-+ if (need_cipher(inode)) {
-+ cipher_plugin * ciplug;
-+ struct blkcipher_desc desc;
-+ struct scatterlist src;
-+ struct scatterlist dst;
-+
-+ ciplug = inode_cipher_plugin(inode);
-+ desc.tfm = info_get_cipher(inode_crypto_stat(inode));
-+ desc.flags = 0;
-+ if (compressed)
-+ alternate_streams(tc);
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ return result;
-+
-+ align_or_cut_overhead(inode, clust, WRITE_OP);
-+ src.page = virt_to_page(tfm_input_data(clust));
-+ src.offset = offset_in_page(tfm_input_data(clust));
-+ src.length = tc->len;
-+
-+ dst.page = virt_to_page(tfm_output_data(clust));
-+ dst.offset = offset_in_page(tfm_output_data(clust));
-+ dst.length = tc->len;
-+
-+ result = crypto_blkcipher_encrypt(&desc, &dst, &src, tc->len);
-+ if (result) {
-+ warning("edward-1405",
-+ "encryption failed flags=%x\n", desc.flags);
-+ return result;
-+ }
-+ encrypted = 1;
-+ }
-+ if (compressed && coplug->checksum != NULL)
-+ dc_set_checksum(coplug, tc);
-+ if (!compressed && !encrypted)
-+ alternate_streams(tc);
-+ return result;
-+}
-+
-+/* Common inflate manager. */
-+int reiser4_inflate_cluster(reiser4_cluster_t * clust, struct inode * inode)
-+{
-+ int result = 0;
-+ int transformed = 0;
-+ tfm_cluster_t * tc = &clust->tc;
-+ compression_plugin * coplug;
-+
-+ assert("edward-905", inode != NULL);
-+ assert("edward-1178", clust->dstat == PREP_DISK_CLUSTER);
-+ assert("edward-906", tfm_stream_is_set(&clust->tc, INPUT_STREAM));
-+ assert("edward-1349", tc->act == TFMA_READ);
-+ assert("edward-907", !tfm_cluster_is_uptodate(tc));
-+
-+ /* Handle a checksum (if any) */
-+ coplug = inode_compression_plugin(inode);
-+ if (need_inflate(clust, inode, need_cipher(inode)) &&
-+ coplug->checksum != NULL) {
-+ result = dc_check_checksum(coplug, tc);
-+ if (unlikely(result)) {
-+ warning("edward-1460",
-+ "Inode %llu: disk cluster %lu looks corrupted",
-+ (unsigned long long)get_inode_oid(inode),
-+ clust->index);
-+ return RETERR(-EIO);
-+ }
-+ }
-+ if (need_cipher(inode)) {
-+ cipher_plugin * ciplug;
-+ struct blkcipher_desc desc;
-+ struct scatterlist src;
-+ struct scatterlist dst;
-+
-+ ciplug = inode_cipher_plugin(inode);
-+ desc.tfm = info_get_cipher(inode_crypto_stat(inode));
-+ desc.flags = 0;
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ return result;
-+ assert("edward-909", tfm_cluster_is_set(tc));
-+
-+ src.page = virt_to_page(tfm_input_data(clust));
-+ src.offset = offset_in_page(tfm_input_data(clust));
-+ src.length = tc->len;
-+
-+ dst.page = virt_to_page(tfm_output_data(clust));
-+ dst.offset = offset_in_page(tfm_output_data(clust));
-+ dst.length = tc->len;
-+
-+ result = crypto_blkcipher_decrypt(&desc, &dst, &src, tc->len);
-+ if (result) {
-+ warning("edward-1600", "decrypt failed flags=%x\n",
-+ desc.flags);
-+ return result;
-+ }
-+ align_or_cut_overhead(inode, clust, READ_OP);
-+ transformed = 1;
-+ }
-+ if (need_inflate(clust, inode, 0)) {
-+ unsigned dst_len = inode_cluster_size(inode);
-+ if(transformed)
-+ alternate_streams(tc);
-+
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ return result;
-+ assert("edward-1305", coplug->decompress != NULL);
-+ assert("edward-910", tfm_cluster_is_set(tc));
-+
-+ coplug->decompress(get_coa(tc, coplug->h.id, tc->act),
-+ tfm_input_data(clust), tc->len,
-+ tfm_output_data(clust), &dst_len);
-+ /* check length */
-+ tc->len = dst_len;
-+ assert("edward-157", dst_len == tc->lsize);
-+ transformed = 1;
-+ }
-+ if (!transformed)
-+ alternate_streams(tc);
-+ return result;
-+}
-+
-+/* This is implementation of readpage method of struct
-+ address_space_operations for cryptcompress plugin. */
-+int readpage_cryptcompress(struct file *file, struct page *page)
-+{
-+ reiser4_context *ctx;
-+ reiser4_cluster_t clust;
-+ item_plugin *iplug;
-+ int result;
-+
-+ assert("edward-88", PageLocked(page));
-+ assert("vs-976", !PageUptodate(page));
-+ assert("edward-89", page->mapping && page->mapping->host);
-+
-+ ctx = reiser4_init_context(page->mapping->host->i_sb);
-+ if (IS_ERR(ctx)) {
-+ unlock_page(page);
-+ return PTR_ERR(ctx);
-+ }
-+ assert("edward-113",
-+ ergo(file != NULL,
-+ page->mapping == file->f_dentry->d_inode->i_mapping));
-+
-+ if (PageUptodate(page)) {
-+ warning("edward-1338", "page is already uptodate\n");
-+ unlock_page(page);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+ cluster_init_read(&clust, NULL);
-+ clust.file = file;
-+ iplug = item_plugin_by_id(CTAIL_ID);
-+ if (!iplug->s.file.readpage) {
-+ unlock_page(page);
-+ put_cluster_handle(&clust);
-+ reiser4_exit_context(ctx);
-+ return -EINVAL;
-+ }
-+ result = iplug->s.file.readpage(&clust, page);
-+
-+ assert("edward-1459", !PageLocked(page));
-+ assert("edward-64", ergo(result == 0, PageUptodate(page)));
-+ put_cluster_handle(&clust);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* how much pages will be captured */
-+static int cluster_nrpages_to_capture(reiser4_cluster_t * clust)
-+{
-+ switch (clust->op) {
-+ case PCL_APPEND:
-+ return clust->nr_pages;
-+ case PCL_TRUNCATE:
-+ assert("edward-1179", clust->win != NULL);
-+ return count_to_nrpages(clust->win->off + clust->win->count);
-+ default:
-+ impossible("edward-1180", "bad page cluster option");
-+ return 0;
-+ }
-+}
-+
-+static void set_cluster_pages_dirty(reiser4_cluster_t * clust)
-+{
-+ int i;
-+ struct page *pg;
-+ int nrpages = cluster_nrpages_to_capture(clust);
-+
-+ for (i = 0; i < nrpages; i++) {
-+
-+ pg = clust->pages[i];
-+ assert("edward-968", pg != NULL);
-+ lock_page(pg);
-+ assert("edward-1065", PageUptodate(pg));
-+ reiser4_set_page_dirty_internal(pg);
-+ unlock_page(pg);
-+ mark_page_accessed(pg);
-+ }
-+}
-+
-+static void clear_cluster_pages_dirty(reiser4_cluster_t * clust)
-+{
-+ int i;
-+ assert("edward-1275", clust != NULL);
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ assert("edward-1276", clust->pages[i] != NULL);
-+
-+ lock_page(clust->pages[i]);
-+ if (PageDirty(clust->pages[i])) {
-+ assert("edward-1277", PageUptodate(clust->pages[i]));
-+ cancel_dirty_page(clust->pages[i], PAGE_CACHE_SIZE);
-+ }
-+#if REISER4_DEBUG
-+ else
-+ /* Race between flush and write:
-+ some pages became clean when write() (or another
-+ process which modifies data) capture the cluster. */
-+ warning("edward-985", "Page of index %lu (inode %llu)"
-+ " is not dirty\n", clust->pages[i]->index,
-+ (unsigned long long)get_inode_oid(clust->
-+ pages[i]->
-+ mapping->
-+ host));
-+#endif
-+ unlock_page(clust->pages[i]);
-+ }
-+}
-+
-+/* update i_size by window */
-+static void inode_set_new_size(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ loff_t size;
-+ reiser4_slide_t *win;
-+
-+ assert("edward-1181", clust != NULL);
-+ assert("edward-1182", inode != NULL);
-+
-+ win = clust->win;
-+ assert("edward-1183", win != NULL);
-+ assert("edward-1183", win->count != 0);
-+
-+ size = clust_to_off(clust->index, inode) + win->off;
-+
-+ switch (clust->op) {
-+ case PCL_APPEND:
-+ if (size + win->count <= inode->i_size)
-+ /* overwrite only */
-+ return;
-+ size += win->count;
-+ break;
-+ case PCL_TRUNCATE:
-+ break;
-+ default:
-+ impossible("edward-1184", "bad page cluster option");
-+ break;
-+ }
-+ inode_check_scale_nolock(inode, inode->i_size, size);
-+ inode->i_size = size;
-+ return;
-+}
-+
-+/* Check in page cluster modifications.
-+ . Make jnode dirty, if it wasn't;
-+ . Reserve space for a disk cluster update by flush algorithm, if needed;
-+ . Clean up old references (if any).
-+ . Put pages (grabbed in this thread) which will be truncated
-+*/
-+static void
-+make_cluster_jnode_dirty_locked(reiser4_cluster_t * clust, jnode * node,
-+ loff_t * old_isize, struct inode *inode)
-+{
-+ int i;
-+ int old_nrpages;
-+ int new_nrpages = cluster_nrpages_to_capture(clust);
-+
-+ assert("edward-973", new_nrpages > 0);
-+ assert("edward-221", node != NULL);
-+ assert("edward-971", clust->reserved == 1);
-+ assert_spin_locked(&(node->guard));
-+ assert("edward-972", node->page_count <= cluster_nrpages(inode));
-+ assert("edward-1263",
-+ clust->reserved_prepped == estimate_update_cluster(inode));
-+ assert("edward-1264", clust->reserved_unprepped == 0);
-+
-+ if (JF_ISSET(node, JNODE_DIRTY)) {
-+ /* someone has modified this cluster, but
-+ the modifications are not committed yet */
-+ old_nrpages =
-+ count_to_nrpages(cnt_to_clcnt(*old_isize,
-+ clust->index, inode));
-+ /* free space which is already reserved */
-+ free_reserved4cluster(inode, clust,
-+ estimate_update_cluster(inode));
-+ /* put old references */
-+ for (i = 0; i < old_nrpages; i++) {
-+ assert("edward-975", clust->pages[i]);
-+ assert("edward-1185", PageUptodate(clust->pages[i]));
-+
-+ page_cache_release(clust->pages[i]);
-+#if REISER4_DEBUG
-+ cryptcompress_inode_data(inode)->pgcount --;
-+#endif
-+ }
-+ } else {
-+ /* no captured pages */
-+ assert("edward-1043", node->page_count == 0);
-+ jnode_make_dirty_locked(node);
-+ clust->reserved = 0;
-+ }
-+ /* put pages that will be truncated (if any) */
-+ for (i = new_nrpages; i < clust->nr_pages; i++) {
-+ assert("edward-1433", clust->pages[i]);
-+ assert("edward-1434", PageUptodate(clust->pages[i]));
-+ page_cache_release(clust->pages[i]);
-+#if REISER4_DEBUG
-+ cryptcompress_inode_data(inode)->pgcount --;
-+#endif
-+ }
-+#if REISER4_DEBUG
-+ clust->reserved_prepped -= estimate_update_cluster(inode);
-+ node->page_count = new_nrpages;
-+#endif
-+ return;
-+}
-+
-+/* This function spawns a transaction and
-+ is called by any thread as a final step in page cluster modification.
-+*/
-+static int try_capture_cluster(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ int result = 0;
-+ loff_t old_size;
-+ jnode *node;
-+
-+ assert("edward-1029", clust != NULL);
-+ assert("edward-1030", clust->reserved == 1);
-+ assert("edward-1031", clust->nr_pages != 0);
-+ assert("edward-1032", clust->pages != NULL);
-+ assert("edward-1033", clust->pages[0] != NULL);
-+
-+ node = jprivate(clust->pages[0]);
-+ assert("edward-1035", node != NULL);
-+ assert("edward-1446", jnode_is_cluster_page(node));
-+
-+ spin_lock_jnode(node);
-+
-+ old_size = inode->i_size;
-+ if (clust->win)
-+ inode_set_new_size(clust, inode);
-+
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ if (result)
-+ goto exit;
-+ make_cluster_jnode_dirty_locked(clust, node, &old_size, inode);
-+ exit:
-+ spin_unlock_jnode(node);
-+ jput(node);
-+ return result;
-+}
-+
-+/* Collect unlocked cluster pages for any modifications and attach a jnode.
-+ We allocate only one jnode per cluster, this jnode is binded to the first
-+ page of this cluster, so we have an extra-reference that will exist with
-+ this jnode, other references will be cleaned up in flush time.
-+*/
-+static int
-+grab_cluster_pages_jnode(struct inode *inode, reiser4_cluster_t * clust)
-+{
-+ int i;
-+ int result = 0;
-+ jnode *node = NULL;
-+
-+ assert("edward-182", clust != NULL);
-+ assert("edward-183", clust->pages != NULL);
-+ assert("edward-184", clust->nr_pages <= cluster_nrpages(inode));
-+
-+ if (clust->nr_pages == 0)
-+ return 0;
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+
-+ assert("edward-1044", clust->pages[i] == NULL);
-+
-+ clust->pages[i] =
-+ find_or_create_page(inode->i_mapping,
-+ clust_to_pg(clust->index, inode) + i,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!clust->pages[i]) {
-+ result = RETERR(-ENOMEM);
-+ break;
-+ }
-+ if (i == 0) {
-+ node = jnode_of_page(clust->pages[i]);
-+ if (IS_ERR(node)) {
-+ result = PTR_ERR(node);
-+ unlock_page(clust->pages[i]);
-+ break;
-+ }
-+ JF_SET(node, JNODE_CLUSTER_PAGE);
-+ unlock_page(clust->pages[i]);
-+ assert("edward-919", node);
-+ continue;
-+ }
-+ unlock_page(clust->pages[i]);
-+ }
-+ if (result) {
-+ while (i)
-+ page_cache_release(clust->pages[--i]);
-+ if (node && !IS_ERR(node))
-+ jput(node);
-+ return result;
-+ }
-+ assert("edward-920", jprivate(clust->pages[0]));
-+#if REISER4_DEBUG
-+ cryptcompress_inode_data(inode)->pgcount += clust->nr_pages;
-+#endif
-+ return 0;
-+}
-+
-+/* Collect unlocked cluster pages only for read (not to modify) */
-+int grab_cluster_pages(struct inode *inode, reiser4_cluster_t * clust)
-+{
-+ int i;
-+ int result = 0;
-+
-+ assert("edward-1428", inode != NULL);
-+ assert("edward-1429", inode->i_mapping != NULL);
-+ assert("edward-787", clust != NULL);
-+ assert("edward-788", clust->pages != NULL);
-+ assert("edward-789", clust->nr_pages != 0);
-+ assert("edward-790", clust->nr_pages <= cluster_nrpages(inode));
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ clust->pages[i] =
-+ find_or_create_page(inode->i_mapping,
-+ clust_to_pg(clust->index, inode) + i,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!clust->pages[i]) {
-+ result = RETERR(-ENOMEM);
-+ break;
-+ }
-+ unlock_page(clust->pages[i]);
-+ }
-+ if (result)
-+ while (i)
-+ page_cache_release(clust->pages[--i]);
-+ return result;
-+}
-+
-+/* @node might be attached by reiser4_writepage(), not by
-+ cryptcompress plugin code, but emergency flush should
-+ understand that pages of cryptcompress files are not
-+ flushable.
-+*/
-+#if 0
-+int jnode_of_cluster(const jnode * node, struct page * page)
-+{
-+ assert("edward-1339", node != NULL);
-+ assert("edward-1340", page != NULL);
-+ assert("edward-1341", page->mapping != NULL);
-+ assert("edward-1342", page->mapping->host != NULL);
-+ assert("edward-1343",
-+ ergo(jnode_is_unformatted(node),
-+ get_inode_oid(page->mapping->host) ==
-+ node->key.j.objectid));
-+ if (inode_file_plugin(page->mapping->host) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)) {
-+#if REISER4_DEBUG
-+ if (!jnode_is_cluster_page(node))
-+ warning("edward-1345",
-+ "inode %llu: cluster page of index %lu became private",
-+ (unsigned long long)get_inode_oid(page->mapping->host),
-+ page->index);
-+#endif
-+ return 1;
-+ }
-+ return 0;
-+}
-+#endif /* 0 */
-+
-+/* put cluster pages */
-+void reiser4_release_cluster_pages(reiser4_cluster_t * clust)
-+{
-+ int i;
-+
-+ assert("edward-447", clust != NULL);
-+ for (i = 0; i < clust->nr_pages; i++) {
-+
-+ assert("edward-449", clust->pages[i] != NULL);
-+
-+ page_cache_release(clust->pages[i]);
-+ }
-+}
-+
-+/* this is called when something is failed */
-+static void reiser4_release_cluster_pages_and_jnode(reiser4_cluster_t * clust)
-+{
-+ jnode *node;
-+
-+ assert("edward-445", clust != NULL);
-+ assert("edward-922", clust->pages != NULL);
-+ assert("edward-446", clust->pages[0] != NULL);
-+
-+ node = jprivate(clust->pages[0]);
-+
-+ assert("edward-447", node != NULL);
-+
-+ reiser4_release_cluster_pages(clust);
-+ jput(node);
-+}
-+
-+#if REISER4_DEBUG
-+static int window_ok(reiser4_slide_t * win, struct inode *inode)
-+{
-+ assert("edward-1115", win != NULL);
-+ assert("edward-1116", ergo(win->delta, win->stat == HOLE_WINDOW));
-+
-+ return (win->off != inode_cluster_size(inode)) &&
-+ (win->off + win->count + win->delta <= inode_cluster_size(inode));
-+}
-+
-+static int cluster_ok(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ assert("edward-279", clust != NULL);
-+
-+ if (!clust->pages)
-+ return 0;
-+ return (clust->win ? window_ok(clust->win, inode) : 1);
-+}
-+#endif
-+
-+/* guess next window stat */
-+static inline window_stat next_window_stat(reiser4_slide_t * win)
-+{
-+ assert("edward-1130", win != NULL);
-+ return ((win->stat == HOLE_WINDOW && win->delta == 0) ?
-+ HOLE_WINDOW : DATA_WINDOW);
-+}
-+
-+/* guess next cluster index and window params */
-+static void
-+update_cluster(struct inode *inode, reiser4_cluster_t * clust, loff_t file_off,
-+ loff_t to_file)
-+{
-+ reiser4_slide_t *win;
-+
-+ assert("edward-185", clust != NULL);
-+ assert("edward-438", clust->pages != NULL);
-+ assert("edward-281", cluster_ok(clust, inode));
-+
-+ win = clust->win;
-+ if (!win)
-+ return;
-+
-+ switch (win->stat) {
-+ case DATA_WINDOW:
-+ /* increment window position */
-+ clust->index++;
-+ win->stat = DATA_WINDOW;
-+ win->off = 0;
-+ win->count = min_count(inode_cluster_size(inode), to_file);
-+ break;
-+ case HOLE_WINDOW:
-+ switch (next_window_stat(win)) {
-+ case HOLE_WINDOW:
-+ /* set window to fit the offset we start write from */
-+ clust->index = off_to_clust(file_off, inode);
-+ win->stat = HOLE_WINDOW;
-+ win->off = 0;
-+ win->count = off_to_cloff(file_off, inode);
-+ win->delta =
-+ min_count(inode_cluster_size(inode) - win->count,
-+ to_file);
-+ break;
-+ case DATA_WINDOW:
-+ /* do not move the window, just change its state,
-+ off+count+delta=inv */
-+ win->stat = DATA_WINDOW;
-+ win->off = win->off + win->count;
-+ win->count = win->delta;
-+ win->delta = 0;
-+ break;
-+ default:
-+ impossible("edward-282", "wrong next window state");
-+ }
-+ break;
-+ default:
-+ impossible("edward-283", "wrong current window state");
-+ }
-+ assert("edward-1068", cluster_ok(clust, inode));
-+}
-+
-+static int update_sd_cryptcompress(struct inode *inode)
-+{
-+ int result = 0;
-+
-+ assert("edward-978", reiser4_schedulable());
-+
-+ result = reiser4_grab_space_force( /* one for stat data update */
-+ estimate_update_common(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ return result;
-+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-+ result = reiser4_update_sd(inode);
-+
-+ return result;
-+}
-+
-+/* NOTE-Edward: this is too similar to reiser4/txnmgr.c:uncapture_jnode() */
-+static void uncapture_cluster_jnode(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert_spin_locked(&(node->guard));
-+
-+ /*jnode_make_clean(node); */
-+ atom = jnode_get_atom(node);
-+ if (atom == NULL) {
-+ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+
-+ reiser4_uncapture_block(node);
-+ spin_unlock_atom(atom);
-+ jput(node);
-+}
-+
-+static void forget_cluster_pages(struct page **pages, int nr)
-+{
-+ int i;
-+ for (i = 0; i < nr; i++) {
-+
-+ assert("edward-1045", pages[i] != NULL);
-+ page_cache_release(pages[i]);
-+ }
-+}
-+
-+/* Check out last modifications we are about to commit,
-+ and prepare input stream for transform operations.
-+*/
-+int
-+flush_cluster_pages(reiser4_cluster_t * clust, jnode * node,
-+ struct inode *inode)
-+{
-+ int result = 0;
-+ int i;
-+ int nr_pages = 0;
-+ tfm_cluster_t *tc = &clust->tc;
-+#if REISER4_DEBUG
-+ int node_pgcount;
-+#endif
-+ assert("edward-980", node != NULL);
-+ assert("edward-236", inode != NULL);
-+ assert("edward-237", clust != NULL);
-+ assert("edward-240", !clust->win);
-+ assert("edward-241", reiser4_schedulable());
-+ assert("edward-718", cryptcompress_inode_ok(inode));
-+
-+ result = grab_tfm_stream(inode, tc, INPUT_STREAM);
-+ if (result) {
-+ warning("edward-1430",
-+ "alloc stream failed with ret=%d", result);
-+ return result;
-+ }
-+ spin_lock_jnode(node);
-+#if REISER4_DEBUG
-+ node_pgcount = node->page_count;
-+#endif
-+ if (!JF_ISSET(node, JNODE_DIRTY)) {
-+ /* race with another flush */
-+#if REISER4_DEBUG
-+ assert("edward-981", node_pgcount == 0);
-+ warning("edward-982", "flush_cluster_pages: jnode is not dirty "
-+ "clust %lu, inode %llu\n",
-+ clust->index, (unsigned long long)get_inode_oid(inode));
-+#endif
-+ spin_unlock_jnode(node);
-+ return RETERR(-E_REPEAT);
-+ }
-+ /* Check out a size of logical cluster and
-+ set a number of cluster pages to commit. */
-+ tc->len = tc->lsize = fsize_to_count(clust, inode);
-+ clust->nr_pages = count_to_nrpages(tc->len);
-+
-+#if REISER4_DEBUG
-+ node->page_count = 0;
-+#endif
-+ cluster_reserved2grabbed(estimate_update_cluster(inode));
-+ uncapture_cluster_jnode(node);
-+
-+ assert("edward-1224", reiser4_schedulable());
-+ /* Check out page cluster for commit */
-+ nr_pages =
-+ find_get_pages(inode->i_mapping, clust_to_pg(clust->index, inode),
-+ clust->nr_pages, clust->pages);
-+ if (nr_pages != clust->nr_pages)
-+ goto checkout_failed;
-+
-+ /* Try to construct input stream from the checked out pages */
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ char *data;
-+
-+ assert("edward-242", clust->pages[i] != NULL);
-+ if (clust->pages[i]->index !=
-+ clust_to_pg(clust->index, inode) + i)
-+ goto checkout_failed;
-+ BUG_ON(!PageUptodate(clust->pages[i]));
-+
-+ /* flush the page into input transform stream */
-+ lock_page(clust->pages[i]);
-+ data = kmap(clust->pages[i]);
-+
-+ assert("edward-986", cnt_to_pgcnt(tc->len, i) != 0);
-+
-+ memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
-+ data, cnt_to_pgcnt(tc->len, i));
-+ kunmap(clust->pages[i]);
-+ unlock_page(clust->pages[i]);
-+ }
-+ /* page cluster flushed successfully */
-+
-+ clear_cluster_pages_dirty(clust);
-+ reiser4_release_cluster_pages(clust);
-+#if REISER4_DEBUG
-+ cryptcompress_inode_data(inode)->pgcount -= clust->nr_pages;
-+#endif
-+ goto out;
-+ checkout_failed:
-+#if REISER4_DEBUG
-+ assert("edward-1282", node_pgcount == 0);
-+ warning("edward-1435", "Inode %llu : checkout page cluster"
-+ "of index %lu failed\n",
-+ (unsigned long long)get_inode_oid(inode), clust->index);
-+#endif /* REISER4_DEBUG */
-+ result = RETERR(-E_REPEAT);
-+ out:
-+ /* put pages that were found here */
-+ forget_cluster_pages(clust->pages, nr_pages);
-+ return result;
-+}
-+
-+/* set hint for the cluster of the index @index */
-+static void set_hint_cluster(struct inode *inode, hint_t * hint,
-+ cloff_t index, znode_lock_mode mode)
-+{
-+ reiser4_key key;
-+ assert("edward-722", cryptcompress_inode_ok(inode));
-+ assert("edward-723",
-+ inode_file_plugin(inode) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+
-+ inode_file_plugin(inode)->key_by_inode(inode,
-+ clust_to_off(index, inode),
-+ &key);
-+
-+ reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, &key);
-+ hint->offset = get_key_offset(&key);
-+ hint->mode = mode;
-+}
-+
-+void invalidate_hint_cluster(reiser4_cluster_t * clust)
-+{
-+ assert("edward-1291", clust != NULL);
-+ assert("edward-1292", clust->hint != NULL);
-+
-+ done_lh(&clust->hint->lh);
-+ hint_clr_valid(clust->hint);
-+}
-+
-+void put_hint_cluster(reiser4_cluster_t * clust, struct inode *inode,
-+ znode_lock_mode mode)
-+{
-+ assert("edward-1286", clust != NULL);
-+ assert("edward-1287", clust->hint != NULL);
-+
-+ set_hint_cluster(inode, clust->hint, clust->index + 1, mode);
-+ invalidate_hint_cluster(clust);
-+}
-+
-+static int
-+balance_dirty_page_cluster(reiser4_cluster_t * clust, struct inode *inode,
-+ loff_t off, loff_t to_file)
-+{
-+ int result;
-+
-+ assert("edward-724", inode != NULL);
-+ assert("edward-725", cryptcompress_inode_ok(inode));
-+
-+ /* set next window params */
-+ update_cluster(inode, clust, off, to_file);
-+
-+ result = update_sd_cryptcompress(inode);
-+ if (result)
-+ return result;
-+ assert("edward-726", clust->hint->lh.owner == NULL);
-+
-+ reiser4_throttle_write(inode);
-+ return 0;
-+}
-+
-+/* set zeroes to the cluster, update it, and maybe, try to capture its pages */
-+static int
-+write_hole(struct inode *inode, reiser4_cluster_t * clust, loff_t file_off,
-+ loff_t to_file)
-+{
-+ char *data;
-+ int result = 0;
-+ unsigned cl_off, cl_count = 0;
-+ unsigned to_pg, pg_off;
-+ reiser4_slide_t *win;
-+
-+ assert("edward-190", clust != NULL);
-+ assert("edward-1069", clust->win != NULL);
-+ assert("edward-191", inode != NULL);
-+ assert("edward-727", cryptcompress_inode_ok(inode));
-+ assert("edward-1171", clust->dstat != INVAL_DISK_CLUSTER);
-+ assert("edward-1154",
-+ ergo(clust->dstat != FAKE_DISK_CLUSTER, clust->reserved == 1));
-+
-+ win = clust->win;
-+
-+ assert("edward-1070", win != NULL);
-+ assert("edward-201", win->stat == HOLE_WINDOW);
-+ assert("edward-192", cluster_ok(clust, inode));
-+
-+ if (win->off == 0 && win->count == inode_cluster_size(inode)) {
-+ /* the hole will be represented by fake disk cluster */
-+ update_cluster(inode, clust, file_off, to_file);
-+ return 0;
-+ }
-+ cl_count = win->count; /* number of zeroes to write */
-+ cl_off = win->off;
-+ pg_off = off_to_pgoff(win->off);
-+
-+ while (cl_count) {
-+ struct page *page;
-+ page = clust->pages[off_to_pg(cl_off)];
-+
-+ assert("edward-284", page != NULL);
-+
-+ to_pg = min_count(PAGE_CACHE_SIZE - pg_off, cl_count);
-+ lock_page(page);
-+ data = kmap_atomic(page, KM_USER0);
-+ memset(data + pg_off, 0, to_pg);
-+ flush_dcache_page(page);
-+ kunmap_atomic(data, KM_USER0);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+
-+ cl_off += to_pg;
-+ cl_count -= to_pg;
-+ pg_off = 0;
-+ }
-+ if (!win->delta) {
-+ /* only zeroes, try to capture */
-+
-+ set_cluster_pages_dirty(clust);
-+ result = try_capture_cluster(clust, inode);
-+ if (result)
-+ return result;
-+ put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
-+ result =
-+ balance_dirty_page_cluster(clust, inode, file_off, to_file);
-+ } else
-+ update_cluster(inode, clust, file_off, to_file);
-+ return result;
-+}
-+
-+/*
-+ The main disk search procedure for cryptcompress plugins, which
-+ . scans all items of disk cluster with the lock mode @mode
-+ . maybe reads each one (if @read)
-+ . maybe makes its znode dirty (if write lock mode was specified)
-+
-+ NOTE-EDWARD: Callers should handle the case when disk cluster
-+ is incomplete (-EIO)
-+*/
-+int find_disk_cluster(reiser4_cluster_t * clust,
-+ struct inode *inode, int read, znode_lock_mode mode)
-+{
-+ flow_t f;
-+ hint_t *hint;
-+ int result = 0;
-+ unsigned long cl_idx;
-+ ra_info_t ra_info;
-+ file_plugin *fplug;
-+ item_plugin *iplug;
-+ tfm_cluster_t *tc;
-+ int was_grabbed;
-+
-+ assert("edward-138", clust != NULL);
-+ assert("edward-728", clust->hint != NULL);
-+ assert("edward-226", reiser4_schedulable());
-+ assert("edward-137", inode != NULL);
-+ assert("edward-729", cryptcompress_inode_ok(inode));
-+
-+ hint = clust->hint;
-+ cl_idx = clust->index;
-+ fplug = inode_file_plugin(inode);
-+ was_grabbed = get_current_context()->grabbed_blocks;
-+ tc = &clust->tc;
-+
-+ assert("edward-462", !tfm_cluster_is_uptodate(tc));
-+ assert("edward-461", ergo(read, tfm_stream_is_set(tc, INPUT_STREAM)));
-+
-+ dclust_init_extension(hint);
-+
-+ /* set key of the first disk cluster item */
-+ fplug->flow_by_inode(inode,
-+ (read ? (char __user *)tfm_stream_data(tc, INPUT_STREAM) : NULL),
-+ 0 /* kernel space */ ,
-+ inode_scaled_cluster_size(inode),
-+ clust_to_off(cl_idx, inode), READ_OP, &f);
-+ if (mode == ZNODE_WRITE_LOCK) {
-+ /* reserve for flush to make dirty all the leaf nodes
-+ which contain disk cluster */
-+ result =
-+ reiser4_grab_space_force(estimate_dirty_cluster(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ goto out;
-+ }
-+
-+ ra_info.key_to_stop = f.key;
-+ set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
-+
-+ while (f.length) {
-+ result = find_cluster_item(hint, &f.key, mode,
-+ NULL, FIND_EXACT,
-+ (mode == ZNODE_WRITE_LOCK ?
-+ CBK_FOR_INSERT : 0));
-+ switch (result) {
-+ case CBK_COORD_NOTFOUND:
-+ result = 0;
-+ if (inode_scaled_offset
-+ (inode,
-+ clust_to_off(cl_idx,
-+ inode)) == get_key_offset(&f.key)) {
-+ /* first item not found, this is treated
-+ as disk cluster is absent */
-+ clust->dstat = FAKE_DISK_CLUSTER;
-+ goto out;
-+ }
-+ /* we are outside the cluster, stop search here */
-+ assert("edward-146",
-+ f.length != inode_scaled_cluster_size(inode));
-+ goto ok;
-+ case CBK_COORD_FOUND:
-+ assert("edward-148",
-+ hint->ext_coord.coord.between == AT_UNIT);
-+ assert("edward-460",
-+ hint->ext_coord.coord.unit_pos == 0);
-+
-+ coord_clear_iplug(&hint->ext_coord.coord);
-+ result = zload_ra(hint->ext_coord.coord.node, &ra_info);
-+ if (unlikely(result))
-+ goto out;
-+ iplug = item_plugin_by_coord(&hint->ext_coord.coord);
-+ assert("edward-147",
-+ item_id_by_coord(&hint->ext_coord.coord) ==
-+ CTAIL_ID);
-+
-+ result = iplug->s.file.read(NULL, &f, hint);
-+ if (result) {
-+ zrelse(hint->ext_coord.coord.node);
-+ goto out;
-+ }
-+ if (mode == ZNODE_WRITE_LOCK) {
-+ /* Don't make dirty more nodes then it was
-+ estimated (see comments before
-+ estimate_dirty_cluster). Missed nodes will be
-+ read up in flush time if they are evicted from
-+ memory */
-+ if (dclust_get_extension_ncount(hint) <=
-+ estimate_dirty_cluster(inode))
-+ znode_make_dirty(hint->ext_coord.coord.node);
-+
-+ znode_set_convertible(hint->ext_coord.coord.
-+ node);
-+ }
-+ zrelse(hint->ext_coord.coord.node);
-+ break;
-+ default:
-+ goto out;
-+ }
-+ }
-+ ok:
-+ /* at least one item was found */
-+ /* NOTE-EDWARD: Callers should handle the case
-+ when disk cluster is incomplete (-EIO) */
-+ tc->len = inode_scaled_cluster_size(inode) - f.length;
-+ tc->lsize = fsize_to_count(clust, inode);
-+ assert("edward-1196", tc->len > 0);
-+ assert("edward-1406", tc->lsize > 0);
-+
-+ if (hint_is_unprepped_dclust(clust->hint))
-+ clust->dstat = UNPR_DISK_CLUSTER;
-+ else {
-+ dclust_set_extension_dsize(clust->hint, tc->len);
-+ clust->dstat = PREP_DISK_CLUSTER;
-+ }
-+ out:
-+ assert("edward-1339",
-+ get_current_context()->grabbed_blocks >= was_grabbed);
-+ grabbed2free(get_current_context(),
-+ get_current_super_private(),
-+ get_current_context()->grabbed_blocks - was_grabbed);
-+ return result;
-+}
-+
-+int
-+get_disk_cluster_locked(reiser4_cluster_t * clust, struct inode *inode,
-+ znode_lock_mode lock_mode)
-+{
-+ reiser4_key key;
-+ ra_info_t ra_info;
-+
-+ assert("edward-730", reiser4_schedulable());
-+ assert("edward-731", clust != NULL);
-+ assert("edward-732", inode != NULL);
-+
-+ if (hint_is_valid(clust->hint)) {
-+ assert("edward-1293", clust->dstat != INVAL_DISK_CLUSTER);
-+ assert("edward-1294",
-+ znode_is_write_locked(clust->hint->lh.node));
-+ /* already have a valid locked position */
-+ return (clust->dstat ==
-+ FAKE_DISK_CLUSTER ? CBK_COORD_NOTFOUND :
-+ CBK_COORD_FOUND);
-+ }
-+ key_by_inode_cryptcompress(inode, clust_to_off(clust->index, inode),
-+ &key);
-+ ra_info.key_to_stop = key;
-+ set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
-+
-+ return find_cluster_item(clust->hint, &key, lock_mode, NULL, FIND_EXACT,
-+ CBK_FOR_INSERT);
-+}
-+
-+/* Read needed cluster pages before modifying.
-+ If success, @clust->hint contains locked position in the tree.
-+ Also:
-+ . find and set disk cluster state
-+ . make disk cluster dirty if its state is not FAKE_DISK_CLUSTER.
-+*/
-+static int
-+read_some_cluster_pages(struct inode *inode, reiser4_cluster_t * clust)
-+{
-+ int i;
-+ int result = 0;
-+ item_plugin *iplug;
-+ reiser4_slide_t *win = clust->win;
-+ znode_lock_mode mode = ZNODE_WRITE_LOCK;
-+
-+ iplug = item_plugin_by_id(CTAIL_ID);
-+
-+ assert("edward-924", !tfm_cluster_is_uptodate(&clust->tc));
-+
-+#if REISER4_DEBUG
-+ if (clust->nr_pages == 0) {
-+ /* start write hole from fake disk cluster */
-+ assert("edward-1117", win != NULL);
-+ assert("edward-1118", win->stat == HOLE_WINDOW);
-+ assert("edward-1119", new_cluster(clust, inode));
-+ }
-+#endif
-+ if (new_cluster(clust, inode)) {
-+ /*
-+ new page cluster is about to be written, nothing to read,
-+ */
-+ assert("edward-734", reiser4_schedulable());
-+ assert("edward-735", clust->hint->lh.owner == NULL);
-+
-+ if (clust->nr_pages) {
-+ int off;
-+ char *data;
-+ struct page * pg;
-+ assert("edward-1419", clust->pages != NULL);
-+ pg = clust->pages[clust->nr_pages - 1];
-+ assert("edward-1420", pg != NULL);
-+ off = off_to_pgoff(win->off+win->count+win->delta);
-+ if (off) {
-+ lock_page(pg);
-+ data = kmap_atomic(pg, KM_USER0);
-+ memset(data + off, 0, PAGE_CACHE_SIZE - off);
-+ flush_dcache_page(pg);
-+ kunmap_atomic(data, KM_USER0);
-+ unlock_page(pg);
-+ }
-+ }
-+ clust->dstat = FAKE_DISK_CLUSTER;
-+ return 0;
-+ }
-+ /*
-+ Here we should search for disk cluster to figure out its real state.
-+ Also there is one more important reason to do disk search: we need
-+ to make disk cluster _dirty_ if it exists
-+ */
-+
-+ /* if windows is specified, read the only pages
-+ that will be modified partially */
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ struct page *pg = clust->pages[i];
-+
-+ lock_page(pg);
-+ if (PageUptodate(pg)) {
-+ unlock_page(pg);
-+ continue;
-+ }
-+ unlock_page(pg);
-+
-+ if (win &&
-+ i >= count_to_nrpages(win->off) &&
-+ i < off_to_pg(win->off + win->count + win->delta))
-+ /* page will be completely overwritten */
-+ continue;
-+
-+ if (win && (i == clust->nr_pages - 1) &&
-+ /* the last page is
-+ partially modified,
-+ not uptodate .. */
-+ (count_to_nrpages(inode->i_size) <= pg->index)) {
-+ /* .. and appended,
-+ so set zeroes to the rest */
-+ char *data;
-+ int offset;
-+ lock_page(pg);
-+ data = kmap_atomic(pg, KM_USER0);
-+
-+ assert("edward-1260",
-+ count_to_nrpages(win->off + win->count +
-+ win->delta) - 1 == i);
-+
-+ offset =
-+ off_to_pgoff(win->off + win->count + win->delta);
-+ memset(data + offset, 0, PAGE_CACHE_SIZE - offset);
-+ flush_dcache_page(pg);
-+ kunmap_atomic(data, KM_USER0);
-+ unlock_page(pg);
-+ /* still not uptodate */
-+ break;
-+ }
-+ if (!tfm_cluster_is_uptodate(&clust->tc)) {
-+ result = ctail_read_disk_cluster(clust, inode, mode);
-+ if (result)
-+ goto out;
-+ assert("edward-925",
-+ tfm_cluster_is_uptodate(&clust->tc));
-+ }
-+ lock_page(pg);
-+ result = do_readpage_ctail(inode, clust, pg, mode);
-+ unlock_page(pg);
-+ if (result) {
-+ impossible("edward-219",
-+ "do_readpage_ctail returned crap");
-+ goto out;
-+ }
-+ }
-+ if (!tfm_cluster_is_uptodate(&clust->tc)) {
-+ /* disk cluster unclaimed, but we need to make its znodes dirty
-+ to make flush update convert its content */
-+ result = find_disk_cluster(clust, inode, 0 /* do not read items */,
-+ mode);
-+ }
-+ out:
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+ return result;
-+}
-+
-+static int
-+should_create_unprepped_cluster(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ assert("edward-737", clust != NULL);
-+
-+ switch (clust->dstat) {
-+ case PREP_DISK_CLUSTER:
-+ case UNPR_DISK_CLUSTER:
-+ return 0;
-+ case FAKE_DISK_CLUSTER:
-+ if (clust->win &&
-+ clust->win->stat == HOLE_WINDOW && clust->nr_pages == 0) {
-+ assert("edward-1172", new_cluster(clust, inode));
-+ return 0;
-+ }
-+ return 1;
-+ default:
-+ impossible("edward-1173", "bad disk cluster state");
-+ return 0;
-+ }
-+}
-+
-+static int
-+cryptcompress_make_unprepped_cluster(reiser4_cluster_t * clust,
-+ struct inode *inode)
-+{
-+ int result;
-+
-+ assert("edward-1123", reiser4_schedulable());
-+ assert("edward-737", clust != NULL);
-+ assert("edward-738", inode != NULL);
-+ assert("edward-739", cryptcompress_inode_ok(inode));
-+ assert("edward-1053", clust->hint != NULL);
-+
-+ if (!should_create_unprepped_cluster(clust, inode)) {
-+ if (clust->reserved) {
-+ cluster_reserved2free(estimate_insert_cluster(inode));
-+#if REISER4_DEBUG
-+ assert("edward-1267",
-+ clust->reserved_unprepped ==
-+ estimate_insert_cluster(inode));
-+ clust->reserved_unprepped -=
-+ estimate_insert_cluster(inode);
-+#endif
-+ }
-+ return 0;
-+ }
-+ assert("edward-1268", clust->reserved);
-+ cluster_reserved2grabbed(estimate_insert_cluster(inode));
-+#if REISER4_DEBUG
-+ assert("edward-1441",
-+ clust->reserved_unprepped == estimate_insert_cluster(inode));
-+ clust->reserved_unprepped -= estimate_insert_cluster(inode);
-+#endif
-+ result = ctail_insert_unprepped_cluster(clust, inode);
-+ if (result)
-+ return result;
-+
-+ inode_add_bytes(inode, inode_cluster_size(inode));
-+
-+ assert("edward-743", cryptcompress_inode_ok(inode));
-+ assert("edward-744", znode_is_write_locked(clust->hint->lh.node));
-+
-+ clust->dstat = UNPR_DISK_CLUSTER;
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+static int jnode_truncate_ok(struct inode *inode, cloff_t index)
-+{
-+ jnode *node;
-+ node =
-+ jlookup(current_tree, get_inode_oid(inode),
-+ clust_to_pg(index, inode));
-+ if (likely(!node))
-+ return 1;
-+ /* someone got this jnode */
-+ warning("edward-1315", "jnode %p is untruncated\n", node);
-+ jput(node);
-+ return (atomic_read(&node->x_count));
-+}
-+#endif
-+
-+/* Collect unlocked cluster pages and jnode (the last is in the
-+ case when the page cluster will be modified and captured) */
-+int
-+prepare_page_cluster(struct inode *inode, reiser4_cluster_t * clust,
-+ int capture)
-+{
-+ assert("edward-177", inode != NULL);
-+ assert("edward-741", cryptcompress_inode_ok(inode));
-+ assert("edward-740", clust->pages != NULL);
-+
-+ set_cluster_nrpages(clust, inode);
-+ reset_cluster_pgset(clust, cluster_nrpages(inode));
-+ return (capture ?
-+ grab_cluster_pages_jnode(inode, clust) :
-+ grab_cluster_pages(inode, clust));
-+}
-+
-+/* Truncate all pages of the cluster of index @index.
-+ This is called by ->kill_hook() method of item plugin */
-+void truncate_page_cluster_cryptcompress(struct inode *inode, cloff_t index,
-+ int even_cows)
-+{
-+ int i;
-+ int found = 0;
-+ int nr_pages;
-+ jnode *node;
-+ struct page *pages[MAX_CLUSTER_NRPAGES];
-+
-+ node =
-+ jlookup(current_tree, get_inode_oid(inode),
-+ clust_to_pg(index, inode));
-+ /* jnode is absent, just drop pages which can not
-+ acquire jnode because of exclusive access */
-+ if (!node)
-+ goto truncate;
-+ /* jnode is present and may be dirty */
-+ nr_pages = count_to_nrpages(cnt_to_clcnt(inode->i_size, index, inode));
-+
-+ found = find_get_pages(inode->i_mapping, clust_to_pg(index, inode),
-+ nr_pages, pages);
-+ spin_lock_jnode(node);
-+
-+ if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS)
-+ && index == 0)
-+ /* converting to unix_file in progress */
-+ JF_CLR(node, JNODE_CLUSTER_PAGE);
-+ if (JF_ISSET(node, JNODE_DIRTY)) {
-+ /* someone has done modifications which are not
-+ yet committed, so we need to release some resources */
-+
-+ /* free disk space grabbed for disk cluster converting */
-+ cluster_reserved2grabbed(estimate_update_cluster(inode));
-+ grabbed2free(get_current_context(),
-+ get_current_super_private(),
-+ estimate_update_cluster(inode));
-+
-+ assert("edward-1198", found == nr_pages);
-+ assert("edward-1199", node->page_count == nr_pages);
-+#if REISER4_DEBUG
-+ node->page_count = 0;
-+#endif
-+ /* This will clear dirty bit */
-+ uncapture_cluster_jnode(node);
-+
-+ /* put pages grabbed for last uncommitted modifications */
-+ for (i = 0; i < nr_pages; i++) {
-+ assert("edward-1200", PageUptodate(pages[i]));
-+ page_cache_release(pages[i]);
-+#if REISER4_DEBUG
-+ cryptcompress_inode_data(inode)->pgcount --;
-+#endif
-+ }
-+ } else
-+ spin_unlock_jnode(node);
-+ /* FIXME-EDWARD: Use truncate_complete_page in the loop above instead */
-+
-+ jput(node);
-+ /* put pages found here */
-+ forget_cluster_pages(pages, found);
-+ truncate:
-+ if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS) &&
-+ index == 0)
-+ return;
-+ reiser4_invalidate_pages(inode->i_mapping,
-+ clust_to_pg(index, inode),
-+ cluster_nrpages(inode),
-+ even_cows);
-+ assert("edward-1201",
-+ ergo(!reiser4_inode_get_flag(inode,
-+ REISER4_FILE_CONV_IN_PROGRESS),
-+ jnode_truncate_ok(inode, index)));
-+ return;
-+}
-+
-+/* Prepare cluster handle before(after) modifications
-+ which are supposed to be committed.
-+
-+ . grab cluster pages;
-+ . reserve disk space;
-+ . maybe read pages from disk and set the disk cluster dirty;
-+ . maybe write hole;
-+ . maybe create 'unprepped' disk cluster if the last one is fake
-+ (i.e. is not represenred by any items)
-+*/
-+
-+static int
-+prepare_cluster(struct inode *inode,
-+ loff_t file_off /* write position in the file */ ,
-+ loff_t to_file, /* bytes of users data to write to the file */
-+ reiser4_cluster_t * clust, page_cluster_op op)
-+{
-+ int result = 0;
-+ reiser4_slide_t *win = clust->win;
-+
-+ reset_cluster_params(clust);
-+ cluster_set_tfm_act(&clust->tc, TFMA_READ);
-+#if REISER4_DEBUG
-+ clust->ctx = get_current_context();
-+#endif
-+ assert("edward-1190", op != PCL_UNKNOWN);
-+
-+ clust->op = op;
-+
-+ result = prepare_page_cluster(inode, clust, 1);
-+ if (result)
-+ return result;
-+ assert("edward-1447",
-+ ergo(clust->nr_pages != 0, jprivate(clust->pages[0])));
-+ assert("edward-1448",
-+ ergo(clust->nr_pages != 0,
-+ jnode_is_cluster_page(jprivate(clust->pages[0]))));
-+
-+ result = reserve4cluster(inode, clust);
-+ if (result)
-+ goto err1;
-+ result = read_some_cluster_pages(inode, clust);
-+ if (result) {
-+ free_reserved4cluster(inode,
-+ clust,
-+ estimate_update_cluster(inode) +
-+ estimate_insert_cluster(inode));
-+ goto err1;
-+ }
-+ assert("edward-1124", clust->dstat != INVAL_DISK_CLUSTER);
-+
-+ result = cryptcompress_make_unprepped_cluster(clust, inode);
-+ if (result)
-+ goto err2;
-+ if (win && win->stat == HOLE_WINDOW) {
-+ result = write_hole(inode, clust, file_off, to_file);
-+ if (result)
-+ goto err2;
-+ }
-+ return 0;
-+ err2:
-+ free_reserved4cluster(inode, clust,
-+ estimate_update_cluster(inode));
-+ err1:
-+ reiser4_release_cluster_pages_and_jnode(clust);
-+ assert("edward-1125", result == -ENOSPC);
-+ return result;
-+}
-+
-+/* set window by two offsets */
-+static void
-+set_window(reiser4_cluster_t * clust, reiser4_slide_t * win,
-+ struct inode *inode, loff_t o1, loff_t o2)
-+{
-+ assert("edward-295", clust != NULL);
-+ assert("edward-296", inode != NULL);
-+ assert("edward-1071", win != NULL);
-+ assert("edward-297", o1 <= o2);
-+
-+ clust->index = off_to_clust(o1, inode);
-+
-+ win->off = off_to_cloff(o1, inode);
-+ win->count = min_count(inode_cluster_size(inode) - win->off, o2 - o1);
-+ win->delta = 0;
-+
-+ clust->win = win;
-+}
-+
-+static int
-+set_cluster_by_window(struct inode *inode, reiser4_cluster_t * clust,
-+ reiser4_slide_t * win, flow_t * f, loff_t file_off)
-+{
-+ int result;
-+
-+ assert("edward-197", clust != NULL);
-+ assert("edward-1072", win != NULL);
-+ assert("edward-198", inode != NULL);
-+
-+ result = alloc_cluster_pgset(clust, cluster_nrpages(inode));
-+ if (result)
-+ return result;
-+
-+ if (file_off > inode->i_size) {
-+ /* Uhmm, hole in cryptcompress file... */
-+ loff_t hole_size;
-+ hole_size = file_off - inode->i_size;
-+
-+ set_window(clust, win, inode, inode->i_size, file_off);
-+ win->stat = HOLE_WINDOW;
-+ if (win->off + hole_size < inode_cluster_size(inode))
-+ /* there is also user's data to append to the hole */
-+ win->delta =
-+ min_count(inode_cluster_size(inode) -
-+ (win->off + win->count), f->length);
-+ return 0;
-+ }
-+ set_window(clust, win, inode, file_off, file_off + f->length);
-+ win->stat = DATA_WINDOW;
-+ return 0;
-+}
-+
-+int set_cluster_by_page(reiser4_cluster_t * clust, struct page * page,
-+ int count)
-+{
-+ int result = 0;
-+ int (*setting_actor)(reiser4_cluster_t * clust, int count);
-+
-+ assert("edward-1358", clust != NULL);
-+ assert("edward-1359", page != NULL);
-+ assert("edward-1360", page->mapping != NULL);
-+ assert("edward-1361", page->mapping->host != NULL);
-+
-+ setting_actor = (clust->pages ? reset_cluster_pgset : alloc_cluster_pgset);
-+ result = setting_actor(clust, count);
-+ clust->index = pg_to_clust(page->index, page->mapping->host);
-+ return result;
-+}
-+
-+/* reset all the params that not get updated */
-+void reset_cluster_params(reiser4_cluster_t * clust)
-+{
-+ assert("edward-197", clust != NULL);
-+
-+ clust->dstat = INVAL_DISK_CLUSTER;
-+ clust->tc.uptodate = 0;
-+ clust->tc.len = 0;
-+}
-+
-+/* Core write procedure of cryptcompress plugin, which slices user's
-+ flow into logical clusters, maps the last ones to the appropriate
-+ page clusters, and tries to capture them.
-+ If @buf != NULL, returns number of successfully written bytes,
-+ otherwise returns error
-+*/
-+static loff_t
-+write_cryptcompress_flow(struct file *file, struct inode *inode,
-+ const char __user *buf, size_t count, loff_t pos,
-+ int *conv_occured)
-+{
-+ int i;
-+ flow_t f;
-+ hint_t *hint;
-+ int result = 0;
-+ size_t to_write = 0;
-+ loff_t file_off;
-+ reiser4_slide_t win;
-+ reiser4_cluster_t clust;
-+
-+ assert("edward-161", reiser4_schedulable());
-+ assert("edward-748", cryptcompress_inode_ok(inode));
-+ assert("edward-159", current_blocksize == PAGE_CACHE_SIZE);
-+ assert("edward-1274", get_current_context()->grabbed_blocks == 0);
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ result = load_file_hint(file, hint);
-+ if (result) {
-+ kfree(hint);
-+ return result;
-+ }
-+
-+ result =
-+ flow_by_inode_cryptcompress(inode, buf, 1 /* user space */ ,
-+ count, pos, WRITE_OP, &f);
-+ if (result)
-+ goto out;
-+ to_write = f.length;
-+
-+ /* current write position in file */
-+ file_off = pos;
-+ reiser4_slide_init(&win);
-+ cluster_init_read(&clust, &win);
-+ clust.hint = hint;
-+
-+ result = set_cluster_by_window(inode, &clust, &win, &f, file_off);
-+ if (result)
-+ goto out;
-+
-+ if (next_window_stat(&win) == HOLE_WINDOW) {
-+ result = write_conversion_hook(file, inode, pos, &clust, NULL);
-+ if (result)
-+ goto out;
-+ result =
-+ prepare_cluster(inode, file_off, f.length, &clust,
-+ PCL_APPEND);
-+ if (result)
-+ goto out;
-+ }
-+ do {
-+ char *src;
-+ unsigned page_off, page_count;
-+
-+ assert("edward-750", reiser4_schedulable());
-+
-+ result = write_conversion_hook(file, inode, pos, &clust,
-+ conv_occured);
-+ if (result || *conv_occured)
-+ goto out;
-+ result =
-+ prepare_cluster(inode, file_off, f.length, &clust,
-+ PCL_APPEND);
-+ if (result)
-+ goto out;
-+
-+ assert("edward-751", cryptcompress_inode_ok(inode));
-+ assert("edward-204", win.stat == DATA_WINDOW);
-+ assert("edward-1288", hint_is_valid(clust.hint));
-+ assert("edward-752",
-+ znode_is_write_locked(hint->ext_coord.coord.node));
-+
-+ put_hint_cluster(&clust, inode, ZNODE_WRITE_LOCK);
-+
-+ /* set write position in page */
-+ page_off = off_to_pgoff(win.off);
-+
-+ /* copy user's data to cluster pages */
-+ for (i = off_to_pg(win.off), src = f.data;
-+ i < count_to_nrpages(win.off + win.count);
-+ i++, src += page_count) {
-+ page_count =
-+ cnt_to_pgcnt(win.off + win.count, i) - page_off;
-+
-+ assert("edward-1039",
-+ page_off + page_count <= PAGE_CACHE_SIZE);
-+ assert("edward-287", clust.pages[i] != NULL);
-+
-+ lock_page(clust.pages[i]);
-+ result =
-+ __copy_from_user((char *)kmap(clust.pages[i]) +
-+ page_off, (char __user *)src, page_count);
-+ kunmap(clust.pages[i]);
-+ if (unlikely(result)) {
-+ unlock_page(clust.pages[i]);
-+ result = -EFAULT;
-+ goto err2;
-+ }
-+ SetPageUptodate(clust.pages[i]);
-+ unlock_page(clust.pages[i]);
-+ page_off = 0;
-+ }
-+ assert("edward-753", cryptcompress_inode_ok(inode));
-+
-+ set_cluster_pages_dirty(&clust);
-+
-+ result = try_capture_cluster(&clust, inode);
-+ if (result)
-+ goto err2;
-+
-+ assert("edward-998", f.user == 1);
-+
-+ move_flow_forward(&f, win.count);
-+
-+ /* disk cluster may be already clean at this point */
-+
-+ /* . update cluster
-+ . set hint for new offset
-+ . unlock znode
-+ . update inode
-+ . balance dirty pages
-+ */
-+ result = balance_dirty_page_cluster(&clust, inode, 0, f.length);
-+ if (result)
-+ goto err1;
-+ assert("edward-755", hint->lh.owner == NULL);
-+ reset_cluster_params(&clust);
-+ continue;
-+ err2:
-+ reiser4_release_cluster_pages_and_jnode(&clust);
-+ err1:
-+ if (clust.reserved)
-+ free_reserved4cluster(inode,
-+ &clust,
-+ estimate_update_cluster(inode));
-+ break;
-+ } while (f.length);
-+ out:
-+ done_lh(&hint->lh);
-+ if (result == -EEXIST)
-+ warning("edward-1407", "write returns EEXIST!\n");
-+
-+ put_cluster_handle(&clust);
-+ save_file_hint(file, hint);
-+ kfree(hint);
-+ if (buf) {
-+ /* if nothing were written - there must be an error */
-+ assert("edward-195", ergo((to_write == f.length),
-+ (result < 0 || *conv_occured)));
-+ return (to_write - f.length) ? (to_write - f.length) : result;
-+ }
-+ return result;
-+}
-+
-+/**
-+ * write_cryptcompress - write of struct file_operations
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @read_amount: number of bytes to write
-+ * @off: position in file to write to
-+ *
-+ * This is implementation of vfs's write method of struct file_operations for
-+ * cryptcompress plugin.
-+ */
-+ssize_t write_cryptcompress(struct file *file, const char __user *buf,
-+ size_t count, loff_t *off, int *conv)
-+{
-+ ssize_t result;
-+ struct inode *inode;
-+ reiser4_context *ctx;
-+ loff_t pos = *off;
-+ cryptcompress_info_t *info;
-+
-+ assert("edward-1449", *conv == 0);
-+
-+ inode = file->f_dentry->d_inode;
-+ assert("edward-196", cryptcompress_inode_ok(inode));
-+
-+ info = cryptcompress_inode_data(inode);
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ mutex_lock(&inode->i_mutex);
-+
-+ result = generic_write_checks(file, &pos, &count, 0);
-+ if (unlikely(result != 0))
-+ goto out;
-+ if (unlikely(count == 0))
-+ goto out;
-+ result = remove_suid(file->f_dentry);
-+ if (unlikely(result != 0))
-+ goto out;
-+ /* remove_suid might create a transaction */
-+ reiser4_txn_restart(ctx);
-+
-+ result = write_cryptcompress_flow(file, inode, buf, count, pos, conv);
-+
-+ if (result < 0)
-+ goto out;
-+ /* update position in a file */
-+ *off = pos + result;
-+ out:
-+ mutex_unlock(&inode->i_mutex);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+int readpages_cryptcompress(struct file *file, struct address_space *mapping,
-+ struct list_head *pages, unsigned nr_pages)
-+{
-+ reiser4_context * ctx;
-+ int ret;
-+
-+ ctx = reiser4_init_context(mapping->host->i_sb);
-+ if (IS_ERR(ctx)) {
-+ ret = PTR_ERR(ctx);
-+ goto err;
-+ }
-+ /* crc files can be built of ctail items only */
-+ ret = readpages_ctail(file, mapping, pages);
-+ reiser4_exit_context(ctx);
-+ if (ret) {
-+err:
-+ put_pages_list(pages);
-+ }
-+ return ret;
-+}
-+
-+static reiser4_block_nr cryptcompress_estimate_read(struct inode *inode)
-+{
-+ /* reserve one block to update stat data item */
-+ assert("edward-1193",
-+ inode_file_plugin(inode)->estimate.update ==
-+ estimate_update_common);
-+ return estimate_update_common(inode);
-+}
-+
-+/**
-+ * read_cryptcompress - read of struct file_operations
-+ * @file: file to read from
-+ * @buf: address of user-space buffer
-+ * @read_amount: number of bytes to read
-+ * @off: position in file to read from
-+ *
-+ * This is implementation of vfs's read method of struct file_operations for
-+ * cryptcompress plugin.
-+ */
-+ssize_t read_cryptcompress(struct file * file, char __user *buf, size_t size,
-+ loff_t * off)
-+{
-+ ssize_t result;
-+ struct inode *inode;
-+ reiser4_context *ctx;
-+ cryptcompress_info_t *info;
-+ reiser4_block_nr needed;
-+
-+ inode = file->f_dentry->d_inode;
-+ assert("edward-1194", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ info = cryptcompress_inode_data(inode);
-+ needed = cryptcompress_estimate_read(inode);
-+
-+ result = reiser4_grab_space(needed, BA_CAN_COMMIT);
-+ if (result != 0) {
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ LOCK_CNT_INC(inode_sem_r);
-+
-+ result = do_sync_read(file, buf, size, off);
-+
-+ LOCK_CNT_DEC(inode_sem_r);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return result;
-+}
-+
-+/* If @index > 0, find real disk cluster of the index (@index - 1),
-+ If @index == 0 find the real disk cluster of the object of maximal index.
-+ Keep incremented index of the result in @found.
-+ It succes was returned:
-+ (@index == 0 && @found == 0) means that the object doesn't have real disk
-+ clusters.
-+ (@index != 0 && @found == 0) means that disk cluster of (@index -1) doesn't
-+ exist.
-+*/
-+static int
-+find_real_disk_cluster(struct inode *inode, cloff_t * found, cloff_t index)
-+{
-+ int result;
-+ reiser4_key key;
-+ loff_t offset;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ lookup_bias bias;
-+ coord_t *coord;
-+ item_plugin *iplug;
-+
-+ assert("edward-1131", inode != NULL);
-+ assert("edward-95", cryptcompress_inode_ok(inode));
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ bias = (index ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN);
-+ offset =
-+ (index ? clust_to_off(index, inode) -
-+ 1 : get_key_offset(reiser4_max_key()));
-+
-+ key_by_inode_cryptcompress(inode, offset, &key);
-+
-+ /* find the last item of this object */
-+ result =
-+ find_cluster_item(hint, &key, ZNODE_READ_LOCK, NULL /* ra_info */,
-+ bias, 0);
-+ if (cbk_errored(result)) {
-+ done_lh(lh);
-+ kfree(hint);
-+ return result;
-+ }
-+ if (result == CBK_COORD_NOTFOUND) {
-+ /* no real disk clusters */
-+ done_lh(lh);
-+ kfree(hint);
-+ *found = 0;
-+ return 0;
-+ }
-+ /* disk cluster is found */
-+ coord = &hint->ext_coord.coord;
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (unlikely(result)) {
-+ done_lh(lh);
-+ kfree(hint);
-+ return result;
-+ }
-+ iplug = item_plugin_by_coord(coord);
-+ assert("edward-277", iplug == item_plugin_by_id(CTAIL_ID));
-+ assert("edward-1202", ctail_ok(coord));
-+
-+ item_key_by_coord(coord, &key);
-+ *found = off_to_clust(get_key_offset(&key), inode) + 1;
-+
-+ assert("edward-1132", ergo(index, index == *found));
-+
-+ zrelse(coord->node);
-+ done_lh(lh);
-+ kfree(hint);
-+ return 0;
-+}
-+
-+static int find_fake_appended(struct inode *inode, cloff_t * index)
-+{
-+ return find_real_disk_cluster(inode, index,
-+ 0 /* find last real one */ );
-+}
-+
-+/* Set left coord when unit is not found after node_lookup()
-+ This takes into account that there can be holes in a sequence
-+ of disk clusters */
-+
-+static void adjust_left_coord(coord_t * left_coord)
-+{
-+ switch (left_coord->between) {
-+ case AFTER_UNIT:
-+ left_coord->between = AFTER_ITEM;
-+ case AFTER_ITEM:
-+ case BEFORE_UNIT:
-+ break;
-+ default:
-+ impossible("edward-1204", "bad left coord to cut");
-+ }
-+ return;
-+}
-+
-+#define CRC_CUT_TREE_MIN_ITERATIONS 64
-+int
-+cut_tree_worker_cryptcompress(tap_t * tap, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed,
-+ struct inode *object, int truncate, int *progress)
-+{
-+ lock_handle next_node_lock;
-+ coord_t left_coord;
-+ int result;
-+
-+ assert("edward-1158", tap->coord->node != NULL);
-+ assert("edward-1159", znode_is_write_locked(tap->coord->node));
-+ assert("edward-1160", znode_get_level(tap->coord->node) == LEAF_LEVEL);
-+
-+ *progress = 0;
-+ init_lh(&next_node_lock);
-+
-+ while (1) {
-+ znode *node; /* node from which items are cut */
-+ node_plugin *nplug; /* node plugin for @node */
-+
-+ node = tap->coord->node;
-+
-+ /* Move next_node_lock to the next node on the left. */
-+ result =
-+ reiser4_get_left_neighbor(&next_node_lock, node,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result != 0 && result != -E_NO_NEIGHBOR)
-+ break;
-+ /* FIXME-EDWARD: Check can we delete the node as a whole. */
-+ result = reiser4_tap_load(tap);
-+ if (result)
-+ return result;
-+
-+ /* Prepare the second (right) point for cut_node() */
-+ if (*progress)
-+ coord_init_last_unit(tap->coord, node);
-+
-+ else if (item_plugin_by_coord(tap->coord)->b.lookup == NULL)
-+ /* set rightmost unit for the items without lookup method */
-+ tap->coord->unit_pos = coord_last_unit_pos(tap->coord);
-+
-+ nplug = node->nplug;
-+
-+ assert("edward-1161", nplug);
-+ assert("edward-1162", nplug->lookup);
-+
-+ /* left_coord is leftmost unit cut from @node */
-+ result = nplug->lookup(node, from_key, FIND_EXACT, &left_coord);
-+
-+ if (IS_CBKERR(result))
-+ break;
-+
-+ if (result == CBK_COORD_NOTFOUND)
-+ adjust_left_coord(&left_coord);
-+
-+ /* adjust coordinates so that they are set to existing units */
-+ if (coord_set_to_right(&left_coord)
-+ || coord_set_to_left(tap->coord)) {
-+ result = 0;
-+ break;
-+ }
-+
-+ if (coord_compare(&left_coord, tap->coord) ==
-+ COORD_CMP_ON_RIGHT) {
-+ /* keys from @from_key to @to_key are not in the tree */
-+ result = 0;
-+ break;
-+ }
-+
-+ /* cut data from one node */
-+ *smallest_removed = *reiser4_min_key();
-+ result = kill_node_content(&left_coord,
-+ tap->coord,
-+ from_key,
-+ to_key,
-+ smallest_removed,
-+ next_node_lock.node,
-+ object, truncate);
-+#if REISER4_DEBUG
-+ /*node_check(node, ~0U); */
-+#endif
-+ reiser4_tap_relse(tap);
-+
-+ if (result)
-+ break;
-+
-+ ++(*progress);
-+
-+ /* Check whether all items with keys >= from_key were removed
-+ * from the tree. */
-+ if (keyle(smallest_removed, from_key))
-+ /* result = 0; */
-+ break;
-+
-+ if (next_node_lock.node == NULL)
-+ break;
-+
-+ result = reiser4_tap_move(tap, &next_node_lock);
-+ done_lh(&next_node_lock);
-+ if (result)
-+ break;
-+
-+ /* Break long cut_tree operation (deletion of a large file) if
-+ * atom requires commit. */
-+ if (*progress > CRC_CUT_TREE_MIN_ITERATIONS
-+ && current_atom_should_commit()) {
-+ result = -E_REPEAT;
-+ break;
-+ }
-+ }
-+ done_lh(&next_node_lock);
-+ return result;
-+}
-+
-+/* Append or expand hole in two steps (exclusive access should be aquired!)
-+ 1) write zeroes to the current real cluster,
-+ 2) expand hole via fake clusters (just increase i_size) */
-+static int
-+cryptcompress_append_hole(struct inode *inode /*contains old i_size */ ,
-+ loff_t new_size)
-+{
-+ int result = 0;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ loff_t hole_size;
-+ int nr_zeroes;
-+ reiser4_slide_t win;
-+ reiser4_cluster_t clust;
-+
-+ assert("edward-1133", inode->i_size < new_size);
-+ assert("edward-1134", reiser4_schedulable());
-+ assert("edward-1135", cryptcompress_inode_ok(inode));
-+ assert("edward-1136", current_blocksize == PAGE_CACHE_SIZE);
-+ assert("edward-1333", off_to_cloff(inode->i_size, inode) != 0);
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ reiser4_slide_init(&win);
-+ cluster_init_read(&clust, &win);
-+ clust.hint = hint;
-+
-+ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
-+ if (result)
-+ goto out;
-+ if (off_to_cloff(inode->i_size, inode) == 0)
-+ goto fake_append;
-+ hole_size = new_size - inode->i_size;
-+ nr_zeroes =
-+ inode_cluster_size(inode) - off_to_cloff(inode->i_size, inode);
-+ if (hole_size < nr_zeroes)
-+ nr_zeroes = hole_size;
-+ set_window(&clust, &win, inode, inode->i_size,
-+ inode->i_size + nr_zeroes);
-+ win.stat = HOLE_WINDOW;
-+
-+ assert("edward-1137",
-+ clust.index == off_to_clust(inode->i_size, inode));
-+
-+ result = prepare_cluster(inode, 0, 0, &clust, PCL_APPEND);
-+
-+ assert("edward-1271", !result || result == -ENOSPC);
-+ if (result)
-+ goto out;
-+ assert("edward-1139",
-+ clust.dstat == PREP_DISK_CLUSTER ||
-+ clust.dstat == UNPR_DISK_CLUSTER);
-+
-+ assert("edward-1431", hole_size >= nr_zeroes);
-+ if (hole_size == nr_zeroes)
-+ /* nothing to append anymore */
-+ goto out;
-+ fake_append:
-+ INODE_SET_FIELD(inode, i_size, new_size);
-+ out:
-+ done_lh(lh);
-+ kfree(hint);
-+ put_cluster_handle(&clust);
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+static int
-+pages_truncate_ok(struct inode *inode, loff_t old_size, pgoff_t start)
-+{
-+ struct pagevec pvec;
-+ int i;
-+ int count;
-+ int rest;
-+
-+ rest = count_to_nrpages(old_size) - start;
-+
-+ pagevec_init(&pvec, 0);
-+ count = min_count(pagevec_space(&pvec), rest);
-+
-+ while (rest) {
-+ count = min_count(pagevec_space(&pvec), rest);
-+ pvec.nr = find_get_pages(inode->i_mapping, start,
-+ count, pvec.pages);
-+ for (i = 0; i < pagevec_count(&pvec); i++) {
-+ if (PageUptodate(pvec.pages[i])) {
-+ warning("edward-1205",
-+ "truncated page of index %lu is uptodate",
-+ pvec.pages[i]->index);
-+ return 0;
-+ }
-+ }
-+ start += count;
-+ rest -= count;
-+ pagevec_release(&pvec);
-+ }
-+ return 1;
-+}
-+
-+static int body_truncate_ok(struct inode *inode, cloff_t aidx)
-+{
-+ int result;
-+ cloff_t raidx;
-+
-+ result = find_fake_appended(inode, &raidx);
-+ return !result && (aidx == raidx);
-+}
-+#endif
-+
-+static int
-+update_cryptcompress_size(struct inode *inode, reiser4_key * key, int update_sd)
-+{
-+ return (get_key_offset(key) & ((loff_t) (inode_cluster_size(inode)) - 1)
-+ ? 0 : reiser4_update_file_size(inode, key, update_sd));
-+}
-+
-+/* prune cryptcompress file in two steps (exclusive access should be acquired!)
-+ 1) cut all disk clusters but the last one partially truncated,
-+ 2) set zeroes and capture last partially truncated page cluster if the last
-+ one exists, otherwise truncate via prune fake cluster (just decrease i_size)
-+*/
-+static int
-+prune_cryptcompress(struct inode *inode, loff_t new_size, int update_sd,
-+ cloff_t aidx)
-+{
-+ int result = 0;
-+ unsigned nr_zeroes;
-+ loff_t to_prune;
-+ loff_t old_size;
-+ cloff_t ridx;
-+
-+ hint_t *hint;
-+ lock_handle *lh;
-+ reiser4_slide_t win;
-+ reiser4_cluster_t clust;
-+
-+ assert("edward-1140", inode->i_size >= new_size);
-+ assert("edward-1141", reiser4_schedulable());
-+ assert("edward-1142", cryptcompress_inode_ok(inode));
-+ assert("edward-1143", current_blocksize == PAGE_CACHE_SIZE);
-+
-+ old_size = inode->i_size;
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ reiser4_slide_init(&win);
-+ cluster_init_read(&clust, &win);
-+ clust.hint = hint;
-+
-+ /* rightmost completely truncated cluster */
-+ ridx = count_to_nrclust(new_size, inode);
-+
-+ assert("edward-1174", ridx <= aidx);
-+ old_size = inode->i_size;
-+ if (ridx != aidx) {
-+ result = cut_file_items(inode,
-+ clust_to_off(ridx, inode),
-+ update_sd,
-+ clust_to_off(aidx, inode),
-+ update_cryptcompress_size);
-+ if (result)
-+ goto out;
-+ }
-+ if (!off_to_cloff(new_size, inode)) {
-+ /* no partially truncated clusters */
-+ assert("edward-1145", inode->i_size == new_size);
-+ goto finish;
-+ }
-+ assert("edward-1146", new_size < inode->i_size);
-+
-+ to_prune = inode->i_size - new_size;
-+
-+ /* partial truncate of leftmost cluster,
-+ first check if it is fake */
-+ result = find_real_disk_cluster(inode, &aidx, ridx);
-+ if (result)
-+ goto out;
-+ if (!aidx)
-+ /* yup, this is fake one */
-+ goto finish;
-+
-+ assert("edward-1148", aidx == ridx);
-+
-+ /* do partial truncate of the leftmost page cluster,
-+ then try to capture this one */
-+ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
-+ if (result)
-+ goto out;
-+ nr_zeroes = (off_to_pgoff(new_size) ?
-+ PAGE_CACHE_SIZE - off_to_pgoff(new_size) : 0);
-+ set_window(&clust, &win, inode, new_size, new_size + nr_zeroes);
-+ win.stat = HOLE_WINDOW;
-+
-+ assert("edward-1149", clust.index == ridx - 1);
-+
-+ result = prepare_cluster(inode, 0, 0, &clust, PCL_TRUNCATE);
-+ if (result)
-+ goto out;
-+ assert("edward-1151",
-+ clust.dstat == PREP_DISK_CLUSTER ||
-+ clust.dstat == UNPR_DISK_CLUSTER);
-+
-+ assert("edward-1191", inode->i_size == new_size);
-+ assert("edward-1206", body_truncate_ok(inode, ridx));
-+ finish:
-+ /* drop all the pages that don't have jnodes (i.e. pages
-+ which can not be truncated by cut_file_items() because
-+ of holes represented by fake disk clusters) including
-+ the pages of partially truncated cluster which was
-+ released by prepare_cluster() */
-+ truncate_inode_pages(inode->i_mapping, new_size);
-+ INODE_SET_FIELD(inode, i_size, new_size);
-+ out:
-+ assert("edward-1334", !result || result == -ENOSPC);
-+ assert("edward-1209",
-+ pages_truncate_ok(inode, old_size, count_to_nrpages(new_size)));
-+ done_lh(lh);
-+ kfree(hint);
-+ put_cluster_handle(&clust);
-+ return result;
-+}
-+
-+/* Prepare cryptcompress file for truncate:
-+ prune or append rightmost fake logical clusters (if any)
-+*/
-+static int
-+start_truncate_fake(struct inode *inode, cloff_t aidx, loff_t new_size,
-+ int update_sd)
-+{
-+ int result = 0;
-+ int bytes;
-+
-+ if (new_size > inode->i_size) {
-+ /* append */
-+ if (inode->i_size < clust_to_off(aidx, inode))
-+ /* no fake bytes */
-+ return 0;
-+ bytes = new_size - inode->i_size;
-+ INODE_SET_FIELD(inode, i_size, inode->i_size + bytes);
-+ } else {
-+ /* prune */
-+ if (inode->i_size <= clust_to_off(aidx, inode))
-+ /* no fake bytes */
-+ return 0;
-+ bytes =
-+ inode->i_size - max_count(new_size,
-+ clust_to_off(aidx, inode));
-+ if (!bytes)
-+ return 0;
-+ INODE_SET_FIELD(inode, i_size, inode->i_size - bytes);
-+ /* In the case of fake prune we need to drop page cluster.
-+ There are only 2 cases for partially truncated page:
-+ 1. If is is dirty, therefore it is anonymous
-+ (was dirtied via mmap), and will be captured
-+ later via ->capture().
-+ 2. If is clean, therefore it is filled by zeroes.
-+ In both cases we don't need to make it dirty and
-+ capture here.
-+ */
-+ truncate_inode_pages(inode->i_mapping, inode->i_size);
-+ }
-+ if (update_sd)
-+ result = update_sd_cryptcompress(inode);
-+ return result;
-+}
-+
-+/* This is called in setattr_cryptcompress when it is used to truncate,
-+ and in delete_cryptcompress */
-+static int cryptcompress_truncate(struct inode *inode, /* old size */
-+ loff_t new_size, /* new size */
-+ int update_sd)
-+{
-+ int result;
-+ cloff_t aidx;
-+
-+ result = find_fake_appended(inode, &aidx);
-+ if (result)
-+ return result;
-+ assert("edward-1208",
-+ ergo(aidx > 0, inode->i_size > clust_to_off(aidx - 1, inode)));
-+
-+ result = start_truncate_fake(inode, aidx, new_size, update_sd);
-+ if (result)
-+ return result;
-+ if (inode->i_size == new_size)
-+ /* nothing to truncate anymore */
-+ return 0;
-+ result = (inode->i_size < new_size ?
-+ cryptcompress_append_hole(inode, new_size) :
-+ prune_cryptcompress(inode, new_size, update_sd, aidx));
-+ if (!result && update_sd)
-+ result = update_sd_cryptcompress(inode);
-+ return result;
-+}
-+
-+static void clear_moved_tag_cluster(struct address_space * mapping,
-+ reiser4_cluster_t * clust)
-+{
-+ int i;
-+ void * ret;
-+ read_lock_irq(&mapping->tree_lock);
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ assert("edward-1438", clust->pages[i] != NULL);
-+ ret = radix_tree_tag_clear(&mapping->page_tree,
-+ clust->pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ assert("edward-1439", ret == clust->pages[i]);
-+ }
-+ read_unlock_irq(&mapping->tree_lock);
-+}
-+
-+/* Capture an anonymous pager cluster. (Page cluser is
-+ anonymous if it contains at least one anonymous page */
-+static int
-+capture_page_cluster(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ int result;
-+
-+ assert("edward-1073", clust != NULL);
-+ assert("edward-1074", inode != NULL);
-+ assert("edward-1075", clust->dstat == INVAL_DISK_CLUSTER);
-+
-+ result = prepare_cluster(inode, 0, 0, clust, PCL_APPEND);
-+ if (result)
-+ return result;
-+ set_cluster_pages_dirty(clust);
-+ clear_moved_tag_cluster(inode->i_mapping, clust);
-+
-+ result = try_capture_cluster(clust, inode);
-+ put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
-+ if (unlikely(result)) {
-+ /* set cleared tag back, so it will be
-+ possible to capture it again later */
-+ read_lock_irq(&inode->i_mapping->tree_lock);
-+ radix_tree_tag_set(&inode->i_mapping->page_tree,
-+ clust_to_pg(clust->index, inode),
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ read_unlock_irq(&inode->i_mapping->tree_lock);
-+
-+ reiser4_release_cluster_pages_and_jnode(clust);
-+ }
-+ return result;
-+}
-+
-+#define MAX_CLUSTERS_TO_CAPTURE(inode) (1024 >> cluster_nrpages_shift(inode))
-+
-+/* read lock should be acquired */
-+static int
-+capture_anonymous_clusters(struct address_space *mapping, pgoff_t * index,
-+ int to_capture)
-+{
-+ int result = 0;
-+ int found;
-+ struct page *page = NULL;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ reiser4_cluster_t clust;
-+
-+ assert("edward-1127", mapping != NULL);
-+ assert("edward-1128", mapping->host != NULL);
-+ assert("edward-1440", mapping->host->i_mapping == mapping);
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ cluster_init_read(&clust, NULL);
-+ clust.hint = hint;
-+
-+ result = alloc_cluster_pgset(&clust, cluster_nrpages(mapping->host));
-+ if (result)
-+ goto out;
-+
-+ while (to_capture > 0) {
-+ found =
-+ find_get_pages_tag(mapping, index,
-+ PAGECACHE_TAG_REISER4_MOVED, 1, &page);
-+ if (!found) {
-+ *index = (pgoff_t) - 1;
-+ break;
-+ }
-+ assert("edward-1109", page != NULL);
-+
-+ move_cluster_forward(&clust, mapping->host, page->index);
-+ result = capture_page_cluster(&clust, mapping->host);
-+ page_cache_release(page);
-+ if (result)
-+ break;
-+ to_capture -= clust.nr_pages;
-+ }
-+ if (result) {
-+ warning("edward-1077",
-+ "Cannot capture anon pages: result=%i (captured=%d)\n",
-+ result,
-+ ((__u32) MAX_CLUSTERS_TO_CAPTURE(mapping->host)) -
-+ to_capture);
-+ } else {
-+ /* something had to be found */
-+ assert("edward-1078",
-+ to_capture <= MAX_CLUSTERS_TO_CAPTURE(mapping->host));
-+ if (to_capture <= 0)
-+ /* there may be left more pages */
-+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-+ }
-+ out:
-+ done_lh(lh);
-+ kfree(hint);
-+ put_cluster_handle(&clust);
-+ return result;
-+}
-+
-+/* Check mapping for existence of not captured dirty pages.
-+ This returns !0 if either page tree contains pages tagged
-+ PAGECACHE_TAG_REISER4_MOVED */
-+static int cryptcompress_inode_has_anon_pages(struct inode *inode)
-+{
-+ return mapping_tagged(inode->i_mapping, PAGECACHE_TAG_REISER4_MOVED);
-+}
-+
-+/* this is implementation of vfs's writepages method of struct
-+ address_space_operations */
-+int
-+writepages_cryptcompress(struct address_space *mapping,
-+ struct writeback_control *wbc)
-+{
-+ int result;
-+ int to_capture;
-+ pgoff_t nrpages;
-+ pgoff_t index = 0;
-+ cryptcompress_info_t *info;
-+ struct inode *inode;
-+
-+ inode = mapping->host;
-+ if (!cryptcompress_inode_has_anon_pages(inode)) {
-+ result = 0;
-+ goto end;
-+ }
-+
-+ info = cryptcompress_inode_data(inode);
-+ nrpages = count_to_nrpages(i_size_read(inode));
-+
-+ if (wbc->sync_mode != WB_SYNC_ALL)
-+ to_capture =
-+ min_count(wbc->nr_to_write, MAX_CLUSTERS_TO_CAPTURE(inode));
-+ else
-+ to_capture = MAX_CLUSTERS_TO_CAPTURE(inode);
-+ do {
-+ reiser4_context *ctx;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ result = PTR_ERR(ctx);
-+ break;
-+ }
-+ ctx->nobalance = 1;
-+
-+ assert("edward-1079",
-+ lock_stack_isclean(get_current_lock_stack()));
-+
-+ LOCK_CNT_INC(inode_sem_r);
-+
-+ result =
-+ capture_anonymous_clusters(inode->i_mapping, &index,
-+ to_capture);
-+
-+ if (result != 0 || wbc->sync_mode != WB_SYNC_ALL) {
-+ reiser4_exit_context(ctx);
-+ break;
-+ }
-+ result = txnmgr_force_commit_all(inode->i_sb, 0);
-+ reiser4_exit_context(ctx);
-+ } while (result == 0 && index < nrpages);
-+
-+ end:
-+ if (is_in_reiser4_context()) {
-+ if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
-+ /* there are already pages to flush, flush them out, do
-+ not delay until end of reiser4_sync_inodes */
-+ reiser4_writeout(inode->i_sb, wbc);
-+ get_current_context()->nr_captured = 0;
-+ }
-+ }
-+ return result;
-+}
-+
-+/* plugin->u.file.mmap */
-+int mmap_cryptcompress(struct file *file, struct vm_area_struct *vma)
-+{
-+ int result;
-+ struct inode *inode;
-+ reiser4_context *ctx;
-+
-+ inode = file->f_dentry->d_inode;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ /*
-+ * generic_file_mmap will do update_atime. Grab space for stat data
-+ * update.
-+ */
-+ result = reiser4_grab_space_force
-+ (inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+ if (result) {
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ result = generic_file_mmap(file, vma);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* plugin->u.file.release */
-+/* plugin->u.file.get_block */
-+
-+/* this is implementation of delete method of file plugin for
-+ cryptcompress objects */
-+int delete_object_cryptcompress(struct inode *inode)
-+{
-+ int result;
-+
-+ assert("edward-429", inode->i_nlink == 0);
-+
-+ reiser4_txn_restart_current();
-+
-+ result = cryptcompress_truncate(inode, 0, 0);
-+ if (result) {
-+ warning("edward-430",
-+ "cannot truncate cryptcompress file %lli: %i",
-+ (unsigned long long)get_inode_oid(inode),
-+ result);
-+ }
-+ truncate_inode_pages(inode->i_mapping, 0);
-+ /* and remove stat data */
-+ return reiser4_delete_object_common(inode);
-+}
-+
-+/* plugin->u.file.setattr method
-+ This implements actual truncate (see comments in reiser4/page_cache.c) */
-+int setattr_cryptcompress(struct dentry *dentry, struct iattr *attr)
-+{
-+ int result;
-+ struct inode *inode;
-+
-+ inode = dentry->d_inode;
-+ if (attr->ia_valid & ATTR_SIZE) {
-+ if (inode->i_size != attr->ia_size) {
-+ reiser4_context *ctx;
-+ loff_t old_size;
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ inode_check_scale(inode, inode->i_size, attr->ia_size);
-+
-+ old_size = inode->i_size;
-+
-+ result =
-+ cryptcompress_truncate(inode, attr->ia_size,
-+ 1 /* update stat data */ );
-+ if (result) {
-+ warning("edward-1192",
-+ "truncate_cryptcompress failed: oid %lli, "
-+ "old size %lld, new size %lld, retval %d",
-+ (unsigned long long)
-+ get_inode_oid(inode), old_size,
-+ attr->ia_size, result);
-+ }
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ } else
-+ result = 0;
-+ } else
-+ result = reiser4_setattr_common(dentry, attr);
-+ return result;
-+}
-+
-+/* sendfile_cryptcompress - sendfile of struct file_operations */
-+ssize_t
-+sendfile_cryptcompress(struct file *file, loff_t *ppos, size_t count,
-+ read_actor_t actor, void *target)
-+{
-+ reiser4_context *ctx;
-+ ssize_t result;
-+ struct inode *inode;
-+ cryptcompress_info_t *info;
-+
-+ inode = file->f_dentry->d_inode;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ /*
-+ * generic_file_sndfile may want to call update_atime. Grab space for
-+ * stat data update
-+ */
-+ result = reiser4_grab_space(estimate_update_common(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ goto exit;
-+ info = cryptcompress_inode_data(inode);
-+
-+ result = generic_file_sendfile(file, ppos, count, actor, target);
-+ exit:
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/*
-+ * release_cryptcompress - release of struct file_operations
-+ * @inode: inode of released file
-+ * @file: file to release
-+ */
-+int release_cryptcompress(struct inode *inode, struct file *file)
-+{
-+ reiser4_context *ctx = reiser4_init_context(inode->i_sb);
-+
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ reiser4_free_file_fsdata(file);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+#if 0
-+int prepare_write_cryptcompress(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ return prepare_write_common(file, page, from, to);
-+}
-+#endif /* 0 */
-+
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/cryptcompress.h linux-2.6.20/fs/reiser4/plugin/file/cryptcompress.h
---- linux-2.6.20.orig/fs/reiser4/plugin/file/cryptcompress.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/cryptcompress.h 2007-05-06 14:50:43.774999471 +0400
-@@ -0,0 +1,554 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* See http://www.namesys.com/cryptcompress_design.html */
-+
-+#if !defined( __FS_REISER4_CRYPTCOMPRESS_H__ )
-+#define __FS_REISER4_CRYPTCOMPRESS_H__
-+
-+#include "../../page_cache.h"
-+#include "../compress/compress.h"
-+#include "../crypto/cipher.h"
-+
-+#include <linux/pagemap.h>
-+
-+#define MIN_CLUSTER_SHIFT PAGE_CACHE_SHIFT
-+#define MAX_CLUSTER_SHIFT 16
-+#define MAX_CLUSTER_NRPAGES (1U << MAX_CLUSTER_SHIFT >> PAGE_CACHE_SHIFT)
-+#define DC_CHECKSUM_SIZE 4
-+
-+#define MIN_LATTICE_FACTOR 1
-+#define MAX_LATTICE_FACTOR 32
-+
-+/* this mask contains all non-standard plugins that might
-+ be present in reiser4-specific part of inode managed by
-+ cryptcompress file plugin */
-+#define cryptcompress_mask \
-+ ((1 << PSET_FILE) | \
-+ (1 << PSET_CLUSTER) | \
-+ (1 << PSET_CIPHER) | \
-+ (1 << PSET_DIGEST) | \
-+ (1 << PSET_COMPRESSION) | \
-+ (1 << PSET_COMPRESSION_MODE))
-+
-+static inline loff_t min_count(loff_t a, loff_t b)
-+{
-+ return (a < b ? a : b);
-+}
-+
-+static inline loff_t max_count(loff_t a, loff_t b)
-+{
-+ return (a > b ? a : b);
-+}
-+
-+#if REISER4_DEBUG
-+static inline int cluster_shift_ok(int shift)
-+{
-+ return (shift >= MIN_CLUSTER_SHIFT) && (shift <= MAX_CLUSTER_SHIFT);
-+}
-+#endif
-+
-+typedef struct tfm_stream {
-+ __u8 *data;
-+ size_t size;
-+} tfm_stream_t;
-+
-+typedef enum {
-+ INPUT_STREAM,
-+ OUTPUT_STREAM,
-+ LAST_STREAM
-+} tfm_stream_id;
-+
-+typedef tfm_stream_t *tfm_unit[LAST_STREAM];
-+
-+static inline __u8 *ts_data(tfm_stream_t * stm)
-+{
-+ assert("edward-928", stm != NULL);
-+ return stm->data;
-+}
-+
-+static inline size_t ts_size(tfm_stream_t * stm)
-+{
-+ assert("edward-929", stm != NULL);
-+ return stm->size;
-+}
-+
-+static inline void set_ts_size(tfm_stream_t * stm, size_t size)
-+{
-+ assert("edward-930", stm != NULL);
-+
-+ stm->size = size;
-+}
-+
-+static inline int alloc_ts(tfm_stream_t ** stm)
-+{
-+ assert("edward-931", stm);
-+ assert("edward-932", *stm == NULL);
-+
-+ *stm = kmalloc(sizeof **stm, reiser4_ctx_gfp_mask_get());
-+ if (*stm == NULL)
-+ return -ENOMEM;
-+ memset(*stm, 0, sizeof **stm);
-+ return 0;
-+}
-+
-+static inline void free_ts(tfm_stream_t * stm)
-+{
-+ assert("edward-933", !ts_data(stm));
-+ assert("edward-934", !ts_size(stm));
-+
-+ kfree(stm);
-+}
-+
-+static inline int alloc_ts_data(tfm_stream_t * stm, size_t size)
-+{
-+ assert("edward-935", !ts_data(stm));
-+ assert("edward-936", !ts_size(stm));
-+ assert("edward-937", size != 0);
-+
-+ stm->data = reiser4_vmalloc(size);
-+ if (!stm->data)
-+ return -ENOMEM;
-+ set_ts_size(stm, size);
-+ return 0;
-+}
-+
-+static inline void free_ts_data(tfm_stream_t * stm)
-+{
-+ assert("edward-938", equi(ts_data(stm), ts_size(stm)));
-+
-+ if (ts_data(stm))
-+ vfree(ts_data(stm));
-+ memset(stm, 0, sizeof *stm);
-+}
-+
-+/* Write modes for item conversion in flush convert phase */
-+typedef enum {
-+ CRC_APPEND_ITEM = 1,
-+ CRC_OVERWRITE_ITEM = 2,
-+ CRC_CUT_ITEM = 3
-+} cryptcompress_write_mode_t;
-+
-+typedef enum {
-+ PCL_UNKNOWN = 0, /* invalid option */
-+ PCL_APPEND = 1, /* append and/or overwrite */
-+ PCL_TRUNCATE = 2 /* truncate */
-+} page_cluster_op;
-+
-+/* Reiser4 file write/read transforms page cluster into disk cluster (and back)
-+ using crypto/compression transforms implemented by reiser4 transform plugins.
-+ Before each transform we allocate a pair of streams (tfm_unit) and assemble
-+ page cluster into the input one. After transform we split output stream into
-+ a set of items (disk cluster).
-+*/
-+typedef struct tfm_cluster {
-+ coa_set coa;
-+ tfm_unit tun;
-+ tfm_action act;
-+ int uptodate;
-+ int lsize; /* size of the logical cluster */
-+ int len; /* length of the transform stream */
-+} tfm_cluster_t;
-+
-+static inline coa_t get_coa(tfm_cluster_t * tc, reiser4_compression_id id, tfm_action act)
-+{
-+ return tc->coa[id][act];
-+}
-+
-+static inline void
-+set_coa(tfm_cluster_t * tc, reiser4_compression_id id, tfm_action act, coa_t coa)
-+{
-+ tc->coa[id][act] = coa;
-+}
-+
-+static inline int
-+alloc_coa(tfm_cluster_t * tc, compression_plugin * cplug)
-+{
-+ coa_t coa;
-+
-+ coa = cplug->alloc(tc->act);
-+ if (IS_ERR(coa))
-+ return PTR_ERR(coa);
-+ set_coa(tc, cplug->h.id, tc->act, coa);
-+ return 0;
-+}
-+
-+static inline int
-+grab_coa(tfm_cluster_t * tc, compression_plugin * cplug)
-+{
-+ return (cplug->alloc && !get_coa(tc, cplug->h.id, tc->act) ?
-+ alloc_coa(tc, cplug) : 0);
-+}
-+
-+static inline void free_coa_set(tfm_cluster_t * tc)
-+{
-+ tfm_action j;
-+ reiser4_compression_id i;
-+ compression_plugin *cplug;
-+
-+ assert("edward-810", tc != NULL);
-+
-+ for (j = 0; j < TFMA_LAST; j++)
-+ for (i = 0; i < LAST_COMPRESSION_ID; i++) {
-+ if (!get_coa(tc, i, j))
-+ continue;
-+ cplug = compression_plugin_by_id(i);
-+ assert("edward-812", cplug->free != NULL);
-+ cplug->free(get_coa(tc, i, j), j);
-+ set_coa(tc, i, j, 0);
-+ }
-+ return;
-+}
-+
-+static inline tfm_stream_t *tfm_stream(tfm_cluster_t * tc, tfm_stream_id id)
-+{
-+ return tc->tun[id];
-+}
-+
-+static inline void
-+set_tfm_stream(tfm_cluster_t * tc, tfm_stream_id id, tfm_stream_t * ts)
-+{
-+ tc->tun[id] = ts;
-+}
-+
-+static inline __u8 *tfm_stream_data(tfm_cluster_t * tc, tfm_stream_id id)
-+{
-+ return ts_data(tfm_stream(tc, id));
-+}
-+
-+static inline void
-+set_tfm_stream_data(tfm_cluster_t * tc, tfm_stream_id id, __u8 * data)
-+{
-+ tfm_stream(tc, id)->data = data;
-+}
-+
-+static inline size_t tfm_stream_size(tfm_cluster_t * tc, tfm_stream_id id)
-+{
-+ return ts_size(tfm_stream(tc, id));
-+}
-+
-+static inline void
-+set_tfm_stream_size(tfm_cluster_t * tc, tfm_stream_id id, size_t size)
-+{
-+ tfm_stream(tc, id)->size = size;
-+}
-+
-+static inline int
-+alloc_tfm_stream(tfm_cluster_t * tc, size_t size, tfm_stream_id id)
-+{
-+ assert("edward-939", tc != NULL);
-+ assert("edward-940", !tfm_stream(tc, id));
-+
-+ tc->tun[id] = kmalloc(sizeof(tfm_stream_t), reiser4_ctx_gfp_mask_get());
-+ if (!tc->tun[id])
-+ return -ENOMEM;
-+ memset(tfm_stream(tc, id), 0, sizeof(tfm_stream_t));
-+ return alloc_ts_data(tfm_stream(tc, id), size);
-+}
-+
-+static inline int
-+realloc_tfm_stream(tfm_cluster_t * tc, size_t size, tfm_stream_id id)
-+{
-+ assert("edward-941", tfm_stream_size(tc, id) < size);
-+ free_ts_data(tfm_stream(tc, id));
-+ return alloc_ts_data(tfm_stream(tc, id), size);
-+}
-+
-+static inline void free_tfm_stream(tfm_cluster_t * tc, tfm_stream_id id)
-+{
-+ free_ts_data(tfm_stream(tc, id));
-+ free_ts(tfm_stream(tc, id));
-+ set_tfm_stream(tc, id, 0);
-+}
-+
-+static inline unsigned coa_overrun(compression_plugin * cplug, int ilen)
-+{
-+ return (cplug->overrun != NULL ? cplug->overrun(ilen) : 0);
-+}
-+
-+static inline void free_tfm_unit(tfm_cluster_t * tc)
-+{
-+ tfm_stream_id id;
-+ for (id = 0; id < LAST_STREAM; id++) {
-+ if (!tfm_stream(tc, id))
-+ continue;
-+ free_tfm_stream(tc, id);
-+ }
-+}
-+
-+static inline void put_tfm_cluster(tfm_cluster_t * tc)
-+{
-+ assert("edward-942", tc != NULL);
-+ free_coa_set(tc);
-+ free_tfm_unit(tc);
-+}
-+
-+static inline int tfm_cluster_is_uptodate(tfm_cluster_t * tc)
-+{
-+ assert("edward-943", tc != NULL);
-+ assert("edward-944", tc->uptodate == 0 || tc->uptodate == 1);
-+ return (tc->uptodate == 1);
-+}
-+
-+static inline void tfm_cluster_set_uptodate(tfm_cluster_t * tc)
-+{
-+ assert("edward-945", tc != NULL);
-+ assert("edward-946", tc->uptodate == 0 || tc->uptodate == 1);
-+ tc->uptodate = 1;
-+ return;
-+}
-+
-+static inline void tfm_cluster_clr_uptodate(tfm_cluster_t * tc)
-+{
-+ assert("edward-947", tc != NULL);
-+ assert("edward-948", tc->uptodate == 0 || tc->uptodate == 1);
-+ tc->uptodate = 0;
-+ return;
-+}
-+
-+static inline int tfm_stream_is_set(tfm_cluster_t * tc, tfm_stream_id id)
-+{
-+ return (tfm_stream(tc, id) &&
-+ tfm_stream_data(tc, id) && tfm_stream_size(tc, id));
-+}
-+
-+static inline int tfm_cluster_is_set(tfm_cluster_t * tc)
-+{
-+ int i;
-+ for (i = 0; i < LAST_STREAM; i++)
-+ if (!tfm_stream_is_set(tc, i))
-+ return 0;
-+ return 1;
-+}
-+
-+static inline void alternate_streams(tfm_cluster_t * tc)
-+{
-+ tfm_stream_t *tmp = tfm_stream(tc, INPUT_STREAM);
-+
-+ set_tfm_stream(tc, INPUT_STREAM, tfm_stream(tc, OUTPUT_STREAM));
-+ set_tfm_stream(tc, OUTPUT_STREAM, tmp);
-+}
-+
-+/* a kind of data that we can write to the window */
-+typedef enum {
-+ DATA_WINDOW, /* the data we copy form user space */
-+ HOLE_WINDOW /* zeroes if we write hole */
-+} window_stat;
-+
-+/* Sliding window of cluster size which should be set to the approprite position
-+ (defined by cluster index) in a file before page cluster modification by
-+ file_write. Then we translate file size, offset to write from, number of
-+ bytes to write, etc.. to the following configuration needed to estimate
-+ number of pages to read before write, etc...
-+*/
-+typedef struct reiser4_slide {
-+ unsigned off; /* offset we start to write/truncate from */
-+ unsigned count; /* number of bytes (zeroes) to write/truncate */
-+ unsigned delta; /* number of bytes to append to the hole */
-+ window_stat stat; /* a kind of data to write to the window */
-+} reiser4_slide_t;
-+
-+/* The following is a set of possible disk cluster states */
-+typedef enum {
-+ INVAL_DISK_CLUSTER, /* unknown state */
-+ PREP_DISK_CLUSTER, /* disk cluster got converted by flush
-+ at least 1 time */
-+ UNPR_DISK_CLUSTER, /* disk cluster just created and should be
-+ converted by flush */
-+ FAKE_DISK_CLUSTER /* disk cluster doesn't exist neither in memory
-+ nor on disk */
-+} disk_cluster_stat;
-+
-+/*
-+ While implementing all transforms (from page to disk cluster, and back)
-+ reiser4 cluster manager fills the following structure incapsulating pointers
-+ to all the clusters for the same index including the sliding window above
-+*/
-+typedef struct reiser4_cluster {
-+ tfm_cluster_t tc; /* transform cluster */
-+ int nr_pages; /* number of pages */
-+ struct page **pages; /* page cluster */
-+ page_cluster_op op; /* page cluster operation */
-+ struct file *file;
-+ hint_t *hint; /* disk cluster item for traversal */
-+ disk_cluster_stat dstat; /* state of the current disk cluster */
-+ cloff_t index; /* offset in the units of cluster size */
-+ int index_valid; /* to validate the index above, if needed */
-+ reiser4_slide_t *win; /* sliding window of cluster size */
-+ int reserved; /* this indicates that space for disk
-+ cluster modification is reserved */
-+#if REISER4_DEBUG
-+ reiser4_context *ctx;
-+ int reserved_prepped;
-+ int reserved_unprepped;
-+#endif
-+
-+} reiser4_cluster_t;
-+
-+static inline __u8 * tfm_input_data (reiser4_cluster_t * clust)
-+{
-+ return tfm_stream_data(&clust->tc, INPUT_STREAM);
-+}
-+
-+static inline __u8 * tfm_output_data (reiser4_cluster_t * clust)
-+{
-+ return tfm_stream_data(&clust->tc, OUTPUT_STREAM);
-+}
-+
-+static inline int reset_cluster_pgset(reiser4_cluster_t * clust, int nrpages)
-+{
-+ assert("edward-1057", clust->pages != NULL);
-+ memset(clust->pages, 0, sizeof(*clust->pages) * nrpages);
-+ return 0;
-+}
-+
-+static inline int alloc_cluster_pgset(reiser4_cluster_t * clust, int nrpages)
-+{
-+ assert("edward-949", clust != NULL);
-+ assert("edward-1362", clust->pages == NULL);
-+ assert("edward-950", nrpages != 0 && nrpages <= MAX_CLUSTER_NRPAGES);
-+
-+ clust->pages =
-+ kmalloc(sizeof(*clust->pages) * nrpages,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!clust->pages)
-+ return RETERR(-ENOMEM);
-+ reset_cluster_pgset(clust, nrpages);
-+ return 0;
-+}
-+
-+static inline void free_cluster_pgset(reiser4_cluster_t * clust)
-+{
-+ assert("edward-951", clust->pages != NULL);
-+ kfree(clust->pages);
-+ clust->pages = NULL;
-+}
-+
-+static inline void put_cluster_handle(reiser4_cluster_t * clust)
-+{
-+ assert("edward-435", clust != NULL);
-+
-+ put_tfm_cluster(&clust->tc);
-+ if (clust->pages)
-+ free_cluster_pgset(clust);
-+ memset(clust, 0, sizeof *clust);
-+}
-+
-+static inline void inc_keyload_count(crypto_stat_t * data)
-+{
-+ assert("edward-1410", data != NULL);
-+ data->keyload_count++;
-+}
-+
-+static inline void dec_keyload_count(crypto_stat_t * data)
-+{
-+ assert("edward-1411", data != NULL);
-+ assert("edward-1412", data->keyload_count > 0);
-+ data->keyload_count--;
-+}
-+
-+/* cryptcompress specific part of reiser4_inode */
-+typedef struct cryptcompress_info {
-+ crypto_stat_t *crypt;
-+ /* the following 2 fields are controlled by compression mode plugin */
-+ int compress_toggle; /* current status of compressibility */
-+ int lattice_factor; /* factor of dynamic lattice. FIXME: Have a
-+ compression_toggle to keep the factor */
-+#if REISER4_DEBUG
-+ int pgcount; /* number of captured pages */
-+#endif
-+} cryptcompress_info_t;
-+
-+static inline void set_compression_toggle (cryptcompress_info_t * info, int val)
-+{
-+ info->compress_toggle = val;
-+}
-+
-+static inline int get_compression_toggle (cryptcompress_info_t * info)
-+{
-+ return info->compress_toggle;
-+}
-+
-+static inline int compression_is_on(cryptcompress_info_t * info)
-+{
-+ return get_compression_toggle(info) == 1;
-+}
-+
-+static inline void turn_on_compression(cryptcompress_info_t * info)
-+{
-+ set_compression_toggle(info, 1);
-+}
-+
-+static inline void turn_off_compression(cryptcompress_info_t * info)
-+{
-+ set_compression_toggle(info, 0);
-+}
-+
-+static inline void set_lattice_factor(cryptcompress_info_t * info, int val)
-+{
-+ info->lattice_factor = val;
-+}
-+
-+static inline int get_lattice_factor(cryptcompress_info_t * info)
-+{
-+ return info->lattice_factor;
-+}
-+
-+cryptcompress_info_t *cryptcompress_inode_data(const struct inode *);
-+int equal_to_rdk(znode *, const reiser4_key *);
-+int goto_right_neighbor(coord_t *, lock_handle *);
-+int cryptcompress_inode_ok(struct inode *inode);
-+int coord_is_unprepped_ctail(const coord_t * coord);
-+extern int ctail_read_disk_cluster (reiser4_cluster_t *, struct inode *,
-+ znode_lock_mode mode);
-+extern int do_readpage_ctail(struct inode *, reiser4_cluster_t *,
-+ struct page * page, znode_lock_mode mode);
-+extern int ctail_insert_unprepped_cluster(reiser4_cluster_t * clust,
-+ struct inode * inode);
-+extern int readpages_cryptcompress(struct file*, struct address_space*,
-+ struct list_head*, unsigned);
-+int bind_cryptcompress(struct inode *child, struct inode *parent);
-+void destroy_inode_cryptcompress(struct inode * inode);
-+int grab_cluster_pages(struct inode *inode, reiser4_cluster_t * clust);
-+int write_conversion_hook(struct file *file, struct inode * inode, loff_t pos,
-+ reiser4_cluster_t * clust, int * progress);
-+crypto_stat_t * inode_crypto_stat (struct inode * inode);
-+void inherit_crypto_stat_common(struct inode * parent, struct inode * object,
-+ int (*can_inherit)(struct inode * child,
-+ struct inode * parent));
-+void reiser4_attach_crypto_stat(struct inode * inode, crypto_stat_t * info);
-+void change_crypto_stat(struct inode * inode, crypto_stat_t * new);
-+crypto_stat_t * reiser4_alloc_crypto_stat (struct inode * inode);
-+
-+static inline struct crypto_blkcipher * info_get_cipher(crypto_stat_t * info)
-+{
-+ return info->cipher;
-+}
-+
-+static inline void info_set_cipher(crypto_stat_t * info,
-+ struct crypto_blkcipher * tfm)
-+{
-+ info->cipher = tfm;
-+}
-+
-+static inline struct crypto_hash * info_get_digest(crypto_stat_t * info)
-+{
-+ return info->digest;
-+}
-+
-+static inline void info_set_digest(crypto_stat_t * info,
-+ struct crypto_hash * tfm)
-+{
-+ info->digest = tfm;
-+}
-+
-+#endif /* __FS_REISER4_CRYPTCOMPRESS_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/file.c linux-2.6.20/fs/reiser4/plugin/file/file.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file/file.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/file.c 2007-05-06 14:50:43.779000721 +0400
-@@ -0,0 +1,2821 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * this file contains implementations of inode/file/address_space/file plugin
-+ * operations specific for "unix file plugin" (plugin id is
-+ * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only
-+ * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have
-+ * no items but stat data)
-+ */
-+
-+#include "../../inode.h"
-+#include "../../super.h"
-+#include "../../tree_walk.h"
-+#include "../../carry.h"
-+#include "../../page_cache.h"
-+#include "../../ioctl.h"
-+#include "../object.h"
-+#include "../../safe_link.h"
-+
-+#include <linux/writeback.h>
-+#include <linux/pagevec.h>
-+#include <linux/syscalls.h>
-+
-+
-+static int unpack(struct file *file, struct inode *inode, int forever);
-+static void drop_access(unix_file_info_t *);
-+static int hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
-+ znode_lock_mode lock_mode);
-+
-+/* Get exclusive access and make sure that file is not partially
-+ * converted (It may happen that another process is doing tail
-+ * conversion. If so, wait until it completes)
-+ */
-+static inline void get_exclusive_access_careful(unix_file_info_t * uf_info,
-+ struct inode *inode)
-+{
-+ do {
-+ get_exclusive_access(uf_info);
-+ if (!reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))
-+ break;
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ } while (1);
-+}
-+
-+/* get unix file plugin specific portion of inode */
-+unix_file_info_t *unix_file_inode_data(const struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->file_plugin_data.unix_file_info;
-+}
-+
-+/**
-+ * equal_to_rdk - compare key and znode's right delimiting key
-+ * @node: node whose right delimiting key to compare with @key
-+ * @key: key to compare with @node's right delimiting key
-+ *
-+ * Returns true if @key is equal to right delimiting key of @node.
-+ */
-+int equal_to_rdk(znode *node, const reiser4_key *key)
-+{
-+ int result;
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = keyeq(key, znode_get_rd_key(node));
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/**
-+ * equal_to_ldk - compare key and znode's left delimiting key
-+ * @node: node whose left delimiting key to compare with @key
-+ * @key: key to compare with @node's left delimiting key
-+ *
-+ * Returns true if @key is equal to left delimiting key of @node.
-+ */
-+int equal_to_ldk(znode *node, const reiser4_key *key)
-+{
-+ int result;
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = keyeq(key, znode_get_ld_key(node));
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+/**
-+ * check_coord - check whether coord corresponds to key
-+ * @coord: coord to check
-+ * @key: key @coord has to correspond to
-+ *
-+ * Returns true if @coord is set as if it was set as result of lookup with @key
-+ * in coord->node.
-+ */
-+static int check_coord(const coord_t *coord, const reiser4_key *key)
-+{
-+ coord_t twin;
-+
-+ node_plugin_by_node(coord->node)->lookup(coord->node, key,
-+ FIND_MAX_NOT_MORE_THAN, &twin);
-+ return coords_equal(coord, &twin);
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/**
-+ * init_uf_coord - initialize extended coord
-+ * @uf_coord:
-+ * @lh:
-+ *
-+ *
-+ */
-+void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh)
-+{
-+ coord_init_zero(&uf_coord->coord);
-+ coord_clear_iplug(&uf_coord->coord);
-+ uf_coord->lh = lh;
-+ init_lh(lh);
-+ memset(&uf_coord->extension, 0, sizeof(uf_coord->extension));
-+ uf_coord->valid = 0;
-+}
-+
-+static void validate_extended_coord(uf_coord_t *uf_coord, loff_t offset)
-+{
-+ assert("vs-1333", uf_coord->valid == 0);
-+
-+ if (coord_is_between_items(&uf_coord->coord))
-+ return;
-+
-+ assert("vs-1348",
-+ item_plugin_by_coord(&uf_coord->coord)->s.file.
-+ init_coord_extension);
-+
-+ item_body_by_coord(&uf_coord->coord);
-+ item_plugin_by_coord(&uf_coord->coord)->s.file.
-+ init_coord_extension(uf_coord, offset);
-+}
-+
-+/**
-+ * goto_right_neighbor - lock right neighbor, drop current node lock
-+ * @coord:
-+ * @lh:
-+ *
-+ * Obtain lock on right neighbor and drop lock on current node.
-+ */
-+int goto_right_neighbor(coord_t *coord, lock_handle *lh)
-+{
-+ int result;
-+ lock_handle lh_right;
-+
-+ assert("vs-1100", znode_is_locked(coord->node));
-+
-+ init_lh(&lh_right);
-+ result = reiser4_get_right_neighbor(&lh_right, coord->node,
-+ znode_is_wlocked(coord->node) ?
-+ ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result) {
-+ done_lh(&lh_right);
-+ return result;
-+ }
-+
-+ /*
-+ * we hold two longterm locks on neighboring nodes. Unlock left of
-+ * them
-+ */
-+ done_lh(lh);
-+
-+ coord_init_first_unit_nocheck(coord, lh_right.node);
-+ move_lh(lh, &lh_right);
-+
-+ return 0;
-+
-+}
-+
-+/**
-+ * set_file_state
-+ * @uf_info:
-+ * @cbk_result:
-+ * @level:
-+ *
-+ * This is to be used by find_file_item and in find_file_state to
-+ * determine real state of file
-+ */
-+static void set_file_state(unix_file_info_t *uf_info, int cbk_result,
-+ tree_level level)
-+{
-+ if (cbk_errored(cbk_result))
-+ /* error happened in find_file_item */
-+ return;
-+
-+ assert("vs-1164", level == LEAF_LEVEL || level == TWIG_LEVEL);
-+
-+ if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ /*
-+ * container is unknown, therefore conversion can not be in
-+ * progress
-+ */
-+ assert("",
-+ !reiser4_inode_get_flag(unix_file_info_to_inode(uf_info),
-+ REISER4_PART_IN_CONV));
-+ if (cbk_result == CBK_COORD_NOTFOUND)
-+ uf_info->container = UF_CONTAINER_EMPTY;
-+ else if (level == LEAF_LEVEL)
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ else
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ } else {
-+ /*
-+ * file state is known, check whether it is set correctly if
-+ * file is not being tail converted
-+ */
-+ if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info),
-+ REISER4_PART_IN_CONV)) {
-+ assert("vs-1162",
-+ ergo(level == LEAF_LEVEL &&
-+ cbk_result == CBK_COORD_FOUND,
-+ uf_info->container == UF_CONTAINER_TAILS));
-+ assert("vs-1165",
-+ ergo(level == TWIG_LEVEL &&
-+ cbk_result == CBK_COORD_FOUND,
-+ uf_info->container == UF_CONTAINER_EXTENTS));
-+ }
-+ }
-+}
-+
-+int find_file_item_nohint(coord_t *coord, lock_handle *lh,
-+ const reiser4_key *key, znode_lock_mode lock_mode,
-+ struct inode *inode)
-+{
-+ return reiser4_object_lookup(inode, key, coord, lh, lock_mode,
-+ FIND_MAX_NOT_MORE_THAN,
-+ TWIG_LEVEL, LEAF_LEVEL,
-+ (lock_mode == ZNODE_READ_LOCK) ? CBK_UNIQUE :
-+ (CBK_UNIQUE | CBK_FOR_INSERT),
-+ NULL /* ra_info */ );
-+}
-+
-+/**
-+ * find_file_item - look for file item in the tree
-+ * @hint: provides coordinate, lock handle, seal
-+ * @key: key for search
-+ * @mode: mode of lock to put on returned node
-+ * @ra_info:
-+ * @inode:
-+ *
-+ * This finds position in the tree corresponding to @key. It first tries to use
-+ * @hint's seal if it is set.
-+ */
-+int find_file_item(hint_t *hint, const reiser4_key *key,
-+ znode_lock_mode lock_mode,
-+ struct inode *inode)
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle *lh;
-+
-+ assert("nikita-3030", reiser4_schedulable());
-+ assert("vs-1707", hint != NULL);
-+ assert("vs-47", inode != NULL);
-+
-+ coord = &hint->ext_coord.coord;
-+ lh = hint->ext_coord.lh;
-+ init_lh(lh);
-+
-+ result = hint_validate(hint, key, 1 /* check key */, lock_mode);
-+ if (!result) {
-+ if (coord->between == AFTER_UNIT &&
-+ equal_to_rdk(coord->node, key)) {
-+ result = goto_right_neighbor(coord, lh);
-+ if (result == -E_NO_NEIGHBOR)
-+ return RETERR(-EIO);
-+ if (result)
-+ return result;
-+ assert("vs-1152", equal_to_ldk(coord->node, key));
-+ /*
-+ * we moved to different node. Invalidate coord
-+ * extension, zload is necessary to init it again
-+ */
-+ hint->ext_coord.valid = 0;
-+ }
-+
-+ set_file_state(unix_file_inode_data(inode), CBK_COORD_FOUND,
-+ znode_get_level(coord->node));
-+
-+ return CBK_COORD_FOUND;
-+ }
-+
-+ coord_init_zero(coord);
-+ result = find_file_item_nohint(coord, lh, key, lock_mode, inode);
-+ set_file_state(unix_file_inode_data(inode), result,
-+ znode_get_level(coord->node));
-+
-+ /* FIXME: we might already have coord extension initialized */
-+ hint->ext_coord.valid = 0;
-+ return result;
-+}
-+
-+/* plugin->u.file.write_flowom = NULL
-+ plugin->u.file.read_flow = NULL */
-+
-+void hint_init_zero(hint_t * hint)
-+{
-+ memset(hint, 0, sizeof(*hint));
-+ init_lh(&hint->lh);
-+ hint->ext_coord.lh = &hint->lh;
-+}
-+
-+static int find_file_state(struct inode *inode, unix_file_info_t *uf_info)
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ assert("vs-1628", ea_obtained(uf_info));
-+
-+ if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ key_by_inode_and_offset_common(inode, 0, &key);
-+ init_lh(&lh);
-+ result = find_file_item_nohint(&coord, &lh, &key,
-+ ZNODE_READ_LOCK, inode);
-+ set_file_state(uf_info, result, znode_get_level(coord.node));
-+ done_lh(&lh);
-+ if (!cbk_errored(result))
-+ result = 0;
-+ } else
-+ result = 0;
-+ assert("vs-1074",
-+ ergo(result == 0, uf_info->container != UF_CONTAINER_UNKNOWN));
-+ reiser4_txn_restart_current();
-+ return result;
-+}
-+
-+/* estimate and reserve space needed to truncate page which gets partially truncated: one block for page itself, stat
-+ data update (estimate_one_insert_into_item) and one item insertion (estimate_one_insert_into_item) which may happen
-+ if page corresponds to hole extent and unallocated one will have to be created */
-+static int reserve_partial_page(reiser4_tree * tree)
-+{
-+ grab_space_enable();
-+ return reiser4_grab_reserved(reiser4_get_current_sb(),
-+ 1 +
-+ 2 * estimate_one_insert_into_item(tree),
-+ BA_CAN_COMMIT);
-+}
-+
-+/* estimate and reserve space needed to cut one item and update one stat data */
-+static int reserve_cut_iteration(reiser4_tree * tree)
-+{
-+ __u64 estimate = estimate_one_item_removal(tree)
-+ + estimate_one_insert_into_item(tree);
-+
-+ assert("nikita-3172", lock_stack_isclean(get_current_lock_stack()));
-+
-+ grab_space_enable();
-+ /* We need to double our estimate now that we can delete more than one
-+ node. */
-+ return reiser4_grab_reserved(reiser4_get_current_sb(), estimate * 2,
-+ BA_CAN_COMMIT);
-+}
-+
-+int reiser4_update_file_size(struct inode *inode, reiser4_key * key,
-+ int update_sd)
-+{
-+ int result = 0;
-+
-+ INODE_SET_FIELD(inode, i_size, get_key_offset(key));
-+ if (update_sd) {
-+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-+ result = reiser4_update_sd(inode);
-+ }
-+ return result;
-+}
-+
-+/* cut file items one by one starting from the last one until new file size (inode->i_size) is reached. Reserve space
-+ and update file stat data on every single cut from the tree */
-+int
-+cut_file_items(struct inode *inode, loff_t new_size, int update_sd,
-+ loff_t cur_size, int (*update_actor) (struct inode *,
-+ reiser4_key *, int))
-+{
-+ reiser4_key from_key, to_key;
-+ reiser4_key smallest_removed;
-+ file_plugin *fplug = inode_file_plugin(inode);
-+ int result;
-+ int progress = 0;
-+
-+ assert("vs-1248",
-+ fplug == file_plugin_by_id(UNIX_FILE_PLUGIN_ID) ||
-+ fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+
-+ fplug->key_by_inode(inode, new_size, &from_key);
-+ to_key = from_key;
-+ set_key_offset(&to_key, cur_size - 1 /*get_key_offset(reiser4_max_key()) */ );
-+ /* this loop normally runs just once */
-+ while (1) {
-+ result = reserve_cut_iteration(reiser4_tree_by_inode(inode));
-+ if (result)
-+ break;
-+
-+ result = reiser4_cut_tree_object(current_tree, &from_key, &to_key,
-+ &smallest_removed, inode, 1,
-+ &progress);
-+ if (result == -E_REPEAT) {
-+ /* -E_REPEAT is a signal to interrupt a long file truncation process */
-+ if (progress) {
-+ result =
-+ update_actor(inode, &smallest_removed,
-+ update_sd);
-+ if (result)
-+ break;
-+ }
-+
-+ /* the below does up(sbinfo->delete_mutex). Do not get folled */
-+ reiser4_release_reserved(inode->i_sb);
-+
-+ /* reiser4_cut_tree_object() was interrupted probably because
-+ * current atom requires commit, we have to release
-+ * transaction handle to allow atom commit. */
-+ reiser4_txn_restart_current();
-+ continue;
-+ }
-+ if (result
-+ && !(result == CBK_COORD_NOTFOUND && new_size == 0
-+ && inode->i_size == 0))
-+ break;
-+
-+ set_key_offset(&smallest_removed, new_size);
-+ /* Final sd update after the file gets its correct size */
-+ result = update_actor(inode, &smallest_removed, update_sd);
-+ break;
-+ }
-+
-+ /* the below does up(sbinfo->delete_mutex). Do not get folled */
-+ reiser4_release_reserved(inode->i_sb);
-+
-+ return result;
-+}
-+
-+int find_or_create_extent(struct page *page);
-+
-+/* part of truncate_file_body: it is called when truncate is used to make file
-+ shorter */
-+static int shorten_file(struct inode *inode, loff_t new_size)
-+{
-+ int result;
-+ struct page *page;
-+ int padd_from;
-+ unsigned long index;
-+ char *kaddr;
-+ unix_file_info_t *uf_info;
-+
-+ /*
-+ * all items of ordinary reiser4 file are grouped together. That is why
-+ * we can use reiser4_cut_tree. Plan B files (for instance) can not be
-+ * truncated that simply
-+ */
-+ result = cut_file_items(inode, new_size, 1 /*update_sd */ ,
-+ get_key_offset(reiser4_max_key()),
-+ reiser4_update_file_size);
-+ if (result)
-+ return result;
-+
-+ uf_info = unix_file_inode_data(inode);
-+ assert("vs-1105", new_size == inode->i_size);
-+ if (new_size == 0) {
-+ uf_info->container = UF_CONTAINER_EMPTY;
-+ return 0;
-+ }
-+
-+ result = find_file_state(inode, uf_info);
-+ if (result)
-+ return result;
-+ if (uf_info->container == UF_CONTAINER_TAILS)
-+ /*
-+ * No need to worry about zeroing last page after new file
-+ * end
-+ */
-+ return 0;
-+
-+ padd_from = inode->i_size & (PAGE_CACHE_SIZE - 1);
-+ if (!padd_from)
-+ /* file is truncated to page boundary */
-+ return 0;
-+
-+ result = reserve_partial_page(reiser4_tree_by_inode(inode));
-+ if (result) {
-+ reiser4_release_reserved(inode->i_sb);
-+ return result;
-+ }
-+
-+ /* last page is partially truncated - zero its content */
-+ index = (inode->i_size >> PAGE_CACHE_SHIFT);
-+ page = read_mapping_page(inode->i_mapping, index, NULL);
-+ if (IS_ERR(page)) {
-+ /*
-+ * the below does up(sbinfo->delete_mutex). Do not get
-+ * confused
-+ */
-+ reiser4_release_reserved(inode->i_sb);
-+ if (likely(PTR_ERR(page) == -EINVAL)) {
-+ /* looks like file is built of tail items */
-+ return 0;
-+ }
-+ return PTR_ERR(page);
-+ }
-+ wait_on_page_locked(page);
-+ if (!PageUptodate(page)) {
-+ page_cache_release(page);
-+ /*
-+ * the below does up(sbinfo->delete_mutex). Do not get
-+ * confused
-+ */
-+ reiser4_release_reserved(inode->i_sb);
-+ return RETERR(-EIO);
-+ }
-+
-+ /*
-+ * if page correspons to hole extent unit - unallocated one will be
-+ * created here. This is not necessary
-+ */
-+ result = find_or_create_extent(page);
-+
-+ /*
-+ * FIXME: cut_file_items has already updated inode. Probably it would
-+ * be better to update it here when file is really truncated
-+ */
-+ if (result) {
-+ page_cache_release(page);
-+ /*
-+ * the below does up(sbinfo->delete_mutex). Do not get
-+ * confused
-+ */
-+ reiser4_release_reserved(inode->i_sb);
-+ return result;
-+ }
-+
-+ lock_page(page);
-+ assert("vs-1066", PageLocked(page));
-+ kaddr = kmap_atomic(page, KM_USER0);
-+ memset(kaddr + padd_from, 0, PAGE_CACHE_SIZE - padd_from);
-+ flush_dcache_page(page);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ unlock_page(page);
-+ page_cache_release(page);
-+ /* the below does up(sbinfo->delete_mutex). Do not get confused */
-+ reiser4_release_reserved(inode->i_sb);
-+ return 0;
-+}
-+
-+/**
-+ * should_have_notail
-+ * @uf_info:
-+ * @new_size:
-+ *
-+ * Calls formatting plugin to see whether file of size @new_size has to be
-+ * stored in unformatted nodes or in tail items. 0 is returned for later case.
-+ */
-+static int should_have_notail(const unix_file_info_t *uf_info, loff_t new_size)
-+{
-+ if (!uf_info->tplug)
-+ return 1;
-+ return !uf_info->tplug->have_tail(unix_file_info_to_inode(uf_info),
-+ new_size);
-+
-+}
-+
-+/**
-+ * truncate_file_body - change length of file
-+ * @inode: inode of file
-+ * @new_size: new file length
-+ *
-+ * Adjusts items file @inode is built of to match @new_size. It may either cut
-+ * items or add them to represent a hole at the end of file. The caller has to
-+ * obtain exclusive access to the file.
-+ */
-+static int truncate_file_body(struct inode *inode, loff_t new_size)
-+{
-+ int result;
-+
-+ if (inode->i_size < new_size) {
-+ /* expanding truncate */
-+ struct dentry dentry;
-+ struct file file;
-+ unix_file_info_t *uf_info;
-+
-+ dentry.d_inode = inode;
-+ file.f_dentry = &dentry;
-+ file.private_data = NULL;
-+ file.f_pos = new_size;
-+ file.private_data = NULL;
-+ uf_info = unix_file_inode_data(inode);
-+ result = find_file_state(inode, uf_info);
-+ if (result)
-+ return result;
-+
-+ if (should_have_notail(uf_info, new_size)) {
-+ /*
-+ * file of size @new_size has to be built of
-+ * extents. If it is built of tails - convert to
-+ * extents
-+ */
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * if file is being convered by another process
-+ * - wait until it completes
-+ */
-+ while (1) {
-+ if (reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV)) {
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ get_exclusive_access(uf_info);
-+ continue;
-+ }
-+ break;
-+ }
-+
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ result = tail2extent(uf_info);
-+ if (result)
-+ return result;
-+ }
-+ }
-+ result = reiser4_write_extent(&file, NULL, 0,
-+ &new_size);
-+ if (result)
-+ return result;
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ } else {
-+ if (uf_info->container == UF_CONTAINER_EXTENTS) {
-+ result = reiser4_write_extent(&file, NULL, 0,
-+ &new_size);
-+ if (result)
-+ return result;
-+ } else {
-+ result = reiser4_write_tail(&file, NULL, 0,
-+ &new_size);
-+ if (result)
-+ return result;
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ }
-+ }
-+ BUG_ON(result > 0);
-+ INODE_SET_FIELD(inode, i_size, new_size);
-+ file_update_time(&file);
-+ result = reiser4_update_sd(inode);
-+ BUG_ON(result != 0);
-+ reiser4_free_file_fsdata(&file);
-+ } else
-+ result = shorten_file(inode, new_size);
-+ return result;
-+}
-+
-+/* plugin->u.write_sd_by_inode = write_sd_by_inode_common */
-+
-+/**
-+ * load_file_hint - copy hint from struct file to local variable
-+ * @file: file to get hint from
-+ * @hint: structure to fill
-+ *
-+ * Reiser4 specific portion of struct file may contain information (hint)
-+ * stored on exiting from previous read or write. That information includes
-+ * seal of znode and coord within that znode where previous read or write
-+ * stopped. This function copies that information to @hint if it was stored or
-+ * initializes @hint by 0s otherwise.
-+ */
-+int load_file_hint(struct file *file, hint_t *hint)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ if (file) {
-+ fsdata = reiser4_get_file_fsdata(file);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+
-+ spin_lock_inode(file->f_dentry->d_inode);
-+ if (reiser4_seal_is_set(&fsdata->reg.hint.seal)) {
-+ *hint = fsdata->reg.hint;
-+ init_lh(&hint->lh);
-+ hint->ext_coord.lh = &hint->lh;
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+ /*
-+ * force re-validation of the coord on the first
-+ * iteration of the read/write loop.
-+ */
-+ hint->ext_coord.valid = 0;
-+ assert("nikita-19892", coords_equal(&hint->seal.coord1,
-+ &hint->ext_coord.
-+ coord));
-+ return 0;
-+ }
-+ memset(&fsdata->reg.hint, 0, sizeof(hint_t));
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+ }
-+ hint_init_zero(hint);
-+ return 0;
-+}
-+
-+/**
-+ * save_file_hint - copy hint to reiser4 private struct file's part
-+ * @file: file to save hint in
-+ * @hint: hint to save
-+ *
-+ * This copies @hint to reiser4 private part of struct file. It can help
-+ * speedup future accesses to the file.
-+ */
-+void save_file_hint(struct file *file, const hint_t *hint)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ assert("edward-1337", hint != NULL);
-+
-+ if (!file || !reiser4_seal_is_set(&hint->seal))
-+ return;
-+ fsdata = reiser4_get_file_fsdata(file);
-+ assert("vs-965", !IS_ERR(fsdata));
-+ assert("nikita-19891",
-+ coords_equal(&hint->seal.coord1, &hint->ext_coord.coord));
-+ assert("vs-30", hint->lh.owner == NULL);
-+ spin_lock_inode(file->f_dentry->d_inode);
-+ fsdata->reg.hint = *hint;
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+ return;
-+}
-+
-+void reiser4_unset_hint(hint_t * hint)
-+{
-+ assert("vs-1315", hint);
-+ hint->ext_coord.valid = 0;
-+ reiser4_seal_done(&hint->seal);
-+ done_lh(&hint->lh);
-+}
-+
-+/* coord must be set properly. So, that reiser4_set_hint
-+ has nothing to do */
-+void reiser4_set_hint(hint_t * hint, const reiser4_key * key,
-+ znode_lock_mode mode)
-+{
-+ ON_DEBUG(coord_t * coord = &hint->ext_coord.coord);
-+ assert("vs-1207", WITH_DATA(coord->node, check_coord(coord, key)));
-+
-+ reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, key);
-+ hint->offset = get_key_offset(key);
-+ hint->mode = mode;
-+ done_lh(&hint->lh);
-+}
-+
-+int hint_is_set(const hint_t * hint)
-+{
-+ return reiser4_seal_is_set(&hint->seal);
-+}
-+
-+#if REISER4_DEBUG
-+static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
-+{
-+ return (get_key_locality(k1) == get_key_locality(k2) &&
-+ get_key_type(k1) == get_key_type(k2) &&
-+ get_key_band(k1) == get_key_band(k2) &&
-+ get_key_ordering(k1) == get_key_ordering(k2) &&
-+ get_key_objectid(k1) == get_key_objectid(k2));
-+}
-+#endif
-+
-+static int
-+hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
-+ znode_lock_mode lock_mode)
-+{
-+ if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
-+ /* hint either not set or set by different operation */
-+ return RETERR(-E_REPEAT);
-+
-+ assert("vs-1277", all_but_offset_key_eq(key, &hint->seal.key));
-+
-+ if (check_key && get_key_offset(key) != hint->offset)
-+ /* hint is set for different key */
-+ return RETERR(-E_REPEAT);
-+
-+ assert("vs-31", hint->ext_coord.lh == &hint->lh);
-+ return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, key,
-+ hint->ext_coord.lh, lock_mode,
-+ ZNODE_LOCK_LOPRI);
-+}
-+
-+/**
-+ * find_or_create_extent -
-+ * @page:
-+ *
-+ *
-+ */
-+/* look for place at twig level for extent corresponding to page, call extent's writepage method to create
-+ unallocated extent if it does not exist yet, initialize jnode, capture page */
-+int find_or_create_extent(struct page *page)
-+{
-+ int result;
-+ struct inode *inode;
-+ int plugged_hole;
-+
-+ jnode *node;
-+
-+ assert("vs-1065", page->mapping && page->mapping->host);
-+ inode = page->mapping->host;
-+
-+ lock_page(page);
-+ node = jnode_of_page(page);
-+ if (IS_ERR(node)) {
-+ unlock_page(page);
-+ return PTR_ERR(node);
-+ }
-+ JF_SET(node, JNODE_WRITE_PREPARED);
-+ unlock_page(page);
-+
-+ if (node->blocknr == 0) {
-+ plugged_hole = 0;
-+ result = reiser4_update_extent(inode, node, page_offset(page),
-+ &plugged_hole);
-+ if (result) {
-+ JF_CLR(node, JNODE_WRITE_PREPARED);
-+ jput(node);
-+ warning("", "reiser4_update_extent failed: %d", result);
-+ return result;
-+ }
-+ if (plugged_hole)
-+ reiser4_update_sd(inode);
-+ } else {
-+ spin_lock_jnode(node);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+ }
-+
-+ BUG_ON(node->atom == NULL);
-+ JF_CLR(node, JNODE_WRITE_PREPARED);
-+ jput(node);
-+
-+ if (get_current_context()->entd) {
-+ entd_context *ent = get_entd_context(node->tree->super);
-+
-+ if (ent->cur_request->page == page)
-+ ent->cur_request->node = node;
-+ }
-+ return 0;
-+}
-+
-+/**
-+ * has_anonymous_pages - check whether inode has pages dirtied via mmap
-+ * @inode: inode to check
-+ *
-+ * Returns true if inode's mapping has dirty pages which do not belong to any
-+ * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page
-+ * tree or were eflushed and can be found via jnodes tagged
-+ * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes.
-+ */
-+static int has_anonymous_pages(struct inode *inode)
-+{
-+ int result;
-+
-+ read_lock_irq(&inode->i_mapping->tree_lock);
-+ result = radix_tree_tagged(&inode->i_mapping->page_tree, PAGECACHE_TAG_REISER4_MOVED);
-+ read_unlock_irq(&inode->i_mapping->tree_lock);
-+ return result;
-+}
-+
-+/**
-+ * capture_page_and_create_extent -
-+ * @page: page to be captured
-+ *
-+ * Grabs space for extent creation and stat data update and calls function to
-+ * do actual work.
-+ */
-+static int capture_page_and_create_extent(struct page *page)
-+{
-+ int result;
-+ struct inode *inode;
-+
-+ assert("vs-1084", page->mapping && page->mapping->host);
-+ inode = page->mapping->host;
-+ assert("vs-1139",
-+ unix_file_inode_data(inode)->container == UF_CONTAINER_EXTENTS);
-+ /* page belongs to file */
-+ assert("vs-1393",
-+ inode->i_size > page_offset(page));
-+
-+ /* page capture may require extent creation (if it does not exist yet)
-+ and stat data's update (number of blocks changes on extent
-+ creation) */
-+ grab_space_enable();
-+ result = reiser4_grab_space(2 * estimate_one_insert_into_item
-+ (reiser4_tree_by_inode(inode)),
-+ BA_CAN_COMMIT);
-+ if (likely(!result))
-+ result = find_or_create_extent(page);
-+
-+ if (result != 0)
-+ SetPageError(page);
-+ return result;
-+}
-+
-+/* this is implementation of method commit_write of struct
-+ address_space_operations for unix file plugin */
-+int
-+commit_write_unix_file(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ int result;
-+
-+ assert("umka-3101", file != NULL);
-+ assert("umka-3102", page != NULL);
-+ assert("umka-3093", PageLocked(page));
-+
-+ SetPageUptodate(page);
-+
-+ inode = page->mapping->host;
-+ ctx = reiser4_init_context(page->mapping->host->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ page_cache_get(page);
-+ unlock_page(page);
-+ result = capture_page_and_create_extent(page);
-+ lock_page(page);
-+ page_cache_release(page);
-+
-+ /* don't commit transaction under inode semaphore */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/*
-+ * Support for "anonymous" pages and jnodes.
-+ *
-+ * When file is write-accessed through mmap pages can be dirtied from the user
-+ * level. In this case kernel is not notified until one of following happens:
-+ *
-+ * (1) msync()
-+ *
-+ * (2) truncate() (either explicit or through unlink)
-+ *
-+ * (3) VM scanner starts reclaiming mapped pages, dirtying them before
-+ * starting write-back.
-+ *
-+ * As a result of (3) ->writepage may be called on a dirty page without
-+ * jnode. Such page is called "anonymous" in reiser4. Certain work-loads
-+ * (iozone) generate huge number of anonymous pages. Emergency flush handles
-+ * this situation by creating jnode for anonymous page, starting IO on the
-+ * page, and marking jnode with JNODE_KEEPME bit so that it's not thrown out of
-+ * memory. Such jnode is also called anonymous.
-+ *
-+ * reiser4_sync_sb() method tries to insert anonymous pages and jnodes into
-+ * tree. This is done by capture_anonymous_*() functions below.
-+ */
-+
-+/**
-+ * capture_anonymous_page - involve page into transaction
-+ * @pg: page to deal with
-+ *
-+ * Takes care that @page has corresponding metadata in the tree, creates jnode
-+ * for @page and captures it. On success 1 is returned.
-+ */
-+static int capture_anonymous_page(struct page *page)
-+{
-+ int result;
-+
-+ if (PageWriteback(page))
-+ /* FIXME: do nothing? */
-+ return 0;
-+
-+ result = capture_page_and_create_extent(page);
-+ if (result == 0) {
-+ result = 1;
-+ } else
-+ warning("nikita-3329",
-+ "Cannot capture anon page: %i", result);
-+
-+ return result;
-+}
-+
-+/**
-+ * capture_anonymous_pages - find and capture pages dirtied via mmap
-+ * @mapping: address space where to look for pages
-+ * @index: start index
-+ * @to_capture: maximum number of pages to capture
-+ *
-+ * Looks for pages tagged REISER4_MOVED starting from the *@index-th page,
-+ * captures (involves into atom) them, returns number of captured pages,
-+ * updates @index to next page after the last captured one.
-+ */
-+static int
-+capture_anonymous_pages(struct address_space *mapping, pgoff_t *index,
-+ unsigned int to_capture)
-+{
-+ int result;
-+ struct pagevec pvec;
-+ unsigned int i, count;
-+ int nr;
-+
-+ pagevec_init(&pvec, 0);
-+ count = min(pagevec_space(&pvec), to_capture);
-+ nr = 0;
-+
-+ /* find pages tagged MOVED */
-+ write_lock_irq(&mapping->tree_lock);
-+ pvec.nr = radix_tree_gang_lookup_tag(&mapping->page_tree,
-+ (void **)pvec.pages, *index, count,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ if (pagevec_count(&pvec) == 0) {
-+ /*
-+ * there are no pages tagged MOVED in mapping->page_tree
-+ * starting from *index
-+ */
-+ write_unlock_irq(&mapping->tree_lock);
-+ *index = (pgoff_t)-1;
-+ return 0;
-+ }
-+
-+ /* clear MOVED tag for all found pages */
-+ for (i = 0; i < pagevec_count(&pvec); i++) {
-+ void *p;
-+
-+ page_cache_get(pvec.pages[i]);
-+ p = radix_tree_tag_clear(&mapping->page_tree, pvec.pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ assert("vs-49", p == pvec.pages[i]);
-+ }
-+ write_unlock_irq(&mapping->tree_lock);
-+
-+
-+ *index = pvec.pages[i - 1]->index + 1;
-+
-+ for (i = 0; i < pagevec_count(&pvec); i++) {
-+ /*
-+ * tag PAGECACHE_TAG_REISER4_MOVED will be cleared by
-+ * reiser4_set_page_dirty_internal which is called when jnode is
-+ * captured
-+ */
-+ result = capture_anonymous_page(pvec.pages[i]);
-+ if (result == 1)
-+ nr++;
-+ else {
-+ if (result < 0) {
-+ warning("vs-1454",
-+ "failed to capture page: "
-+ "result=%d, captured=%d)\n",
-+ result, i);
-+
-+ /*
-+ * set MOVED tag to all pages which left not
-+ * captured
-+ */
-+ write_lock_irq(&mapping->tree_lock);
-+ for (; i < pagevec_count(&pvec); i ++) {
-+ radix_tree_tag_set(&mapping->page_tree,
-+ pvec.pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ }
-+ write_unlock_irq(&mapping->tree_lock);
-+
-+ pagevec_release(&pvec);
-+ return result;
-+ } else {
-+ /*
-+ * result == 0. capture_anonymous_page returns
-+ * 0 for Writeback-ed page. Set MOVED tag on
-+ * that page
-+ */
-+ write_lock_irq(&mapping->tree_lock);
-+ radix_tree_tag_set(&mapping->page_tree,
-+ pvec.pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ write_unlock_irq(&mapping->tree_lock);
-+ if (i == 0)
-+ *index = pvec.pages[0]->index;
-+ else
-+ *index = pvec.pages[i - 1]->index + 1;
-+ }
-+ }
-+ }
-+ pagevec_release(&pvec);
-+ return nr;
-+}
-+
-+/**
-+ * capture_anonymous_jnodes - find and capture anonymous jnodes
-+ * @mapping: address space where to look for jnodes
-+ * @from: start index
-+ * @to: end index
-+ * @to_capture: maximum number of jnodes to capture
-+ *
-+ * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in
-+ * the range of indexes @from-@to and captures them, returns number of captured
-+ * jnodes, updates @from to next jnode after the last captured one.
-+ */
-+static int
-+capture_anonymous_jnodes(struct address_space *mapping,
-+ pgoff_t *from, pgoff_t to, int to_capture)
-+{
-+ *from = to;
-+ return 0;
-+}
-+
-+/*
-+ * Commit atom of the jnode of a page.
-+ */
-+static int sync_page(struct page *page)
-+{
-+ int result;
-+ do {
-+ jnode *node;
-+ txn_atom *atom;
-+
-+ lock_page(page);
-+ node = jprivate(page);
-+ if (node != NULL) {
-+ spin_lock_jnode(node);
-+ atom = jnode_get_atom(node);
-+ spin_unlock_jnode(node);
-+ } else
-+ atom = NULL;
-+ unlock_page(page);
-+ result = reiser4_sync_atom(atom);
-+ } while (result == -E_REPEAT);
-+ /*
-+ * ZAM-FIXME-HANS: document the logic of this loop, is it just to
-+ * handle the case where more pages get added to the atom while we are
-+ * syncing it?
-+ */
-+ assert("nikita-3485", ergo(result == 0,
-+ get_current_context()->trans->atom == NULL));
-+ return result;
-+}
-+
-+/*
-+ * Commit atoms of pages on @pages list.
-+ * call sync_page for each page from mapping's page tree
-+ */
-+static int sync_page_list(struct inode *inode)
-+{
-+ int result;
-+ struct address_space *mapping;
-+ unsigned long from; /* start index for radix_tree_gang_lookup */
-+ unsigned int found; /* return value for radix_tree_gang_lookup */
-+
-+ mapping = inode->i_mapping;
-+ from = 0;
-+ result = 0;
-+ read_lock_irq(&mapping->tree_lock);
-+ while (result == 0) {
-+ struct page *page;
-+
-+ found =
-+ radix_tree_gang_lookup(&mapping->page_tree, (void **)&page,
-+ from, 1);
-+ assert("", found < 2);
-+ if (found == 0)
-+ break;
-+
-+ /* page may not leave radix tree because it is protected from truncating by inode->i_mutex locked by
-+ sys_fsync */
-+ page_cache_get(page);
-+ read_unlock_irq(&mapping->tree_lock);
-+
-+ from = page->index + 1;
-+
-+ result = sync_page(page);
-+
-+ page_cache_release(page);
-+ read_lock_irq(&mapping->tree_lock);
-+ }
-+
-+ read_unlock_irq(&mapping->tree_lock);
-+ return result;
-+}
-+
-+static int commit_file_atoms(struct inode *inode)
-+{
-+ int result;
-+ unix_file_info_t *uf_info;
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access(uf_info);
-+ /*
-+ * find what items file is made from
-+ */
-+ result = find_file_state(inode, uf_info);
-+ drop_exclusive_access(uf_info);
-+ if (result != 0)
-+ return result;
-+
-+ /*
-+ * file state cannot change because we are under ->i_mutex
-+ */
-+ switch (uf_info->container) {
-+ case UF_CONTAINER_EXTENTS:
-+ /* find_file_state might open join an atom */
-+ reiser4_txn_restart_current();
-+ result =
-+ /*
-+ * when we are called by
-+ * filemap_fdatawrite->
-+ * do_writepages()->
-+ * reiser4_writepages()
-+ *
-+ * inode->i_mapping->dirty_pages are spices into
-+ * ->io_pages, leaving ->dirty_pages dirty.
-+ *
-+ * When we are called from
-+ * reiser4_fsync()->sync_unix_file(), we have to
-+ * commit atoms of all pages on the ->dirty_list.
-+ *
-+ * So for simplicity we just commit ->io_pages and
-+ * ->dirty_pages.
-+ */
-+ sync_page_list(inode);
-+ break;
-+ case UF_CONTAINER_TAILS:
-+ /*
-+ * NOTE-NIKITA probably we can be smarter for tails. For now
-+ * just commit all existing atoms.
-+ */
-+ result = txnmgr_force_commit_all(inode->i_sb, 0);
-+ break;
-+ case UF_CONTAINER_EMPTY:
-+ result = 0;
-+ break;
-+ case UF_CONTAINER_UNKNOWN:
-+ default:
-+ result = -EIO;
-+ break;
-+ }
-+
-+ /*
-+ * commit current transaction: there can be captured nodes from
-+ * find_file_state() and finish_conversion().
-+ */
-+ reiser4_txn_restart_current();
-+ return result;
-+}
-+
-+/**
-+ * writepages_unix_file - writepages of struct address_space_operations
-+ * @mapping:
-+ * @wbc:
-+ *
-+ * This captures anonymous pages and anonymous jnodes. Anonymous pages are
-+ * pages which are dirtied via mmapping. Anonymous jnodes are ones which were
-+ * created by reiser4_writepage.
-+ */
-+int writepages_unix_file(struct address_space *mapping,
-+ struct writeback_control *wbc)
-+{
-+ int result;
-+ unix_file_info_t *uf_info;
-+ pgoff_t pindex, jindex, nr_pages;
-+ long to_capture;
-+ struct inode *inode;
-+
-+ inode = mapping->host;
-+ if (!has_anonymous_pages(inode)) {
-+ result = 0;
-+ goto end;
-+ }
-+ jindex = pindex = wbc->range_start >> PAGE_CACHE_SHIFT;
-+ result = 0;
-+ nr_pages =
-+ (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-+ uf_info = unix_file_inode_data(inode);
-+
-+ do {
-+ reiser4_context *ctx;
-+
-+ if (wbc->sync_mode != WB_SYNC_ALL)
-+ to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST);
-+ else
-+ to_capture = CAPTURE_APAGE_BURST;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ result = PTR_ERR(ctx);
-+ break;
-+ }
-+ /* avoid recursive calls to ->sync_inodes */
-+ ctx->nobalance = 1;
-+ assert("zam-760", lock_stack_isclean(get_current_lock_stack()));
-+ assert("", LOCK_CNT_NIL(inode_sem_w));
-+ assert("", LOCK_CNT_NIL(inode_sem_r));
-+
-+ reiser4_txn_restart_current();
-+
-+ /* we have to get nonexclusive access to the file */
-+ if (get_current_context()->entd) {
-+ /*
-+ * use nonblocking version of nonexclusive_access to
-+ * avoid deadlock which might look like the following:
-+ * process P1 holds NEA on file F1 and called entd to
-+ * reclaim some memory. Entd works for P1 and is going
-+ * to capture pages of file F2. To do that entd has to
-+ * get NEA to F2. F2 is held by process P2 which also
-+ * called entd. But entd is serving P1 at the moment
-+ * and P2 has to wait. Process P3 trying to get EA to
-+ * file F2. Existence of pending EA request to file F2
-+ * makes impossible for entd to get NEA to file
-+ * F2. Neither of these process can continue. Using
-+ * nonblocking version of gettign NEA is supposed to
-+ * avoid this deadlock.
-+ */
-+ if (try_to_get_nonexclusive_access(uf_info) == 0) {
-+ result = RETERR(-EBUSY);
-+ reiser4_exit_context(ctx);
-+ break;
-+ }
-+ } else
-+ get_nonexclusive_access(uf_info);
-+
-+ while (to_capture > 0) {
-+ pgoff_t start;
-+
-+ assert("vs-1727", jindex <= pindex);
-+ if (pindex == jindex) {
-+ start = pindex;
-+ result =
-+ capture_anonymous_pages(inode->i_mapping,
-+ &pindex,
-+ to_capture);
-+ if (result <= 0)
-+ break;
-+ to_capture -= result;
-+ wbc->nr_to_write -= result;
-+ if (start + result == pindex) {
-+ jindex = pindex;
-+ continue;
-+ }
-+ if (to_capture <= 0)
-+ break;
-+ }
-+ /* deal with anonymous jnodes between jindex and pindex */
-+ result =
-+ capture_anonymous_jnodes(inode->i_mapping, &jindex,
-+ pindex, to_capture);
-+ if (result < 0)
-+ break;
-+ to_capture -= result;
-+ get_current_context()->nr_captured += result;
-+
-+ if (jindex == (pgoff_t) - 1) {
-+ assert("vs-1728", pindex == (pgoff_t) - 1);
-+ break;
-+ }
-+ }
-+ if (to_capture <= 0)
-+ /* there may be left more pages */
-+ __mark_inode_dirty(inode, I_DIRTY_PAGES);
-+
-+ drop_nonexclusive_access(uf_info);
-+ if (result < 0) {
-+ /* error happened */
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ if (wbc->sync_mode != WB_SYNC_ALL) {
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+ result = commit_file_atoms(inode);
-+ reiser4_exit_context(ctx);
-+ if (pindex >= nr_pages && jindex == pindex)
-+ break;
-+ } while (1);
-+
-+ end:
-+ if (is_in_reiser4_context()) {
-+ if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
-+ /*
-+ * there are already pages to flush, flush them out, do
-+ * not delay until end of reiser4_sync_inodes
-+ */
-+ reiser4_writeout(inode->i_sb, wbc);
-+ get_current_context()->nr_captured = 0;
-+ }
-+ }
-+ return result;
-+}
-+
-+/*
-+ * ->sync() method for unix file.
-+ *
-+ * We are trying to be smart here. Instead of committing all atoms (original
-+ * solution), we scan dirty pages of this file and commit all atoms they are
-+ * part of.
-+ *
-+ * Situation is complicated by anonymous pages: i.e., extent-less pages
-+ * dirtied through mmap. Fortunately sys_fsync() first calls
-+ * filemap_fdatawrite() that will ultimately call reiser4_writepages(), insert
-+ * all missing extents and capture anonymous pages.
-+ */
-+int sync_unix_file(struct file *file, struct dentry *dentry, int datasync)
-+{
-+ reiser4_context *ctx;
-+ txn_atom *atom;
-+ reiser4_block_nr reserve;
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ reserve = estimate_update_common(dentry->d_inode);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOSPC);
-+ }
-+ write_sd_by_inode_common(dentry->d_inode);
-+
-+ atom = get_current_atom_locked();
-+ spin_lock_txnh(ctx->trans);
-+ force_commit_atom(ctx->trans);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+/**
-+ * readpage_unix_file_nolock - readpage of struct address_space_operations
-+ * @file:
-+ * @page:
-+ *
-+ * Compose a key and search for item containing information about @page
-+ * data. If item is found - its readpage method is called.
-+ */
-+int readpage_unix_file(struct file *file, struct page *page)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *inode;
-+ reiser4_key key;
-+ item_plugin *iplug;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ coord_t *coord;
-+
-+ assert("vs-1062", PageLocked(page));
-+ assert("vs-976", !PageUptodate(page));
-+ assert("vs-1061", page->mapping && page->mapping->host);
-+
-+ if (page->mapping->host->i_size <= page_offset(page)) {
-+ /* page is out of file already */
-+ unlock_page(page);
-+ return -EINVAL;
-+ }
-+
-+ inode = page->mapping->host;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ unlock_page(page);
-+ return PTR_ERR(ctx);
-+ }
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL) {
-+ unlock_page(page);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ result = load_file_hint(file, hint);
-+ if (result) {
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ lh = &hint->lh;
-+
-+ /* get key of first byte of the page */
-+ key_by_inode_and_offset_common(inode, page_offset(page), &key);
-+
-+ /* look for file metadata corresponding to first byte of page */
-+ page_cache_get(page);
-+ unlock_page(page);
-+ result = find_file_item(hint, &key, ZNODE_READ_LOCK, inode);
-+ lock_page(page);
-+ page_cache_release(page);
-+
-+ if (page->mapping == NULL) {
-+ /*
-+ * readpage allows truncate to run concurrently. Page was
-+ * truncated while it was not locked
-+ */
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return -EINVAL;
-+ }
-+
-+ if (result != CBK_COORD_FOUND || hint->ext_coord.coord.between != AT_UNIT) {
-+ if (result == CBK_COORD_FOUND &&
-+ hint->ext_coord.coord.between != AT_UNIT)
-+ /* file is truncated */
-+ result = -EINVAL;
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ /*
-+ * item corresponding to page is found. It can not be removed because
-+ * znode lock is held
-+ */
-+ if (PageUptodate(page)) {
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+
-+ coord = &hint->ext_coord.coord;
-+ result = zload(coord->node);
-+ if (result) {
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ validate_extended_coord(&hint->ext_coord, page_offset(page));
-+
-+ if (!coord_is_existing_unit(coord)) {
-+ /* this indicates corruption */
-+ warning("vs-280",
-+ "Looking for page %lu of file %llu (size %lli). "
-+ "No file items found (%d). File is corrupted?\n",
-+ page->index, (unsigned long long)get_inode_oid(inode),
-+ inode->i_size, result);
-+ zrelse(coord->node);
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-EIO);
-+ }
-+
-+ /*
-+ * get plugin of found item or use plugin if extent if there are no
-+ * one
-+ */
-+ iplug = item_plugin_by_coord(coord);
-+ if (iplug->s.file.readpage)
-+ result = iplug->s.file.readpage(coord, page);
-+ else
-+ result = RETERR(-EINVAL);
-+
-+ if (!result) {
-+ set_key_offset(&key,
-+ (loff_t) (page->index + 1) << PAGE_CACHE_SHIFT);
-+ /* FIXME should call reiser4_set_hint() */
-+ reiser4_unset_hint(hint);
-+ } else {
-+ unlock_page(page);
-+ reiser4_unset_hint(hint);
-+ }
-+ assert("vs-979",
-+ ergo(result == 0, (PageLocked(page) || PageUptodate(page))));
-+ assert("vs-9791", ergo(result != 0, !PageLocked(page)));
-+
-+ zrelse(coord->node);
-+ done_lh(lh);
-+
-+ save_file_hint(file, hint);
-+ kfree(hint);
-+
-+ /*
-+ * FIXME: explain why it is needed. HINT: page allocation in write can
-+ * not be done when atom is not NULL because reiser4_writepage can not
-+ * kick entd and have to eflush
-+ */
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+struct uf_readpages_context {
-+ lock_handle lh;
-+ coord_t coord;
-+};
-+
-+/* A callback function for readpages_unix_file/read_cache_pages.
-+ * If the file is build of tails, then return error (-ENOENT).
-+ *
-+ * @data -- a pointer to reiser4_readpages_context object,
-+ * to save the twig lock and the coord between
-+ * read_cache_page iterations.
-+ * @page -- page to start read.
-+ */
-+static int uf_readpages_filler(void * data, struct page * page)
-+{
-+ struct uf_readpages_context *rc = data;
-+ jnode * node;
-+ int ret = 0;
-+ reiser4_extent *ext;
-+ __u64 ext_index;
-+ int cbk_done = 0;
-+ struct address_space * mapping = page->mapping;
-+
-+ if (PageUptodate(page)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ if (rc->lh.node == 0) {
-+ /* no twig lock - have to do tree search. */
-+ reiser4_key key;
-+ repeat:
-+ unlock_page(page);
-+ key_by_inode_and_offset_common(
-+ mapping->host, page_offset(page), &key);
-+ ret = coord_by_key(
-+ &get_super_private(mapping->host->i_sb)->tree,
-+ &key, &rc->coord, &rc->lh,
-+ ZNODE_READ_LOCK, FIND_EXACT,
-+ TWIG_LEVEL, TWIG_LEVEL, CBK_UNIQUE, NULL);
-+ if (ret)
-+ return ret;
-+ lock_page(page);
-+ cbk_done = 1;
-+ }
-+ ret = zload(rc->coord.node);
-+ if (ret) {
-+ unlock_page(page);
-+ return ret;
-+ }
-+ if (!coord_is_existing_item(&rc->coord) ||
-+ !item_is_extent(&rc->coord)) {
-+ zrelse(rc->coord.node);
-+ unlock_page(page);
-+ return RETERR(-EIO);
-+ }
-+ ext = extent_by_coord(&rc->coord);
-+ ext_index = extent_unit_index(&rc->coord);
-+ if (page->index < ext_index ||
-+ page->index >= ext_index + extent_get_width(ext)) {
-+ /* the page index doesn't belong to the extent unit
-+ which the coord points to - release the lock and
-+ repeat with tree search. */
-+ zrelse(rc->coord.node);
-+ done_lh(&rc->lh);
-+ /* we can be here after a CBK call only in case of
-+ corruption of the tree or the tree lookup algorithm bug. */
-+ if (unlikely(cbk_done)) {
-+ unlock_page(page);
-+ return RETERR(-EIO);
-+ }
-+ goto repeat;
-+ }
-+ node = jnode_of_page(page);
-+ if (unlikely(IS_ERR(node))) {
-+ zrelse(rc->coord.node);
-+ unlock_page(page);
-+ return PTR_ERR(node);
-+ }
-+ ret = reiser4_do_readpage_extent(ext, page->index - ext_index, page);
-+ jput(node);
-+ zrelse(rc->coord.node);
-+ if (ret)
-+ unlock_page(page);
-+ return ret;
-+}
-+
-+/**
-+ * readpages_unix_file - called by the readahead code, starts reading for each
-+ * page of given list of pages
-+ */
-+int readpages_unix_file(
-+ struct file *file, struct address_space *mapping,
-+ struct list_head *pages, unsigned nr_pages)
-+{
-+ reiser4_context *ctx;
-+ struct uf_readpages_context rc;
-+ int ret;
-+
-+ ctx = reiser4_init_context(mapping->host->i_sb);
-+ if (IS_ERR(ctx)) {
-+ put_pages_list(pages);
-+ return PTR_ERR(ctx);
-+ }
-+ init_lh(&rc.lh);
-+ ret = read_cache_pages(mapping, pages, uf_readpages_filler, &rc);
-+ done_lh(&rc.lh);
-+ context_set_commit_async(ctx);
-+ /* close the transaction to protect further page allocation from deadlocks */
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return ret;
-+}
-+
-+static reiser4_block_nr unix_file_estimate_read(struct inode *inode,
-+ loff_t count UNUSED_ARG)
-+{
-+ /* We should reserve one block, because of updating of the stat data
-+ item */
-+ assert("vs-1249",
-+ inode_file_plugin(inode)->estimate.update ==
-+ estimate_update_common);
-+ return estimate_update_common(inode);
-+}
-+
-+/* this is called with nonexclusive access obtained, file's container can not change */
-+static ssize_t read_file(hint_t *hint, struct file *file, /* file to read from to */
-+ char __user *buf, /* address of user-space buffer */
-+ size_t count, /* number of bytes to read */
-+ loff_t *off)
-+{
-+ int result;
-+ struct inode *inode;
-+ flow_t flow;
-+ int (*read_f) (struct file *, flow_t *, hint_t *);
-+ coord_t *coord;
-+ znode *loaded;
-+
-+ inode = file->f_dentry->d_inode;
-+
-+ /* build flow */
-+ assert("vs-1250",
-+ inode_file_plugin(inode)->flow_by_inode ==
-+ flow_by_inode_unix_file);
-+ result =
-+ flow_by_inode_unix_file(inode, buf, 1 /* user space */ , count,
-+ *off, READ_OP, &flow);
-+ if (unlikely(result))
-+ return result;
-+
-+ /* get seal and coord sealed with it from reiser4 private data
-+ of struct file. The coord will tell us where our last read
-+ of this file finished, and the seal will help to determine
-+ if that location is still valid.
-+ */
-+ coord = &hint->ext_coord.coord;
-+ while (flow.length && result == 0) {
-+ result =
-+ find_file_item(hint, &flow.key, ZNODE_READ_LOCK, inode);
-+ if (cbk_errored(result))
-+ /* error happened */
-+ break;
-+
-+ if (coord->between != AT_UNIT) {
-+ /* there were no items corresponding to given offset */
-+ done_lh(hint->ext_coord.lh);
-+ break;
-+ }
-+
-+ loaded = coord->node;
-+ result = zload(loaded);
-+ if (unlikely(result)) {
-+ done_lh(hint->ext_coord.lh);
-+ break;
-+ }
-+
-+ if (hint->ext_coord.valid == 0)
-+ validate_extended_coord(&hint->ext_coord,
-+ get_key_offset(&flow.key));
-+
-+ assert("vs-4", hint->ext_coord.valid == 1);
-+ assert("vs-33", hint->ext_coord.lh == &hint->lh);
-+ /* call item's read method */
-+ read_f = item_plugin_by_coord(coord)->s.file.read;
-+ result = read_f(file, &flow, hint);
-+ zrelse(loaded);
-+ done_lh(hint->ext_coord.lh);
-+ }
-+
-+ return (count - flow.length) ? (count - flow.length) : result;
-+}
-+
-+static ssize_t read_unix_file_container_tails(struct file*, char __user*, size_t, loff_t*);
-+
-+/**
-+ * read_unix_file - read of struct file_operations
-+ * @file: file to read from
-+ * @buf: address of user-space buffer
-+ * @read_amount: number of bytes to read
-+ * @off: position in file to read from
-+ *
-+ * This is implementation of vfs's read method of struct file_operations for
-+ * unix file plugin.
-+ */
-+ssize_t read_unix_file(struct file *file, char __user *buf, size_t read_amount,
-+ loff_t *off)
-+{
-+ reiser4_context *ctx;
-+ ssize_t result;
-+ struct inode *inode;
-+ unix_file_info_t *uf_info;
-+
-+ if (unlikely(read_amount == 0))
-+ return 0;
-+
-+ assert("umka-072", file != NULL);
-+ assert("umka-074", off != NULL);
-+ inode = file->f_dentry->d_inode;
-+ assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ uf_info = unix_file_inode_data(inode);
-+ if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ get_exclusive_access(uf_info);
-+ result = find_file_state(inode, uf_info);
-+ if (unlikely(result != 0))
-+ goto out;
-+ } else
-+ get_nonexclusive_access(uf_info);
-+ result = reiser4_grab_space_force(unix_file_estimate_read(inode, read_amount),
-+ BA_CAN_COMMIT);
-+ if (unlikely(result != 0))
-+ goto out;
-+ if (uf_info->container == UF_CONTAINER_EXTENTS){
-+ result = do_sync_read(file, buf, read_amount, off);
-+ } else if (uf_info->container == UF_CONTAINER_TAILS ||
-+ reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV) ||
-+ reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ result = read_unix_file_container_tails(file, buf, read_amount, off);
-+ } else {
-+ assert("zam-1085", uf_info->container == UF_CONTAINER_EMPTY);
-+ result = 0;
-+ }
-+out:
-+ drop_access(uf_info);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+static ssize_t read_unix_file_container_tails(
-+ struct file *file, char __user *buf, size_t read_amount, loff_t *off)
-+{
-+ int result;
-+ struct inode *inode;
-+ hint_t *hint;
-+ unix_file_info_t *uf_info;
-+ size_t count, read, left;
-+ loff_t size;
-+
-+ assert("umka-072", file != NULL);
-+ assert("umka-074", off != NULL);
-+ inode = file->f_dentry->d_inode;
-+ assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ result = load_file_hint(file, hint);
-+ if (result) {
-+ kfree(hint);
-+ return result;
-+ }
-+
-+ left = read_amount;
-+ count = 0;
-+ uf_info = unix_file_inode_data(inode);
-+ while (left > 0) {
-+ reiser4_txn_restart_current();
-+ size = i_size_read(inode);
-+ if (*off >= size)
-+ /* position to read from is past the end of file */
-+ break;
-+ if (*off + left > size)
-+ left = size - *off;
-+ /* faultin user page */
-+ result = fault_in_pages_writeable(buf, left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left);
-+ if (result)
-+ return RETERR(-EFAULT);
-+
-+ read = read_file(hint, file, buf,
-+ left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left,
-+ off);
-+ if (read < 0) {
-+ result = read;
-+ break;
-+ }
-+ left -= read;
-+ buf += read;
-+
-+ /* update position in a file */
-+ *off += read;
-+ /* total number of read bytes */
-+ count += read;
-+ }
-+ done_lh(&hint->lh);
-+ save_file_hint(file, hint);
-+ kfree(hint);
-+ if (count)
-+ file_accessed(file);
-+ /* return number of read bytes or error code if nothing is read */
-+ return count ? count : result;
-+}
-+
-+/* This function takes care about @file's pages. First of all it checks if
-+ filesystems readonly and if so gets out. Otherwise, it throws out all
-+ pages of file if it was mapped for read and going to be mapped for write
-+ and consists of tails. This is done in order to not manage few copies
-+ of the data (first in page cache and second one in tails them selves)
-+ for the case of mapping files consisting tails.
-+
-+ Here also tail2extent conversion is performed if it is allowed and file
-+ is going to be written or mapped for write. This functions may be called
-+ from write_unix_file() or mmap_unix_file(). */
-+static int check_pages_unix_file(struct file *file, struct inode *inode)
-+{
-+ reiser4_invalidate_pages(inode->i_mapping, 0,
-+ (inode->i_size + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT, 0);
-+ return unpack(file, inode, 0 /* not forever */ );
-+}
-+
-+/**
-+ * mmap_unix_file - mmap of struct file_operations
-+ * @file: file to mmap
-+ * @vma:
-+ *
-+ * This is implementation of vfs's mmap method of struct file_operations for
-+ * unix file plugin. It converts file to extent if necessary. Sets
-+ * reiser4_inode's flag - REISER4_HAS_MMAP.
-+ */
-+int mmap_unix_file(struct file *file, struct vm_area_struct *vma)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *inode;
-+ unix_file_info_t *uf_info;
-+ reiser4_block_nr needed;
-+
-+ inode = file->f_dentry->d_inode;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access_careful(uf_info, inode);
-+
-+ if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
-+ /*
-+ * we need file built of extent items. If it is still built of
-+ * tail items we have to convert it. Find what items the file
-+ * is built of
-+ */
-+ result = find_file_state(inode, uf_info);
-+ if (result != 0) {
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ assert("vs-1648", (uf_info->container == UF_CONTAINER_TAILS ||
-+ uf_info->container == UF_CONTAINER_EXTENTS ||
-+ uf_info->container == UF_CONTAINER_EMPTY));
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * invalidate all pages and convert file from tails to
-+ * extents
-+ */
-+ result = check_pages_unix_file(file, inode);
-+ if (result) {
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ }
-+ }
-+
-+ /*
-+ * generic_file_mmap will do update_atime. Grab space for stat data
-+ * update.
-+ */
-+ needed = inode_file_plugin(inode)->estimate.update(inode);
-+ result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
-+ if (result) {
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = generic_file_mmap(file, vma);
-+ if (result == 0) {
-+ /* mark file as having mapping. */
-+ reiser4_inode_set_flag(inode, REISER4_HAS_MMAP);
-+ }
-+
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * find_first_item
-+ * @inode:
-+ *
-+ * Finds file item which is responsible for first byte in the file.
-+ */
-+static int find_first_item(struct inode *inode)
-+{
-+ coord_t coord;
-+ lock_handle lh;
-+ reiser4_key key;
-+ int result;
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ inode_file_plugin(inode)->key_by_inode(inode, 0, &key);
-+ result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK,
-+ inode);
-+ if (result == CBK_COORD_FOUND) {
-+ if (coord.between == AT_UNIT) {
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ result = item_id_by_coord(&coord);
-+ zrelse(coord.node);
-+ if (result != EXTENT_POINTER_ID &&
-+ result != FORMATTING_ID)
-+ result = RETERR(-EIO);
-+ }
-+ } else
-+ result = RETERR(-EIO);
-+ }
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+/**
-+ * open_unix_file
-+ * @inode:
-+ * @file:
-+ *
-+ * If filesystem is not readonly - complete uncompleted tail conversion if
-+ * there was one
-+ */
-+int open_unix_file(struct inode *inode, struct file *file)
-+{
-+ int result;
-+ reiser4_context *ctx;
-+ unix_file_info_t *uf_info;
-+
-+ if (IS_RDONLY(inode))
-+ return 0;
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED))
-+ return 0;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access_careful(uf_info, inode);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ /*
-+ * other process completed the conversion
-+ */
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+
-+ /*
-+ * file left in semi converted state after unclean shutdown or another
-+ * thread is doing conversion and dropped exclusive access which doing
-+ * balance dirty pages. Complete the conversion
-+ */
-+ result = find_first_item(inode);
-+ if (result == EXTENT_POINTER_ID)
-+ /*
-+ * first item is extent, therefore there was incomplete
-+ * tail2extent conversion. Complete it
-+ */
-+ result = tail2extent(unix_file_inode_data(inode));
-+ else if (result == FORMATTING_ID)
-+ /*
-+ * first item is formatting item, therefore there was
-+ * incomplete extent2tail conversion. Complete it
-+ */
-+ result = extent2tail(unix_file_inode_data(inode));
-+ else
-+ result = -EIO;
-+
-+ assert("vs-1712",
-+ ergo(result == 0,
-+ (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED) &&
-+ !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))));
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+#define NEITHER_OBTAINED 0
-+#define EA_OBTAINED 1
-+#define NEA_OBTAINED 2
-+
-+static void drop_access(unix_file_info_t *uf_info)
-+{
-+ if (uf_info->exclusive_use)
-+ drop_exclusive_access(uf_info);
-+ else
-+ drop_nonexclusive_access(uf_info);
-+}
-+
-+#define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \
-+ __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
-+
-+/**
-+ * write_unix_file - write of struct file_operations
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @write_amount: number of bytes to write
-+ * @off: position in file to write to
-+ *
-+ * This is implementation of vfs's write method of struct file_operations for
-+ * unix file plugin.
-+ */
-+ssize_t write_unix_file(struct file *file, const char __user *buf,
-+ size_t count, loff_t *pos)
-+{
-+ int result;
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ unix_file_info_t *uf_info;
-+ ssize_t written;
-+ int try_free_space;
-+ int to_write = PAGE_CACHE_SIZE * WRITE_GRANULARITY;
-+ size_t left;
-+ ssize_t (*write_op)(struct file *, const char __user *, size_t,
-+ loff_t *pos);
-+ int ea;
-+ loff_t new_size;
-+
-+ inode = file->f_dentry->d_inode;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ mutex_lock(&inode->i_mutex);
-+
-+ assert("vs-947", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+ assert("vs-9471", (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)));
-+
-+ /* check amount of bytes to write and writing position */
-+ result = generic_write_checks(file, pos, &count, 0);
-+ if (result) {
-+ mutex_unlock(&inode->i_mutex);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = remove_suid(file->f_dentry);
-+ if (result) {
-+ mutex_unlock(&inode->i_mutex);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ current->backing_dev_info = inode->i_mapping->backing_dev_info;
-+ written = 0;
-+ try_free_space = 0;
-+ left = count;
-+ ea = NEITHER_OBTAINED;
-+
-+ new_size = i_size_read(inode);
-+ if (*pos + count > new_size)
-+ new_size = *pos + count;
-+
-+ while (left) {
-+ if (left < to_write)
-+ to_write = left;
-+
-+ if (uf_info->container == UF_CONTAINER_EMPTY) {
-+ get_exclusive_access(uf_info);
-+ ea = EA_OBTAINED;
-+ if (uf_info->container != UF_CONTAINER_EMPTY) {
-+ /* file is made not empty by another process */
-+ drop_exclusive_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ continue;
-+ }
-+ } else if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ /*
-+ * get exclusive access directly just to not have to
-+ * re-obtain it if file will appear empty
-+ */
-+ get_exclusive_access(uf_info);
-+ ea = EA_OBTAINED;
-+ result = find_file_state(inode, uf_info);
-+ if (result) {
-+ drop_exclusive_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ break;
-+ }
-+ } else {
-+ get_nonexclusive_access(uf_info);
-+ ea = NEA_OBTAINED;
-+ }
-+
-+ /* either EA or NEA is obtained. Choose item write method */
-+ if (uf_info->container == UF_CONTAINER_EXTENTS) {
-+ /* file is built of extent items */
-+ write_op = reiser4_write_extent;
-+ } else if (uf_info->container == UF_CONTAINER_EMPTY) {
-+ /* file is empty */
-+ if (should_have_notail(uf_info, new_size))
-+ write_op = reiser4_write_extent;
-+ else
-+ write_op = reiser4_write_tail;
-+ } else {
-+ /* file is built of tail items */
-+ if (should_have_notail(uf_info, new_size)) {
-+ if (ea == NEA_OBTAINED) {
-+ drop_nonexclusive_access(uf_info);
-+ get_exclusive_access(uf_info);
-+ ea = EA_OBTAINED;
-+ }
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * if file is being convered by another
-+ * process - wait until it completes
-+ */
-+ while (1) {
-+ if (reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV)) {
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ get_exclusive_access(uf_info);
-+ continue;
-+ }
-+ break;
-+ }
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ result = tail2extent(uf_info);
-+ if (result)
-+ break;
-+ }
-+ }
-+ drop_exclusive_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ continue;
-+ }
-+ write_op = reiser4_write_tail;
-+ }
-+
-+ written = write_op(file, buf, to_write, pos);
-+ if (written == -ENOSPC && try_free_space) {
-+ drop_access(uf_info);
-+ txnmgr_force_commit_all(inode->i_sb, 0);
-+ try_free_space = 0;
-+ continue;
-+ }
-+ if (written < 0) {
-+ drop_access(uf_info);
-+ result = written;
-+ break;
-+ }
-+ /* something is written. */
-+ if (uf_info->container == UF_CONTAINER_EMPTY) {
-+ assert("", ea == EA_OBTAINED);
-+ uf_info->container =
-+ (write_op == reiser4_write_extent) ?
-+ UF_CONTAINER_EXTENTS : UF_CONTAINER_TAILS;
-+ } else {
-+ assert("", ergo(uf_info->container == UF_CONTAINER_EXTENTS,
-+ write_op == reiser4_write_extent));
-+ assert("", ergo(uf_info->container == UF_CONTAINER_TAILS,
-+ write_op == reiser4_write_tail));
-+ }
-+ if (*pos + written > inode->i_size)
-+ INODE_SET_FIELD(inode, i_size, *pos + written);
-+ file_update_time(file);
-+ result = reiser4_update_sd(inode);
-+ if (result) {
-+ mutex_unlock(&inode->i_mutex);
-+ current->backing_dev_info = NULL;
-+ drop_access(uf_info);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ drop_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ reiser4_txn_restart(ctx);
-+ current->journal_info = NULL;
-+ /*
-+ * tell VM how many pages were dirtied. Maybe number of pages
-+ * which were dirty already should not be counted
-+ */
-+ balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-+ (written + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE);
-+ current->journal_info = ctx;
-+
-+ left -= written;
-+ buf += written;
-+ *pos += written;
-+ }
-+
-+ mutex_unlock(&inode->i_mutex);
-+
-+ if (result == 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-+ reiser4_txn_restart_current();
-+ grab_space_enable();
-+ result = sync_unix_file(file, file->f_dentry,
-+ 0 /* data and stat data */ );
-+ if (result)
-+ warning("reiser4-7", "failed to sync file %llu",
-+ (unsigned long long)get_inode_oid(inode));
-+ }
-+
-+ current->backing_dev_info = NULL;
-+
-+ reiser4_exit_context(ctx);
-+
-+ /*
-+ * return number of written bytes or error code if nothing is
-+ * written. Note, that it does not work correctly in case when
-+ * sync_unix_file returns error
-+ */
-+ return (count - left) ? (count - left) : result;
-+}
-+
-+/**
-+ * release_unix_file - release of struct file_operations
-+ * @inode: inode of released file
-+ * @file: file to release
-+ *
-+ * Implementation of release method of struct file_operations for unix file
-+ * plugin. If last reference to indode is released - convert all extent items
-+ * into tail items if necessary. Frees reiser4 specific file data.
-+ */
-+int release_unix_file(struct inode *inode, struct file *file)
-+{
-+ reiser4_context *ctx;
-+ unix_file_info_t *uf_info;
-+ int result;
-+ int in_reiser4;
-+
-+ in_reiser4 = is_in_reiser4_context();
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ result = 0;
-+ if (in_reiser4 == 0) {
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access_careful(uf_info, inode);
-+ if (atomic_read(&file->f_dentry->d_count) == 1 &&
-+ uf_info->container == UF_CONTAINER_EXTENTS &&
-+ !should_have_notail(uf_info, inode->i_size) &&
-+ !rofs_inode(inode)) {
-+ result = extent2tail(uf_info);
-+ if (result != 0) {
-+ warning("nikita-3233",
-+ "Failed (%d) to convert in %s (%llu)",
-+ result, __FUNCTION__,
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+ }
-+ }
-+ drop_exclusive_access(uf_info);
-+ } else {
-+ /*
-+ we are within reiser4 context already. How latter is
-+ possible? Simple:
-+
-+ (gdb) bt
-+ #0 get_exclusive_access ()
-+ #2 0xc01e56d3 in release_unix_file ()
-+ #3 0xc01c3643 in reiser4_release ()
-+ #4 0xc014cae0 in __fput ()
-+ #5 0xc013ffc3 in remove_vm_struct ()
-+ #6 0xc0141786 in exit_mmap ()
-+ #7 0xc0118480 in mmput ()
-+ #8 0xc0133205 in oom_kill ()
-+ #9 0xc01332d1 in out_of_memory ()
-+ #10 0xc013bc1d in try_to_free_pages ()
-+ #11 0xc013427b in __alloc_pages ()
-+ #12 0xc013f058 in do_anonymous_page ()
-+ #13 0xc013f19d in do_no_page ()
-+ #14 0xc013f60e in handle_mm_fault ()
-+ #15 0xc01131e5 in do_page_fault ()
-+ #16 0xc0104935 in error_code ()
-+ #17 0xc025c0c6 in __copy_to_user_ll ()
-+ #18 0xc01d496f in reiser4_read_tail ()
-+ #19 0xc01e4def in read_unix_file ()
-+ #20 0xc01c3504 in reiser4_read ()
-+ #21 0xc014bd4f in vfs_read ()
-+ #22 0xc014bf66 in sys_read ()
-+ */
-+ warning("vs-44", "out of memory?");
-+ }
-+
-+ reiser4_free_file_fsdata(file);
-+
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+static void set_file_notail(struct inode *inode)
-+{
-+ reiser4_inode *state;
-+ formatting_plugin *tplug;
-+
-+ state = reiser4_inode_data(inode);
-+ tplug = formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID);
-+ force_plugin_pset(inode, PSET_FORMATTING, (reiser4_plugin *)tplug);
-+}
-+
-+/* if file is built of tails - convert it to extents */
-+static int unpack(struct file *filp, struct inode *inode, int forever)
-+{
-+ int result = 0;
-+ unix_file_info_t *uf_info;
-+
-+ uf_info = unix_file_inode_data(inode);
-+ assert("vs-1628", ea_obtained(uf_info));
-+
-+ result = find_file_state(inode, uf_info);
-+ if (result)
-+ return result;
-+ assert("vs-1074", uf_info->container != UF_CONTAINER_UNKNOWN);
-+
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * if file is being convered by another process - wait until it
-+ * completes
-+ */
-+ while (1) {
-+ if (reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV)) {
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ get_exclusive_access(uf_info);
-+ continue;
-+ }
-+ break;
-+ }
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ result = tail2extent(uf_info);
-+ if (result)
-+ return result;
-+ }
-+ }
-+ if (forever) {
-+ /* safe new formatting plugin in stat data */
-+ __u64 tograb;
-+
-+ set_file_notail(inode);
-+
-+ grab_space_enable();
-+ tograb = inode_file_plugin(inode)->estimate.update(inode);
-+ result = reiser4_grab_space(tograb, BA_CAN_COMMIT);
-+ result = reiser4_update_sd(inode);
-+ }
-+
-+ return result;
-+}
-+
-+/* implentation of vfs' ioctl method of struct file_operations for unix file
-+ plugin
-+*/
-+int
-+ioctl_unix_file(struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg UNUSED_ARG)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ switch (cmd) {
-+ case REISER4_IOC_UNPACK:
-+ get_exclusive_access(unix_file_inode_data(inode));
-+ result = unpack(filp, inode, 1 /* forever */ );
-+ drop_exclusive_access(unix_file_inode_data(inode));
-+ break;
-+
-+ default:
-+ result = RETERR(-ENOSYS);
-+ break;
-+ }
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* implentation of vfs' bmap method of struct address_space_operations for unix
-+ file plugin
-+*/
-+sector_t bmap_unix_file(struct address_space * mapping, sector_t lblock)
-+{
-+ reiser4_context *ctx;
-+ sector_t result;
-+ reiser4_key key;
-+ coord_t coord;
-+ lock_handle lh;
-+ struct inode *inode;
-+ item_plugin *iplug;
-+ sector_t block;
-+
-+ inode = mapping->host;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ key_by_inode_and_offset_common(inode,
-+ (loff_t) lblock * current_blocksize,
-+ &key);
-+
-+ init_lh(&lh);
-+ result =
-+ find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode);
-+ if (cbk_errored(result)) {
-+ done_lh(&lh);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = zload(coord.node);
-+ if (result) {
-+ done_lh(&lh);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ iplug = item_plugin_by_coord(&coord);
-+ if (iplug->s.file.get_block) {
-+ result = iplug->s.file.get_block(&coord, lblock, &block);
-+ if (result == 0)
-+ result = block;
-+ } else
-+ result = RETERR(-EINVAL);
-+
-+ zrelse(coord.node);
-+ done_lh(&lh);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * flow_by_inode_unix_file - initizlize structure flow
-+ * @inode: inode of file for which read or write is abou
-+ * @buf: buffer to perform read to or write from
-+ * @user: flag showing whether @buf is user space or kernel space
-+ * @size: size of buffer @buf
-+ * @off: start offset fro read or write
-+ * @op: READ or WRITE
-+ * @flow:
-+ *
-+ * Initializes fields of @flow: key, size of data, i/o mode (read or write).
-+ */
-+int flow_by_inode_unix_file(struct inode *inode,
-+ const char __user *buf, int user,
-+ loff_t size, loff_t off,
-+ rw_op op, flow_t *flow)
-+{
-+ assert("nikita-1100", inode != NULL);
-+
-+ flow->length = size;
-+ memcpy(&flow->data, &buf, sizeof(buf));
-+ flow->user = user;
-+ flow->op = op;
-+ assert("nikita-1931", inode_file_plugin(inode) != NULL);
-+ assert("nikita-1932",
-+ inode_file_plugin(inode)->key_by_inode ==
-+ key_by_inode_and_offset_common);
-+ /* calculate key of write position and insert it into flow->key */
-+ return key_by_inode_and_offset_common(inode, off, &flow->key);
-+}
-+
-+/* plugin->u.file.set_plug_in_sd = NULL
-+ plugin->u.file.set_plug_in_inode = NULL
-+ plugin->u.file.create_blank_sd = NULL */
-+/* plugin->u.file.delete */
-+/*
-+ plugin->u.file.add_link = reiser4_add_link_common
-+ plugin->u.file.rem_link = NULL */
-+
-+/* plugin->u.file.owns_item
-+ this is common_file_owns_item with assertion */
-+/* Audited by: green(2002.06.15) */
-+int
-+owns_item_unix_file(const struct inode *inode /* object to check against */ ,
-+ const coord_t * coord /* coord to check */ )
-+{
-+ int result;
-+
-+ result = owns_item_common(inode, coord);
-+ if (!result)
-+ return 0;
-+ if (!plugin_of_group(item_plugin_by_coord(coord),
-+ UNIX_FILE_METADATA_ITEM_TYPE))
-+ return 0;
-+ assert("vs-547",
-+ item_id_by_coord(coord) == EXTENT_POINTER_ID ||
-+ item_id_by_coord(coord) == FORMATTING_ID);
-+ return 1;
-+}
-+
-+static int setattr_truncate(struct inode *inode, struct iattr *attr)
-+{
-+ int result;
-+ int s_result;
-+ loff_t old_size;
-+ reiser4_tree *tree;
-+
-+ inode_check_scale(inode, inode->i_size, attr->ia_size);
-+
-+ old_size = inode->i_size;
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ result = safe_link_grab(tree, BA_CAN_COMMIT);
-+ if (result == 0)
-+ result = safe_link_add(inode, SAFE_TRUNCATE);
-+ if (result == 0)
-+ result = truncate_file_body(inode, attr->ia_size);
-+ if (result)
-+ warning("vs-1588", "truncate_file failed: oid %lli, "
-+ "old size %lld, new size %lld, retval %d",
-+ (unsigned long long)get_inode_oid(inode),
-+ old_size, attr->ia_size, result);
-+
-+ s_result = safe_link_grab(tree, BA_CAN_COMMIT);
-+ if (s_result == 0)
-+ s_result =
-+ safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE);
-+ if (s_result != 0) {
-+ warning("nikita-3417", "Cannot kill safelink %lli: %i",
-+ (unsigned long long)get_inode_oid(inode), s_result);
-+ }
-+ safe_link_release(tree);
-+ return result;
-+}
-+
-+/* plugin->u.file.setattr method */
-+/* This calls inode_setattr and if truncate is in effect it also takes
-+ exclusive inode access to avoid races */
-+int setattr_unix_file(struct dentry *dentry, /* Object to change attributes */
-+ struct iattr *attr /* change description */ )
-+{
-+ int result;
-+
-+ if (attr->ia_valid & ATTR_SIZE) {
-+ reiser4_context *ctx;
-+ unix_file_info_t *uf_info;
-+
-+ /* truncate does reservation itself and requires exclusive
-+ access obtained */
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(dentry->d_inode);
-+ get_exclusive_access_careful(uf_info, dentry->d_inode);
-+ result = setattr_truncate(dentry->d_inode, attr);
-+ drop_exclusive_access(uf_info);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ } else
-+ result = reiser4_setattr_common(dentry, attr);
-+
-+ return result;
-+}
-+
-+/* plugin->u.file.init_inode_data */
-+void
-+init_inode_data_unix_file(struct inode *inode,
-+ reiser4_object_create_data * crd, int create)
-+{
-+ unix_file_info_t *data;
-+
-+ data = unix_file_inode_data(inode);
-+ data->container = create ? UF_CONTAINER_EMPTY : UF_CONTAINER_UNKNOWN;
-+ init_rwsem(&data->latch);
-+ data->tplug = inode_formatting_plugin(inode);
-+ data->exclusive_use = 0;
-+
-+#if REISER4_DEBUG
-+ data->ea_owner = NULL;
-+ atomic_set(&data->nr_neas, 0);
-+#endif
-+ init_inode_ordering(inode, crd, create);
-+}
-+
-+/**
-+ * delete_object_unix_file - delete_object of file_plugin
-+ * @inode: inode to be deleted
-+ *
-+ * Truncates file to length 0, removes stat data and safe link.
-+ */
-+int delete_object_unix_file(struct inode *inode)
-+{
-+ unix_file_info_t *uf_info;
-+ int result;
-+
-+ if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
-+ return 0;
-+
-+ /* truncate file bogy first */
-+ uf_info = unix_file_inode_data(inode);
-+ get_exclusive_access(uf_info);
-+ result = truncate_file_body(inode, 0 /* size */ );
-+ drop_exclusive_access(uf_info);
-+
-+ if (result)
-+ warning("", "failed to truncate file (%llu) on removal: %d",
-+ get_inode_oid(inode), result);
-+
-+ /* remove stat data and safe link */
-+ return reiser4_delete_object_common(inode);
-+}
-+
-+/**
-+ * sendfile_unix_file - sendfile of struct file_operations
-+ * @file: file to be sent
-+ * @ppos: position to start from
-+ * @count: number of bytes to send
-+ * @actor: function to copy data
-+ * @target: where to copy read data
-+ *
-+ * Reads @count bytes from @file and calls @actor for every page read. This is
-+ * needed for loop back devices support.
-+ */
-+ssize_t
-+sendfile_unix_file(struct file *file, loff_t *ppos, size_t count,
-+ read_actor_t actor, void *target)
-+{
-+ reiser4_context *ctx;
-+ ssize_t result;
-+ struct inode *inode;
-+ unix_file_info_t *uf_info;
-+
-+ inode = file->f_dentry->d_inode;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ /*
-+ * generic_file_sndfile may want to call update_atime. Grab space for
-+ * stat data update
-+ */
-+ result = reiser4_grab_space(estimate_update_common(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ goto error;
-+ mutex_lock(&inode->i_mutex);
-+ reiser4_inode_set_flag(inode, REISER4_HAS_MMAP);
-+ mutex_unlock(&inode->i_mutex);
-+
-+ uf_info = unix_file_inode_data(inode);
-+ get_nonexclusive_access(uf_info);
-+ result = generic_file_sendfile(file, ppos, count, actor, target);
-+ drop_nonexclusive_access(uf_info);
-+ error:
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+int
-+prepare_write_unix_file(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ reiser4_context *ctx;
-+ unix_file_info_t *uf_info;
-+ int ret;
-+
-+ ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(file->f_dentry->d_inode);
-+ get_exclusive_access(uf_info);
-+ ret = find_file_state(file->f_dentry->d_inode, uf_info);
-+ if (ret == 0) {
-+ if (uf_info->container == UF_CONTAINER_TAILS)
-+ ret = -EINVAL;
-+ else
-+ ret = do_prepare_write(file, page, from, to);
-+ }
-+ drop_exclusive_access(uf_info);
-+
-+ /* don't commit transaction under inode semaphore */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return ret;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/file_conversion.c linux-2.6.20/fs/reiser4/plugin/file/file_conversion.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file/file_conversion.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/file_conversion.c 2007-05-06 14:50:43.783001971 +0400
-@@ -0,0 +1,594 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser,
-+ licensing governed by reiser4/README */
-+
-+/* This file contains hooks that converts (*) cryptcompress files to unix-files,
-+ and a set of protected (**) methods of a cryptcompress file plugin to perform
-+ such conversion.
-+
-+(*)
-+ The conversion is performed for incompressible files to reduce cpu and memory
-+ usage. If first logical cluster (64K by default) of a file is incompressible,
-+ then we make a desicion, that the whole file is incompressible.
-+ The conversion can be enabled via installing a special compression mode
-+ plugin (CONVX_COMPRESSION_MODE_ID, see plugin/compress/compress_mode.c for
-+ details).
-+
-+(**)
-+ The protection means serialization of critical sections (readers and writers
-+ of @pset->file)
-+*/
-+
-+#include "../../inode.h"
-+#include "../cluster.h"
-+#include "file.h"
-+
-+#define conversion_enabled(inode) \
-+ (inode_compression_mode_plugin(inode) == \
-+ compression_mode_plugin_by_id(CONVX_COMPRESSION_MODE_ID))
-+
-+
-+/* Located sections (readers and writers of @pset->file) are not
-+ permanently critical: cryptcompress file can be converted only
-+ if the conversion is enabled (see the macrio above). And we don't
-+ convert unix files at all.
-+ The following helper macro is a sanity check to decide if we
-+ need to protect a located section.
-+*/
-+#define should_protect(inode) \
-+ (inode_file_plugin(inode) == \
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID) && \
-+ conversion_enabled(inode))
-+
-+/* All protected methods have prefix "prot" in their names.
-+ It is convenient to construct them by usual (unprotected) ones
-+ using the following common macros:
-+*/
-+
-+/* Macro for passive protection.
-+ method_cryptcompress contains only readers */
-+#define PROT_PASSIVE(type, method, args) \
-+({ \
-+ type _result; \
-+ struct rw_semaphore * guard = \
-+ &reiser4_inode_data(inode)->conv_sem; \
-+ \
-+ if (should_protect(inode)) { \
-+ down_read(guard); \
-+ if (!should_protect(inode)) \
-+ up_read(guard); \
-+ } \
-+ if (inode_file_plugin(inode) == \
-+ file_plugin_by_id(UNIX_FILE_PLUGIN_ID)) \
-+ _result = method ## _unix_file args; \
-+ else \
-+ _result = method ## _cryptcompress args; \
-+ if (should_protect(inode)) \
-+ up_read(guard); \
-+ _result; \
-+})
-+
-+#define PROT_PASSIVE_VOID(method, args) \
-+({ \
-+ struct rw_semaphore * guard = \
-+ &reiser4_inode_data(inode)->conv_sem; \
-+ \
-+ if (should_protect(inode)) { \
-+ down_read(guard); \
-+ if (!should_protect(inode)) \
-+ up_read(guard); \
-+ } \
-+ if (inode_file_plugin(inode) == \
-+ file_plugin_by_id(UNIX_FILE_PLUGIN_ID)) \
-+ method ## _unix_file args; \
-+ else \
-+ method ## _cryptcompress args; \
-+ if (should_protect(inode)) \
-+ up_read(guard); \
-+})
-+
-+/* Macro for active protection.
-+ active_expr contains readers and writers; after its
-+ evaluation conversion should be disabled */
-+#define PROT_ACTIVE(type, method, args, active_expr) \
-+({ \
-+ type _result = 0; \
-+ struct rw_semaphore * guard = \
-+ &reiser4_inode_data(inode)->conv_sem; \
-+ reiser4_context * ctx = reiser4_init_context(inode->i_sb); \
-+ if (IS_ERR(ctx)) \
-+ return PTR_ERR(ctx); \
-+ \
-+ if (should_protect(inode)) { \
-+ down_write(guard); \
-+ if (should_protect(inode)) \
-+ _result = active_expr; \
-+ up_write(guard); \
-+ } \
-+ if (_result == 0) { \
-+ if (inode_file_plugin(inode) == \
-+ file_plugin_by_id(UNIX_FILE_PLUGIN_ID)) \
-+ _result = method ## _unix_file args; \
-+ else \
-+ _result = method ## _cryptcompress args; \
-+ } \
-+ reiser4_exit_context(ctx); \
-+ _result; \
-+})
-+
-+/* Pass management to the unix-file plugin with "notail" policy */
-+static int __cryptcompress2unixfile(struct file *file, struct inode * inode)
-+{
-+ int result;
-+ reiser4_inode *info;
-+ unix_file_info_t * uf;
-+ info = reiser4_inode_data(inode);
-+
-+ result = aset_set_unsafe(&info->pset,
-+ PSET_FILE,
-+ (reiser4_plugin *)
-+ file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
-+ if (result)
-+ return result;
-+ result = aset_set_unsafe(&info->pset,
-+ PSET_FORMATTING,
-+ (reiser4_plugin *)
-+ formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID));
-+ if (result)
-+ return result;
-+ /* get rid of non-standard plugins */
-+ info->plugin_mask &= ~cryptcompress_mask;
-+ /* get rid of plugin stat-data extension */
-+ info->extmask &= ~(1 << PLUGIN_STAT);
-+
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+
-+ /* FIXME use init_inode_data_unix_file() instead,
-+ but aviod init_inode_ordering() */
-+ /* Init unix-file specific part of inode */
-+ uf = unix_file_inode_data(inode);
-+ uf->container = UF_CONTAINER_UNKNOWN;
-+ init_rwsem(&uf->latch);
-+ uf->tplug = inode_formatting_plugin(inode);
-+ uf->exclusive_use = 0;
-+#if REISER4_DEBUG
-+ uf->ea_owner = NULL;
-+ atomic_set(&uf->nr_neas, 0);
-+#endif
-+ inode->i_op =
-+ &file_plugin_by_id(UNIX_FILE_PLUGIN_ID)->inode_ops;
-+ inode->i_fop =
-+ &file_plugin_by_id(UNIX_FILE_PLUGIN_ID)->file_ops;
-+ inode->i_mapping->a_ops =
-+ &file_plugin_by_id(UNIX_FILE_PLUGIN_ID)->as_ops;
-+ file->f_op = inode->i_fop;
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+static int disabled_conversion_inode_ok(struct inode * inode)
-+{
-+ __u64 extmask = reiser4_inode_data(inode)->extmask;
-+ __u16 plugin_mask = reiser4_inode_data(inode)->plugin_mask;
-+
-+ return ((extmask & (1 << LIGHT_WEIGHT_STAT)) &&
-+ (extmask & (1 << UNIX_STAT)) &&
-+ (extmask & (1 << LARGE_TIMES_STAT)) &&
-+ (extmask & (1 << PLUGIN_STAT)) &&
-+ (plugin_mask & (1 << PSET_COMPRESSION_MODE)));
-+}
-+#endif
-+
-+/* Assign another mode that will control
-+ compression at flush time only */
-+static int disable_conversion_no_update_sd(struct inode * inode)
-+{
-+ int result;
-+ result =
-+ force_plugin_pset(inode,
-+ PSET_COMPRESSION_MODE,
-+ (reiser4_plugin *)compression_mode_plugin_by_id
-+ (LATTD_COMPRESSION_MODE_ID));
-+ assert("edward-1500",
-+ ergo(!result, disabled_conversion_inode_ok(inode)));
-+ return result;
-+}
-+
-+/* Disable future attempts to check/convert. This function is called by
-+ conversion hooks. */
-+static int disable_conversion(struct inode * inode)
-+{
-+ return disable_conversion_no_update_sd(inode);
-+}
-+
-+static int check_position(struct inode * inode,
-+ loff_t pos /* initial position in the file */,
-+ reiser4_cluster_t * clust,
-+ int * check_compress)
-+{
-+ assert("edward-1505", conversion_enabled(inode));
-+ assert("edward-1506", inode->i_size <= inode_cluster_size(inode));
-+ /* if file size is more then cluster size, then compressible
-+ status must be figured out (i.e. compression was disabled,
-+ or file plugin was converted to unix_file) */
-+
-+ if (pos > inode->i_size)
-+ /* first logical cluster will contain a (partial) hole */
-+ return disable_conversion(inode);
-+ if (inode->i_size == inode_cluster_size(inode))
-+ *check_compress = 1;
-+ return 0;
-+}
-+
-+static void start_check_compressibility(struct inode * inode,
-+ reiser4_cluster_t * clust,
-+ hint_t * hint)
-+{
-+ assert("edward-1507", clust->index == 1);
-+ assert("edward-1508", !tfm_cluster_is_uptodate(&clust->tc));
-+ assert("edward-1509", cluster_get_tfm_act(&clust->tc) == TFMA_READ);
-+
-+ hint_init_zero(hint);
-+ clust->hint = hint;
-+ clust->index --;
-+ clust->nr_pages = count_to_nrpages(fsize_to_count(clust, inode));
-+
-+ /* first logical cluster (of index #0) must be complete */
-+ assert("edward-1510", fsize_to_count(clust, inode) ==
-+ inode_cluster_size(inode));
-+}
-+
-+static void finish_check_compressibility(struct inode * inode,
-+ reiser4_cluster_t * clust,
-+ hint_t * hint)
-+{
-+ reiser4_unset_hint(clust->hint);
-+ clust->hint = hint;
-+ clust->index ++;
-+}
-+
-+#if REISER4_DEBUG
-+static int prepped_dclust_ok(hint_t * hint)
-+{
-+ reiser4_key key;
-+ coord_t * coord = &hint->ext_coord.coord;
-+
-+ item_key_by_coord(coord, &key);
-+ return (item_id_by_coord(coord) == CTAIL_ID &&
-+ !coord_is_unprepped_ctail(coord) &&
-+ (get_key_offset(&key) + nr_units_ctail(coord) ==
-+ dclust_get_extension_dsize(hint)));
-+}
-+#endif
-+
-+#define fifty_persent(size) (size >> 1)
-+/* evaluation of data compressibility */
-+#define data_is_compressible(osize, isize) \
-+ (osize < fifty_persent(isize))
-+
-+/* This is called only once per file life.
-+ Read first logical cluster (of index #0) and estimate its compressibility.
-+ Save estimation result in @compressible */
-+static int read_check_compressibility(struct inode * inode,
-+ reiser4_cluster_t * clust,
-+ int * compressible)
-+{
-+ int i;
-+ int result;
-+ __u32 dst_len;
-+ hint_t tmp_hint;
-+ hint_t * cur_hint = clust->hint;
-+
-+ start_check_compressibility(inode, clust, &tmp_hint);
-+
-+ result = grab_cluster_pages(inode, clust);
-+ if (result)
-+ return result;
-+ /* Read page cluster here */
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ struct page *page = clust->pages[i];
-+ lock_page(page);
-+ result = do_readpage_ctail(inode, clust, page,
-+ ZNODE_READ_LOCK);
-+ unlock_page(page);
-+ if (result)
-+ goto error;
-+ }
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+
-+ cluster_set_tfm_act(&clust->tc, TFMA_WRITE);
-+
-+ if (hint_is_valid(&tmp_hint) && !hint_is_unprepped_dclust(&tmp_hint)) {
-+ /* lenght of compressed data is known, no need to compress */
-+ assert("edward-1511",
-+ znode_is_write_locked(tmp_hint.ext_coord.coord.node));
-+ assert("edward-1512",
-+ WITH_DATA(tmp_hint.ext_coord.coord.node,
-+ prepped_dclust_ok(&tmp_hint)));
-+ dst_len = dclust_get_extension_dsize(&tmp_hint);
-+ }
-+ else {
-+ tfm_cluster_t * tc = &clust->tc;
-+ compression_plugin * cplug = inode_compression_plugin(inode);
-+ result = grab_tfm_stream(inode, tc, INPUT_STREAM);
-+ if (result)
-+ goto error;
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ char *data;
-+ lock_page(clust->pages[i]);
-+ BUG_ON(!PageUptodate(clust->pages[i]));
-+ data = kmap(clust->pages[i]);
-+ memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
-+ data, PAGE_CACHE_SIZE);
-+ kunmap(clust->pages[i]);
-+ unlock_page(clust->pages[i]);
-+ }
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ goto error;
-+ result = grab_coa(tc, cplug);
-+ if (result)
-+ goto error;
-+ tc->len = tc->lsize = fsize_to_count(clust, inode);
-+ assert("edward-1513", tc->len == inode_cluster_size(inode));
-+ dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
-+ cplug->compress(get_coa(tc, cplug->h.id, tc->act),
-+ tfm_input_data(clust), tc->len,
-+ tfm_output_data(clust), &dst_len);
-+ assert("edward-1514",
-+ dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
-+ }
-+ finish_check_compressibility(inode, clust, cur_hint);
-+ *compressible = data_is_compressible(dst_len,
-+ inode_cluster_size(inode));
-+ return 0;
-+ error:
-+ reiser4_release_cluster_pages(clust);
-+ return result;
-+}
-+
-+/* Cut disk cluster of index @idx */
-+static int cut_disk_cluster(struct inode * inode, cloff_t idx)
-+{
-+ reiser4_key from, to;
-+ assert("edward-1515", inode_file_plugin(inode) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+ key_by_inode_cryptcompress(inode, clust_to_off(idx, inode), &from);
-+ to = from;
-+ set_key_offset(&to,
-+ get_key_offset(&from) + inode_cluster_size(inode) - 1);
-+ return reiser4_cut_tree(reiser4_tree_by_inode(inode),
-+ &from, &to, inode, 0);
-+}
-+
-+static int reserve_cryptcompress2unixfile(struct inode *inode)
-+{
-+ reiser4_block_nr unformatted_nodes;
-+ reiser4_tree *tree;
-+
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ /* number of unformatted nodes which will be created */
-+ unformatted_nodes = cluster_nrpages(inode); /* N */
-+
-+ /*
-+ * space required for one iteration of extent->tail conversion:
-+ *
-+ * 1. kill ctail items
-+ *
-+ * 2. insert N unformatted nodes
-+ *
-+ * 3. insert N (worst-case single-block
-+ * extents) extent units.
-+ *
-+ * 4. drilling to the leaf level by coord_by_key()
-+ *
-+ * 5. possible update of stat-data
-+ *
-+ */
-+ grab_space_enable();
-+ return reiser4_grab_space
-+ (2 * tree->height +
-+ unformatted_nodes +
-+ unformatted_nodes * estimate_one_insert_into_item(tree) +
-+ 1 + estimate_one_insert_item(tree) +
-+ inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+}
-+
-+/* clear flag that indicated conversion and update
-+ stat-data with new (unix-file - specific) info */
-+static int complete_file_conversion(struct inode *inode)
-+{
-+ int result;
-+
-+ grab_space_enable();
-+ result =
-+ reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+ if (result == 0) {
-+ reiser4_inode_clr_flag(inode, REISER4_FILE_CONV_IN_PROGRESS);
-+ result = reiser4_update_sd(inode);
-+ }
-+ if (result)
-+ warning("edward-1452",
-+ "Converting %llu to unix-file: update sd failed (%i)",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ return 0;
-+}
-+
-+
-+/* do conversion */
-+static int cryptcompress2unixfile(struct file *file, struct inode * inode,
-+ reiser4_cluster_t * clust)
-+{
-+ int i;
-+ int result = 0;
-+ cryptcompress_info_t *cr_info;
-+ unix_file_info_t *uf_info;
-+
-+ assert("edward-1516", clust->pages[0]->index == 0);
-+ assert("edward-1517", clust->hint != NULL);
-+
-+ /* release all cryptcompress-specific recources */
-+ cr_info = cryptcompress_inode_data(inode);
-+ result = reserve_cryptcompress2unixfile(inode);
-+ if (result)
-+ goto out;
-+ reiser4_inode_set_flag(inode, REISER4_FILE_CONV_IN_PROGRESS);
-+ reiser4_unset_hint(clust->hint);
-+ result = cut_disk_cluster(inode, 0);
-+ if (result)
-+ goto out;
-+ /* captured jnode of cluster and assotiated resources (pages,
-+ reserved disk space) were released by ->kill_hook() method
-+ of the item plugin */
-+
-+ result = __cryptcompress2unixfile(file, inode);
-+ if (result)
-+ goto out;
-+ /* At this point file is managed by unix file plugin */
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ assert("edward-1518",
-+ ergo(jprivate(clust->pages[0]),
-+ !jnode_is_cluster_page(jprivate(clust->pages[0]))));
-+ for(i = 0; i < clust->nr_pages; i++) {
-+ assert("edward-1519", clust->pages[i]);
-+ assert("edward-1520", PageUptodate(clust->pages[i]));
-+
-+ result = find_or_create_extent(clust->pages[i]);
-+ if (result)
-+ break;
-+ }
-+ if (!result) {
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ complete_file_conversion(inode);
-+ }
-+ out:
-+ all_grabbed2free();
-+ if (result)
-+ warning("edward-1453", "Failed to convert file %llu: %i",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ return result;
-+}
-+
-+/* Check, then perform or disable conversion if needed */
-+int write_conversion_hook(struct file *file, struct inode * inode, loff_t pos,
-+ reiser4_cluster_t * clust, int * progress)
-+{
-+ int result;
-+ int check_compress = 0;
-+ int compressible = 0;
-+
-+ if (!conversion_enabled(inode))
-+ return 0;
-+ result = check_position(inode, pos, clust, &check_compress);
-+ if (result || !check_compress)
-+ return result;
-+ result = read_check_compressibility(inode, clust, &compressible);
-+ if (result)
-+ return result;
-+
-+ /* At this point page cluster is grabbed and uptodate */
-+ if (!compressible) {
-+ result = cryptcompress2unixfile(file, inode, clust);
-+ if (result == 0)
-+ *progress = 1;
-+ }
-+ else
-+ result = disable_conversion(inode);
-+
-+ reiser4_release_cluster_pages(clust);
-+ return result;
-+}
-+
-+static int setattr_conversion_hook(struct inode * inode, struct iattr *attr)
-+{
-+ return (attr->ia_valid & ATTR_SIZE ? disable_conversion(inode) : 0);
-+}
-+
-+/* Protected methods of cryptcompress file plugin constructed
-+ by the macros above */
-+
-+/* Wrappers with active protection for:
-+ . write_cryptcompress;
-+ . setattr_cryptcompress;
-+*/
-+
-+ssize_t prot_write_cryptcompress(struct file *file, const char __user *buf,
-+ size_t count, loff_t *off)
-+{
-+ int prot = 0;
-+ int conv = 0;
-+ ssize_t written_cr = 0;
-+ ssize_t written_uf = 0;
-+ struct inode * inode = file->f_dentry->d_inode;
-+ struct rw_semaphore * guard = &reiser4_inode_data(inode)->conv_sem;
-+
-+ if (should_protect(inode)) {
-+ prot = 1;
-+ down_write(guard);
-+ }
-+ written_cr = write_cryptcompress(file, buf, count, off, &conv);
-+ if (prot)
-+ up_write(guard);
-+ if (written_cr < 0)
-+ return written_cr;
-+ if (conv)
-+ written_uf = write_unix_file(file, buf + written_cr,
-+ count - written_cr, off);
-+ return written_cr + (written_uf < 0 ? 0 : written_uf);
-+}
-+
-+int prot_setattr_cryptcompress(struct dentry *dentry, struct iattr *attr)
-+{
-+ struct inode * inode = dentry->d_inode;
-+ return PROT_ACTIVE(int, setattr, (dentry, attr),
-+ setattr_conversion_hook(inode, attr));
-+}
-+
-+/* Wrappers with passive protection for:
-+ . read_cryptcomperess;
-+ . mmap_cryptcompress;
-+ . release_cryptcompress;
-+ . sendfile_cryptcompress;
-+ . delete_object_cryptcompress.
-+*/
-+ssize_t prot_read_cryptcompress(struct file * file, char __user * buf,
-+ size_t size, loff_t * off)
-+{
-+ struct inode * inode = file->f_dentry->d_inode;
-+ return PROT_PASSIVE(ssize_t, read, (file, buf, size, off));
-+}
-+
-+int prot_mmap_cryptcompress(struct file *file, struct vm_area_struct *vma)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ return PROT_PASSIVE(int, mmap, (file, vma));
-+}
-+
-+int prot_release_cryptcompress(struct inode *inode, struct file *file)
-+{
-+ return PROT_PASSIVE(int, release, (inode, file));
-+}
-+
-+ssize_t prot_sendfile_cryptcompress(struct file *file, loff_t *ppos,
-+ size_t count, read_actor_t actor,
-+ void *target)
-+{
-+ struct inode * inode = file->f_dentry->d_inode;
-+ return PROT_PASSIVE(ssize_t, sendfile,
-+ (file, ppos, count, actor, target));
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/file.h linux-2.6.20/fs/reiser4/plugin/file/file.h
---- linux-2.6.20.orig/fs/reiser4/plugin/file/file.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/file.h 2007-05-06 14:50:43.783001971 +0400
-@@ -0,0 +1,272 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* this file contains declarations of methods implementing
-+ file plugins (UNIX_FILE_PLUGIN_ID, CRYPTCOMPRESS_FILE_PLUGIN_ID
-+ and SYMLINK_FILE_PLUGIN_ID) */
-+
-+#if !defined( __REISER4_FILE_H__ )
-+#define __REISER4_FILE_H__
-+
-+/* declarations of functions implementing UNIX_FILE_PLUGIN_ID file plugin */
-+
-+/* inode operations */
-+int setattr_unix_file(struct dentry *, struct iattr *);
-+
-+/* file operations */
-+ssize_t read_unix_file(struct file *, char __user *buf, size_t read_amount,
-+ loff_t *off);
-+ssize_t write_unix_file(struct file *, const char __user *buf, size_t write_amount,
-+ loff_t * off);
-+int ioctl_unix_file(struct inode *, struct file *, unsigned int cmd,
-+ unsigned long arg);
-+int mmap_unix_file(struct file *, struct vm_area_struct *);
-+int open_unix_file(struct inode *, struct file *);
-+int release_unix_file(struct inode *, struct file *);
-+int sync_unix_file(struct file *, struct dentry *, int datasync);
-+ssize_t sendfile_unix_file(struct file *, loff_t *ppos, size_t count,
-+ read_actor_t, void *target);
-+
-+/* address space operations */
-+int readpage_unix_file(struct file *, struct page *);
-+int readpages_unix_file(struct file*, struct address_space*, struct list_head*, unsigned);
-+int writepages_unix_file(struct address_space *, struct writeback_control *);
-+int prepare_write_unix_file(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+int commit_write_unix_file(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+sector_t bmap_unix_file(struct address_space *, sector_t lblock);
-+
-+/* file plugin operations */
-+int flow_by_inode_unix_file(struct inode *, const char __user *buf,
-+ int user, loff_t, loff_t, rw_op, flow_t *);
-+int owns_item_unix_file(const struct inode *, const coord_t *);
-+void init_inode_data_unix_file(struct inode *, reiser4_object_create_data *,
-+ int create);
-+int delete_object_unix_file(struct inode *);
-+
-+/*
-+ * all the write into unix file is performed by item write method. Write method
-+ * of unix file plugin only decides which item plugin (extent or tail) and in
-+ * which mode (one from the enum below) to call
-+ */
-+typedef enum {
-+ FIRST_ITEM = 1,
-+ APPEND_ITEM = 2,
-+ OVERWRITE_ITEM = 3
-+} write_mode_t;
-+
-+/* unix file may be in one the following states */
-+typedef enum {
-+ UF_CONTAINER_UNKNOWN = 0,
-+ UF_CONTAINER_TAILS = 1,
-+ UF_CONTAINER_EXTENTS = 2,
-+ UF_CONTAINER_EMPTY = 3
-+} file_container_t;
-+
-+struct formatting_plugin;
-+struct inode;
-+
-+/* unix file plugin specific part of reiser4 inode */
-+typedef struct unix_file_info {
-+ /*
-+ * this read-write lock protects file containerization change. Accesses
-+ * which do not change file containerization (see file_container_t)
-+ * (read, readpage, writepage, write (until tail conversion is
-+ * involved)) take read-lock. Accesses which modify file
-+ * containerization (truncate, conversion from tail to extent and back)
-+ * take write-lock.
-+ */
-+ struct rw_semaphore latch;
-+ /* this enum specifies which items are used to build the file */
-+ file_container_t container;
-+ /*
-+ * plugin which controls when file is to be converted to extents and
-+ * back to tail
-+ */
-+ struct formatting_plugin *tplug;
-+ /* if this is set, file is in exclusive use */
-+ int exclusive_use;
-+#if REISER4_DEBUG
-+ /* pointer to task struct of thread owning exclusive access to file */
-+ void *ea_owner;
-+ atomic_t nr_neas;
-+ void *last_reader;
-+#endif
-+} unix_file_info_t;
-+
-+struct unix_file_info *unix_file_inode_data(const struct inode *inode);
-+void get_exclusive_access(unix_file_info_t *);
-+void drop_exclusive_access(unix_file_info_t *);
-+void get_nonexclusive_access(unix_file_info_t *);
-+void drop_nonexclusive_access(unix_file_info_t *);
-+int try_to_get_nonexclusive_access(unix_file_info_t *);
-+int find_file_item(hint_t *, const reiser4_key *, znode_lock_mode,
-+ struct inode *);
-+int find_file_item_nohint(coord_t *, lock_handle *,
-+ const reiser4_key *, znode_lock_mode,
-+ struct inode *);
-+
-+int load_file_hint(struct file *, hint_t *);
-+void save_file_hint(struct file *, const hint_t *);
-+
-+#include "../item/extent.h"
-+#include "../item/tail.h"
-+#include "../item/ctail.h"
-+
-+struct uf_coord {
-+ coord_t coord;
-+ lock_handle *lh;
-+ int valid;
-+ union {
-+ extent_coord_extension_t extent;
-+ tail_coord_extension_t tail;
-+ ctail_coord_extension_t ctail;
-+ } extension;
-+};
-+
-+#include "../../forward.h"
-+#include "../../seal.h"
-+#include "../../lock.h"
-+
-+/*
-+ * This structure is used to speed up file operations (reads and writes). A
-+ * hint is a suggestion about where a key resolved to last time. A seal
-+ * indicates whether a node has been modified since a hint was last recorded.
-+ * You check the seal, and if the seal is still valid, you can use the hint
-+ * without traversing the tree again.
-+ */
-+struct hint {
-+ seal_t seal; /* a seal over last file item accessed */
-+ uf_coord_t ext_coord;
-+ loff_t offset;
-+ znode_lock_mode mode;
-+ lock_handle lh;
-+};
-+
-+static inline int hint_is_valid(hint_t * hint)
-+{
-+ return hint->ext_coord.valid;
-+}
-+
-+static inline void hint_set_valid(hint_t * hint)
-+{
-+ hint->ext_coord.valid = 1;
-+}
-+
-+static inline void hint_clr_valid(hint_t * hint)
-+{
-+ hint->ext_coord.valid = 0;
-+}
-+
-+int load_file_hint(struct file *, hint_t *);
-+void save_file_hint(struct file *, const hint_t *);
-+void hint_init_zero(hint_t *);
-+void reiser4_set_hint(hint_t *, const reiser4_key *, znode_lock_mode);
-+int hint_is_set(const hint_t *);
-+void reiser4_unset_hint(hint_t *);
-+
-+int reiser4_update_file_size(struct inode *, reiser4_key *, int update_sd);
-+int cut_file_items(struct inode *, loff_t new_size, int update_sd,
-+ loff_t cur_size, int (*update_actor) (struct inode *,
-+ reiser4_key *, int));
-+#if REISER4_DEBUG
-+
-+/* return 1 is exclusive access is obtained, 0 - otherwise */
-+static inline int ea_obtained(unix_file_info_t * uf_info)
-+{
-+ int ret;
-+
-+ ret = down_read_trylock(&uf_info->latch);
-+ if (ret)
-+ up_read(&uf_info->latch);
-+ return !ret;
-+}
-+
-+#endif
-+
-+/* declarations of functions implementing SYMLINK_FILE_PLUGIN_ID file plugin */
-+int reiser4_create_symlink(struct inode *symlink, struct inode *dir,
-+ reiser4_object_create_data *);
-+void destroy_inode_symlink(struct inode *);
-+
-+/* declarations of functions implementing CRYPTCOMPRESS_FILE_PLUGIN_ID
-+ file plugin */
-+
-+/* inode operations */
-+int setattr_cryptcompress(struct dentry *, struct iattr *);
-+int prot_setattr_cryptcompress(struct dentry *, struct iattr *);
-+
-+/* file operations */
-+ssize_t read_cryptcompress(struct file *, char __user *buf, size_t read_amount,
-+ loff_t * off);
-+ssize_t prot_read_cryptcompress(struct file *, char __user *buf,
-+ size_t read_amount, loff_t * off);
-+
-+ssize_t write_cryptcompress(struct file *, const char __user *buf, size_t write_amount,
-+ loff_t * off, int * conv);
-+ssize_t prot_write_cryptcompress(struct file *, const char __user *buf, size_t write_amount,
-+ loff_t * off);
-+int mmap_cryptcompress(struct file *, struct vm_area_struct *);
-+int prot_mmap_cryptcompress(struct file *, struct vm_area_struct *);
-+ssize_t sendfile_cryptcompress(struct file *file, loff_t *ppos, size_t count,
-+ read_actor_t actor, void *target);
-+ssize_t prot_sendfile_cryptcompress(struct file *file, loff_t *ppos, size_t count,
-+ read_actor_t actor, void *target);
-+
-+int release_cryptcompress(struct inode *, struct file *);
-+int prot_release_cryptcompress(struct inode *, struct file *);
-+
-+/* address space operations */
-+extern int readpage_cryptcompress(struct file *, struct page *);
-+extern int writepages_cryptcompress(struct address_space *,
-+ struct writeback_control *);
-+/* file plugin operations */
-+int flow_by_inode_cryptcompress(struct inode *, const char __user *buf,
-+ int user, loff_t, loff_t, rw_op, flow_t *);
-+int key_by_inode_cryptcompress(struct inode *, loff_t off, reiser4_key *);
-+int create_cryptcompress(struct inode *, struct inode *,
-+ reiser4_object_create_data *);
-+int delete_object_cryptcompress(struct inode *);
-+void init_inode_data_cryptcompress(struct inode *, reiser4_object_create_data *,
-+ int create);
-+int cut_tree_worker_cryptcompress(tap_t *, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed,
-+ struct inode *object, int truncate,
-+ int *progress);
-+void destroy_inode_cryptcompress(struct inode *);
-+int open_object_cryptcompress(struct inode * inode, struct file * file);
-+
-+extern reiser4_plugin_ops cryptcompress_plugin_ops;
-+
-+#define WRITE_GRANULARITY 32
-+
-+int tail2extent(unix_file_info_t *);
-+int extent2tail(unix_file_info_t *);
-+
-+int goto_right_neighbor(coord_t *, lock_handle *);
-+int find_or_create_extent(struct page *);
-+int equal_to_ldk(znode *, const reiser4_key *);
-+
-+void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh);
-+
-+static inline int cbk_errored(int cbk_result)
-+{
-+ return (cbk_result != CBK_COORD_NOTFOUND
-+ && cbk_result != CBK_COORD_FOUND);
-+}
-+
-+/* __REISER4_FILE_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/invert.c linux-2.6.20/fs/reiser4/plugin/file/invert.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file/invert.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/invert.c 2007-05-06 14:50:43.783001971 +0400
-@@ -0,0 +1,493 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Suppose you want to conveniently read and write a large variety of small files conveniently within a single emacs
-+ buffer, without having a separate buffer for each 8 byte or so file. Inverts are the way to do that. An invert
-+ provides you with the contents of a set of subfiles plus its own contents. It is a file which inherits other files
-+ when you read it, and allows you to write to it and through it to the files that it inherits from. In order for it
-+ to know which subfiles each part of your write should go into, there must be delimiters indicating that. It tries to
-+ make that easy for you by providing those delimiters in what you read from it.
-+
-+ When you read it, an invert performs an inverted assignment. Instead of taking an assignment command and writing a
-+ bunch of files, it takes a bunch of files and composes an assignment command for you to read from it that if executed
-+ would create those files. But which files? Well, that must be specified in the body of the invert using a special
-+ syntax, and that specification is called the invert of the assignment.
-+
-+ When written to, an invert performs the assignment command that is written
-+ to it, and modifies its own body to contain the invert of that
-+ assignment.
-+
-+ In other words, writing to an invert file what you have read from it
-+ is the identity operation.
-+
-+ Malformed assignments cause write errors. Partial writes are not
-+ supported in v4.0, but will be.
-+
-+ Example:
-+
-+ If an invert contains:
-+
-+ /filenameA/<>+"(some text stored in the invert)+/filenameB/<>
-+
-+======================
-+Each element in this definition should be an invert, and all files
-+should be called recursively - too. This is bad. If one of the
-+included files in not a regular or invert file, then we can't read
-+main file.
-+
-+I think to make it is possible easier:
-+
-+internal structure of invert file should be like symlink file. But
-+read and write method should be explitely indicated in i/o operation..
-+
-+By default we read and write (if probably) as symlink and if we
-+specify ..invert at reading time that too we can specify it at write time.
-+
-+example:
-+/my_invert_file/..invert<- ( (/filenameA<-"(The contents of filenameA))+"(some text stored in the invert)+(/filenameB<-"(The contents of filenameB) ) )
-+will create /my_invert_file as invert, and will creat /filenameA and /filenameB with specified body.
-+
-+read of /my_invert_file/..invert will be
-+/filenameA<-"(The contents of filenameA)+"(some text stored in the invert)+/filenameB<-"(The contents of filenameB)
-+
-+but read of /my_invert_file/ will be
-+The contents of filenameAsome text stored in the invertThe contents of filenameB
-+
-+we also can creat this file as
-+/my_invert_file/<-/filenameA+"(some text stored in the invert)+/filenameB
-+will create /my_invert_file , and use existing files /filenameA and /filenameB.
-+
-+and when we will read it will be as previously invert file.
-+
-+This is correct?
-+
-+ vv
-+DEMIDOV-FIXME-HANS:
-+
-+Maybe you are right, but then you must disable writes to /my_invert_file/ and only allow writes to /my_invert_file/..invert
-+
-+Do you agree? Discuss it on reiserfs-list....
-+
-+-Hans
-+=======================
-+
-+ Then a read will return:
-+
-+ /filenameA<-"(The contents of filenameA)+"(some text stored in the invert)+/filenameB<-"(The contents of filenameB)
-+
-+ and a write of the line above to the invert will set the contents of
-+ the invert and filenameA and filenameB to their original values.
-+
-+ Note that the contents of an invert have no influence on the effect
-+ of a write unless the write is a partial write (and a write of a
-+ shorter file without using truncate first is a partial write).
-+
-+ truncate() has no effect on filenameA and filenameB, it merely
-+ resets the value of the invert.
-+
-+ Writes to subfiles via the invert are implemented by preceding them
-+ with truncates.
-+
-+ Parse failures cause write failures.
-+
-+ Questions to ponder: should the invert be acted on prior to file
-+ close when writing to an open filedescriptor?
-+
-+ Example:
-+
-+ If an invert contains:
-+
-+ "(This text and a pair of quotes are all that is here.)
-+
-+Then a read will return:
-+
-+ "(This text and a pair of quotes are all that is here.)
-+
-+*/
-+
-+/* OPEN method places a struct file in memory associated with invert body
-+ and returns something like file descriptor to the user for the future access
-+ to the invert file.
-+ During opening we parse the body of invert and get a list of the 'entryes'
-+ (that describes all its subfiles) and place pointer on the first struct in
-+ reiserfs-specific part of invert inode (arbitrary decision).
-+
-+ Each subfile is described by the struct inv_entry that has a pointer @sd on
-+ in-core based stat-data and a pointer on struct file @f (if we find that the
-+ subfile uses more then one unformated node (arbitrary decision), we load
-+ struct file in memory, otherwise we load base stat-data (and maybe 1-2 bytes
-+ of some other information we need)
-+
-+ Since READ and WRITE methods for inverts were formulated in assignment
-+ language, they don't contain arguments 'size' and 'offset' that make sense
-+ only in ordinary read/write methods.
-+
-+ READ method is a combination of two methods:
-+ 1) ordinary read method (with offset=0, lenght = @f->...->i_size) for entries
-+ with @f != 0, this method uses pointer on struct file as an argument
-+ 2) read method for inode-less files with @sd != 0, this method uses
-+ in-core based stat-data instead struct file as an argument.
-+ in the first case we don't use pagecache, just copy data that we got after
-+ cbk() into userspace.
-+
-+ WRITE method for invert files is more complex.
-+ Besides declared WRITE-interface in assignment languageb above we need
-+ to have an opportunity to edit unwrapped body of invert file with some
-+ text editor, it means we need GENERIC WRITE METHOD for invert file:
-+
-+ my_invert_file/..invert <- "string"
-+
-+ this method parses "string" and looks for correct subfile signatures, also
-+ the parsing process splits this "string" on the set of flows in accordance
-+ with the set of subfiles specified by this signarure.
-+ The found list of signatures #S is compared with the opened one #I of invert
-+ file. If it doesn't have this one (#I==0, it will be so for instance if we
-+ have just create this invert file) the write method assignes found signature
-+ (#I=#S;) to the invert file. Then if #I==#S, generic write method splits
-+ itself to the some write methods for ordinary or light-weight, or call itself
-+ recursively for invert files with corresponding flows.
-+ I am not sure, but the list of signatures looks like what mr.Demidov means
-+ by 'delimiters'.
-+
-+ The cases when #S<#I (#I<#S) (in the sense of set-theory) are also available
-+ and cause delete (create new) subfiles (arbitrary decision - it may looks
-+ too complex, but this interface will be the completest). The order of entries
-+ of list #S (#I) and inherited order on #I (#S) must coincide.
-+ The other parsing results give malformed signature that aborts READ method
-+ and releases all resources.
-+
-+ Format of subfile (entry) signature:
-+
-+ "START_MAGIC"<>(TYPE="...",LOOKUP_ARG="...")SUBFILE_BODY"END_MAGIC"
-+
-+ Legend:
-+
-+ START_MAGIC - keyword indicates the start of subfile signature;
-+
-+ <> indicates the start of 'subfile metadata', that is the pair
-+ (TYPE="...",LOOKUP_ARG="...") in parenthesis separated by comma.
-+
-+ TYPE - the string "type" indicates the start of one of the three words:
-+ - ORDINARY_FILE,
-+ - LIGHT_WEIGHT_FILE,
-+ - INVERT_FILE;
-+
-+ LOOKUP_ARG - lookup argument depends on previous type:
-+ */
-+
-+ /************************************************************/
-+ /* TYPE * LOOKUP ARGUMENT */
-+ /************************************************************/
-+ /* LIGH_WEIGHT_FILE * stat-data key */
-+ /************************************************************/
-+ /* ORDINARY_FILE * filename */
-+ /************************************************************/
-+ /* INVERT_FILE * filename */
-+ /************************************************************/
-+
-+ /* where:
-+ *stat-data key - the string contains stat data key of this subfile, it will be
-+ passed to fast-access lookup method for light-weight files;
-+ *filename - pathname of this subfile, iyt well be passed to VFS lookup methods
-+ for ordinary and invert files;
-+
-+ SUBFILE_BODY - data of this subfile (it will go to the flow)
-+ END_MAGIC - the keyword indicates the end of subfile signature.
-+
-+ The other simbols inside the signature interpreted as 'unformatted content',
-+ which is available with VFS's read_link() (arbitraruy decision).
-+
-+ NOTE: Parse method for a body of invert file uses mentioned signatures _without_
-+ subfile bodies.
-+
-+ Now the only unclear thing is WRITE in regular light-weight subfile A that we
-+ can describe only in assignment language:
-+
-+ A <- "some_string"
-+
-+ I guess we don't want to change stat-data and body items of file A
-+ if this file exist, and size(A) != size("some_string") because this operation is
-+ expencive, so we only do the partial write if size(A) > size("some_string")
-+ and do truncate of the "some_string", and then do A <- "truncated string", if
-+ size(A) < size("some_string"). This decision is also arbitrary..
-+ */
-+
-+/* here is infrastructure for formated flows */
-+
-+#define SUBFILE_HEADER_MAGIC 0x19196605
-+#define FLOW_HEADER_MAGIC 0x01194304
-+
-+#include "../plugin.h"
-+#include "../../debug.h"
-+#include "../../forward.h"
-+#include "../object.h"
-+#include "../item/item.h"
-+#include "../item/static_stat.h"
-+#include "../../dformat.h"
-+#include "../znode.h"
-+#include "../inode.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h> /* for struct file */
-+#include <linux/list.h> /* for struct list_head */
-+
-+typedef enum {
-+ LIGHT_WEIGHT_FILE,
-+ ORDINARY_FILE,
-+ INVERT_FILE
-+} inv_entry_type;
-+
-+typedef struct flow_header {
-+ d32 fl_magic;
-+ d16 fl_nr; /* number of subfiles in the flow */
-+};
-+
-+typedef struct subfile_header {
-+ d32 sh_magic; /* subfile magic */
-+ d16 sh_type; /* type of subfile: light-weight, ordinary, invert */
-+ d16 sh_arg_len; /* lenght of lookup argument (filename, key) */
-+ d32 sh_body_len; /* lenght of subfile body */
-+};
-+
-+/* functions to get/set fields of flow header */
-+
-+static void fl_set_magic(flow_header * fh, __u32 value)
-+{
-+ cputod32(value, &fh->fh_magic);
-+}
-+
-+static __u32 fl_get_magic(flow_header * fh)
-+{
-+ return d32tocpu(&fh->fh_magic);
-+}
-+static void fl_set_number(flow_header * fh, __u16 value)
-+{
-+ cputod16(value, &fh->fh_nr);
-+}
-+static unsigned fl_get_number(flow_header * fh)
-+{
-+ return d16tocpu(&fh->fh_nr);
-+}
-+
-+/* functions to get/set fields of subfile header */
-+
-+static void sh_set_magic(subfile_header * sh, __u32 value)
-+{
-+ cputod32(value, &sh->sh_magic);
-+}
-+
-+static __u32 sh_get_magic(subfile_header * sh)
-+{
-+ return d32tocpu(&sh->sh_magic);
-+}
-+static void sh_set_type(subfile_header * sh, __u16 value)
-+{
-+ cputod16(value, &sh->sh_magic);
-+}
-+static unsigned sh_get_type(subfile_header * sh)
-+{
-+ return d16tocpu(&sh->sh_magic);
-+}
-+static void sh_set_arg_len(subfile_header * sh, __u16 value)
-+{
-+ cputod16(value, &sh->sh_arg_len);
-+}
-+static unsigned sh_get_arg_len(subfile_header * sh)
-+{
-+ return d16tocpu(&sh->sh_arg_len);
-+}
-+static void sh_set_body_len(subfile_header * sh, __u32 value)
-+{
-+ cputod32(value, &sh->sh_body_len);
-+}
-+
-+static __u32 sh_get_body_len(subfile_header * sh)
-+{
-+ return d32tocpu(&sh->sh_body_len);
-+}
-+
-+/* in-core minimal stat-data, light-weight analog of inode */
-+
-+struct incore_sd_base {
-+ umode_t isd_mode;
-+ nlink_t isd_nlink;
-+ loff_t isd_size;
-+ char *isd_data; /* 'subflow' to write */
-+};
-+
-+/* open invert create a list of invert entries,
-+ every entry is represented by structure inv_entry */
-+
-+struct inv_entry {
-+ struct list_head *ie_list;
-+ struct file *ie_file; /* this is NULL if the file doesn't
-+ have unformated nodes */
-+ struct incore_sd_base *ie_sd; /* inode-less analog of struct file */
-+};
-+
-+/* allocate and init invert entry */
-+
-+static struct inv_entry *allocate_inv_entry(void)
-+{
-+ struct inv_entry *inv_entry;
-+
-+ inv_entry = reiser4_kmalloc(sizeof(struct inv_entry), GFP_KERNEL);
-+ if (!inv_entry)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ inv_entry->ie_file = NULL;
-+ inv_entry->ie_sd = NULL;
-+ INIT_LIST_HEAD(&inv_entry->ie_list);
-+ return inv_entry;
-+}
-+
-+static int put_inv_entry(struct inv_entry *ientry)
-+{
-+ int result = 0;
-+
-+ assert("edward-96", ientry != NULL);
-+ assert("edward-97", ientry->ie_list != NULL);
-+
-+ list_del(ientry->ie_list);
-+ if (ientry->ie_sd != NULL) {
-+ kfree(ientry->ie_sd);
-+ kfree(ientry);
-+ }
-+ if (ientry->ie_file != NULL)
-+ result = filp_close(ientry->file, NULL);
-+ return result;
-+}
-+
-+static int allocate_incore_sd_base(struct inv_entry *inv_entry)
-+{
-+ struct incore_sd_base *isd_base assert("edward-98", inv_entry != NULL);
-+ assert("edward-99", inv_entry->ie_inode = NULL);
-+ assert("edward-100", inv_entry->ie_sd = NULL);
-+
-+ isd_base = reiser4_kmalloc(sizeof(struct incore_sd_base), GFP_KERNEL);
-+ if (!isd_base)
-+ return RETERR(-ENOMEM);
-+ inv_entry->ie_sd = isd_base;
-+ return 0;
-+}
-+
-+/* this can be installed as ->init_inv_entry () method of
-+ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c).
-+ Copies data from on-disk stat-data format into light-weight analog of inode .
-+ Doesn't hanlde stat-data extensions. */
-+
-+static void sd_base_load(struct inv_entry *inv_entry, char *sd)
-+{
-+ reiser4_stat_data_base *sd_base;
-+
-+ assert("edward-101", inv_entry != NULL);
-+ assert("edward-101", inv_entry->ie_sd != NULL);
-+ assert("edward-102", sd != NULL);
-+
-+ sd_base = (reiser4_stat_data_base *) sd;
-+ inv_entry->incore_sd_base->isd_mode = d16tocpu(&sd_base->mode);
-+ inv_entry->incore_sd_base->isd_nlink = d32tocpu(&sd_base->nlink);
-+ inv_entry->incore_sd_base->isd_size = d64tocpu(&sd_base->size);
-+ inv_entry->incore_sd_base->isd_data = NULL;
-+}
-+
-+/* initialise incore stat-data */
-+
-+static void init_incore_sd_base(struct inv_entry *inv_entry, coord_t * coord)
-+{
-+ reiser4_plugin *plugin = item_plugin_by_coord(coord);
-+ void *body = item_body_by_coord(coord);
-+
-+ assert("edward-103", inv_entry != NULL);
-+ assert("edward-104", plugin != NULL);
-+ assert("edward-105", body != NULL);
-+
-+ sd_base_load(inv_entry, body);
-+}
-+
-+/* takes a key or filename and allocates new invert_entry,
-+ init and adds it into the list,
-+ we use lookup_sd_by_key() for light-weight files and VFS lookup by filename */
-+
-+int get_inv_entry(struct inode *invert_inode, /* inode of invert's body */
-+ inv_entry_type type, /* LIGHT-WEIGHT or ORDINARY */
-+ const reiser4_key * key, /* key of invert entry stat-data */
-+ char *filename, /* filename of the file to be opened */
-+ int flags, int mode)
-+{
-+ int result;
-+ struct inv_entry *ientry;
-+
-+ assert("edward-107", invert_inode != NULL);
-+
-+ ientry = allocate_inv_entry();
-+ if (IS_ERR(ientry))
-+ return (PTR_ERR(ientry));
-+
-+ if (type == LIGHT_WEIGHT_FILE) {
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ assert("edward-108", key != NULL);
-+
-+ init_coord(&coord);
-+ init_lh(&lh);
-+ result =
-+ lookup_sd_by_key(reiser4_tree_by_inode(invert_inode),
-+ ZNODE_READ_LOCK, &coord, &lh, key);
-+ if (result == 0)
-+ init_incore_sd_base(ientry, coord);
-+
-+ done_lh(&lh);
-+ done_coord(&coord);
-+ return (result);
-+ } else {
-+ struct file *file = filp_open(filename, flags, mode);
-+ /* FIXME_EDWARD here we need to check if we
-+ did't follow to any mount point */
-+
-+ assert("edward-108", filename != NULL);
-+
-+ if (IS_ERR(file))
-+ return (PTR_ERR(file));
-+ ientry->ie_file = file;
-+ return 0;
-+ }
-+}
-+
-+/* takes inode of invert, reads the body of this invert, parses it,
-+ opens all invert entries and return pointer on the first inv_entry */
-+
-+struct inv_entry *open_invert(struct file *invert_file)
-+{
-+
-+}
-+
-+ssize_t subfile_read(struct *invert_entry, flow * f)
-+{
-+
-+}
-+
-+ssize_t subfile_write(struct *invert_entry, flow * f)
-+{
-+
-+}
-+
-+ssize_t invert_read(struct *file, flow * f)
-+{
-+
-+}
-+
-+ssize_t invert_write(struct *file, flow * f)
-+{
-+
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/Makefile linux-2.6.20/fs/reiser4/plugin/file/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/file/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/Makefile 2007-05-06 14:50:43.783001971 +0400
-@@ -0,0 +1,7 @@
-+obj-$(CONFIG_REISER4_FS) += file_plugins.o
-+
-+file_plugins-objs := \
-+ file.o \
-+ tail_conversion.o \
-+ symlink.o \
-+ cryptcompress.o
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/symfile.c linux-2.6.20/fs/reiser4/plugin/file/symfile.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file/symfile.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/symfile.c 2007-05-06 14:50:43.787003221 +0400
-@@ -0,0 +1,87 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Symfiles are a generalization of Unix symlinks.
-+
-+ A symfile when read behaves as though you took its contents and
-+ substituted them into the reiser4 naming system as the right hand side
-+ of an assignment, and then read that which you had assigned to it.
-+
-+ A key issue for symfiles is how to implement writes through to
-+ subfiles. In general, one must have some method of determining what
-+ of that which is written to the symfile is written to what subfile.
-+ This can be done by use of custom plugin methods written by users, or
-+ by using a few general methods we provide for those willing to endure
-+ the insertion of delimiters into what is read.
-+
-+ Writing to symfiles without delimiters to denote what is written to
-+ what subfile is not supported by any plugins we provide in this
-+ release. Our most sophisticated support for writes is that embodied
-+ by the invert plugin (see invert.c).
-+
-+ A read only version of the /etc/passwd file might be
-+ constructed as a symfile whose contents are as follows:
-+
-+ /etc/passwd/userlines/*
-+
-+ or
-+
-+ /etc/passwd/userlines/demidov+/etc/passwd/userlines/edward+/etc/passwd/userlines/reiser+/etc/passwd/userlines/root
-+
-+ or
-+
-+ /etc/passwd/userlines/(demidov+edward+reiser+root)
-+
-+ A symfile with contents
-+
-+ /filenameA+"(some text stored in the uninvertable symfile)+/filenameB
-+
-+ will return when read
-+
-+ The contents of filenameAsome text stored in the uninvertable symfileThe contents of filenameB
-+
-+ and write of what has been read will not be possible to implement as
-+ an identity operation because there are no delimiters denoting the
-+ boundaries of what is to be written to what subfile.
-+
-+ Note that one could make this a read/write symfile if one specified
-+ delimiters, and the write method understood those delimiters delimited
-+ what was written to subfiles.
-+
-+ So, specifying the symfile in a manner that allows writes:
-+
-+ /etc/passwd/userlines/demidov+"(
-+ )+/etc/passwd/userlines/edward+"(
-+ )+/etc/passwd/userlines/reiser+"(
-+ )+/etc/passwd/userlines/root+"(
-+ )
-+
-+ or
-+
-+ /etc/passwd/userlines/(demidov+"(
-+ )+edward+"(
-+ )+reiser+"(
-+ )+root+"(
-+ ))
-+
-+ and the file demidov might be specified as:
-+
-+ /etc/passwd/userlines/demidov/username+"(:)+/etc/passwd/userlines/demidov/password+"(:)+/etc/passwd/userlines/demidov/userid+"(:)+/etc/passwd/userlines/demidov/groupid+"(:)+/etc/passwd/userlines/demidov/gecos+"(:)+/etc/passwd/userlines/demidov/home+"(:)+/etc/passwd/userlines/demidov/shell
-+
-+ or
-+
-+ /etc/passwd/userlines/demidov/(username+"(:)+password+"(:)+userid+"(:)+groupid+"(:)+gecos+"(:)+home+"(:)+shell)
-+
-+ Notice that if the file demidov has a carriage return in it, the
-+ parsing fails, but then if you put carriage returns in the wrong place
-+ in a normal /etc/passwd file it breaks things also.
-+
-+ Note that it is forbidden to have no text between two interpolations
-+ if one wants to be able to define what parts of a write go to what
-+ subfiles referenced in an interpolation.
-+
-+ If one wants to be able to add new lines by writing to the file, one
-+ must either write a custom plugin for /etc/passwd that knows how to
-+ name an added line, or one must use an invert, or one must use a more
-+ sophisticated symfile syntax that we are not planning to write for
-+ version 4.0.
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/symlink.c linux-2.6.20/fs/reiser4/plugin/file/symlink.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file/symlink.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/symlink.c 2007-05-06 14:50:43.787003221 +0400
-@@ -0,0 +1,95 @@
-+/* Copyright 2002, 2003, 2005 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../inode.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+
-+/* file plugin methods specific for symlink files
-+ (SYMLINK_FILE_PLUGIN_ID) */
-+
-+/* this is implementation of create_object method of file plugin for
-+ SYMLINK_FILE_PLUGIN_ID
-+ */
-+
-+/**
-+ * reiser4_create_symlink - create_object of file plugin for SYMLINK_FILE_PLUGIN_ID
-+ * @symlink: inode of symlink object
-+ * @dir: inode of parent directory
-+ * @info: parameters of new object
-+ *
-+ * Inserts stat data with symlink extension where into the tree.
-+ */
-+int reiser4_create_symlink(struct inode *symlink,
-+ struct inode *dir UNUSED_ARG,
-+ reiser4_object_create_data *data /* info passed to us
-+ * this is filled by
-+ * reiser4() syscall
-+ * in particular */)
-+{
-+ int result;
-+
-+ assert("nikita-680", symlink != NULL);
-+ assert("nikita-681", S_ISLNK(symlink->i_mode));
-+ assert("nikita-685", reiser4_inode_get_flag(symlink, REISER4_NO_SD));
-+ assert("nikita-682", dir != NULL);
-+ assert("nikita-684", data != NULL);
-+ assert("nikita-686", data->id == SYMLINK_FILE_PLUGIN_ID);
-+
-+ /*
-+ * stat data of symlink has symlink extension in which we store
-+ * symlink content, that is, path symlink is pointing to.
-+ */
-+ reiser4_inode_data(symlink)->extmask |= (1 << SYMLINK_STAT);
-+
-+ assert("vs-838", symlink->i_private == NULL);
-+ symlink->i_private = (void *)data->name;
-+
-+ assert("vs-843", symlink->i_size == 0);
-+ INODE_SET_FIELD(symlink, i_size, strlen(data->name));
-+
-+ /* insert stat data appended with data->name */
-+ result = inode_file_plugin(symlink)->write_sd_by_inode(symlink);
-+ if (result) {
-+ /* FIXME-VS: Make sure that symlink->i_private is not attached
-+ to kmalloced data */
-+ INODE_SET_FIELD(symlink, i_size, 0);
-+ } else {
-+ assert("vs-849", symlink->i_private
-+ && reiser4_inode_get_flag(symlink,
-+ REISER4_GENERIC_PTR_USED));
-+ assert("vs-850",
-+ !memcmp((char *)symlink->i_private, data->name,
-+ (size_t) symlink->i_size + 1));
-+ }
-+ return result;
-+}
-+
-+/* this is implementation of destroy_inode method of file plugin for
-+ SYMLINK_FILE_PLUGIN_ID
-+ */
-+void destroy_inode_symlink(struct inode *inode)
-+{
-+ assert("edward-799",
-+ inode_file_plugin(inode) ==
-+ file_plugin_by_id(SYMLINK_FILE_PLUGIN_ID));
-+ assert("edward-800", !is_bad_inode(inode) && is_inode_loaded(inode));
-+ assert("edward-801", reiser4_inode_get_flag(inode,
-+ REISER4_GENERIC_PTR_USED));
-+ assert("vs-839", S_ISLNK(inode->i_mode));
-+
-+ kfree(inode->i_private);
-+ inode->i_private = NULL;
-+ reiser4_inode_clr_flag(inode, REISER4_GENERIC_PTR_USED);
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file/tail_conversion.c linux-2.6.20/fs/reiser4/plugin/file/tail_conversion.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file/tail_conversion.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file/tail_conversion.c 2007-05-06 14:50:43.787003221 +0400
-@@ -0,0 +1,729 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../inode.h"
-+#include "../../super.h"
-+#include "../../page_cache.h"
-+#include "../../carry.h"
-+#include "../../safe_link.h"
-+#include "../../vfs_ops.h"
-+
-+#include <linux/writeback.h>
-+
-+/* this file contains:
-+ tail2extent and extent2tail */
-+
-+/* exclusive access to a file is acquired when file state changes: tail2extent, empty2tail, extent2tail, etc */
-+void get_exclusive_access(unix_file_info_t * uf_info)
-+{
-+ assert("nikita-3028", reiser4_schedulable());
-+ assert("nikita-3047", LOCK_CNT_NIL(inode_sem_w));
-+ assert("nikita-3048", LOCK_CNT_NIL(inode_sem_r));
-+ /*
-+ * "deadlock avoidance": sometimes we commit a transaction under
-+ * rw-semaphore on a file. Such commit can deadlock with another
-+ * thread that captured some block (hence preventing atom from being
-+ * committed) and waits on rw-semaphore.
-+ */
-+ reiser4_txn_restart_current();
-+ LOCK_CNT_INC(inode_sem_w);
-+ down_write(&uf_info->latch);
-+ uf_info->exclusive_use = 1;
-+ assert("vs-1713", uf_info->ea_owner == NULL);
-+ assert("vs-1713", atomic_read(&uf_info->nr_neas) == 0);
-+ ON_DEBUG(uf_info->ea_owner = current);
-+}
-+
-+void drop_exclusive_access(unix_file_info_t * uf_info)
-+{
-+ assert("vs-1714", uf_info->ea_owner == current);
-+ assert("vs-1715", atomic_read(&uf_info->nr_neas) == 0);
-+ ON_DEBUG(uf_info->ea_owner = NULL);
-+ uf_info->exclusive_use = 0;
-+ up_write(&uf_info->latch);
-+ assert("nikita-3049", LOCK_CNT_NIL(inode_sem_r));
-+ assert("nikita-3049", LOCK_CNT_GTZ(inode_sem_w));
-+ LOCK_CNT_DEC(inode_sem_w);
-+ reiser4_txn_restart_current();
-+}
-+
-+/**
-+ * nea_grabbed - do something when file semaphore is down_read-ed
-+ * @uf_info:
-+ *
-+ * This is called when nonexclisive access is obtained on file. All it does is
-+ * for debugging purposes.
-+ */
-+static void nea_grabbed(unix_file_info_t *uf_info)
-+{
-+#if REISER4_DEBUG
-+ LOCK_CNT_INC(inode_sem_r);
-+ assert("vs-1716", uf_info->ea_owner == NULL);
-+ atomic_inc(&uf_info->nr_neas);
-+ uf_info->last_reader = current;
-+#endif
-+}
-+
-+/**
-+ * get_nonexclusive_access - get nonexclusive access to a file
-+ * @uf_info: unix file specific part of inode to obtain access to
-+ *
-+ * Nonexclusive access is obtained on a file before read, write, readpage.
-+ */
-+void get_nonexclusive_access(unix_file_info_t *uf_info)
-+{
-+ assert("nikita-3029", reiser4_schedulable());
-+ assert("nikita-3361", get_current_context()->trans->atom == NULL);
-+
-+ down_read(&uf_info->latch);
-+ nea_grabbed(uf_info);
-+}
-+
-+/**
-+ * try_to_get_nonexclusive_access - try to get nonexclusive access to a file
-+ * @uf_info: unix file specific part of inode to obtain access to
-+ *
-+ * Non-blocking version of nonexclusive access obtaining.
-+ */
-+int try_to_get_nonexclusive_access(unix_file_info_t *uf_info)
-+{
-+ int result;
-+
-+ result = down_read_trylock(&uf_info->latch);
-+ if (result)
-+ nea_grabbed(uf_info);
-+ return result;
-+}
-+
-+void drop_nonexclusive_access(unix_file_info_t * uf_info)
-+{
-+ assert("vs-1718", uf_info->ea_owner == NULL);
-+ assert("vs-1719", atomic_read(&uf_info->nr_neas) > 0);
-+ ON_DEBUG(atomic_dec(&uf_info->nr_neas));
-+
-+ up_read(&uf_info->latch);
-+
-+ LOCK_CNT_DEC(inode_sem_r);
-+ reiser4_txn_restart_current();
-+}
-+
-+/* part of tail2extent. Cut all items covering @count bytes starting from
-+ @offset */
-+/* Audited by: green(2002.06.15) */
-+static int cut_formatting_items(struct inode *inode, loff_t offset, int count)
-+{
-+ reiser4_key from, to;
-+
-+ /* AUDIT: How about putting an assertion here, what would check
-+ all provided range is covered by tail items only? */
-+ /* key of first byte in the range to be cut */
-+ inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
-+
-+ /* key of last byte in that range */
-+ to = from;
-+ set_key_offset(&to, (__u64) (offset + count - 1));
-+
-+ /* cut everything between those keys */
-+ return reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, &to,
-+ inode, 0);
-+}
-+
-+static void release_all_pages(struct page **pages, unsigned nr_pages)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < nr_pages; i++) {
-+ if (pages[i] == NULL) {
-+ unsigned j;
-+ for (j = i + 1; j < nr_pages; j++)
-+ assert("vs-1620", pages[j] == NULL);
-+ break;
-+ }
-+ page_cache_release(pages[i]);
-+ pages[i] = NULL;
-+ }
-+}
-+
-+/* part of tail2extent. replace tail items with extent one. Content of tail
-+ items (@count bytes) being cut are copied already into
-+ pages. extent_writepage method is called to create extents corresponding to
-+ those pages */
-+static int replace(struct inode *inode, struct page **pages, unsigned nr_pages, int count)
-+{
-+ int result;
-+ unsigned i;
-+ STORE_COUNTERS;
-+
-+ if (nr_pages == 0)
-+ return 0;
-+
-+ assert("vs-596", pages[0]);
-+
-+ /* cut copied items */
-+ result = cut_formatting_items(inode, page_offset(pages[0]), count);
-+ if (result)
-+ return result;
-+
-+ CHECK_COUNTERS;
-+
-+ /* put into tree replacement for just removed items: extent item, namely */
-+ for (i = 0; i < nr_pages; i++) {
-+ result = add_to_page_cache_lru(pages[i], inode->i_mapping,
-+ pages[i]->index,
-+ mapping_gfp_mask(inode->
-+ i_mapping));
-+ if (result)
-+ break;
-+ unlock_page(pages[i]);
-+ result = find_or_create_extent(pages[i]);
-+ if (result)
-+ break;
-+ SetPageUptodate(pages[i]);
-+ }
-+ return result;
-+}
-+
-+#define TAIL2EXTENT_PAGE_NUM 3 /* number of pages to fill before cutting tail
-+ * items */
-+
-+static int reserve_tail2extent_iteration(struct inode *inode)
-+{
-+ reiser4_block_nr unformatted_nodes;
-+ reiser4_tree *tree;
-+
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ /* number of unformatted nodes which will be created */
-+ unformatted_nodes = TAIL2EXTENT_PAGE_NUM;
-+
-+ /*
-+ * space required for one iteration of extent->tail conversion:
-+ *
-+ * 1. kill N tail items
-+ *
-+ * 2. insert TAIL2EXTENT_PAGE_NUM unformatted nodes
-+ *
-+ * 3. insert TAIL2EXTENT_PAGE_NUM (worst-case single-block
-+ * extents) extent units.
-+ *
-+ * 4. drilling to the leaf level by coord_by_key()
-+ *
-+ * 5. possible update of stat-data
-+ *
-+ */
-+ grab_space_enable();
-+ return reiser4_grab_space
-+ (2 * tree->height +
-+ TAIL2EXTENT_PAGE_NUM +
-+ TAIL2EXTENT_PAGE_NUM * estimate_one_insert_into_item(tree) +
-+ 1 + estimate_one_insert_item(tree) +
-+ inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
-+}
-+
-+/* clear stat data's flag indicating that conversion is being converted */
-+static int complete_conversion(struct inode *inode)
-+{
-+ int result;
-+
-+ grab_space_enable();
-+ result =
-+ reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+ if (result == 0) {
-+ reiser4_inode_clr_flag(inode, REISER4_PART_MIXED);
-+ result = reiser4_update_sd(inode);
-+ }
-+ if (result)
-+ warning("vs-1696", "Failed to clear converting bit of %llu: %i",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ return 0;
-+}
-+
-+/**
-+ * find_start
-+ * @inode:
-+ * @id:
-+ * @offset:
-+ *
-+ * this is used by tail2extent and extent2tail to detect where previous
-+ * uncompleted conversion stopped
-+ */
-+static int find_start(struct inode *inode, reiser4_plugin_id id, __u64 *offset)
-+{
-+ int result;
-+ lock_handle lh;
-+ coord_t coord;
-+ unix_file_info_t *ufo;
-+ int found;
-+ reiser4_key key;
-+
-+ ufo = unix_file_inode_data(inode);
-+ init_lh(&lh);
-+ result = 0;
-+ found = 0;
-+ inode_file_plugin(inode)->key_by_inode(inode, *offset, &key);
-+ do {
-+ init_lh(&lh);
-+ result = find_file_item_nohint(&coord, &lh, &key,
-+ ZNODE_READ_LOCK, inode);
-+
-+ if (result == CBK_COORD_FOUND) {
-+ if (coord.between == AT_UNIT) {
-+ /*coord_clear_iplug(&coord); */
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ if (item_id_by_coord(&coord) == id)
-+ found = 1;
-+ else
-+ item_plugin_by_coord(&coord)->s.
-+ file.append_key(&coord,
-+ &key);
-+ zrelse(coord.node);
-+ }
-+ } else
-+ result = RETERR(-ENOENT);
-+ }
-+ done_lh(&lh);
-+ } while (result == 0 && !found);
-+ *offset = get_key_offset(&key);
-+ return result;
-+}
-+
-+/**
-+ * tail2extent
-+ * @uf_info:
-+ *
-+ *
-+ */
-+int tail2extent(unix_file_info_t *uf_info)
-+{
-+ int result;
-+ reiser4_key key; /* key of next byte to be moved to page */
-+ char *p_data; /* data of page */
-+ unsigned page_off = 0, /* offset within the page where to copy data */
-+ count; /* number of bytes of item which can be
-+ * copied to page */
-+ struct page *pages[TAIL2EXTENT_PAGE_NUM];
-+ struct page *page;
-+ int done; /* set to 1 when all file is read */
-+ char *item;
-+ int i;
-+ struct inode *inode;
-+ int first_iteration;
-+ int bytes;
-+ __u64 offset;
-+
-+ assert("nikita-3362", ea_obtained(uf_info));
-+ inode = unix_file_info_to_inode(uf_info);
-+ assert("nikita-3412", !IS_RDONLY(inode));
-+ assert("vs-1649", uf_info->container != UF_CONTAINER_EXTENTS);
-+ assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
-+
-+ offset = 0;
-+ first_iteration = 1;
-+ result = 0;
-+ if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ /*
-+ * file is marked on disk as there was a conversion which did
-+ * not complete due to either crash or some error. Find which
-+ * offset tail conversion stopped at
-+ */
-+ result = find_start(inode, FORMATTING_ID, &offset);
-+ if (result == -ENOENT) {
-+ /* no tail items found, everything is converted */
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ complete_conversion(inode);
-+ return 0;
-+ } else if (result != 0)
-+ /* some other error */
-+ return result;
-+ first_iteration = 0;
-+ }
-+
-+ reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
-+
-+ /* get key of first byte of a file */
-+ inode_file_plugin(inode)->key_by_inode(inode, offset, &key);
-+
-+ done = 0;
-+ while (done == 0) {
-+ memset(pages, 0, sizeof(pages));
-+ result = reserve_tail2extent_iteration(inode);
-+ if (result != 0)
-+ goto out;
-+ if (first_iteration) {
-+ reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
-+ reiser4_update_sd(inode);
-+ first_iteration = 0;
-+ }
-+ bytes = 0;
-+ for (i = 0; i < sizeof_array(pages) && done == 0; i++) {
-+ assert("vs-598",
-+ (get_key_offset(&key) & ~PAGE_CACHE_MASK) == 0);
-+ page = alloc_page(reiser4_ctx_gfp_mask_get());
-+ if (!page) {
-+ result = RETERR(-ENOMEM);
-+ goto error;
-+ }
-+
-+ page->index =
-+ (unsigned long)(get_key_offset(&key) >>
-+ PAGE_CACHE_SHIFT);
-+ /*
-+ * usually when one is going to longterm lock znode (as
-+ * find_file_item does, for instance) he must not hold
-+ * locked pages. However, there is an exception for
-+ * case tail2extent. Pages appearing here are not
-+ * reachable to everyone else, they are clean, they do
-+ * not have jnodes attached so keeping them locked do
-+ * not risk deadlock appearance
-+ */
-+ assert("vs-983", !PagePrivate(page));
-+ reiser4_invalidate_pages(inode->i_mapping, page->index,
-+ 1, 0);
-+
-+ for (page_off = 0; page_off < PAGE_CACHE_SIZE;) {
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ /* get next item */
-+ /* FIXME: we might want to readahead here */
-+ init_lh(&lh);
-+ result =
-+ find_file_item_nohint(&coord, &lh, &key,
-+ ZNODE_READ_LOCK,
-+ inode);
-+ if (result != CBK_COORD_FOUND) {
-+ /*
-+ * error happened of not items of file
-+ * were found
-+ */
-+ done_lh(&lh);
-+ page_cache_release(page);
-+ goto error;
-+ }
-+
-+ if (coord.between == AFTER_UNIT) {
-+ /*
-+ * end of file is reached. Padd page
-+ * with zeros
-+ */
-+ done_lh(&lh);
-+ done = 1;
-+ p_data = kmap_atomic(page, KM_USER0);
-+ memset(p_data + page_off, 0,
-+ PAGE_CACHE_SIZE - page_off);
-+ kunmap_atomic(p_data, KM_USER0);
-+ break;
-+ }
-+
-+ result = zload(coord.node);
-+ if (result) {
-+ page_cache_release(page);
-+ done_lh(&lh);
-+ goto error;
-+ }
-+ assert("vs-856", coord.between == AT_UNIT);
-+ item = ((char *)item_body_by_coord(&coord)) +
-+ coord.unit_pos;
-+
-+ /* how many bytes to copy */
-+ count =
-+ item_length_by_coord(&coord) -
-+ coord.unit_pos;
-+ /* limit length of copy to end of page */
-+ if (count > PAGE_CACHE_SIZE - page_off)
-+ count = PAGE_CACHE_SIZE - page_off;
-+
-+ /*
-+ * copy item (as much as will fit starting from
-+ * the beginning of the item) into the page
-+ */
-+ p_data = kmap_atomic(page, KM_USER0);
-+ memcpy(p_data + page_off, item, count);
-+ kunmap_atomic(p_data, KM_USER0);
-+
-+ page_off += count;
-+ bytes += count;
-+ set_key_offset(&key,
-+ get_key_offset(&key) + count);
-+
-+ zrelse(coord.node);
-+ done_lh(&lh);
-+ } /* end of loop which fills one page by content of
-+ * formatting items */
-+
-+ if (page_off) {
-+ /* something was copied into page */
-+ pages[i] = page;
-+ } else {
-+ page_cache_release(page);
-+ assert("vs-1648", done == 1);
-+ break;
-+ }
-+ } /* end of loop through pages of one conversion iteration */
-+
-+ if (i > 0) {
-+ result = replace(inode, pages, i, bytes);
-+ release_all_pages(pages, sizeof_array(pages));
-+ if (result)
-+ goto error;
-+ /*
-+ * We have to drop exclusive access to avoid deadlock
-+ * which may happen because called by reiser4_writepages
-+ * capture_unix_file requires to get non-exclusive
-+ * access to a file. It is safe to drop EA in the middle
-+ * of tail2extent conversion because write_unix_file,
-+ * setattr_unix_file(truncate), mmap_unix_file,
-+ * release_unix_file(extent2tail) checks if conversion
-+ * is not in progress (see comments before
-+ * get_exclusive_access_careful().
-+ * Other processes that acquire non-exclusive access
-+ * (read_unix_file, reiser4_writepages, etc) should work
-+ * on partially converted files.
-+ */
-+ drop_exclusive_access(uf_info);
-+ /* throttle the conversion */
-+ reiser4_throttle_write(inode);
-+ get_exclusive_access(uf_info);
-+
-+ /*
-+ * nobody is allowed to complete conversion but a
-+ * process which started it
-+ */
-+ assert("", reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED));
-+ }
-+ }
-+
-+ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
-+
-+ if (result == 0) {
-+ /* file is converted to extent items */
-+ assert("vs-1697", reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED));
-+
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ complete_conversion(inode);
-+ } else {
-+ /*
-+ * conversion is not complete. Inode was already marked as
-+ * REISER4_PART_CONV and stat-data were updated at the first
-+ * iteration of the loop above.
-+ */
-+ error:
-+ release_all_pages(pages, sizeof_array(pages));
-+ warning("nikita-2282", "Partial conversion of %llu: %i",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ }
-+
-+ out:
-+ return result;
-+}
-+
-+static int reserve_extent2tail_iteration(struct inode *inode)
-+{
-+ reiser4_tree *tree;
-+
-+ tree = reiser4_tree_by_inode(inode);
-+ /*
-+ * reserve blocks for (in this order):
-+ *
-+ * 1. removal of extent item
-+ *
-+ * 2. insertion of tail by insert_flow()
-+ *
-+ * 3. drilling to the leaf level by coord_by_key()
-+ *
-+ * 4. possible update of stat-data
-+ */
-+ grab_space_enable();
-+ return reiser4_grab_space
-+ (estimate_one_item_removal(tree) +
-+ estimate_insert_flow(tree->height) +
-+ 1 + estimate_one_insert_item(tree) +
-+ inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
-+}
-+
-+/* for every page of file: read page, cut part of extent pointing to this page,
-+ put data of page tree by tail item */
-+int extent2tail(unix_file_info_t *uf_info)
-+{
-+ int result;
-+ struct inode *inode;
-+ struct page *page;
-+ unsigned long num_pages, i;
-+ unsigned long start_page;
-+ reiser4_key from;
-+ reiser4_key to;
-+ unsigned count;
-+ __u64 offset;
-+
-+ assert("nikita-3362", ea_obtained(uf_info));
-+ inode = unix_file_info_to_inode(uf_info);
-+ assert("nikita-3412", !IS_RDONLY(inode));
-+ assert("vs-1649", uf_info->container != UF_CONTAINER_TAILS);
-+ assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
-+
-+ offset = 0;
-+ if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ /*
-+ * file is marked on disk as there was a conversion which did
-+ * not complete due to either crash or some error. Find which
-+ * offset tail conversion stopped at
-+ */
-+ result = find_start(inode, EXTENT_POINTER_ID, &offset);
-+ if (result == -ENOENT) {
-+ /* no extent found, everything is converted */
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ complete_conversion(inode);
-+ return 0;
-+ } else if (result != 0)
-+ /* some other error */
-+ return result;
-+ }
-+
-+ reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
-+
-+ /* number of pages in the file */
-+ num_pages =
-+ (inode->i_size + - offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-+ start_page = offset >> PAGE_CACHE_SHIFT;
-+
-+ inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
-+ to = from;
-+
-+ result = 0;
-+ for (i = 0; i < num_pages; i++) {
-+ __u64 start_byte;
-+
-+ result = reserve_extent2tail_iteration(inode);
-+ if (result != 0)
-+ break;
-+ if (i == 0 && offset == 0) {
-+ reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
-+ reiser4_update_sd(inode);
-+ }
-+
-+ page = read_mapping_page(inode->i_mapping,
-+ (unsigned)(i + start_page), NULL);
-+ if (IS_ERR(page)) {
-+ result = PTR_ERR(page);
-+ break;
-+ }
-+
-+ wait_on_page_locked(page);
-+
-+ if (!PageUptodate(page)) {
-+ page_cache_release(page);
-+ result = RETERR(-EIO);
-+ break;
-+ }
-+
-+ /* cut part of file we have read */
-+ start_byte = (__u64) (i << PAGE_CACHE_SHIFT);
-+ set_key_offset(&from, start_byte);
-+ set_key_offset(&to, start_byte + PAGE_CACHE_SIZE - 1);
-+ /*
-+ * reiser4_cut_tree_object() returns -E_REPEAT to allow atom
-+ * commits during over-long truncates. But
-+ * extent->tail conversion should be performed in one
-+ * transaction.
-+ */
-+ result = reiser4_cut_tree(reiser4_tree_by_inode(inode), &from,
-+ &to, inode, 0);
-+
-+ if (result) {
-+ page_cache_release(page);
-+ break;
-+ }
-+
-+ /* put page data into tree via tail_write */
-+ count = PAGE_CACHE_SIZE;
-+ if ((i == (num_pages - 1)) &&
-+ (inode->i_size & ~PAGE_CACHE_MASK))
-+ /* last page can be incompleted */
-+ count = (inode->i_size & ~PAGE_CACHE_MASK);
-+ while (count) {
-+ struct dentry dentry;
-+ struct file file;
-+ loff_t pos;
-+
-+ dentry.d_inode = inode;
-+ file.f_dentry = &dentry;
-+ file.private_data = NULL;
-+ file.f_pos = start_byte;
-+ file.private_data = NULL;
-+ pos = start_byte;
-+ result = reiser4_write_tail(&file,
-+ (char __user *)kmap(page),
-+ count, &pos);
-+ reiser4_free_file_fsdata(&file);
-+ if (result <= 0) {
-+ warning("", "reiser4_write_tail failed");
-+ page_cache_release(page);
-+ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
-+ return result;
-+ }
-+ count -= result;
-+ }
-+
-+ /* release page */
-+ lock_page(page);
-+ /* page is already detached from jnode and mapping. */
-+ assert("vs-1086", page->mapping == NULL);
-+ assert("nikita-2690",
-+ (!PagePrivate(page) && jprivate(page) == 0));
-+ /* waiting for writeback completion with page lock held is
-+ * perfectly valid. */
-+ wait_on_page_writeback(page);
-+ reiser4_drop_page(page);
-+ /* release reference taken by read_cache_page() above */
-+ page_cache_release(page);
-+
-+ drop_exclusive_access(uf_info);
-+ /* throttle the conversion */
-+ reiser4_throttle_write(inode);
-+ get_exclusive_access(uf_info);
-+ /*
-+ * nobody is allowed to complete conversion but a process which
-+ * started it
-+ */
-+ assert("", reiser4_inode_get_flag(inode, REISER4_PART_MIXED));
-+ }
-+
-+ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
-+
-+ if (i == num_pages) {
-+ /* file is converted to formatted items */
-+ assert("vs-1698", reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED));
-+ assert("vs-1260",
-+ inode_has_no_jnodes(reiser4_inode_data(inode)));
-+
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ complete_conversion(inode);
-+ return 0;
-+ }
-+ /*
-+ * conversion is not complete. Inode was already marked as
-+ * REISER4_PART_MIXED and stat-data were updated at the first *
-+ * iteration of the loop above.
-+ */
-+ warning("nikita-2282",
-+ "Partial conversion of %llu: %lu of %lu: %i",
-+ (unsigned long long)get_inode_oid(inode), i,
-+ num_pages, result);
-+
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file_ops.c linux-2.6.20/fs/reiser4/plugin/file_ops.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file_ops.c 2007-05-06 14:50:43.787003221 +0400
-@@ -0,0 +1,168 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+
-+/* this file contains typical implementations for some of methods of
-+ struct file_operations and of struct address_space_operations
-+*/
-+
-+#include "../inode.h"
-+#include "object.h"
-+
-+/* file operations */
-+
-+/* implementation of vfs's llseek method of struct file_operations for
-+ typical directory can be found in readdir_common.c
-+*/
-+loff_t reiser4_llseek_dir_common(struct file *, loff_t, int origin);
-+
-+/* implementation of vfs's readdir method of struct file_operations for
-+ typical directory can be found in readdir_common.c
-+*/
-+int reiser4_readdir_common(struct file *, void *dirent, filldir_t);
-+
-+/**
-+ * reiser4_release_dir_common - release of struct file_operations
-+ * @inode: inode of released file
-+ * @file: file to release
-+ *
-+ * Implementation of release method of struct file_operations for typical
-+ * directory. All it does is freeing of reiser4 specific file data.
-+*/
-+int reiser4_release_dir_common(struct inode *inode, struct file *file)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ reiser4_free_file_fsdata(file);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+/* this is common implementation of vfs's fsync method of struct
-+ file_operations
-+*/
-+int reiser4_sync_common(struct file *file, struct dentry *dentry, int datasync)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ result = txnmgr_force_commit_all(dentry->d_inode->i_sb, 0);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* this is common implementation of vfs's sendfile method of struct
-+ file_operations
-+
-+ Reads @count bytes from @file and calls @actor for every page read. This is
-+ needed for loop back devices support.
-+*/
-+#if 0
-+ssize_t
-+sendfile_common(struct file *file, loff_t *ppos, size_t count,
-+ read_actor_t actor, void *target)
-+{
-+ reiser4_context *ctx;
-+ ssize_t result;
-+
-+ ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ result = generic_file_sendfile(file, ppos, count, actor, target);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+#endif /* 0 */
-+
-+/* address space operations */
-+
-+/* this is common implementation of vfs's prepare_write method of struct
-+ address_space_operations
-+*/
-+int
-+prepare_write_common(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+
-+ ctx = reiser4_init_context(page->mapping->host->i_sb);
-+ result = do_prepare_write(file, page, from, to);
-+
-+ /* don't commit transaction under inode semaphore */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return result;
-+}
-+
-+/* this is helper for prepare_write_common and prepare_write_unix_file
-+ */
-+int
-+do_prepare_write(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ int result;
-+ file_plugin *fplug;
-+ struct inode *inode;
-+
-+ assert("umka-3099", file != NULL);
-+ assert("umka-3100", page != NULL);
-+ assert("umka-3095", PageLocked(page));
-+
-+ if (to - from == PAGE_CACHE_SIZE || PageUptodate(page))
-+ return 0;
-+
-+ inode = page->mapping->host;
-+ fplug = inode_file_plugin(inode);
-+
-+ if (page->mapping->a_ops->readpage == NULL)
-+ return RETERR(-EINVAL);
-+
-+ result = page->mapping->a_ops->readpage(file, page);
-+ if (result != 0) {
-+ SetPageError(page);
-+ ClearPageUptodate(page);
-+ /* All reiser4 readpage() implementations should return the
-+ * page locked in case of error. */
-+ assert("nikita-3472", PageLocked(page));
-+ } else {
-+ /*
-+ * ->readpage() either:
-+ *
-+ * 1. starts IO against @page. @page is locked for IO in
-+ * this case.
-+ *
-+ * 2. doesn't start IO. @page is unlocked.
-+ *
-+ * In either case, page should be locked.
-+ */
-+ lock_page(page);
-+ /*
-+ * IO (if any) is completed at this point. Check for IO
-+ * errors.
-+ */
-+ if (!PageUptodate(page))
-+ result = RETERR(-EIO);
-+ }
-+ assert("umka-3098", PageLocked(page));
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file_ops_readdir.c linux-2.6.20/fs/reiser4/plugin/file_ops_readdir.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file_ops_readdir.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file_ops_readdir.c 2007-05-06 14:50:43.791004471 +0400
-@@ -0,0 +1,657 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "../inode.h"
-+
-+/* return true, iff @coord points to the valid directory item that is part of
-+ * @inode directory. */
-+static int is_valid_dir_coord(struct inode *inode, coord_t * coord)
-+{
-+ return plugin_of_group(item_plugin_by_coord(coord),
-+ DIR_ENTRY_ITEM_TYPE) &&
-+ inode_file_plugin(inode)->owns_item(inode, coord);
-+}
-+
-+/* compare two logical positions within the same directory */
-+static cmp_t dir_pos_cmp(const dir_pos * p1, const dir_pos * p2)
-+{
-+ cmp_t result;
-+
-+ assert("nikita-2534", p1 != NULL);
-+ assert("nikita-2535", p2 != NULL);
-+
-+ result = de_id_cmp(&p1->dir_entry_key, &p2->dir_entry_key);
-+ if (result == EQUAL_TO) {
-+ int diff;
-+
-+ diff = p1->pos - p2->pos;
-+ result =
-+ (diff < 0) ? LESS_THAN : (diff ? GREATER_THAN : EQUAL_TO);
-+ }
-+ return result;
-+}
-+
-+/* see comment before reiser4_readdir_common() for overview of why "adjustment" is
-+ * necessary. */
-+static void
-+adjust_dir_pos(struct file *dir,
-+ readdir_pos * readdir_spot, const dir_pos * mod_point, int adj)
-+{
-+ dir_pos *pos;
-+
-+ /*
-+ * new directory entry was added (adj == +1) or removed (adj == -1) at
-+ * the @mod_point. Directory file descriptor @dir is doing readdir and
-+ * is currently positioned at @readdir_spot. Latter has to be updated
-+ * to maintain stable readdir.
-+ */
-+ /* directory is positioned to the beginning. */
-+ if (readdir_spot->entry_no == 0)
-+ return;
-+
-+ pos = &readdir_spot->position;
-+ switch (dir_pos_cmp(mod_point, pos)) {
-+ case LESS_THAN:
-+ /* @mod_pos is _before_ @readdir_spot, that is, entry was
-+ * added/removed on the left (in key order) of current
-+ * position. */
-+ /* logical number of directory entry readdir is "looking" at
-+ * changes */
-+ readdir_spot->entry_no += adj;
-+ assert("nikita-2577",
-+ ergo(dir != NULL, reiser4_get_dir_fpos(dir) + adj >= 0));
-+ if (de_id_cmp(&pos->dir_entry_key,
-+ &mod_point->dir_entry_key) == EQUAL_TO) {
-+ assert("nikita-2575", mod_point->pos < pos->pos);
-+ /*
-+ * if entry added/removed has the same key as current
-+ * for readdir, update counter of duplicate keys in
-+ * @readdir_spot.
-+ */
-+ pos->pos += adj;
-+ }
-+ break;
-+ case GREATER_THAN:
-+ /* directory is modified after @pos: nothing to do. */
-+ break;
-+ case EQUAL_TO:
-+ /* cannot insert an entry readdir is looking at, because it
-+ already exists. */
-+ assert("nikita-2576", adj < 0);
-+ /* directory entry to which @pos points to is being
-+ removed.
-+
-+ NOTE-NIKITA: Right thing to do is to update @pos to point
-+ to the next entry. This is complex (we are under spin-lock
-+ for one thing). Just rewind it to the beginning. Next
-+ readdir will have to scan the beginning of
-+ directory. Proper solution is to use semaphore in
-+ spin lock's stead and use rewind_right() here.
-+
-+ NOTE-NIKITA: now, semaphore is used, so...
-+ */
-+ memset(readdir_spot, 0, sizeof *readdir_spot);
-+ }
-+}
-+
-+/* scan all file-descriptors for this directory and adjust their
-+ positions respectively. Should be used by implementations of
-+ add_entry and rem_entry of dir plugin */
-+void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
-+ int offset, int adj)
-+{
-+ reiser4_file_fsdata *scan;
-+ dir_pos mod_point;
-+
-+ assert("nikita-2536", dir != NULL);
-+ assert("nikita-2538", de != NULL);
-+ assert("nikita-2539", adj != 0);
-+
-+ build_de_id(dir, &de->d_name, &mod_point.dir_entry_key);
-+ mod_point.pos = offset;
-+
-+ spin_lock_inode(dir);
-+
-+ /*
-+ * new entry was added/removed in directory @dir. Scan all file
-+ * descriptors for @dir that are currently involved into @readdir and
-+ * update them.
-+ */
-+
-+ list_for_each_entry(scan, get_readdir_list(dir), dir.linkage)
-+ adjust_dir_pos(scan->back, &scan->dir.readdir, &mod_point, adj);
-+
-+ spin_unlock_inode(dir);
-+}
-+
-+/*
-+ * traverse tree to start/continue readdir from the readdir position @pos.
-+ */
-+static int dir_go_to(struct file *dir, readdir_pos * pos, tap_t * tap)
-+{
-+ reiser4_key key;
-+ int result;
-+ struct inode *inode;
-+
-+ assert("nikita-2554", pos != NULL);
-+
-+ inode = dir->f_dentry->d_inode;
-+ result = inode_dir_plugin(inode)->build_readdir_key(dir, &key);
-+ if (result != 0)
-+ return result;
-+ result = reiser4_object_lookup(inode,
-+ &key,
-+ tap->coord,
-+ tap->lh,
-+ tap->mode,
-+ FIND_EXACT,
-+ LEAF_LEVEL, LEAF_LEVEL,
-+ 0, &tap->ra_info);
-+ if (result == CBK_COORD_FOUND)
-+ result = rewind_right(tap, (int)pos->position.pos);
-+ else {
-+ tap->coord->node = NULL;
-+ done_lh(tap->lh);
-+ result = RETERR(-EIO);
-+ }
-+ return result;
-+}
-+
-+/*
-+ * handling of non-unique keys: calculate at what ordinal position within
-+ * sequence of directory items with identical keys @pos is.
-+ */
-+static int set_pos(struct inode *inode, readdir_pos * pos, tap_t * tap)
-+{
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+ tap_t scan;
-+ de_id *did;
-+ reiser4_key de_key;
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ reiser4_tap_init(&scan, &coord, &lh, ZNODE_READ_LOCK);
-+ reiser4_tap_copy(&scan, tap);
-+ reiser4_tap_load(&scan);
-+ pos->position.pos = 0;
-+
-+ did = &pos->position.dir_entry_key;
-+
-+ if (is_valid_dir_coord(inode, scan.coord)) {
-+
-+ build_de_id_by_key(unit_key_by_coord(scan.coord, &de_key), did);
-+
-+ while (1) {
-+
-+ result = go_prev_unit(&scan);
-+ if (result != 0)
-+ break;
-+
-+ if (!is_valid_dir_coord(inode, scan.coord)) {
-+ result = -EINVAL;
-+ break;
-+ }
-+
-+ /* get key of directory entry */
-+ unit_key_by_coord(scan.coord, &de_key);
-+ if (de_id_key_cmp(did, &de_key) != EQUAL_TO) {
-+ /* duplicate-sequence is over */
-+ break;
-+ }
-+ pos->position.pos++;
-+ }
-+ } else
-+ result = RETERR(-ENOENT);
-+ reiser4_tap_relse(&scan);
-+ reiser4_tap_done(&scan);
-+ return result;
-+}
-+
-+/*
-+ * "rewind" directory to @offset, i.e., set @pos and @tap correspondingly.
-+ */
-+static int dir_rewind(struct file *dir, readdir_pos * pos, tap_t * tap)
-+{
-+ __u64 destination;
-+ __s64 shift;
-+ int result;
-+ struct inode *inode;
-+ loff_t dirpos;
-+
-+ assert("nikita-2553", dir != NULL);
-+ assert("nikita-2548", pos != NULL);
-+ assert("nikita-2551", tap->coord != NULL);
-+ assert("nikita-2552", tap->lh != NULL);
-+
-+ dirpos = reiser4_get_dir_fpos(dir);
-+ shift = dirpos - pos->fpos;
-+ /* this is logical directory entry within @dir which we are rewinding
-+ * to */
-+ destination = pos->entry_no + shift;
-+
-+ inode = dir->f_dentry->d_inode;
-+ if (dirpos < 0)
-+ return RETERR(-EINVAL);
-+ else if (destination == 0ll || dirpos == 0) {
-+ /* rewind to the beginning of directory */
-+ memset(pos, 0, sizeof *pos);
-+ return dir_go_to(dir, pos, tap);
-+ } else if (destination >= inode->i_size)
-+ return RETERR(-ENOENT);
-+
-+ if (shift < 0) {
-+ /* I am afraid of negative numbers */
-+ shift = -shift;
-+ /* rewinding to the left */
-+ if (shift <= (int)pos->position.pos) {
-+ /* destination is within sequence of entries with
-+ duplicate keys. */
-+ result = dir_go_to(dir, pos, tap);
-+ } else {
-+ shift -= pos->position.pos;
-+ while (1) {
-+ /* repetitions: deadlock is possible when
-+ going to the left. */
-+ result = dir_go_to(dir, pos, tap);
-+ if (result == 0) {
-+ result = rewind_left(tap, shift);
-+ if (result == -E_DEADLOCK) {
-+ reiser4_tap_done(tap);
-+ continue;
-+ }
-+ }
-+ break;
-+ }
-+ }
-+ } else {
-+ /* rewinding to the right */
-+ result = dir_go_to(dir, pos, tap);
-+ if (result == 0)
-+ result = rewind_right(tap, shift);
-+ }
-+ if (result == 0) {
-+ result = set_pos(inode, pos, tap);
-+ if (result == 0) {
-+ /* update pos->position.pos */
-+ pos->entry_no = destination;
-+ pos->fpos = dirpos;
-+ }
-+ }
-+ return result;
-+}
-+
-+/*
-+ * Function that is called by common_readdir() on each directory entry while
-+ * doing readdir. ->filldir callback may block, so we had to release long term
-+ * lock while calling it. To avoid repeating tree traversal, seal is used. If
-+ * seal is broken, we return -E_REPEAT. Node is unlocked in this case.
-+ *
-+ * Whether node is unlocked in case of any other error is undefined. It is
-+ * guaranteed to be still locked if success (0) is returned.
-+ *
-+ * When ->filldir() wants no more, feed_entry() returns 1, and node is
-+ * unlocked.
-+ */
-+static int
-+feed_entry(struct file *f,
-+ readdir_pos * pos, tap_t * tap, filldir_t filldir, void *dirent)
-+{
-+ item_plugin *iplug;
-+ char *name;
-+ reiser4_key sd_key;
-+ int result;
-+ char buf[DE_NAME_BUF_LEN];
-+ char name_buf[32];
-+ char *local_name;
-+ unsigned file_type;
-+ seal_t seal;
-+ coord_t *coord;
-+ reiser4_key entry_key;
-+
-+ coord = tap->coord;
-+ iplug = item_plugin_by_coord(coord);
-+
-+ /* pointer to name within the node */
-+ name = iplug->s.dir.extract_name(coord, buf);
-+ assert("nikita-1371", name != NULL);
-+
-+ /* key of object the entry points to */
-+ if (iplug->s.dir.extract_key(coord, &sd_key) != 0)
-+ return RETERR(-EIO);
-+
-+ /* we must release longterm znode lock before calling filldir to avoid
-+ deadlock which may happen if filldir causes page fault. So, copy
-+ name to intermediate buffer */
-+ if (strlen(name) + 1 > sizeof(name_buf)) {
-+ local_name = kmalloc(strlen(name) + 1,
-+ reiser4_ctx_gfp_mask_get());
-+ if (local_name == NULL)
-+ return RETERR(-ENOMEM);
-+ } else
-+ local_name = name_buf;
-+
-+ strcpy(local_name, name);
-+ file_type = iplug->s.dir.extract_file_type(coord);
-+
-+ unit_key_by_coord(coord, &entry_key);
-+ reiser4_seal_init(&seal, coord, &entry_key);
-+
-+ longterm_unlock_znode(tap->lh);
-+
-+ /*
-+ * send information about directory entry to the ->filldir() filler
-+ * supplied to us by caller (VFS).
-+ *
-+ * ->filldir is entitled to do weird things. For example, ->filldir
-+ * supplied by knfsd re-enters file system. Make sure no locks are
-+ * held.
-+ */
-+ assert("nikita-3436", lock_stack_isclean(get_current_lock_stack()));
-+
-+ reiser4_txn_restart_current();
-+ result = filldir(dirent, name, (int)strlen(name),
-+ /* offset of this entry */
-+ f->f_pos,
-+ /* inode number of object bounden by this entry */
-+ oid_to_uino(get_key_objectid(&sd_key)), file_type);
-+ if (local_name != name_buf)
-+ kfree(local_name);
-+ if (result < 0)
-+ /* ->filldir() is satisfied. (no space in buffer, IOW) */
-+ result = 1;
-+ else
-+ result = reiser4_seal_validate(&seal, coord, &entry_key,
-+ tap->lh, tap->mode,
-+ ZNODE_LOCK_HIPRI);
-+ return result;
-+}
-+
-+static void move_entry(readdir_pos * pos, coord_t * coord)
-+{
-+ reiser4_key de_key;
-+ de_id *did;
-+
-+ /* update @pos */
-+ ++pos->entry_no;
-+ did = &pos->position.dir_entry_key;
-+
-+ /* get key of directory entry */
-+ unit_key_by_coord(coord, &de_key);
-+
-+ if (de_id_key_cmp(did, &de_key) == EQUAL_TO)
-+ /* we are within sequence of directory entries
-+ with duplicate keys. */
-+ ++pos->position.pos;
-+ else {
-+ pos->position.pos = 0;
-+ build_de_id_by_key(&de_key, did);
-+ }
-+ ++pos->fpos;
-+}
-+
-+/*
-+ * STATELESS READDIR
-+ *
-+ * readdir support in reiser4 relies on ability to update readdir_pos embedded
-+ * into reiser4_file_fsdata on each directory modification (name insertion and
-+ * removal), see reiser4_readdir_common() function below. This obviously doesn't
-+ * work when reiser4 is accessed over NFS, because NFS doesn't keep any state
-+ * across client READDIR requests for the same directory.
-+ *
-+ * To address this we maintain a "pool" of detached reiser4_file_fsdata
-+ * (d_cursor). Whenever NFS readdir request comes, we detect this, and try to
-+ * find detached reiser4_file_fsdata corresponding to previous readdir
-+ * request. In other words, additional state is maintained on the
-+ * server. (This is somewhat contrary to the design goals of NFS protocol.)
-+ *
-+ * To efficiently detect when our ->readdir() method is called by NFS server,
-+ * dentry is marked as "stateless" in reiser4_decode_fh() (this is checked by
-+ * file_is_stateless() function).
-+ *
-+ * To find out d_cursor in the pool, we encode client id (cid) in the highest
-+ * bits of NFS readdir cookie: when first readdir request comes to the given
-+ * directory from the given client, cookie is set to 0. This situation is
-+ * detected, global cid_counter is incremented, and stored in highest bits of
-+ * all direntry offsets returned to the client, including last one. As the
-+ * only valid readdir cookie is one obtained as direntry->offset, we are
-+ * guaranteed that next readdir request (continuing current one) will have
-+ * current cid in the highest bits of starting readdir cookie. All d_cursors
-+ * are hashed into per-super-block hash table by (oid, cid) key.
-+ *
-+ * In addition d_cursors are placed into per-super-block radix tree where they
-+ * are keyed by oid alone. This is necessary to efficiently remove them during
-+ * rmdir.
-+ *
-+ * At last, currently unused d_cursors are linked into special list. This list
-+ * is used d_cursor_shrink to reclaim d_cursors on memory pressure.
-+ *
-+ */
-+
-+/*
-+ * prepare for readdir.
-+ */
-+static int dir_readdir_init(struct file *f, tap_t * tap, readdir_pos ** pos)
-+{
-+ struct inode *inode;
-+ reiser4_file_fsdata *fsdata;
-+ int result;
-+
-+ assert("nikita-1359", f != NULL);
-+ inode = f->f_dentry->d_inode;
-+ assert("nikita-1360", inode != NULL);
-+
-+ if (!S_ISDIR(inode->i_mode))
-+ return RETERR(-ENOTDIR);
-+
-+ /* try to find detached readdir state */
-+ result = reiser4_attach_fsdata(f, inode);
-+ if (result != 0)
-+ return result;
-+
-+ fsdata = reiser4_get_file_fsdata(f);
-+ assert("nikita-2571", fsdata != NULL);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+
-+ /* add file descriptor to the readdir list hanging of directory
-+ * inode. This list is used to scan "readdirs-in-progress" while
-+ * inserting or removing names in the directory. */
-+ spin_lock_inode(inode);
-+ if (list_empty_careful(&fsdata->dir.linkage))
-+ list_add(&fsdata->dir.linkage, get_readdir_list(inode));
-+ *pos = &fsdata->dir.readdir;
-+ spin_unlock_inode(inode);
-+
-+ /* move @tap to the current position */
-+ return dir_rewind(f, *pos, tap);
-+}
-+
-+/* this is implementation of vfs's llseek method of struct file_operations for
-+ typical directory
-+ See comment before reiser4_readdir_common() for explanation.
-+*/
-+loff_t reiser4_llseek_dir_common(struct file * file, loff_t off, int origin)
-+{
-+ reiser4_context *ctx;
-+ loff_t result;
-+ struct inode *inode;
-+
-+ inode = file->f_dentry->d_inode;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ mutex_lock(&inode->i_mutex);
-+
-+ /* update ->f_pos */
-+ result = default_llseek(file, off, origin);
-+ if (result >= 0) {
-+ int ff;
-+ coord_t coord;
-+ lock_handle lh;
-+ tap_t tap;
-+ readdir_pos *pos;
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
-+
-+ ff = dir_readdir_init(file, &tap, &pos);
-+ reiser4_detach_fsdata(file);
-+ if (ff != 0)
-+ result = (loff_t) ff;
-+ reiser4_tap_done(&tap);
-+ }
-+ reiser4_detach_fsdata(file);
-+ mutex_unlock(&inode->i_mutex);
-+
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* this is common implementation of vfs's readdir method of struct
-+ file_operations
-+
-+ readdir problems:
-+
-+ readdir(2)/getdents(2) interface is based on implicit assumption that
-+ readdir can be restarted from any particular point by supplying file system
-+ with off_t-full of data. That is, file system fills ->d_off field in struct
-+ dirent and later user passes ->d_off to the seekdir(3), which is, actually,
-+ implemented by glibc as lseek(2) on directory.
-+
-+ Reiser4 cannot restart readdir from 64 bits of data, because two last
-+ components of the key of directory entry are unknown, which given 128 bits:
-+ locality and type fields in the key of directory entry are always known, to
-+ start readdir() from given point objectid and offset fields have to be
-+ filled.
-+
-+ Traditional UNIX API for scanning through directory
-+ (readdir/seekdir/telldir/opendir/closedir/rewindir/getdents) is based on the
-+ assumption that directory is structured very much like regular file, in
-+ particular, it is implied that each name within given directory (directory
-+ entry) can be uniquely identified by scalar offset and that such offset is
-+ stable across the life-time of the name is identifies.
-+
-+ This is manifestly not so for reiser4. In reiser4 the only stable unique
-+ identifies for the directory entry is its key that doesn't fit into
-+ seekdir/telldir API.
-+
-+ solution:
-+
-+ Within each file descriptor participating in readdir-ing of directory
-+ plugin/dir/dir.h:readdir_pos is maintained. This structure keeps track of
-+ the "current" directory entry that file descriptor looks at. It contains a
-+ key of directory entry (plus some additional info to deal with non-unique
-+ keys that we wouldn't dwell onto here) and a logical position of this
-+ directory entry starting from the beginning of the directory, that is
-+ ordinal number of this entry in the readdir order.
-+
-+ Obviously this logical position is not stable in the face of directory
-+ modifications. To work around this, on each addition or removal of directory
-+ entry all file descriptors for directory inode are scanned and their
-+ readdir_pos are updated accordingly (adjust_dir_pos()).
-+*/
-+int reiser4_readdir_common(struct file *f /* directory file being read */,
-+ void *dirent /* opaque data passed to us by VFS */,
-+ filldir_t filld /* filler function passed to us
-+ * by VFS */)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *inode;
-+ coord_t coord;
-+ lock_handle lh;
-+ tap_t tap;
-+ readdir_pos *pos;
-+
-+ assert("nikita-1359", f != NULL);
-+ inode = f->f_dentry->d_inode;
-+ assert("nikita-1360", inode != NULL);
-+
-+ if (!S_ISDIR(inode->i_mode))
-+ return RETERR(-ENOTDIR);
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
-+
-+ reiser4_readdir_readahead_init(inode, &tap);
-+
-+ repeat:
-+ result = dir_readdir_init(f, &tap, &pos);
-+ if (result == 0) {
-+ result = reiser4_tap_load(&tap);
-+ /* scan entries one by one feeding them to @filld */
-+ while (result == 0) {
-+ coord_t *coord;
-+
-+ coord = tap.coord;
-+ assert("nikita-2572", coord_is_existing_unit(coord));
-+ assert("nikita-3227", is_valid_dir_coord(inode, coord));
-+
-+ result = feed_entry(f, pos, &tap, filld, dirent);
-+ if (result > 0) {
-+ break;
-+ } else if (result == 0) {
-+ ++f->f_pos;
-+ result = go_next_unit(&tap);
-+ if (result == -E_NO_NEIGHBOR ||
-+ result == -ENOENT) {
-+ result = 0;
-+ break;
-+ } else if (result == 0) {
-+ if (is_valid_dir_coord(inode, coord))
-+ move_entry(pos, coord);
-+ else
-+ break;
-+ }
-+ } else if (result == -E_REPEAT) {
-+ /* feed_entry() had to restart. */
-+ ++f->f_pos;
-+ reiser4_tap_relse(&tap);
-+ goto repeat;
-+ } else
-+ warning("vs-1617",
-+ "reiser4_readdir_common: unexpected error %d",
-+ result);
-+ }
-+ reiser4_tap_relse(&tap);
-+
-+ if (result >= 0)
-+ f->f_version = inode->i_version;
-+ } else if (result == -E_NO_NEIGHBOR || result == -ENOENT)
-+ result = 0;
-+ reiser4_tap_done(&tap);
-+ reiser4_detach_fsdata(f);
-+
-+ /* try to update directory's atime */
-+ if (reiser4_grab_space_force(inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT) != 0)
-+ warning("", "failed to update atime on readdir: %llu",
-+ get_inode_oid(inode));
-+ else
-+ file_accessed(f);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return (result <= 0) ? result : 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/file_plugin_common.c linux-2.6.20/fs/reiser4/plugin/file_plugin_common.c
---- linux-2.6.20.orig/fs/reiser4/plugin/file_plugin_common.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/file_plugin_common.c 2007-05-06 14:50:43.791004471 +0400
-@@ -0,0 +1,1007 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+
-+/* this file contains typical implementations for most of methods of
-+ file plugin
-+*/
-+
-+#include "../inode.h"
-+#include "object.h"
-+#include "../safe_link.h"
-+
-+#include <linux/quotaops.h>
-+
-+static int insert_new_sd(struct inode *inode);
-+static int update_sd(struct inode *inode);
-+
-+/* this is common implementation of write_sd_by_inode method of file plugin
-+ either insert stat data or update it
-+ */
-+int write_sd_by_inode_common(struct inode *inode /* object to save */ )
-+{
-+ int result;
-+
-+ assert("nikita-730", inode != NULL);
-+
-+ if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
-+ /* object doesn't have stat-data yet */
-+ result = insert_new_sd(inode);
-+ else
-+ result = update_sd(inode);
-+ if (result != 0 && result != -ENAMETOOLONG && result != -ENOMEM)
-+ /* Don't issue warnings about "name is too long" */
-+ warning("nikita-2221", "Failed to save sd for %llu: %i",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ return result;
-+}
-+
-+/* this is common implementation of key_by_inode method of file plugin
-+ */
-+int
-+key_by_inode_and_offset_common(struct inode *inode, loff_t off,
-+ reiser4_key * key)
-+{
-+ reiser4_key_init(key);
-+ set_key_locality(key, reiser4_inode_data(inode)->locality_id);
-+ set_key_ordering(key, get_inode_ordering(inode));
-+ set_key_objectid(key, get_inode_oid(inode)); /*FIXME: inode->i_ino */
-+ set_key_type(key, KEY_BODY_MINOR);
-+ set_key_offset(key, (__u64) off);
-+ return 0;
-+}
-+
-+/* this is common implementation of set_plug_in_inode method of file plugin
-+ */
-+int set_plug_in_inode_common(struct inode *object /* inode to set plugin on */ ,
-+ struct inode *parent /* parent object */ ,
-+ reiser4_object_create_data * data /* creational
-+ * data */ )
-+{
-+ __u64 mask;
-+
-+ object->i_mode = data->mode;
-+ /* this should be plugin decision */
-+ object->i_uid = current->fsuid;
-+ object->i_mtime = object->i_atime = object->i_ctime = CURRENT_TIME;
-+
-+ /* support for BSD style group-id assignment. See mount's manual page
-+ description of bsdgroups ext2 mount options for more details */
-+ if (reiser4_is_set(object->i_sb, REISER4_BSD_GID))
-+ object->i_gid = parent->i_gid;
-+ else if (parent->i_mode & S_ISGID) {
-+ /* parent directory has sguid bit */
-+ object->i_gid = parent->i_gid;
-+ if (S_ISDIR(object->i_mode))
-+ /* sguid is inherited by sub-directories */
-+ object->i_mode |= S_ISGID;
-+ } else
-+ object->i_gid = current->fsgid;
-+
-+ /* this object doesn't have stat-data yet */
-+ reiser4_inode_set_flag(object, REISER4_NO_SD);
-+#if 0
-+ /* this is now called after all inode plugins are initialized:
-+ do_create_vfs_child after adjust_to_parent */
-+ /* setup inode and file-operations for this inode */
-+ setup_inode_ops(object, data);
-+#endif
-+ object->i_nlink = 0;
-+ reiser4_seal_init(&reiser4_inode_data(object)->sd_seal, NULL, NULL);
-+ mask = (1 << UNIX_STAT) | (1 << LIGHT_WEIGHT_STAT);
-+ if (!reiser4_is_set(object->i_sb, REISER4_32_BIT_TIMES))
-+ mask |= (1 << LARGE_TIMES_STAT);
-+
-+ reiser4_inode_data(object)->extmask = mask;
-+ return 0;
-+}
-+
-+/* this is common implementation of adjust_to_parent method of file plugin for
-+ regular files
-+ */
-+int adjust_to_parent_common(struct inode *object /* new object */ ,
-+ struct inode *parent /* parent directory */ ,
-+ struct inode *root /* root directory */ )
-+{
-+ assert("nikita-2165", object != NULL);
-+ if (parent == NULL)
-+ parent = root;
-+ assert("nikita-2069", parent != NULL);
-+
-+ /*
-+ * inherit missing plugins from parent
-+ */
-+
-+ grab_plugin_pset(object, parent, PSET_FILE);
-+ grab_plugin_pset(object, parent, PSET_SD);
-+ grab_plugin_pset(object, parent, PSET_FORMATTING);
-+ grab_plugin_pset(object, parent, PSET_PERM);
-+ return 0;
-+}
-+
-+/* this is common implementation of adjust_to_parent method of file plugin for
-+ typical directories
-+ */
-+int adjust_to_parent_common_dir(struct inode *object /* new object */ ,
-+ struct inode *parent /* parent directory */ ,
-+ struct inode *root /* root directory */ )
-+{
-+ int result = 0;
-+ pset_member memb;
-+
-+ assert("nikita-2166", object != NULL);
-+ if (parent == NULL)
-+ parent = root;
-+ assert("nikita-2167", parent != NULL);
-+
-+ /*
-+ * inherit missing plugins from parent
-+ */
-+ for (memb = 0; memb < PSET_LAST; ++memb) {
-+ result = grab_plugin_pset(object, parent, memb);
-+ if (result != 0)
-+ break;
-+ }
-+ return result;
-+}
-+
-+int adjust_to_parent_cryptcompress(struct inode *object /* new object */ ,
-+ struct inode *parent /* parent directory */,
-+ struct inode *root /* root directory */)
-+{
-+ int result;
-+ result = adjust_to_parent_common(object, parent, root);
-+ if (result)
-+ return result;
-+ assert("edward-1416", parent != NULL);
-+
-+ grab_plugin_pset(object, parent, PSET_CLUSTER);
-+ grab_plugin_pset(object, parent, PSET_CIPHER);
-+ grab_plugin_pset(object, parent, PSET_DIGEST);
-+ grab_plugin_pset(object, parent, PSET_COMPRESSION);
-+ grab_plugin_pset(object, parent, PSET_COMPRESSION_MODE);
-+
-+ return 0;
-+}
-+
-+/* this is common implementation of create_object method of file plugin
-+ */
-+int reiser4_create_object_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data * data)
-+{
-+ reiser4_block_nr reserve;
-+ assert("nikita-744", object != NULL);
-+ assert("nikita-745", parent != NULL);
-+ assert("nikita-747", data != NULL);
-+ assert("nikita-748", reiser4_inode_get_flag(object, REISER4_NO_SD));
-+
-+ reserve = estimate_create_common(object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+ return write_sd_by_inode_common(object);
-+}
-+
-+static int common_object_delete_no_reserve(struct inode *inode);
-+
-+/**
-+ * reiser4_delete_object_common - delete_object of file_plugin
-+ * @inode: inode to be deleted
-+ *
-+ * This is common implementation of delete_object method of file_plugin. It
-+ * applies to object its deletion consists of removing two items - stat data
-+ * and safe-link.
-+ */
-+int reiser4_delete_object_common(struct inode *inode)
-+{
-+ int result;
-+
-+ assert("nikita-1477", inode != NULL);
-+ /* FIXME: if file body deletion failed (i/o error, for instance),
-+ inode->i_size can be != 0 here */
-+ assert("nikita-3420", inode->i_size == 0 || S_ISLNK(inode->i_mode));
-+ assert("nikita-3421", inode->i_nlink == 0);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
-+ reiser4_block_nr reserve;
-+
-+ /* grab space which is needed to remove 2 items from the tree:
-+ stat data and safe-link */
-+ reserve = 2 *
-+ estimate_one_item_removal(reiser4_tree_by_inode(inode));
-+ if (reiser4_grab_space_force(reserve,
-+ BA_RESERVED | BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+ result = common_object_delete_no_reserve(inode);
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/**
-+ * reiser4_delete_dir_common - delete_object of file_plugin
-+ * @inode: inode to be deleted
-+ *
-+ * This is common implementation of delete_object method of file_plugin for
-+ * typical directory. It calls done method of dir_plugin to remove "." and
-+ * removes stat data and safe-link.
-+ */
-+int reiser4_delete_dir_common(struct inode *inode)
-+{
-+ int result;
-+ dir_plugin *dplug;
-+
-+ assert("", (get_current_context() &&
-+ get_current_context()->trans->atom == NULL));
-+
-+ dplug = inode_dir_plugin(inode);
-+ assert("vs-1101", dplug && dplug->done);
-+
-+ /* kill cursors which might be attached to inode */
-+ reiser4_kill_cursors(inode);
-+
-+ /* grab space enough for removing two items */
-+ if (reiser4_grab_space
-+ (2 * estimate_one_item_removal(reiser4_tree_by_inode(inode)),
-+ BA_RESERVED | BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ result = dplug->done(inode);
-+ if (!result)
-+ result = common_object_delete_no_reserve(inode);
-+ return result;
-+}
-+
-+/* this is common implementation of add_link method of file plugin
-+ */
-+int reiser4_add_link_common(struct inode *object, struct inode *parent)
-+{
-+ /*
-+ * increment ->i_nlink and update ->i_ctime
-+ */
-+
-+ INODE_INC_FIELD(object, i_nlink);
-+ object->i_ctime = CURRENT_TIME;
-+ return 0;
-+}
-+
-+/* this is common implementation of rem_link method of file plugin
-+ */
-+int reiser4_rem_link_common(struct inode *object, struct inode *parent)
-+{
-+ assert("nikita-2021", object != NULL);
-+ assert("nikita-2163", object->i_nlink > 0);
-+
-+ /*
-+ * decrement ->i_nlink and update ->i_ctime
-+ */
-+
-+ INODE_DEC_FIELD(object, i_nlink);
-+ object->i_ctime = CURRENT_TIME;
-+ return 0;
-+}
-+
-+/* this is common implementation of rem_link method of file plugin for typical
-+ directory
-+*/
-+int rem_link_common_dir(struct inode *object, struct inode *parent UNUSED_ARG)
-+{
-+ assert("nikita-20211", object != NULL);
-+ assert("nikita-21631", object->i_nlink > 0);
-+
-+ /*
-+ * decrement ->i_nlink and update ->i_ctime
-+ */
-+ INODE_DEC_FIELD(object, i_nlink);
-+ if (object->i_nlink == 1)
-+ INODE_DEC_FIELD(object, i_nlink);
-+ object->i_ctime = CURRENT_TIME;
-+ return 0;
-+}
-+
-+/* this is common implementation of owns_item method of file plugin
-+ compare objectids of keys in inode and coord */
-+int owns_item_common(const struct inode *inode, /* object to check
-+ * against */
-+ const coord_t * coord /* coord to check */ )
-+{
-+ reiser4_key item_key;
-+ reiser4_key file_key;
-+
-+ assert("nikita-760", inode != NULL);
-+ assert("nikita-761", coord != NULL);
-+
-+ return coord_is_existing_item(coord) &&
-+ (get_key_objectid(build_sd_key(inode, &file_key)) ==
-+ get_key_objectid(item_key_by_coord(coord, &item_key)));
-+}
-+
-+/* this is common implementation of owns_item method of file plugin
-+ for typical directory
-+*/
-+int owns_item_common_dir(const struct inode *inode, /* object to check against */
-+ const coord_t * coord /* coord of item to check */ )
-+{
-+ reiser4_key item_key;
-+
-+ assert("nikita-1335", inode != NULL);
-+ assert("nikita-1334", coord != NULL);
-+
-+ if (plugin_of_group(item_plugin_by_coord(coord), DIR_ENTRY_ITEM_TYPE))
-+ return get_key_locality(item_key_by_coord(coord, &item_key)) ==
-+ get_inode_oid(inode);
-+ else
-+ return owns_item_common(inode, coord);
-+}
-+
-+/* this is common implementation of can_add_link method of file plugin
-+ checks whether yet another hard links to this object can be added
-+*/
-+int can_add_link_common(const struct inode *object /* object to check */ )
-+{
-+ assert("nikita-732", object != NULL);
-+
-+ /* inode->i_nlink is unsigned int, so just check for integer
-+ overflow */
-+ return object->i_nlink + 1 != 0;
-+}
-+
-+/* this is common implementation of can_rem_link method of file plugin for
-+ typical directory
-+*/
-+int can_rem_link_common_dir(const struct inode *inode)
-+{
-+ /* is_dir_empty() returns 0 is dir is empty */
-+ return !is_dir_empty(inode);
-+}
-+
-+/* this is common implementation of detach method of file plugin for typical
-+ directory
-+*/
-+int reiser4_detach_common_dir(struct inode *child, struct inode *parent)
-+{
-+ dir_plugin *dplug;
-+
-+ dplug = inode_dir_plugin(child);
-+ assert("nikita-2883", dplug != NULL);
-+ assert("nikita-2884", dplug->detach != NULL);
-+ return dplug->detach(child, parent);
-+}
-+
-+/* this is common implementation of bind method of file plugin for typical
-+ directory
-+*/
-+int reiser4_bind_common_dir(struct inode *child, struct inode *parent)
-+{
-+ dir_plugin *dplug;
-+
-+ dplug = inode_dir_plugin(child);
-+ assert("nikita-2646", dplug != NULL);
-+ return dplug->attach(child, parent);
-+}
-+
-+static int process_truncate(struct inode *, __u64 size);
-+
-+/* this is common implementation of safelink method of file plugin
-+ */
-+int safelink_common(struct inode *object, reiser4_safe_link_t link, __u64 value)
-+{
-+ int result;
-+
-+ assert("vs-1705", get_current_context()->trans->atom == NULL);
-+ if (link == SAFE_UNLINK)
-+ /* nothing to do. iput() in the caller (process_safelink) will
-+ * finish with file */
-+ result = 0;
-+ else if (link == SAFE_TRUNCATE)
-+ result = process_truncate(object, value);
-+ else {
-+ warning("nikita-3438", "Unrecognized safe-link type: %i", link);
-+ result = RETERR(-EIO);
-+ }
-+ return result;
-+}
-+
-+/* this is common implementation of estimate.create method of file plugin
-+ can be used when object creation involves insertion of one item (usually stat
-+ data) into tree
-+*/
-+reiser4_block_nr estimate_create_common(const struct inode * object)
-+{
-+ return estimate_one_insert_item(reiser4_tree_by_inode(object));
-+}
-+
-+/* this is common implementation of estimate.create method of file plugin for
-+ typical directory
-+ can be used when directory creation involves insertion of two items (usually
-+ stat data and item containing "." and "..") into tree
-+*/
-+reiser4_block_nr estimate_create_common_dir(const struct inode * object)
-+{
-+ return 2 * estimate_one_insert_item(reiser4_tree_by_inode(object));
-+}
-+
-+/* this is common implementation of estimate.update method of file plugin
-+ can be used when stat data update does not do more than inserting a unit
-+ into a stat data item which is probably true for most cases
-+*/
-+reiser4_block_nr estimate_update_common(const struct inode * inode)
-+{
-+ return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
-+}
-+
-+/* this is common implementation of estimate.unlink method of file plugin
-+ */
-+reiser4_block_nr
-+estimate_unlink_common(const struct inode * object UNUSED_ARG,
-+ const struct inode * parent UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* this is common implementation of estimate.unlink method of file plugin for
-+ typical directory
-+*/
-+reiser4_block_nr
-+estimate_unlink_common_dir(const struct inode * object,
-+ const struct inode * parent)
-+{
-+ dir_plugin *dplug;
-+
-+ dplug = inode_dir_plugin(object);
-+ assert("nikita-2888", dplug != NULL);
-+ assert("nikita-2887", dplug->estimate.unlink != NULL);
-+ return dplug->estimate.unlink(object, parent);
-+}
-+
-+char *wire_write_common(struct inode *inode, char *start)
-+{
-+ return build_inode_onwire(inode, start);
-+}
-+
-+char *wire_read_common(char *addr, reiser4_object_on_wire * obj)
-+{
-+ return extract_obj_key_id_from_onwire(addr, &obj->u.std.key_id);
-+}
-+
-+struct dentry *wire_get_common(struct super_block *sb,
-+ reiser4_object_on_wire * obj)
-+{
-+ struct inode *inode;
-+ struct dentry *dentry;
-+ reiser4_key key;
-+
-+ extract_key_from_id(&obj->u.std.key_id, &key);
-+ inode = reiser4_iget(sb, &key, 1);
-+ if (!IS_ERR(inode)) {
-+ reiser4_iget_complete(inode);
-+ dentry = d_alloc_anon(inode);
-+ if (dentry == NULL) {
-+ iput(inode);
-+ dentry = ERR_PTR(-ENOMEM);
-+ } else
-+ dentry->d_op = &get_super_private(sb)->ops.dentry;
-+ } else if (PTR_ERR(inode) == -ENOENT)
-+ /*
-+ * inode wasn't found at the key encoded in the file
-+ * handle. Hence, file handle is stale.
-+ */
-+ dentry = ERR_PTR(RETERR(-ESTALE));
-+ else
-+ dentry = (void *)inode;
-+ return dentry;
-+}
-+
-+int wire_size_common(struct inode *inode)
-+{
-+ return inode_onwire_size(inode);
-+}
-+
-+void wire_done_common(reiser4_object_on_wire * obj)
-+{
-+ /* nothing to do */
-+}
-+
-+/* helper function to print errors */
-+static void key_warning(const reiser4_key * key /* key to print */ ,
-+ const struct inode *inode,
-+ int code /* error code to print */ )
-+{
-+ assert("nikita-716", key != NULL);
-+
-+ if (code != -ENOMEM) {
-+ warning("nikita-717", "Error for inode %llu (%i)",
-+ (unsigned long long)get_key_objectid(key), code);
-+ reiser4_print_key("for key", key);
-+ }
-+}
-+
-+/* NIKITA-FIXME-HANS: perhaps this function belongs in another file? */
-+#if REISER4_DEBUG
-+static void
-+check_inode_seal(const struct inode *inode,
-+ const coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key unit_key;
-+
-+ unit_key_by_coord(coord, &unit_key);
-+ assert("nikita-2752",
-+ WITH_DATA_RET(coord->node, 1, keyeq(key, &unit_key)));
-+ assert("nikita-2753", get_inode_oid(inode) == get_key_objectid(key));
-+}
-+
-+static void check_sd_coord(coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key ukey;
-+
-+ coord_clear_iplug(coord);
-+ if (zload(coord->node))
-+ return;
-+
-+ if (!coord_is_existing_unit(coord) ||
-+ !item_plugin_by_coord(coord) ||
-+ !keyeq(unit_key_by_coord(coord, &ukey), key) ||
-+ (znode_get_level(coord->node) != LEAF_LEVEL) ||
-+ !item_is_statdata(coord)) {
-+ warning("nikita-1901", "Conspicuous seal");
-+ reiser4_print_key("key", key);
-+ print_coord("coord", coord, 1);
-+ impossible("nikita-2877", "no way");
-+ }
-+ zrelse(coord->node);
-+}
-+
-+#else
-+#define check_inode_seal(inode, coord, key) noop
-+#define check_sd_coord(coord, key) noop
-+#endif
-+
-+/* insert new stat-data into tree. Called with inode state
-+ locked. Return inode state locked. */
-+static int insert_new_sd(struct inode *inode /* inode to create sd for */ )
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t coord;
-+ reiser4_item_data data;
-+ char *area;
-+ reiser4_inode *ref;
-+ lock_handle lh;
-+ oid_t oid;
-+
-+ assert("nikita-723", inode != NULL);
-+ assert("nikita-3406", reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ ref = reiser4_inode_data(inode);
-+ spin_lock_inode(inode);
-+
-+ if (ref->plugin_mask != 0)
-+ /* inode has non-standard plugins */
-+ inode_set_extension(inode, PLUGIN_STAT);
-+ /*
-+ * prepare specification of new item to be inserted
-+ */
-+
-+ data.iplug = inode_sd_plugin(inode);
-+ data.length = data.iplug->s.sd.save_len(inode);
-+ spin_unlock_inode(inode);
-+
-+ data.data = NULL;
-+ data.user = 0;
-+/* could be optimized for case where there is only one node format in
-+ * use in the filesystem, probably there are lots of such
-+ * places we could optimize for only one node layout.... -Hans */
-+ if (data.length > reiser4_tree_by_inode(inode)->nplug->max_item_size()){
-+ /* This is silly check, but we don't know actual node where
-+ insertion will go into. */
-+ return RETERR(-ENAMETOOLONG);
-+ }
-+ oid = oid_allocate(inode->i_sb);
-+/* NIKITA-FIXME-HANS: what is your opinion on whether this error check should be encapsulated into oid_allocate? */
-+ if (oid == ABSOLUTE_MAX_OID)
-+ return RETERR(-EOVERFLOW);
-+
-+ set_inode_oid(inode, oid);
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+
-+ result = insert_by_key(reiser4_tree_by_inode(inode),
-+ build_sd_key(inode, &key), &data, &coord, &lh,
-+ /* stat data lives on a leaf level */
-+ LEAF_LEVEL, CBK_UNIQUE);
-+
-+ /* we don't want to re-check that somebody didn't insert
-+ stat-data while we were doing io, because if it did,
-+ insert_by_key() returned error. */
-+ /* but what _is_ possible is that plugin for inode's stat-data,
-+ list of non-standard plugins or their state would change
-+ during io, so that stat-data wouldn't fit into sd. To avoid
-+ this race we keep inode_state lock. This lock has to be
-+ taken each time you access inode in a way that would cause
-+ changes in sd size: changing plugins etc.
-+ */
-+
-+ if (result == IBK_INSERT_OK) {
-+ coord_clear_iplug(&coord);
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ /* have we really inserted stat data? */
-+ assert("nikita-725", item_is_statdata(&coord));
-+
-+ /* inode was just created. It is inserted into hash
-+ table, but no directory entry was yet inserted into
-+ parent. So, inode is inaccessible through
-+ ->lookup(). All places that directly grab inode
-+ from hash-table (like old knfsd), should check
-+ IMMUTABLE flag that is set by common_create_child.
-+ */
-+ assert("nikita-3240", data.iplug != NULL);
-+ assert("nikita-3241", data.iplug->s.sd.save != NULL);
-+ area = item_body_by_coord(&coord);
-+ result = data.iplug->s.sd.save(inode, &area);
-+ znode_make_dirty(coord.node);
-+ if (result == 0) {
-+ /* object has stat-data now */
-+ reiser4_inode_clr_flag(inode, REISER4_NO_SD);
-+ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
-+ /* initialise stat-data seal */
-+ reiser4_seal_init(&ref->sd_seal, &coord, &key);
-+ ref->sd_coord = coord;
-+ check_inode_seal(inode, &coord, &key);
-+ } else if (result != -ENOMEM)
-+ /*
-+ * convert any other error code to -EIO to
-+ * avoid confusing user level with unexpected
-+ * errors.
-+ */
-+ result = RETERR(-EIO);
-+ zrelse(coord.node);
-+ }
-+ }
-+ done_lh(&lh);
-+
-+ if (result != 0)
-+ key_warning(&key, inode, result);
-+ else
-+ oid_count_allocated();
-+
-+ return result;
-+}
-+
-+/* find sd of inode in a tree, deal with errors */
-+int lookup_sd(struct inode *inode /* inode to look sd for */ ,
-+ znode_lock_mode lock_mode /* lock mode */ ,
-+ coord_t * coord /* resulting coord */ ,
-+ lock_handle * lh /* resulting lock handle */ ,
-+ const reiser4_key * key /* resulting key */ ,
-+ int silent)
-+{
-+ int result;
-+ __u32 flags;
-+
-+ assert("nikita-1692", inode != NULL);
-+ assert("nikita-1693", coord != NULL);
-+ assert("nikita-1694", key != NULL);
-+
-+ /* look for the object's stat data in a tree.
-+ This returns in "node" pointer to a locked znode and in "pos"
-+ position of an item found in node. Both are only valid if
-+ coord_found is returned. */
-+ flags = (lock_mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
-+ flags |= CBK_UNIQUE;
-+ /*
-+ * traverse tree to find stat data. We cannot use vroot here, because
-+ * it only covers _body_ of the file, and stat data don't belong
-+ * there.
-+ */
-+ result = coord_by_key(reiser4_tree_by_inode(inode),
-+ key,
-+ coord,
-+ lh,
-+ lock_mode,
-+ FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL, flags, NULL);
-+ if (REISER4_DEBUG && result == 0)
-+ check_sd_coord(coord, key);
-+
-+ if (result != 0 && !silent)
-+ key_warning(key, inode, result);
-+ return result;
-+}
-+
-+static int
-+locate_inode_sd(struct inode *inode,
-+ reiser4_key * key, coord_t * coord, lock_handle * lh)
-+{
-+ reiser4_inode *state;
-+ seal_t seal;
-+ int result;
-+
-+ assert("nikita-3483", inode != NULL);
-+
-+ state = reiser4_inode_data(inode);
-+ spin_lock_inode(inode);
-+ *coord = state->sd_coord;
-+ coord_clear_iplug(coord);
-+ seal = state->sd_seal;
-+ spin_unlock_inode(inode);
-+
-+ build_sd_key(inode, key);
-+ if (reiser4_seal_is_set(&seal)) {
-+ /* first, try to use seal */
-+ result = reiser4_seal_validate(&seal,
-+ coord,
-+ key,
-+ lh, ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (result == 0)
-+ check_sd_coord(coord, key);
-+ } else
-+ result = -E_REPEAT;
-+
-+ if (result != 0) {
-+ coord_init_zero(coord);
-+ result = lookup_sd(inode, ZNODE_WRITE_LOCK, coord, lh, key, 0);
-+ }
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
-+{
-+ return (get_key_locality(k1) == get_key_locality(k2) &&
-+ get_key_type(k1) == get_key_type(k2) &&
-+ get_key_band(k1) == get_key_band(k2) &&
-+ get_key_ordering(k1) == get_key_ordering(k2) &&
-+ get_key_objectid(k1) == get_key_objectid(k2));
-+}
-+
-+#include "../tree_walk.h"
-+
-+/* make some checks before and after stat-data resize operation */
-+static int check_sd_resize(struct inode * inode, coord_t * coord,
-+ int length, int progress /* 1 means after resize */)
-+{
-+ int ret = 0;
-+ lock_handle left_lock;
-+ coord_t left_coord;
-+ reiser4_key left_key;
-+ reiser4_key key;
-+
-+ if (inode_file_plugin(inode) !=
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
-+ return 0;
-+ if (!length)
-+ return 0;
-+ if (coord->item_pos != 0)
-+ return 0;
-+
-+ init_lh(&left_lock);
-+ ret = reiser4_get_left_neighbor(&left_lock,
-+ coord->node,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
-+ ret == -ENOENT || ret == -EINVAL
-+ || ret == -E_DEADLOCK) {
-+ ret = 0;
-+ goto exit;
-+ }
-+ ret = zload(left_lock.node);
-+ if (ret)
-+ goto exit;
-+ coord_init_last_unit(&left_coord, left_lock.node);
-+ item_key_by_coord(&left_coord, &left_key);
-+ item_key_by_coord(coord, &key);
-+
-+ if (all_but_offset_key_eq(&key, &left_key))
-+ /* corruption occured */
-+ ret = 1;
-+ zrelse(left_lock.node);
-+ exit:
-+ done_lh(&left_lock);
-+ return ret;
-+}
-+#endif
-+
-+/* update stat-data at @coord */
-+static int
-+update_sd_at(struct inode *inode, coord_t * coord, reiser4_key * key,
-+ lock_handle * lh)
-+{
-+ int result;
-+ reiser4_item_data data;
-+ char *area;
-+ reiser4_inode *state;
-+ znode *loaded;
-+
-+ state = reiser4_inode_data(inode);
-+
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (result != 0)
-+ return result;
-+ loaded = coord->node;
-+
-+ spin_lock_inode(inode);
-+ assert("nikita-728", inode_sd_plugin(inode) != NULL);
-+ data.iplug = inode_sd_plugin(inode);
-+
-+ /* if inode has non-standard plugins, add appropriate stat data
-+ * extension */
-+ if (state->extmask & (1 << PLUGIN_STAT)) {
-+ if (state->plugin_mask == 0)
-+ inode_clr_extension(inode, PLUGIN_STAT);
-+ } else if (state->plugin_mask != 0)
-+ inode_set_extension(inode, PLUGIN_STAT);
-+
-+ if (state->extmask & (1 << HEIR_STAT)) {
-+ if (state->heir_mask == 0)
-+ inode_clr_extension(inode, HEIR_STAT);
-+ } else if (state->heir_mask != 0)
-+ inode_set_extension(inode, HEIR_STAT);
-+
-+ /* data.length is how much space to add to (or remove
-+ from if negative) sd */
-+ if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
-+ /* recalculate stat-data length */
-+ data.length =
-+ data.iplug->s.sd.save_len(inode) -
-+ item_length_by_coord(coord);
-+ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
-+ } else
-+ data.length = 0;
-+ spin_unlock_inode(inode);
-+
-+ /* if on-disk stat data is of different length than required
-+ for this inode, resize it */
-+
-+ if (data.length != 0) {
-+ data.data = NULL;
-+ data.user = 0;
-+
-+ assert("edward-1441",
-+ !check_sd_resize(inode, coord,
-+ data.length, 0/* before resize */));
-+
-+ /* insertion code requires that insertion point (coord) was
-+ * between units. */
-+ coord->between = AFTER_UNIT;
-+ result = reiser4_resize_item(coord, &data, key, lh,
-+ COPI_DONT_SHIFT_LEFT);
-+ if (result != 0) {
-+ key_warning(key, inode, result);
-+ zrelse(loaded);
-+ return result;
-+ }
-+ if (loaded != coord->node) {
-+ /* reiser4_resize_item moved coord to another node.
-+ Zload it */
-+ zrelse(loaded);
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (result != 0)
-+ return result;
-+ loaded = coord->node;
-+ }
-+ assert("edward-1442",
-+ !check_sd_resize(inode, coord,
-+ data.length, 1/* after resize */));
-+ }
-+ area = item_body_by_coord(coord);
-+ spin_lock_inode(inode);
-+ result = data.iplug->s.sd.save(inode, &area);
-+ znode_make_dirty(coord->node);
-+
-+ /* re-initialise stat-data seal */
-+
-+ /*
-+ * coord.between was possibly skewed from AT_UNIT when stat-data size
-+ * was changed and new extensions were pasted into item.
-+ */
-+ coord->between = AT_UNIT;
-+ reiser4_seal_init(&state->sd_seal, coord, key);
-+ state->sd_coord = *coord;
-+ spin_unlock_inode(inode);
-+ check_inode_seal(inode, coord, key);
-+ zrelse(loaded);
-+ return result;
-+}
-+
-+/* Update existing stat-data in a tree. Called with inode state locked. Return
-+ inode state locked. */
-+static int update_sd(struct inode *inode /* inode to update sd for */ )
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ assert("nikita-726", inode != NULL);
-+
-+ /* no stat-data, nothing to update?! */
-+ assert("nikita-3482", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ init_lh(&lh);
-+
-+ result = locate_inode_sd(inode, &key, &coord, &lh);
-+ if (result == 0)
-+ result = update_sd_at(inode, &coord, &key, &lh);
-+ done_lh(&lh);
-+
-+ return result;
-+}
-+
-+/* helper for reiser4_delete_object_common and reiser4_delete_dir_common.
-+ Remove object stat data. Space for that must be reserved by caller before
-+*/
-+static int
-+common_object_delete_no_reserve(struct inode *inode /* object to remove */ )
-+{
-+ int result;
-+
-+ assert("nikita-1477", inode != NULL);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
-+ reiser4_key sd_key;
-+
-+ DQUOT_FREE_INODE(inode);
-+ DQUOT_DROP(inode);
-+
-+ build_sd_key(inode, &sd_key);
-+ result =
-+ reiser4_cut_tree(reiser4_tree_by_inode(inode),
-+ &sd_key, &sd_key, NULL, 0);
-+ if (result == 0) {
-+ reiser4_inode_set_flag(inode, REISER4_NO_SD);
-+ result = oid_release(inode->i_sb, get_inode_oid(inode));
-+ if (result == 0) {
-+ oid_count_released();
-+
-+ result = safe_link_del(reiser4_tree_by_inode(inode),
-+ get_inode_oid(inode),
-+ SAFE_UNLINK);
-+ }
-+ }
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/* helper for safelink_common */
-+static int process_truncate(struct inode *inode, __u64 size)
-+{
-+ int result;
-+ struct iattr attr;
-+ file_plugin *fplug;
-+ reiser4_context *ctx;
-+ struct dentry dentry;
-+
-+ assert("vs-21", is_in_reiser4_context());
-+ ctx = reiser4_init_context(inode->i_sb);
-+ assert("vs-22", !IS_ERR(ctx));
-+
-+ attr.ia_size = size;
-+ attr.ia_valid = ATTR_SIZE | ATTR_CTIME;
-+ fplug = inode_file_plugin(inode);
-+
-+ mutex_lock(&inode->i_mutex);
-+ assert("vs-1704", get_current_context()->trans->atom == NULL);
-+ dentry.d_inode = inode;
-+ result = inode->i_op->setattr(&dentry, &attr);
-+ mutex_unlock(&inode->i_mutex);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return result;
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/hash.c linux-2.6.20/fs/reiser4/plugin/hash.c
---- linux-2.6.20.orig/fs/reiser4/plugin/hash.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/hash.c 2007-05-06 14:50:43.791004471 +0400
-@@ -0,0 +1,353 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Hash functions */
-+
-+#include "../debug.h"
-+#include "plugin_header.h"
-+#include "plugin.h"
-+#include "../super.h"
-+#include "../inode.h"
-+
-+#include <linux/types.h>
-+
-+/* old rupasov (yura) hash */
-+static __u64 hash_rupasov(const unsigned char *name /* name to hash */ ,
-+ int len /* @name's length */ )
-+{
-+ int i;
-+ int j;
-+ int pow;
-+ __u64 a;
-+ __u64 c;
-+
-+ assert("nikita-672", name != NULL);
-+ assert("nikita-673", len >= 0);
-+
-+ for (pow = 1, i = 1; i < len; ++i)
-+ pow = pow * 10;
-+
-+ if (len == 1)
-+ a = name[0] - 48;
-+ else
-+ a = (name[0] - 48) * pow;
-+
-+ for (i = 1; i < len; ++i) {
-+ c = name[i] - 48;
-+ for (pow = 1, j = i; j < len - 1; ++j)
-+ pow = pow * 10;
-+ a = a + c * pow;
-+ }
-+ for (; i < 40; ++i) {
-+ c = '0' - 48;
-+ for (pow = 1, j = i; j < len - 1; ++j)
-+ pow = pow * 10;
-+ a = a + c * pow;
-+ }
-+
-+ for (; i < 256; ++i) {
-+ c = i;
-+ for (pow = 1, j = i; j < len - 1; ++j)
-+ pow = pow * 10;
-+ a = a + c * pow;
-+ }
-+
-+ a = a << 7;
-+ return a;
-+}
-+
-+/* r5 hash */
-+static __u64 hash_r5(const unsigned char *name /* name to hash */ ,
-+ int len UNUSED_ARG /* @name's length */ )
-+{
-+ __u64 a = 0;
-+
-+ assert("nikita-674", name != NULL);
-+ assert("nikita-675", len >= 0);
-+
-+ while (*name) {
-+ a += *name << 4;
-+ a += *name >> 4;
-+ a *= 11;
-+ name++;
-+ }
-+ return a;
-+}
-+
-+/* Keyed 32-bit hash function using TEA in a Davis-Meyer function
-+ H0 = Key
-+ Hi = E Mi(Hi-1) + Hi-1
-+
-+ (see Applied Cryptography, 2nd edition, p448).
-+
-+ Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
-+
-+ Jeremy has agreed to the contents of reiserfs/README. -Hans
-+
-+ This code was blindly upgraded to __u64 by s/__u32/__u64/g.
-+*/
-+static __u64 hash_tea(const unsigned char *name /* name to hash */ ,
-+ int len /* @name's length */ )
-+{
-+ __u64 k[] = { 0x9464a485u, 0x542e1a94u, 0x3e846bffu, 0xb75bcfc3u };
-+
-+ __u64 h0 = k[0], h1 = k[1];
-+ __u64 a, b, c, d;
-+ __u64 pad;
-+ int i;
-+
-+ assert("nikita-676", name != NULL);
-+ assert("nikita-677", len >= 0);
-+
-+#define DELTA 0x9E3779B9u
-+#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
-+#define PARTROUNDS 6 /* 6 gets complete mixing */
-+
-+/* a, b, c, d - data; h0, h1 - accumulated hash */
-+#define TEACORE(rounds) \
-+ do { \
-+ __u64 sum = 0; \
-+ int n = rounds; \
-+ __u64 b0, b1; \
-+ \
-+ b0 = h0; \
-+ b1 = h1; \
-+ \
-+ do \
-+ { \
-+ sum += DELTA; \
-+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \
-+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \
-+ } while(--n); \
-+ \
-+ h0 += b0; \
-+ h1 += b1; \
-+ } while(0)
-+
-+ pad = (__u64) len | ((__u64) len << 8);
-+ pad |= pad << 16;
-+
-+ while (len >= 16) {
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
-+ 16 | (__u64) name[7] << 24;
-+ c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
-+ 16 | (__u64) name[11] << 24;
-+ d = (__u64) name[12] | (__u64) name[13] << 8 | (__u64) name[14]
-+ << 16 | (__u64) name[15] << 24;
-+
-+ TEACORE(PARTROUNDS);
-+
-+ len -= 16;
-+ name += 16;
-+ }
-+
-+ if (len >= 12) {
-+ //assert(len < 16);
-+ if (len >= 16)
-+ *(int *)0 = 0;
-+
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
-+ 16 | (__u64) name[7] << 24;
-+ c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
-+ 16 | (__u64) name[11] << 24;
-+
-+ d = pad;
-+ for (i = 12; i < len; i++) {
-+ d <<= 8;
-+ d |= name[i];
-+ }
-+ } else if (len >= 8) {
-+ //assert(len < 12);
-+ if (len >= 12)
-+ *(int *)0 = 0;
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
-+ 16 | (__u64) name[7] << 24;
-+
-+ c = d = pad;
-+ for (i = 8; i < len; i++) {
-+ c <<= 8;
-+ c |= name[i];
-+ }
-+ } else if (len >= 4) {
-+ //assert(len < 8);
-+ if (len >= 8)
-+ *(int *)0 = 0;
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+
-+ b = c = d = pad;
-+ for (i = 4; i < len; i++) {
-+ b <<= 8;
-+ b |= name[i];
-+ }
-+ } else {
-+ //assert(len < 4);
-+ if (len >= 4)
-+ *(int *)0 = 0;
-+ a = b = c = d = pad;
-+ for (i = 0; i < len; i++) {
-+ a <<= 8;
-+ a |= name[i];
-+ }
-+ }
-+
-+ TEACORE(FULLROUNDS);
-+
-+/* return 0;*/
-+ return h0 ^ h1;
-+
-+}
-+
-+/* classical 64 bit Fowler/Noll/Vo-1 (FNV-1) hash.
-+
-+ See http://www.isthe.com/chongo/tech/comp/fnv/ for details.
-+
-+ Excerpts:
-+
-+ FNV hashes are designed to be fast while maintaining a low collision
-+ rate.
-+
-+ [This version also seems to preserve lexicographical order locally.]
-+
-+ FNV hash algorithms and source code have been released into the public
-+ domain.
-+
-+*/
-+static __u64 hash_fnv1(const unsigned char *name /* name to hash */ ,
-+ int len UNUSED_ARG /* @name's length */ )
-+{
-+ unsigned long long a = 0xcbf29ce484222325ull;
-+ const unsigned long long fnv_64_prime = 0x100000001b3ull;
-+
-+ assert("nikita-678", name != NULL);
-+ assert("nikita-679", len >= 0);
-+
-+ /* FNV-1 hash each octet in the buffer */
-+ for (; *name; ++name) {
-+ /* multiply by the 32 bit FNV magic prime mod 2^64 */
-+ a *= fnv_64_prime;
-+ /* xor the bottom with the current octet */
-+ a ^= (unsigned long long)(*name);
-+ }
-+ /* return our new hash value */
-+ return a;
-+}
-+
-+/* degenerate hash function used to simplify testing of non-unique key
-+ handling */
-+static __u64 hash_deg(const unsigned char *name UNUSED_ARG /* name to hash */ ,
-+ int len UNUSED_ARG /* @name's length */ )
-+{
-+ return 0xc0c0c0c010101010ull;
-+}
-+
-+static int change_hash(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ int result;
-+
-+ assert("nikita-3503", inode != NULL);
-+ assert("nikita-3504", plugin != NULL);
-+
-+ assert("nikita-3505", is_reiser4_inode(inode));
-+ assert("nikita-3507", plugin->h.type_id == REISER4_HASH_PLUGIN_TYPE);
-+
-+ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
-+ return RETERR(-EINVAL);
-+
-+ result = 0;
-+ if (inode_hash_plugin(inode) == NULL ||
-+ inode_hash_plugin(inode)->h.id != plugin->h.id) {
-+ if (is_dir_empty(inode) == 0)
-+ result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_HASH, plugin);
-+ else
-+ result = RETERR(-ENOTEMPTY);
-+
-+ }
-+ return result;
-+}
-+
-+static reiser4_plugin_ops hash_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = change_hash
-+};
-+
-+/* hash plugins */
-+hash_plugin hash_plugins[LAST_HASH_ID] = {
-+ [RUPASOV_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = RUPASOV_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "rupasov",
-+ .desc = "Original Yura's hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_rupasov
-+ },
-+ [R5_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = R5_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "r5",
-+ .desc = "r5 hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_r5
-+ },
-+ [TEA_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = TEA_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "tea",
-+ .desc = "tea hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_tea
-+ },
-+ [FNV1_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = FNV1_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "fnv1",
-+ .desc = "fnv1 hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_fnv1
-+ },
-+ [DEGENERATE_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = DEGENERATE_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "degenerate hash",
-+ .desc = "Degenerate hash: only for testing",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_deg
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/inode_ops.c linux-2.6.20/fs/reiser4/plugin/inode_ops.c
---- linux-2.6.20.orig/fs/reiser4/plugin/inode_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/inode_ops.c 2007-05-06 14:50:43.795005721 +0400
-@@ -0,0 +1,897 @@
-+/*
-+ * Copyright 2005 by Hans Reiser, licensing governed by reiser4/README
-+ */
-+
-+/*
-+ * this file contains typical implementations for most of methods of struct
-+ * inode_operations
-+ */
-+
-+#include "../inode.h"
-+#include "../safe_link.h"
-+
-+#include <linux/quotaops.h>
-+#include <linux/namei.h>
-+
-+static int create_vfs_object(struct inode *parent, struct dentry *dentry,
-+ reiser4_object_create_data *data);
-+
-+/**
-+ * reiser4_create_common - create of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of new object to create
-+ * @mode: the permissions to use
-+ * @nameidata:
-+ *
-+ * This is common implementation of vfs's create method of struct
-+ * inode_operations.
-+ * Creates regular file using file plugin from parent directory plugin set.
-+ */
-+int reiser4_create_common(struct inode *parent, struct dentry *dentry,
-+ int mode, struct nameidata *nameidata)
-+{
-+ reiser4_object_create_data data;
-+ file_plugin *fplug;
-+
-+ memset(&data, 0, sizeof data);
-+ data.mode = S_IFREG | mode;
-+ fplug = child_create_plugin(parent) ? : inode_create_plugin(parent);
-+ if (!plugin_of_group(fplug, REISER4_REGULAR_FILE)) {
-+ warning("vpf-1900", "'%s' is not a regular file plugin.",
-+ fplug->h.label);
-+ return RETERR(-EIO);
-+ }
-+ data.id = fplug->h.id;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+int reiser4_lookup_name(struct inode *dir, struct dentry *, reiser4_key *);
-+void check_light_weight(struct inode *inode, struct inode *parent);
-+
-+/**
-+ * reiser4_lookup_common - lookup of inode operations
-+ * @parent: inode of directory to lookup into
-+ * @dentry: name to look for
-+ * @nameidata:
-+ *
-+ * This is common implementation of vfs's lookup method of struct
-+ * inode_operations.
-+ */
-+struct dentry *reiser4_lookup_common(struct inode *parent,
-+ struct dentry *dentry,
-+ struct nameidata *nameidata)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct dentry *new;
-+ struct inode *inode;
-+ reiser4_dir_entry_desc entry;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return (struct dentry *)ctx;
-+
-+ /* set up operations on dentry. */
-+ dentry->d_op = &get_super_private(parent->i_sb)->ops.dentry;
-+
-+ result = reiser4_lookup_name(parent, dentry, &entry.key);
-+ if (result) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ if (result == -ENOENT) {
-+ /* object not found */
-+ if (!IS_DEADDIR(parent))
-+ d_add(dentry, NULL);
-+ return NULL;
-+ }
-+ return ERR_PTR(result);
-+ }
-+
-+ inode = reiser4_iget(parent->i_sb, &entry.key, 0);
-+ if (IS_ERR(inode)) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return ERR_PTR(PTR_ERR(inode));
-+ }
-+
-+ /* success */
-+ check_light_weight(inode, parent);
-+ new = d_splice_alias(inode, dentry);
-+ reiser4_iget_complete(inode);
-+
-+ /* prevent balance_dirty_pages() from being called: we don't want to
-+ * do this under directory i_mutex. */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return new;
-+}
-+
-+static reiser4_block_nr common_estimate_link(struct inode *parent,
-+ struct inode *object);
-+int reiser4_update_dir(struct inode *);
-+
-+/**
-+ * reiser4_link_common - link of inode operations
-+ * @existing: dentry of object which is to get new name
-+ * @parent: directory where new name is to be created
-+ * @newname: new name
-+ *
-+ * This is common implementation of vfs's link method of struct
-+ * inode_operations.
-+ */
-+int reiser4_link_common(struct dentry *existing, struct inode *parent,
-+ struct dentry *newname)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *object;
-+ dir_plugin *parent_dplug;
-+ reiser4_dir_entry_desc entry;
-+ reiser4_object_create_data data;
-+ reiser4_block_nr reserve;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ assert("nikita-1431", existing != NULL);
-+ assert("nikita-1432", parent != NULL);
-+ assert("nikita-1433", newname != NULL);
-+
-+ object = existing->d_inode;
-+ assert("nikita-1434", object != NULL);
-+
-+ /* check for race with create_object() */
-+ if (reiser4_inode_get_flag(object, REISER4_IMMUTABLE)) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-E_REPEAT);
-+ }
-+
-+ parent_dplug = inode_dir_plugin(parent);
-+
-+ memset(&entry, 0, sizeof entry);
-+ entry.obj = object;
-+
-+ data.mode = object->i_mode;
-+ data.id = inode_file_plugin(object)->h.id;
-+
-+ reserve = common_estimate_link(parent, existing->d_inode);
-+ if ((__s64) reserve < 0) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return reserve;
-+ }
-+
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOSPC);
-+ }
-+
-+ /*
-+ * Subtle race handling: sys_link() doesn't take i_mutex on @parent. It
-+ * means that link(2) can race against unlink(2) or rename(2), and
-+ * inode is dead (->i_nlink == 0) when reiser4_link() is entered.
-+ *
-+ * For such inode we have to undo special processing done in
-+ * reiser4_unlink() viz. creation of safe-link.
-+ */
-+ if (unlikely(object->i_nlink == 0)) {
-+ result = safe_link_del(reiser4_tree_by_inode(object),
-+ get_inode_oid(object), SAFE_UNLINK);
-+ if (result != 0) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ }
-+
-+ /* increment nlink of @existing and update its stat data */
-+ result = reiser4_add_nlink(object, parent, 1);
-+ if (result == 0) {
-+ /* add entry to the parent */
-+ result =
-+ parent_dplug->add_entry(parent, newname, &data, &entry);
-+ if (result != 0) {
-+ /* failed to add entry to the parent, decrement nlink
-+ of @existing */
-+ reiser4_del_nlink(object, parent, 1);
-+ /*
-+ * now, if that failed, we have a file with too big
-+ * nlink---space leak, much better than directory
-+ * entry pointing to nowhere
-+ */
-+ }
-+ }
-+ if (result == 0) {
-+ atomic_inc(&object->i_count);
-+ /*
-+ * Upon successful completion, link() shall mark for update
-+ * the st_ctime field of the file. Also, the st_ctime and
-+ * st_mtime fields of the directory that contains the new
-+ * entry shall be marked for update. --SUS
-+ */
-+ result = reiser4_update_dir(parent);
-+ }
-+ if (result == 0)
-+ d_instantiate(newname, existing->d_inode);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+static int unlink_check_and_grab(struct inode *parent, struct dentry *victim);
-+
-+/**
-+ * reiser4_unlink_common - unlink of inode operations
-+ * @parent: inode of directory to remove name from
-+ * @victim: name to be removed
-+ *
-+ * This is common implementation of vfs's unlink method of struct
-+ * inode_operations.
-+ */
-+int reiser4_unlink_common(struct inode *parent, struct dentry *victim)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *object;
-+ file_plugin *fplug;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ object = victim->d_inode;
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-2882", fplug->detach != NULL);
-+
-+ result = unlink_check_and_grab(parent, victim);
-+ if (result != 0) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = fplug->detach(object, parent);
-+ if (result == 0) {
-+ dir_plugin *parent_dplug;
-+ reiser4_dir_entry_desc entry;
-+
-+ parent_dplug = inode_dir_plugin(parent);
-+ memset(&entry, 0, sizeof entry);
-+
-+ /* first, delete directory entry */
-+ result = parent_dplug->rem_entry(parent, victim, &entry);
-+ if (result == 0) {
-+ /*
-+ * if name was removed successfully, we _have_ to
-+ * return 0 from this function, because upper level
-+ * caller (vfs_{rmdir,unlink}) expect this.
-+ *
-+ * now that directory entry is removed, update
-+ * stat-data
-+ */
-+ reiser4_del_nlink(object, parent, 1);
-+ /*
-+ * Upon successful completion, unlink() shall mark for
-+ * update the st_ctime and st_mtime fields of the
-+ * parent directory. Also, if the file's link count is
-+ * not 0, the st_ctime field of the file shall be
-+ * marked for update. --SUS
-+ */
-+ reiser4_update_dir(parent);
-+ /* add safe-link for this file */
-+ if (object->i_nlink == 0)
-+ safe_link_add(object, SAFE_UNLINK);
-+ }
-+ }
-+
-+ if (unlikely(result != 0)) {
-+ if (result != -ENOMEM)
-+ warning("nikita-3398", "Cannot unlink %llu (%i)",
-+ (unsigned long long)get_inode_oid(object),
-+ result);
-+ /* if operation failed commit pending inode modifications to
-+ * the stat-data */
-+ reiser4_update_sd(object);
-+ reiser4_update_sd(parent);
-+ }
-+
-+ reiser4_release_reserved(object->i_sb);
-+
-+ /* @object's i_ctime was updated by ->rem_link() method(). */
-+
-+ /* @victim can be already removed from the disk by this time. Inode is
-+ then marked so that iput() wouldn't try to remove stat data. But
-+ inode itself is still there.
-+ */
-+
-+ /*
-+ * we cannot release directory semaphore here, because name has
-+ * already been deleted, but dentry (@victim) still exists. Prevent
-+ * balance_dirty_pages() from being called on exiting this context: we
-+ * don't want to do this under directory i_mutex.
-+ */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * reiser4_symlink_common - symlink of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of object to be created
-+ * @linkname: string symlink is to contain
-+ *
-+ * This is common implementation of vfs's symlink method of struct
-+ * inode_operations.
-+ * Creates object using file plugin SYMLINK_FILE_PLUGIN_ID.
-+ */
-+int reiser4_symlink_common(struct inode *parent, struct dentry *dentry,
-+ const char *linkname)
-+{
-+ reiser4_object_create_data data;
-+
-+ memset(&data, 0, sizeof data);
-+ data.name = linkname;
-+ data.id = SYMLINK_FILE_PLUGIN_ID;
-+ data.mode = S_IFLNK | S_IRWXUGO;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+/**
-+ * reiser4_mkdir_common - mkdir of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of object to be created
-+ * @mode: the permissions to use
-+ *
-+ * This is common implementation of vfs's mkdir method of struct
-+ * inode_operations.
-+ * Creates object using file plugin DIRECTORY_FILE_PLUGIN_ID.
-+ */
-+int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, int mode)
-+{
-+ reiser4_object_create_data data;
-+
-+ memset(&data, 0, sizeof data);
-+ data.mode = S_IFDIR | mode;
-+ data.id = DIRECTORY_FILE_PLUGIN_ID;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+/**
-+ * reiser4_mknod_common - mknod of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of object to be created
-+ * @mode: the permissions to use and file type
-+ * @rdev: minor and major of new device file
-+ *
-+ * This is common implementation of vfs's mknod method of struct
-+ * inode_operations.
-+ * Creates object using file plugin SPECIAL_FILE_PLUGIN_ID.
-+ */
-+int reiser4_mknod_common(struct inode *parent, struct dentry *dentry,
-+ int mode, dev_t rdev)
-+{
-+ reiser4_object_create_data data;
-+
-+ memset(&data, 0, sizeof data);
-+ data.mode = mode;
-+ data.rdev = rdev;
-+ data.id = SPECIAL_FILE_PLUGIN_ID;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+/*
-+ * implementation of vfs's rename method of struct inode_operations for typical
-+ * directory is in inode_ops_rename.c
-+ */
-+
-+/**
-+ * reiser4_follow_link_common - follow_link of inode operations
-+ * @dentry: dentry of symlink
-+ * @data:
-+ *
-+ * This is common implementation of vfs's followlink method of struct
-+ * inode_operations.
-+ * Assumes that inode's i_private points to the content of symbolic link.
-+ */
-+void *reiser4_follow_link_common(struct dentry *dentry, struct nameidata *nd)
-+{
-+ assert("vs-851", S_ISLNK(dentry->d_inode->i_mode));
-+
-+ if (!dentry->d_inode->i_private
-+ || !reiser4_inode_get_flag(dentry->d_inode,
-+ REISER4_GENERIC_PTR_USED))
-+ return ERR_PTR(RETERR(-EINVAL));
-+ nd_set_link(nd, dentry->d_inode->i_private);
-+ return NULL;
-+}
-+
-+/**
-+ * reiser4_permission_common - permission of inode operations
-+ * @inode: inode to check permissions for
-+ * @mask: mode bits to check permissions for
-+ * @nameidata:
-+ *
-+ * Uses generic function to check for rwx permissions.
-+ */
-+int reiser4_permission_common(struct inode *inode, int mask,
-+ struct nameidata *nameidata)
-+{
-+ return generic_permission(inode, mask, NULL);
-+}
-+
-+static int setattr_reserve(reiser4_tree *);
-+
-+/* this is common implementation of vfs's setattr method of struct
-+ inode_operations
-+*/
-+int reiser4_setattr_common(struct dentry *dentry, struct iattr *attr)
-+{
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ int result;
-+
-+ inode = dentry->d_inode;
-+ result = inode_change_ok(inode, attr);
-+ if (result)
-+ return result;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ assert("nikita-3119", !(attr->ia_valid & ATTR_SIZE));
-+
-+ /*
-+ * grab disk space and call standard inode_setattr().
-+ */
-+ result = setattr_reserve(reiser4_tree_by_inode(inode));
-+ if (!result) {
-+ if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid)
-+ || (attr->ia_valid & ATTR_GID
-+ && attr->ia_gid != inode->i_gid)) {
-+ result = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
-+ if (result) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ }
-+ result = inode_setattr(inode, attr);
-+ if (!result)
-+ reiser4_update_sd(inode);
-+ }
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* this is common implementation of vfs's getattr method of struct
-+ inode_operations
-+*/
-+int reiser4_getattr_common(struct vfsmount *mnt UNUSED_ARG,
-+ struct dentry *dentry, struct kstat *stat)
-+{
-+ struct inode *obj;
-+
-+ assert("nikita-2298", dentry != NULL);
-+ assert("nikita-2299", stat != NULL);
-+ assert("nikita-2300", dentry->d_inode != NULL);
-+
-+ obj = dentry->d_inode;
-+
-+ stat->dev = obj->i_sb->s_dev;
-+ stat->ino = oid_to_uino(get_inode_oid(obj));
-+ stat->mode = obj->i_mode;
-+ /* don't confuse userland with huge nlink. This is not entirely
-+ * correct, because nlink_t is not necessary 16 bit signed. */
-+ stat->nlink = min(obj->i_nlink, (typeof(obj->i_nlink)) 0x7fff);
-+ stat->uid = obj->i_uid;
-+ stat->gid = obj->i_gid;
-+ stat->rdev = obj->i_rdev;
-+ stat->atime = obj->i_atime;
-+ stat->mtime = obj->i_mtime;
-+ stat->ctime = obj->i_ctime;
-+ stat->size = obj->i_size;
-+ stat->blocks =
-+ (inode_get_bytes(obj) + VFS_BLKSIZE - 1) >> VFS_BLKSIZE_BITS;
-+ /* "preferred" blocksize for efficient file system I/O */
-+ stat->blksize = get_super_private(obj->i_sb)->optimal_io_size;
-+
-+ return 0;
-+}
-+
-+/* Estimate the maximum amount of nodes which might be allocated or changed on
-+ typical new object creation. Typical creation consists of calling create
-+ method of file plugin, adding directory entry to parent and update parent
-+ directory's stat data.
-+*/
-+static reiser4_block_nr estimate_create_vfs_object(struct inode *parent, /* parent object */
-+ struct inode *object
-+ /* object */ )
-+{
-+ assert("vpf-309", parent != NULL);
-+ assert("vpf-307", object != NULL);
-+
-+ return
-+ /* object creation estimation */
-+ inode_file_plugin(object)->estimate.create(object) +
-+ /* stat data of parent directory estimation */
-+ inode_file_plugin(parent)->estimate.update(parent) +
-+ /* adding entry estimation */
-+ inode_dir_plugin(parent)->estimate.add_entry(parent) +
-+ /* to undo in the case of failure */
-+ inode_dir_plugin(parent)->estimate.rem_entry(parent);
-+}
-+
-+/* Create child in directory.
-+
-+ . get object's plugin
-+ . get fresh inode
-+ . initialize inode
-+ . add object's stat-data
-+ . initialize object's directory
-+ . add entry to the parent
-+ . instantiate dentry
-+
-+*/
-+static int do_create_vfs_child(reiser4_object_create_data * data, /* parameters of new
-+ object */
-+ struct inode **retobj)
-+{
-+ int result;
-+
-+ struct dentry *dentry; /* parent object */
-+ struct inode *parent; /* new name */
-+
-+ dir_plugin *par_dir; /* directory plugin on the parent */
-+ dir_plugin *obj_dir; /* directory plugin on the new object */
-+ file_plugin *obj_plug; /* object plugin on the new object */
-+ struct inode *object; /* new object */
-+ reiser4_block_nr reserve;
-+
-+ reiser4_dir_entry_desc entry; /* new directory entry */
-+
-+ assert("nikita-1420", data != NULL);
-+ parent = data->parent;
-+ dentry = data->dentry;
-+
-+ assert("nikita-1418", parent != NULL);
-+ assert("nikita-1419", dentry != NULL);
-+
-+ /* check, that name is acceptable for parent */
-+ par_dir = inode_dir_plugin(parent);
-+ if (par_dir->is_name_acceptable &&
-+ !par_dir->is_name_acceptable(parent,
-+ dentry->d_name.name,
-+ (int)dentry->d_name.len))
-+ return RETERR(-ENAMETOOLONG);
-+
-+ result = 0;
-+ obj_plug = file_plugin_by_id((int)data->id);
-+ if (obj_plug == NULL) {
-+ warning("nikita-430", "Cannot find plugin %i", data->id);
-+ return RETERR(-ENOENT);
-+ }
-+ object = new_inode(parent->i_sb);
-+ if (object == NULL)
-+ return RETERR(-ENOMEM);
-+ /* we'll update i_nlink below */
-+ object->i_nlink = 0;
-+ /* new_inode() initializes i_ino to "arbitrary" value. Reset it to 0,
-+ * to simplify error handling: if some error occurs before i_ino is
-+ * initialized with oid, i_ino should already be set to some
-+ * distinguished value. */
-+ object->i_ino = 0;
-+
-+ /* So that on error iput will be called. */
-+ *retobj = object;
-+
-+ if (DQUOT_ALLOC_INODE(object)) {
-+ DQUOT_DROP(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-EDQUOT);
-+ }
-+
-+ memset(&entry, 0, sizeof entry);
-+ entry.obj = object;
-+
-+ set_plugin(&reiser4_inode_data(object)->pset, PSET_FILE,
-+ file_plugin_to_plugin(obj_plug));
-+ result = obj_plug->set_plug_in_inode(object, parent, data);
-+ if (result) {
-+ warning("nikita-431", "Cannot install plugin %i on %llx",
-+ data->id, (unsigned long long)get_inode_oid(object));
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return result;
-+ }
-+
-+ /* reget plugin after installation */
-+ obj_plug = inode_file_plugin(object);
-+
-+ if (obj_plug->create_object == NULL) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-EPERM);
-+ }
-+
-+ /* if any of hash, tail, sd or permission plugins for newly created
-+ object are not set yet set them here inheriting them from parent
-+ directory
-+ */
-+ assert("nikita-2070", obj_plug->adjust_to_parent != NULL);
-+ result = obj_plug->adjust_to_parent(object,
-+ parent,
-+ object->i_sb->s_root->d_inode);
-+ if (result == 0)
-+ result = finish_pset(object);
-+ if (result != 0) {
-+ warning("nikita-432", "Cannot inherit from %llx to %llx",
-+ (unsigned long long)get_inode_oid(parent),
-+ (unsigned long long)get_inode_oid(object));
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return result;
-+ }
-+
-+ /* setup inode and file-operations for this inode */
-+ setup_inode_ops(object, data);
-+
-+ /* call file plugin's method to initialize plugin specific part of
-+ * inode */
-+ if (obj_plug->init_inode_data)
-+ obj_plug->init_inode_data(object, data, 1 /*create */ );
-+
-+ /* obtain directory plugin (if any) for new object. */
-+ obj_dir = inode_dir_plugin(object);
-+ if (obj_dir != NULL && obj_dir->init == NULL) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-EPERM);
-+ }
-+
-+ reiser4_inode_data(object)->locality_id = get_inode_oid(parent);
-+
-+ reserve = estimate_create_vfs_object(parent, object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-ENOSPC);
-+ }
-+
-+ /* mark inode `immutable'. We disable changes to the file being
-+ created until valid directory entry for it is inserted. Otherwise,
-+ if file were expanded and insertion of directory entry fails, we
-+ have to remove file, but we only alloted enough space in
-+ transaction to remove _empty_ file. 3.x code used to remove stat
-+ data in different transaction thus possibly leaking disk space on
-+ crash. This all only matters if it's possible to access file
-+ without name, for example, by inode number
-+ */
-+ reiser4_inode_set_flag(object, REISER4_IMMUTABLE);
-+
-+ /* create empty object, this includes allocation of new objectid. For
-+ directories this implies creation of dot and dotdot */
-+ assert("nikita-2265", reiser4_inode_get_flag(object, REISER4_NO_SD));
-+
-+ /* mark inode as `loaded'. From this point onward
-+ reiser4_delete_inode() will try to remove its stat-data. */
-+ reiser4_inode_set_flag(object, REISER4_LOADED);
-+
-+ result = obj_plug->create_object(object, parent, data);
-+ if (result != 0) {
-+ reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
-+ if (result != -ENAMETOOLONG && result != -ENOMEM)
-+ warning("nikita-2219",
-+ "Failed to create sd for %llu",
-+ (unsigned long long)get_inode_oid(object));
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return result;
-+ }
-+
-+ if (obj_dir != NULL)
-+ result = obj_dir->init(object, parent, data);
-+ if (result == 0) {
-+ assert("nikita-434", !reiser4_inode_get_flag(object,
-+ REISER4_NO_SD));
-+ /* insert inode into VFS hash table */
-+ insert_inode_hash(object);
-+ /* create entry */
-+ result = par_dir->add_entry(parent, dentry, data, &entry);
-+ if (result == 0) {
-+ result = reiser4_add_nlink(object, parent, 0);
-+ /* If O_CREAT is set and the file did not previously
-+ exist, upon successful completion, open() shall
-+ mark for update the st_atime, st_ctime, and
-+ st_mtime fields of the file and the st_ctime and
-+ st_mtime fields of the parent directory. --SUS
-+ */
-+ /* @object times are already updated by
-+ reiser4_add_nlink() */
-+ if (result == 0)
-+ reiser4_update_dir(parent);
-+ if (result != 0)
-+ /* cleanup failure to add nlink */
-+ par_dir->rem_entry(parent, dentry, &entry);
-+ }
-+ if (result != 0)
-+ /* cleanup failure to add entry */
-+ obj_plug->detach(object, parent);
-+ } else if (result != -ENOMEM)
-+ warning("nikita-2219", "Failed to initialize dir for %llu: %i",
-+ (unsigned long long)get_inode_oid(object), result);
-+
-+ /*
-+ * update stat-data, committing all pending modifications to the inode
-+ * fields.
-+ */
-+ reiser4_update_sd(object);
-+ if (result != 0) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ /* if everything was ok (result == 0), parent stat-data is
-+ * already updated above (update_parent_dir()) */
-+ reiser4_update_sd(parent);
-+ /* failure to create entry, remove object */
-+ obj_plug->delete_object(object);
-+ }
-+
-+ /* file has name now, clear immutable flag */
-+ reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
-+
-+ /* on error, iput() will call ->delete_inode(). We should keep track
-+ of the existence of stat-data for this inode and avoid attempt to
-+ remove it in reiser4_delete_inode(). This is accomplished through
-+ REISER4_NO_SD bit in inode.u.reiser4_i.plugin.flags
-+ */
-+ return result;
-+}
-+
-+/* this is helper for common implementations of reiser4_mkdir, reiser4_create,
-+ reiser4_mknod and reiser4_symlink
-+*/
-+static int
-+create_vfs_object(struct inode *parent,
-+ struct dentry *dentry, reiser4_object_create_data * data)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *child;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ context_set_commit_async(ctx);
-+
-+ data->parent = parent;
-+ data->dentry = dentry;
-+ child = NULL;
-+ result = do_create_vfs_child(data, &child);
-+ if (unlikely(result != 0)) {
-+ if (child != NULL) {
-+ reiser4_make_bad_inode(child);
-+ iput(child);
-+ }
-+ } else
-+ d_instantiate(dentry, child);
-+
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* helper for link_common. Estimate disk space necessary to add a link
-+ from @parent to @object
-+*/
-+static reiser4_block_nr common_estimate_link(struct inode *parent, /* parent directory */
-+ struct inode *object
-+ /* object to which new link is being cerated */
-+ )
-+{
-+ reiser4_block_nr res = 0;
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ assert("vpf-317", object != NULL);
-+ assert("vpf-318", parent != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ dplug = inode_dir_plugin(parent);
-+ /* VS-FIXME-HANS: why do we do fplug->estimate.update(object) twice instead of multiplying by 2? */
-+ /* reiser4_add_nlink(object) */
-+ res += fplug->estimate.update(object);
-+ /* add_entry(parent) */
-+ res += dplug->estimate.add_entry(parent);
-+ /* reiser4_del_nlink(object) */
-+ res += fplug->estimate.update(object);
-+ /* update_dir(parent) */
-+ res += inode_file_plugin(parent)->estimate.update(parent);
-+ /* safe-link */
-+ res += estimate_one_item_removal(reiser4_tree_by_inode(object));
-+
-+ return res;
-+}
-+
-+/* Estimate disk space necessary to remove a link between @parent and
-+ @object.
-+*/
-+static reiser4_block_nr estimate_unlink(struct inode *parent, /* parent directory */
-+ struct inode *object
-+ /* object to which new link is being cerated */
-+ )
-+{
-+ reiser4_block_nr res = 0;
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ assert("vpf-317", object != NULL);
-+ assert("vpf-318", parent != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ dplug = inode_dir_plugin(parent);
-+
-+ /* rem_entry(parent) */
-+ res += dplug->estimate.rem_entry(parent);
-+ /* reiser4_del_nlink(object) */
-+ res += fplug->estimate.update(object);
-+ /* update_dir(parent) */
-+ res += inode_file_plugin(parent)->estimate.update(parent);
-+ /* fplug->unlink */
-+ res += fplug->estimate.unlink(object, parent);
-+ /* safe-link */
-+ res += estimate_one_insert_item(reiser4_tree_by_inode(object));
-+
-+ return res;
-+}
-+
-+/* helper for reiser4_unlink_common. Estimate and grab space for unlink. */
-+static int unlink_check_and_grab(struct inode *parent, struct dentry *victim)
-+{
-+ file_plugin *fplug;
-+ struct inode *child;
-+ int result;
-+
-+ result = 0;
-+ child = victim->d_inode;
-+ fplug = inode_file_plugin(child);
-+
-+ /* check for race with create_object() */
-+ if (reiser4_inode_get_flag(child, REISER4_IMMUTABLE))
-+ return RETERR(-E_REPEAT);
-+ /* object being deleted should have stat data */
-+ assert("vs-949", !reiser4_inode_get_flag(child, REISER4_NO_SD));
-+
-+ /* ask object plugin */
-+ if (fplug->can_rem_link != NULL && !fplug->can_rem_link(child))
-+ return RETERR(-ENOTEMPTY);
-+
-+ result = (int)estimate_unlink(parent, child);
-+ if (result < 0)
-+ return result;
-+
-+ return reiser4_grab_reserved(child->i_sb, result, BA_CAN_COMMIT);
-+}
-+
-+/* helper for reiser4_setattr_common */
-+static int setattr_reserve(reiser4_tree * tree)
-+{
-+ assert("vs-1096", is_grab_enabled(get_current_context()));
-+ return reiser4_grab_space(estimate_one_insert_into_item(tree),
-+ BA_CAN_COMMIT);
-+}
-+
-+/* helper function. Standards require that for many file-system operations
-+ on success ctime and mtime of parent directory is to be updated. */
-+int reiser4_update_dir(struct inode *dir)
-+{
-+ assert("nikita-2525", dir != NULL);
-+
-+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-+ return reiser4_update_sd(dir);
-+}
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/inode_ops_rename.c linux-2.6.20/fs/reiser4/plugin/inode_ops_rename.c
---- linux-2.6.20.orig/fs/reiser4/plugin/inode_ops_rename.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/inode_ops_rename.c 2007-05-06 14:50:43.795005721 +0400
-@@ -0,0 +1,914 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "../inode.h"
-+#include "../safe_link.h"
-+
-+static const char *possible_leak = "Possible disk space leak.";
-+
-+/* re-bind existing name at @from_coord in @from_dir to point to @to_inode.
-+
-+ Helper function called from hashed_rename() */
-+static int replace_name(struct inode *to_inode, /* inode where @from_coord is
-+ * to be re-targeted at */
-+ struct inode *from_dir, /* directory where @from_coord
-+ * lives */
-+ struct inode *from_inode, /* inode @from_coord
-+ * originally point to */
-+ coord_t * from_coord, /* where directory entry is in
-+ * the tree */
-+ lock_handle * from_lh /* lock handle on @from_coord */ )
-+{
-+ item_plugin *from_item;
-+ int result;
-+ znode *node;
-+
-+ coord_clear_iplug(from_coord);
-+ node = from_coord->node;
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+ from_item = item_plugin_by_coord(from_coord);
-+ if (plugin_of_group(item_plugin_by_coord(from_coord),
-+ DIR_ENTRY_ITEM_TYPE))
-+ {
-+ reiser4_key to_key;
-+
-+ build_sd_key(to_inode, &to_key);
-+
-+ /* everything is found and prepared to change directory entry
-+ at @from_coord to point to @to_inode.
-+
-+ @to_inode is just about to get new name, so bump its link
-+ counter.
-+
-+ */
-+ result = reiser4_add_nlink(to_inode, from_dir, 0);
-+ if (result != 0) {
-+ /* Don't issue warning: this may be plain -EMLINK */
-+ zrelse(node);
-+ return result;
-+ }
-+
-+ result =
-+ from_item->s.dir.update_key(from_coord, &to_key, from_lh);
-+ if (result != 0) {
-+ reiser4_del_nlink(to_inode, from_dir, 0);
-+ zrelse(node);
-+ return result;
-+ }
-+
-+ /* @from_inode just lost its name, he-he.
-+
-+ If @from_inode was directory, it contained dotdot pointing
-+ to @from_dir. @from_dir i_nlink will be decreased when
-+ iput() will be called on @from_inode.
-+
-+ If file-system is not ADG (hard-links are
-+ supported on directories), iput(from_inode) will not remove
-+ @from_inode, and thus above is incorrect, but hard-links on
-+ directories are problematic in many other respects.
-+ */
-+ result = reiser4_del_nlink(from_inode, from_dir, 0);
-+ if (result != 0) {
-+ warning("nikita-2330",
-+ "Cannot remove link from source: %i. %s",
-+ result, possible_leak);
-+ }
-+ /* Has to return success, because entry is already
-+ * modified. */
-+ result = 0;
-+
-+ /* NOTE-NIKITA consider calling plugin method in stead of
-+ accessing inode fields directly. */
-+ from_dir->i_mtime = CURRENT_TIME;
-+ } else {
-+ warning("nikita-2326", "Unexpected item type");
-+ result = RETERR(-EIO);
-+ }
-+ zrelse(node);
-+ return result;
-+}
-+
-+/* add new entry pointing to @inode into @dir at @coord, locked by @lh
-+
-+ Helper function used by hashed_rename(). */
-+static int add_name(struct inode *inode, /* inode where @coord is to be
-+ * re-targeted at */
-+ struct inode *dir, /* directory where @coord lives */
-+ struct dentry *name, /* new name */
-+ coord_t * coord, /* where directory entry is in the tree */
-+ lock_handle * lh, /* lock handle on @coord */
-+ int is_dir /* true, if @inode is directory */ )
-+{
-+ int result;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-2333", lh->node == coord->node);
-+ assert("nikita-2334", is_dir == S_ISDIR(inode->i_mode));
-+
-+ memset(&entry, 0, sizeof entry);
-+ entry.obj = inode;
-+ /* build key of directory entry description */
-+ inode_dir_plugin(dir)->build_entry_key(dir, &name->d_name, &entry.key);
-+
-+ /* ext2 does this in different order: first inserts new entry,
-+ then increases directory nlink. We don't want do this,
-+ because reiser4_add_nlink() calls ->add_link() plugin
-+ method that can fail for whatever reason, leaving as with
-+ cleanup problems.
-+ */
-+ /* @inode is getting new name */
-+ reiser4_add_nlink(inode, dir, 0);
-+ /* create @new_name in @new_dir pointing to
-+ @old_inode */
-+ result = WITH_COORD(coord,
-+ inode_dir_item_plugin(dir)->s.dir.add_entry(dir,
-+ coord,
-+ lh,
-+ name,
-+ &entry));
-+ if (result != 0) {
-+ int result2;
-+ result2 = reiser4_del_nlink(inode, dir, 0);
-+ if (result2 != 0) {
-+ warning("nikita-2327",
-+ "Cannot drop link on %lli %i. %s",
-+ (unsigned long long)get_inode_oid(inode),
-+ result2, possible_leak);
-+ }
-+ } else
-+ INODE_INC_FIELD(dir, i_size);
-+ return result;
-+}
-+
-+static reiser4_block_nr estimate_rename(struct inode *old_dir, /* directory where @old is located */
-+ struct dentry *old_name, /* old name */
-+ struct inode *new_dir, /* directory where @new is located */
-+ struct dentry *new_name /* new name */ )
-+{
-+ reiser4_block_nr res1, res2;
-+ dir_plugin *p_parent_old, *p_parent_new;
-+ file_plugin *p_child_old, *p_child_new;
-+
-+ assert("vpf-311", old_dir != NULL);
-+ assert("vpf-312", new_dir != NULL);
-+ assert("vpf-313", old_name != NULL);
-+ assert("vpf-314", new_name != NULL);
-+
-+ p_parent_old = inode_dir_plugin(old_dir);
-+ p_parent_new = inode_dir_plugin(new_dir);
-+ p_child_old = inode_file_plugin(old_name->d_inode);
-+ if (new_name->d_inode)
-+ p_child_new = inode_file_plugin(new_name->d_inode);
-+ else
-+ p_child_new = NULL;
-+
-+ /* find_entry - can insert one leaf. */
-+ res1 = res2 = 1;
-+
-+ /* replace_name */
-+ {
-+ /* reiser4_add_nlink(p_child_old) and reiser4_del_nlink(p_child_old) */
-+ res1 += 2 * p_child_old->estimate.update(old_name->d_inode);
-+ /* update key */
-+ res1 += 1;
-+ /* reiser4_del_nlink(p_child_new) */
-+ if (p_child_new)
-+ res1 += p_child_new->estimate.update(new_name->d_inode);
-+ }
-+
-+ /* else add_name */
-+ {
-+ /* reiser4_add_nlink(p_parent_new) and reiser4_del_nlink(p_parent_new) */
-+ res2 +=
-+ 2 * inode_file_plugin(new_dir)->estimate.update(new_dir);
-+ /* reiser4_add_nlink(p_parent_old) */
-+ res2 += p_child_old->estimate.update(old_name->d_inode);
-+ /* add_entry(p_parent_new) */
-+ res2 += p_parent_new->estimate.add_entry(new_dir);
-+ /* reiser4_del_nlink(p_parent_old) */
-+ res2 += p_child_old->estimate.update(old_name->d_inode);
-+ }
-+
-+ res1 = res1 < res2 ? res2 : res1;
-+
-+ /* reiser4_write_sd(p_parent_new) */
-+ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
-+
-+ /* reiser4_write_sd(p_child_new) */
-+ if (p_child_new)
-+ res1 += p_child_new->estimate.update(new_name->d_inode);
-+
-+ /* hashed_rem_entry(p_parent_old) */
-+ res1 += p_parent_old->estimate.rem_entry(old_dir);
-+
-+ /* reiser4_del_nlink(p_child_old) */
-+ res1 += p_child_old->estimate.update(old_name->d_inode);
-+
-+ /* replace_name */
-+ {
-+ /* reiser4_add_nlink(p_parent_dir_new) */
-+ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
-+ /* update_key */
-+ res1 += 1;
-+ /* reiser4_del_nlink(p_parent_new) */
-+ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
-+ /* reiser4_del_nlink(p_parent_old) */
-+ res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
-+ }
-+
-+ /* reiser4_write_sd(p_parent_old) */
-+ res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
-+
-+ /* reiser4_write_sd(p_child_old) */
-+ res1 += p_child_old->estimate.update(old_name->d_inode);
-+
-+ return res1;
-+}
-+
-+static int hashed_rename_estimate_and_grab(struct inode *old_dir, /* directory where @old is located */
-+ struct dentry *old_name, /* old name */
-+ struct inode *new_dir, /* directory where @new is located */
-+ struct dentry *new_name
-+ /* new name */ )
-+{
-+ reiser4_block_nr reserve;
-+
-+ reserve = estimate_rename(old_dir, old_name, new_dir, new_name);
-+
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ return 0;
-+}
-+
-+/* check whether @old_inode and @new_inode can be moved within file system
-+ * tree. This singles out attempts to rename pseudo-files, for example. */
-+static int can_rename(struct inode *old_dir, struct inode *old_inode,
-+ struct inode *new_dir, struct inode *new_inode)
-+{
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ assert("nikita-3370", old_inode != NULL);
-+
-+ dplug = inode_dir_plugin(new_dir);
-+ fplug = inode_file_plugin(old_inode);
-+
-+ if (dplug == NULL)
-+ return RETERR(-ENOTDIR);
-+ else if (new_dir->i_op->create == NULL)
-+ return RETERR(-EPERM);
-+ else if (!fplug->can_add_link(old_inode))
-+ return RETERR(-EMLINK);
-+ else if (new_inode != NULL) {
-+ fplug = inode_file_plugin(new_inode);
-+ if (fplug->can_rem_link != NULL &&
-+ !fplug->can_rem_link(new_inode))
-+ return RETERR(-EBUSY);
-+ }
-+ return 0;
-+}
-+
-+int reiser4_find_entry(struct inode *, struct dentry *, lock_handle *,
-+ znode_lock_mode, reiser4_dir_entry_desc *);
-+int reiser4_update_dir(struct inode *);
-+
-+/* this is common implementation of vfs's rename method of struct
-+ inode_operations
-+ See comments in the body.
-+
-+ It is arguable that this function can be made generic so, that it
-+ will be applicable to any kind of directory plugin that deals with
-+ directories composed out of directory entries. The only obstacle
-+ here is that we don't have any data-type to represent directory
-+ entry. This should be re-considered when more than one different
-+ directory plugin will be implemented.
-+*/
-+int reiser4_rename_common(struct inode *old_dir /* directory where @old
-+ * is located */ ,
-+ struct dentry *old_name /* old name */ ,
-+ struct inode *new_dir /* directory where @new
-+ * is located */ ,
-+ struct dentry *new_name /* new name */ )
-+{
-+ /* From `The Open Group Base Specifications Issue 6'
-+
-+ If either the old or new argument names a symbolic link, rename()
-+ shall operate on the symbolic link itself, and shall not resolve
-+ the last component of the argument. If the old argument and the new
-+ argument resolve to the same existing file, rename() shall return
-+ successfully and perform no other action.
-+
-+ [this is done by VFS: vfs_rename()]
-+
-+ If the old argument points to the pathname of a file that is not a
-+ directory, the new argument shall not point to the pathname of a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the link named by the new argument exists, it shall
-+ be removed and old renamed to new. In this case, a link named new
-+ shall remain visible to other processes throughout the renaming
-+ operation and refer either to the file referred to by new or old
-+ before the operation began.
-+
-+ [we should assure this]
-+
-+ Write access permission is required for
-+ both the directory containing old and the directory containing new.
-+
-+ [checked by VFS: vfs_rename->may_delete(), may_create()]
-+
-+ If the old argument points to the pathname of a directory, the new
-+ argument shall not point to the pathname of a file that is not a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the directory named by the new argument exists, it
-+ shall be removed and old renamed to new. In this case, a link named
-+ new shall exist throughout the renaming operation and shall refer
-+ either to the directory referred to by new or old before the
-+ operation began.
-+
-+ [we should assure this]
-+
-+ If new names an existing directory, it shall be
-+ required to be an empty directory.
-+
-+ [we should check this]
-+
-+ If the old argument points to a pathname of a symbolic link, the
-+ symbolic link shall be renamed. If the new argument points to a
-+ pathname of a symbolic link, the symbolic link shall be removed.
-+
-+ The new pathname shall not contain a path prefix that names
-+ old. Write access permission is required for the directory
-+ containing old and the directory containing new. If the old
-+ argument points to the pathname of a directory, write access
-+ permission may be required for the directory named by old, and, if
-+ it exists, the directory named by new.
-+
-+ [checked by VFS: vfs_rename(), vfs_rename_dir()]
-+
-+ If the link named by the new argument exists and the file's link
-+ count becomes 0 when it is removed and no process has the file
-+ open, the space occupied by the file shall be freed and the file
-+ shall no longer be accessible. If one or more processes have the
-+ file open when the last link is removed, the link shall be removed
-+ before rename() returns, but the removal of the file contents shall
-+ be postponed until all references to the file are closed.
-+
-+ [iput() handles this, but we can do this manually, a la
-+ reiser4_unlink()]
-+
-+ Upon successful completion, rename() shall mark for update the
-+ st_ctime and st_mtime fields of the parent directory of each file.
-+
-+ [N/A]
-+
-+ */
-+ reiser4_context *ctx;
-+ int result;
-+ int is_dir; /* is @old_name directory */
-+
-+ struct inode *old_inode;
-+ struct inode *new_inode;
-+ coord_t *new_coord;
-+
-+ reiser4_dentry_fsdata *new_fsdata;
-+ dir_plugin *dplug;
-+ file_plugin *fplug;
-+
-+ reiser4_dir_entry_desc *old_entry, *new_entry, *dotdot_entry;
-+ lock_handle *new_lh, *dotdot_lh;
-+ struct dentry *dotdot_name;
-+ reiser4_dentry_fsdata *dataonstack;
-+
-+ ctx = reiser4_init_context(old_dir->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ old_entry = kmalloc(3 * sizeof(*old_entry) + 2 * sizeof(*new_lh) +
-+ sizeof(*dotdot_name) + sizeof(*dataonstack),
-+ reiser4_ctx_gfp_mask_get());
-+ if (old_entry == NULL) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOMEM);
-+ }
-+ memset(old_entry, 0, 3 * sizeof(*old_entry) + 2 * sizeof(*new_lh) +
-+ sizeof(*dotdot_name) + sizeof(*dataonstack));
-+
-+ new_entry = old_entry + 1;
-+ dotdot_entry = old_entry + 2;
-+ new_lh = (lock_handle *)(old_entry + 3);
-+ dotdot_lh = new_lh + 1;
-+ dotdot_name = (struct dentry *)(new_lh + 2);
-+ dataonstack = (reiser4_dentry_fsdata *)(dotdot_name + 1);
-+
-+ assert("nikita-2318", old_dir != NULL);
-+ assert("nikita-2319", new_dir != NULL);
-+ assert("nikita-2320", old_name != NULL);
-+ assert("nikita-2321", new_name != NULL);
-+
-+ old_inode = old_name->d_inode;
-+ new_inode = new_name->d_inode;
-+
-+ dplug = inode_dir_plugin(old_dir);
-+ fplug = NULL;
-+
-+ new_fsdata = reiser4_get_dentry_fsdata(new_name);
-+ if (IS_ERR(new_fsdata)) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return PTR_ERR(new_fsdata);
-+ }
-+
-+ new_coord = &new_fsdata->dec.entry_coord;
-+ coord_clear_iplug(new_coord);
-+
-+ is_dir = S_ISDIR(old_inode->i_mode);
-+
-+ assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
-+
-+ /* if target is existing directory and it's not empty---return error.
-+
-+ This check is done specifically, because is_dir_empty() requires
-+ tree traversal and have to be done before locks are taken.
-+ */
-+ if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOTEMPTY);
-+ }
-+
-+ result = can_rename(old_dir, old_inode, new_dir, new_inode);
-+ if (result != 0) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = hashed_rename_estimate_and_grab(old_dir, old_name,
-+ new_dir, new_name);
-+ if (result != 0) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ init_lh(new_lh);
-+
-+ /* find entry for @new_name */
-+ result = reiser4_find_entry(new_dir, new_name, new_lh, ZNODE_WRITE_LOCK,
-+ new_entry);
-+
-+ if (IS_CBKERR(result)) {
-+ done_lh(new_lh);
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ reiser4_seal_done(&new_fsdata->dec.entry_seal);
-+
-+ /* add or replace name for @old_inode as @new_name */
-+ if (new_inode != NULL) {
-+ /* target (@new_name) exists. */
-+ /* Not clear what to do with objects that are
-+ both directories and files at the same time. */
-+ if (result == CBK_COORD_FOUND) {
-+ result = replace_name(old_inode,
-+ new_dir,
-+ new_inode, new_coord, new_lh);
-+ if (result == 0)
-+ fplug = inode_file_plugin(new_inode);
-+ } else if (result == CBK_COORD_NOTFOUND) {
-+ /* VFS told us that @new_name is bound to existing
-+ inode, but we failed to find directory entry. */
-+ warning("nikita-2324", "Target not found");
-+ result = RETERR(-ENOENT);
-+ }
-+ } else {
-+ /* target (@new_name) doesn't exists. */
-+ if (result == CBK_COORD_NOTFOUND)
-+ result = add_name(old_inode,
-+ new_dir,
-+ new_name, new_coord, new_lh, is_dir);
-+ else if (result == CBK_COORD_FOUND) {
-+ /* VFS told us that @new_name is "negative" dentry,
-+ but we found directory entry. */
-+ warning("nikita-2331", "Target found unexpectedly");
-+ result = RETERR(-EIO);
-+ }
-+ }
-+
-+ assert("nikita-3462", ergo(result == 0,
-+ old_inode->i_nlink >= 2 + !!is_dir));
-+
-+ /* We are done with all modifications to the @new_dir, release lock on
-+ node. */
-+ done_lh(new_lh);
-+
-+ if (fplug != NULL) {
-+ /* detach @new_inode from name-space */
-+ result = fplug->detach(new_inode, new_dir);
-+ if (result != 0)
-+ warning("nikita-2330", "Cannot detach %lli: %i. %s",
-+ (unsigned long long)get_inode_oid(new_inode),
-+ result, possible_leak);
-+ }
-+
-+ if (new_inode != NULL)
-+ reiser4_update_sd(new_inode);
-+
-+ if (result == 0) {
-+ old_entry->obj = old_inode;
-+
-+ dplug->build_entry_key(old_dir,
-+ &old_name->d_name, &old_entry->key);
-+
-+ /* At this stage new name was introduced for
-+ @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
-+ counters were updated.
-+
-+ We want to remove @old_name now. If @old_inode wasn't
-+ directory this is simple.
-+ */
-+ result = dplug->rem_entry(old_dir, old_name, old_entry);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2335",
-+ "Cannot remove old name: %i", result);
-+ } else {
-+ result = reiser4_del_nlink(old_inode, old_dir, 0);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2337",
-+ "Cannot drop link on old: %i", result);
-+ }
-+ }
-+
-+ if (result == 0 && is_dir) {
-+ /* @old_inode is directory. We also have to update
-+ dotdot entry. */
-+ coord_t *dotdot_coord;
-+
-+ memset(dataonstack, 0, sizeof dataonstack);
-+ memset(dotdot_entry, 0, sizeof dotdot_entry);
-+ dotdot_entry->obj = old_dir;
-+ memset(dotdot_name, 0, sizeof dotdot_name);
-+ dotdot_name->d_name.name = "..";
-+ dotdot_name->d_name.len = 2;
-+ /*
-+ * allocate ->d_fsdata on the stack to avoid using
-+ * reiser4_get_dentry_fsdata(). Locking is not needed,
-+ * because dentry is private to the current thread.
-+ */
-+ dotdot_name->d_fsdata = dataonstack;
-+ init_lh(dotdot_lh);
-+
-+ dotdot_coord = &dataonstack->dec.entry_coord;
-+ coord_clear_iplug(dotdot_coord);
-+
-+ result = reiser4_find_entry(old_inode, dotdot_name,
-+ dotdot_lh, ZNODE_WRITE_LOCK,
-+ dotdot_entry);
-+ if (result == 0) {
-+ /* replace_name() decreases i_nlink on
-+ * @old_dir */
-+ result = replace_name(new_dir,
-+ old_inode,
-+ old_dir,
-+ dotdot_coord, dotdot_lh);
-+ } else
-+ result = RETERR(-EIO);
-+ done_lh(dotdot_lh);
-+ }
-+ }
-+ reiser4_update_dir(new_dir);
-+ reiser4_update_dir(old_dir);
-+ reiser4_update_sd(old_inode);
-+ if (result == 0) {
-+ file_plugin *fplug;
-+
-+ if (new_inode != NULL) {
-+ /* add safe-link for target file (in case we removed
-+ * last reference to the poor fellow */
-+ fplug = inode_file_plugin(new_inode);
-+ if (new_inode->i_nlink == 0)
-+ result = safe_link_add(new_inode, SAFE_UNLINK);
-+ }
-+ }
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+#if 0
-+int reiser4_rename_common(struct inode *old_dir /* directory where @old
-+ * is located */ ,
-+ struct dentry *old_name /* old name */ ,
-+ struct inode *new_dir /* directory where @new
-+ * is located */ ,
-+ struct dentry *new_name /* new name */ )
-+{
-+ /* From `The Open Group Base Specifications Issue 6'
-+
-+ If either the old or new argument names a symbolic link, rename()
-+ shall operate on the symbolic link itself, and shall not resolve
-+ the last component of the argument. If the old argument and the new
-+ argument resolve to the same existing file, rename() shall return
-+ successfully and perform no other action.
-+
-+ [this is done by VFS: vfs_rename()]
-+
-+ If the old argument points to the pathname of a file that is not a
-+ directory, the new argument shall not point to the pathname of a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the link named by the new argument exists, it shall
-+ be removed and old renamed to new. In this case, a link named new
-+ shall remain visible to other processes throughout the renaming
-+ operation and refer either to the file referred to by new or old
-+ before the operation began.
-+
-+ [we should assure this]
-+
-+ Write access permission is required for
-+ both the directory containing old and the directory containing new.
-+
-+ [checked by VFS: vfs_rename->may_delete(), may_create()]
-+
-+ If the old argument points to the pathname of a directory, the new
-+ argument shall not point to the pathname of a file that is not a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the directory named by the new argument exists, it
-+ shall be removed and old renamed to new. In this case, a link named
-+ new shall exist throughout the renaming operation and shall refer
-+ either to the directory referred to by new or old before the
-+ operation began.
-+
-+ [we should assure this]
-+
-+ If new names an existing directory, it shall be
-+ required to be an empty directory.
-+
-+ [we should check this]
-+
-+ If the old argument points to a pathname of a symbolic link, the
-+ symbolic link shall be renamed. If the new argument points to a
-+ pathname of a symbolic link, the symbolic link shall be removed.
-+
-+ The new pathname shall not contain a path prefix that names
-+ old. Write access permission is required for the directory
-+ containing old and the directory containing new. If the old
-+ argument points to the pathname of a directory, write access
-+ permission may be required for the directory named by old, and, if
-+ it exists, the directory named by new.
-+
-+ [checked by VFS: vfs_rename(), vfs_rename_dir()]
-+
-+ If the link named by the new argument exists and the file's link
-+ count becomes 0 when it is removed and no process has the file
-+ open, the space occupied by the file shall be freed and the file
-+ shall no longer be accessible. If one or more processes have the
-+ file open when the last link is removed, the link shall be removed
-+ before rename() returns, but the removal of the file contents shall
-+ be postponed until all references to the file are closed.
-+
-+ [iput() handles this, but we can do this manually, a la
-+ reiser4_unlink()]
-+
-+ Upon successful completion, rename() shall mark for update the
-+ st_ctime and st_mtime fields of the parent directory of each file.
-+
-+ [N/A]
-+
-+ */
-+ reiser4_context *ctx;
-+ int result;
-+ int is_dir; /* is @old_name directory */
-+ struct inode *old_inode;
-+ struct inode *new_inode;
-+ reiser4_dir_entry_desc old_entry;
-+ reiser4_dir_entry_desc new_entry;
-+ coord_t *new_coord;
-+ reiser4_dentry_fsdata *new_fsdata;
-+ lock_handle new_lh;
-+ dir_plugin *dplug;
-+ file_plugin *fplug;
-+
-+ ctx = reiser4_init_context(old_dir->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ assert("nikita-2318", old_dir != NULL);
-+ assert("nikita-2319", new_dir != NULL);
-+ assert("nikita-2320", old_name != NULL);
-+ assert("nikita-2321", new_name != NULL);
-+
-+ old_inode = old_name->d_inode;
-+ new_inode = new_name->d_inode;
-+
-+ dplug = inode_dir_plugin(old_dir);
-+ fplug = NULL;
-+
-+ new_fsdata = reiser4_get_dentry_fsdata(new_name);
-+ if (IS_ERR(new_fsdata)) {
-+ result = PTR_ERR(new_fsdata);
-+ goto exit;
-+ }
-+
-+ new_coord = &new_fsdata->dec.entry_coord;
-+ coord_clear_iplug(new_coord);
-+
-+ is_dir = S_ISDIR(old_inode->i_mode);
-+
-+ assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
-+
-+ /* if target is existing directory and it's not empty---return error.
-+
-+ This check is done specifically, because is_dir_empty() requires
-+ tree traversal and have to be done before locks are taken.
-+ */
-+ if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0)
-+ return RETERR(-ENOTEMPTY);
-+
-+ result = can_rename(old_dir, old_inode, new_dir, new_inode);
-+ if (result != 0)
-+ goto exit;
-+
-+ result = hashed_rename_estimate_and_grab(old_dir, old_name,
-+ new_dir, new_name);
-+ if (result != 0)
-+ goto exit;
-+
-+ init_lh(&new_lh);
-+
-+ /* find entry for @new_name */
-+ result = reiser4_find_entry(new_dir, new_name, &new_lh,
-+ ZNODE_WRITE_LOCK, &new_entry);
-+
-+ if (IS_CBKERR(result)) {
-+ done_lh(&new_lh);
-+ goto exit;
-+ }
-+
-+ reiser4_seal_done(&new_fsdata->dec.entry_seal);
-+
-+ /* add or replace name for @old_inode as @new_name */
-+ if (new_inode != NULL) {
-+ /* target (@new_name) exists. */
-+ /* Not clear what to do with objects that are
-+ both directories and files at the same time. */
-+ if (result == CBK_COORD_FOUND) {
-+ result = replace_name(old_inode,
-+ new_dir,
-+ new_inode, new_coord, &new_lh);
-+ if (result == 0)
-+ fplug = inode_file_plugin(new_inode);
-+ } else if (result == CBK_COORD_NOTFOUND) {
-+ /* VFS told us that @new_name is bound to existing
-+ inode, but we failed to find directory entry. */
-+ warning("nikita-2324", "Target not found");
-+ result = RETERR(-ENOENT);
-+ }
-+ } else {
-+ /* target (@new_name) doesn't exists. */
-+ if (result == CBK_COORD_NOTFOUND)
-+ result = add_name(old_inode,
-+ new_dir,
-+ new_name, new_coord, &new_lh, is_dir);
-+ else if (result == CBK_COORD_FOUND) {
-+ /* VFS told us that @new_name is "negative" dentry,
-+ but we found directory entry. */
-+ warning("nikita-2331", "Target found unexpectedly");
-+ result = RETERR(-EIO);
-+ }
-+ }
-+
-+ assert("nikita-3462", ergo(result == 0,
-+ old_inode->i_nlink >= 2 + !!is_dir));
-+
-+ /* We are done with all modifications to the @new_dir, release lock on
-+ node. */
-+ done_lh(&new_lh);
-+
-+ if (fplug != NULL) {
-+ /* detach @new_inode from name-space */
-+ result = fplug->detach(new_inode, new_dir);
-+ if (result != 0)
-+ warning("nikita-2330", "Cannot detach %lli: %i. %s",
-+ (unsigned long long)get_inode_oid(new_inode),
-+ result, possible_leak);
-+ }
-+
-+ if (new_inode != NULL)
-+ reiser4_update_sd(new_inode);
-+
-+ if (result == 0) {
-+ memset(&old_entry, 0, sizeof old_entry);
-+ old_entry.obj = old_inode;
-+
-+ dplug->build_entry_key(old_dir,
-+ &old_name->d_name, &old_entry.key);
-+
-+ /* At this stage new name was introduced for
-+ @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
-+ counters were updated.
-+
-+ We want to remove @old_name now. If @old_inode wasn't
-+ directory this is simple.
-+ */
-+ result = dplug->rem_entry(old_dir, old_name, &old_entry);
-+ /*result = rem_entry_hashed(old_dir, old_name, &old_entry); */
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2335",
-+ "Cannot remove old name: %i", result);
-+ } else {
-+ result = reiser4_del_nlink(old_inode, old_dir, 0);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2337",
-+ "Cannot drop link on old: %i", result);
-+ }
-+ }
-+
-+ if (result == 0 && is_dir) {
-+ /* @old_inode is directory. We also have to update
-+ dotdot entry. */
-+ coord_t *dotdot_coord;
-+ lock_handle dotdot_lh;
-+ struct dentry dotdot_name;
-+ reiser4_dir_entry_desc dotdot_entry;
-+ reiser4_dentry_fsdata dataonstack;
-+ reiser4_dentry_fsdata *fsdata;
-+
-+ memset(&dataonstack, 0, sizeof dataonstack);
-+ memset(&dotdot_entry, 0, sizeof dotdot_entry);
-+ dotdot_entry.obj = old_dir;
-+ memset(&dotdot_name, 0, sizeof dotdot_name);
-+ dotdot_name.d_name.name = "..";
-+ dotdot_name.d_name.len = 2;
-+ /*
-+ * allocate ->d_fsdata on the stack to avoid using
-+ * reiser4_get_dentry_fsdata(). Locking is not needed,
-+ * because dentry is private to the current thread.
-+ */
-+ dotdot_name.d_fsdata = &dataonstack;
-+ init_lh(&dotdot_lh);
-+
-+ fsdata = &dataonstack;
-+ dotdot_coord = &fsdata->dec.entry_coord;
-+ coord_clear_iplug(dotdot_coord);
-+
-+ result = reiser4_find_entry(old_inode,
-+ &dotdot_name,
-+ &dotdot_lh,
-+ ZNODE_WRITE_LOCK,
-+ &dotdot_entry);
-+ if (result == 0) {
-+ /* replace_name() decreases i_nlink on
-+ * @old_dir */
-+ result = replace_name(new_dir,
-+ old_inode,
-+ old_dir,
-+ dotdot_coord, &dotdot_lh);
-+ } else
-+ result = RETERR(-EIO);
-+ done_lh(&dotdot_lh);
-+ }
-+ }
-+ reiser4_update_dir(new_dir);
-+ reiser4_update_dir(old_dir);
-+ reiser4_update_sd(old_inode);
-+ if (result == 0) {
-+ file_plugin *fplug;
-+
-+ if (new_inode != NULL) {
-+ /* add safe-link for target file (in case we removed
-+ * last reference to the poor fellow */
-+ fplug = inode_file_plugin(new_inode);
-+ if (new_inode->i_nlink == 0)
-+ result = safe_link_add(new_inode, SAFE_UNLINK);
-+ }
-+ }
-+ exit:
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+#endif
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/acl.h linux-2.6.20/fs/reiser4/plugin/item/acl.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/acl.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/acl.h 2007-05-06 14:50:43.799006970 +0400
-@@ -0,0 +1,66 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
-+#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+
-+#include <linux/fs.h>
-+#include <linux/dcache.h> /* for struct dentry */
-+
-+typedef struct directory_entry_format {
-+ /* key of object stat-data. It's not necessary to store whole
-+ key here, because it's always key of stat-data, so minor
-+ packing locality and offset can be omitted here. But this
-+ relies on particular key allocation scheme for stat-data, so,
-+ for extensibility sake, whole key can be stored here.
-+
-+ We store key as array of bytes, because we don't want 8-byte
-+ alignment of dir entries.
-+ */
-+ obj_key_id id;
-+ /* file name. Null terminated string. */
-+ d8 name[0];
-+} directory_entry_format;
-+
-+void print_de(const char *prefix, coord_t * coord);
-+int extract_key_de(const coord_t * coord, reiser4_key * key);
-+int update_key_de(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh);
-+char *extract_name_de(const coord_t * coord, char *buf);
-+unsigned extract_file_type_de(const coord_t * coord);
-+int add_entry_de(struct inode *dir, coord_t * coord,
-+ lock_handle * lh, const struct dentry *name,
-+ reiser4_dir_entry_desc * entry);
-+int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
-+ lock_handle * lh, reiser4_dir_entry_desc * entry);
-+int max_name_len_de(const struct inode *dir);
-+
-+int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
-+
-+char *extract_dent_name(const coord_t * coord,
-+ directory_entry_format * dent, char *buf);
-+
-+#if REISER4_LARGE_KEY
-+#define DE_NAME_BUF_LEN (24)
-+#else
-+#define DE_NAME_BUF_LEN (16)
-+#endif
-+
-+/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/blackbox.c linux-2.6.20/fs/reiser4/plugin/item/blackbox.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/blackbox.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/blackbox.c 2007-05-06 14:50:43.799006970 +0400
-@@ -0,0 +1,142 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Black box item implementation */
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../coord.h"
-+#include "../../tree.h"
-+#include "../../lock.h"
-+
-+#include "blackbox.h"
-+#include "item.h"
-+#include "../plugin.h"
-+
-+int
-+store_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length)
-+{
-+ int result;
-+ reiser4_item_data idata;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ memset(&idata, 0, sizeof idata);
-+
-+ idata.data = data;
-+ idata.user = 0;
-+ idata.length = length;
-+ idata.iplug = item_plugin_by_id(BLACK_BOX_ID);
-+
-+ init_lh(&lh);
-+ result = insert_by_key(tree, key,
-+ &idata, &coord, &lh, LEAF_LEVEL, CBK_UNIQUE);
-+
-+ assert("nikita-3413",
-+ ergo(result == 0,
-+ WITH_COORD(&coord,
-+ item_length_by_coord(&coord) == length)));
-+
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+int
-+load_black_box(reiser4_tree * tree,
-+ reiser4_key * key, void *data, int length, int exact)
-+{
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ init_lh(&lh);
-+ result = coord_by_key(tree, key,
-+ &coord, &lh, ZNODE_READ_LOCK,
-+ exact ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN,
-+ LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
-+
-+ if (result == 0) {
-+ int ilen;
-+
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ ilen = item_length_by_coord(&coord);
-+ if (ilen <= length) {
-+ memcpy(data, item_body_by_coord(&coord), ilen);
-+ unit_key_by_coord(&coord, key);
-+ } else if (exact) {
-+ /*
-+ * item is larger than buffer provided by the
-+ * user. Only issue a warning if @exact is
-+ * set. If @exact is false, we are iterating
-+ * over all safe-links and here we are reaching
-+ * the end of the iteration.
-+ */
-+ warning("nikita-3415",
-+ "Wrong black box length: %i > %i",
-+ ilen, length);
-+ result = RETERR(-EIO);
-+ }
-+ zrelse(coord.node);
-+ }
-+ }
-+
-+ done_lh(&lh);
-+ return result;
-+
-+}
-+
-+int
-+update_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length)
-+{
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ init_lh(&lh);
-+ result = coord_by_key(tree, key,
-+ &coord, &lh, ZNODE_READ_LOCK,
-+ FIND_EXACT,
-+ LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
-+ if (result == 0) {
-+ int ilen;
-+
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ ilen = item_length_by_coord(&coord);
-+ if (length <= ilen) {
-+ memcpy(item_body_by_coord(&coord), data,
-+ length);
-+ } else {
-+ warning("nikita-3437",
-+ "Wrong black box length: %i < %i",
-+ ilen, length);
-+ result = RETERR(-EIO);
-+ }
-+ zrelse(coord.node);
-+ }
-+ }
-+
-+ done_lh(&lh);
-+ return result;
-+
-+}
-+
-+int kill_black_box(reiser4_tree * tree, const reiser4_key * key)
-+{
-+ return reiser4_cut_tree(tree, key, key, NULL, 1);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/blackbox.h linux-2.6.20/fs/reiser4/plugin/item/blackbox.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/blackbox.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/blackbox.h 2007-05-06 14:50:43.799006970 +0400
-@@ -0,0 +1,33 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* "Black box" entry to fixed-width contain user supplied data */
-+
-+#if !defined( __FS_REISER4_BLACK_BOX_H__ )
-+#define __FS_REISER4_BLACK_BOX_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+
-+extern int store_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length);
-+extern int load_black_box(reiser4_tree * tree,
-+ reiser4_key * key, void *data, int length, int exact);
-+extern int kill_black_box(reiser4_tree * tree, const reiser4_key * key);
-+extern int update_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length);
-+
-+/* __FS_REISER4_BLACK_BOX_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/cde.c linux-2.6.20/fs/reiser4/plugin/item/cde.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/cde.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/cde.c 2007-05-06 14:50:43.799006970 +0400
-@@ -0,0 +1,1008 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry implementation */
-+
-+/* DESCRIPTION:
-+
-+ This is "compound" directory item plugin implementation. This directory
-+ item type is compound (as opposed to the "simple directory item" in
-+ fs/reiser4/plugin/item/sde.[ch]), because it consists of several directory
-+ entries.
-+
-+ The reason behind this decision is disk space efficiency: all directory
-+ entries inside the same directory have identical fragment in their
-+ keys. This, of course, depends on key assignment policy. In our default key
-+ assignment policy, all directory entries have the same locality which is
-+ equal to the object id of their directory.
-+
-+ Composing directory item out of several directory entries for the same
-+ directory allows us to store said key fragment only once. That is, this is
-+ some ad hoc form of key compression (stem compression) that is implemented
-+ here, because general key compression is not supposed to be implemented in
-+ v4.0.
-+
-+ Another decision that was made regarding all directory item plugins, is
-+ that they will store entry keys unaligned. This is for that sake of disk
-+ space efficiency again.
-+
-+ In should be noted, that storing keys unaligned increases CPU consumption,
-+ at least on some architectures.
-+
-+ Internal on-disk structure of the compound directory item is the following:
-+
-+ HEADER cde_item_format. Here number of entries is stored.
-+ ENTRY_HEADER_0 cde_unit_header. Here part of entry key and
-+ ENTRY_HEADER_1 offset of entry body are stored.
-+ ENTRY_HEADER_2 (basically two last parts of key)
-+ ...
-+ ENTRY_HEADER_N
-+ ENTRY_BODY_0 directory_entry_format. Here part of stat data key and
-+ ENTRY_BODY_1 NUL-terminated name are stored.
-+ ENTRY_BODY_2 (part of statadta key in the
-+ sence that since all SDs have
-+ zero offset, this offset is not
-+ stored on disk).
-+ ...
-+ ENTRY_BODY_N
-+
-+ When it comes to the balancing, each directory entry in compound directory
-+ item is unit, that is, something that can be cut from one item and pasted
-+ into another item of the same type. Handling of unit cut and paste is major
-+ reason for the complexity of code below.
-+
-+*/
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "sde.h"
-+#include "cde.h"
-+#include "item.h"
-+#include "../node/node.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../carry.h"
-+#include "../../tree.h"
-+#include "../../inode.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/quotaops.h>
-+
-+#if 0
-+#define CHECKME(coord) \
-+({ \
-+ const char *message; \
-+ coord_t dup; \
-+ \
-+ coord_dup_nocheck(&dup, (coord)); \
-+ dup.unit_pos = 0; \
-+ assert("nikita-2871", cde_check(&dup, &message) == 0); \
-+})
-+#else
-+#define CHECKME(coord) noop
-+#endif
-+
-+/* return body of compound directory item at @coord */
-+static inline cde_item_format *formatted_at(const coord_t * coord)
-+{
-+ assert("nikita-1282", coord != NULL);
-+ return item_body_by_coord(coord);
-+}
-+
-+/* return entry header at @coord */
-+static inline cde_unit_header *header_at(const coord_t *
-+ coord /* coord of item */ ,
-+ int idx /* index of unit */ )
-+{
-+ assert("nikita-1283", coord != NULL);
-+ return &formatted_at(coord)->entry[idx];
-+}
-+
-+/* return number of units in compound directory item at @coord */
-+static int units(const coord_t * coord /* coord of item */ )
-+{
-+ return le16_to_cpu(get_unaligned(&formatted_at(coord)->num_of_entries));
-+}
-+
-+/* return offset of the body of @idx-th entry in @coord */
-+static unsigned int offset_of(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ )
-+{
-+ if (idx < units(coord))
-+ return le16_to_cpu(get_unaligned(&header_at(coord, idx)->offset));
-+ else if (idx == units(coord))
-+ return item_length_by_coord(coord);
-+ else
-+ impossible("nikita-1308", "Wrong idx");
-+ return 0;
-+}
-+
-+/* set offset of the body of @idx-th entry in @coord */
-+static void set_offset(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ ,
-+ unsigned int offset /* new offset */ )
-+{
-+ put_unaligned(cpu_to_le16((__u16) offset), &header_at(coord, idx)->offset);
-+}
-+
-+static void adj_offset(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ ,
-+ int delta /* offset change */ )
-+{
-+ d16 *doffset;
-+ __u16 offset;
-+
-+ doffset = &header_at(coord, idx)->offset;
-+ offset = le16_to_cpu(get_unaligned(doffset));
-+ offset += delta;
-+ put_unaligned(cpu_to_le16((__u16) offset), doffset);
-+}
-+
-+/* return pointer to @offset-th byte from the beginning of @coord */
-+static char *address(const coord_t * coord /* coord of item */ ,
-+ int offset)
-+{
-+ return ((char *)item_body_by_coord(coord)) + offset;
-+}
-+
-+/* return pointer to the body of @idx-th entry in @coord */
-+static directory_entry_format *entry_at(const coord_t * coord /* coord of
-+ * item */ ,
-+ int idx /* index of unit */ )
-+{
-+ return (directory_entry_format *) address(coord,
-+ (int)offset_of(coord, idx));
-+}
-+
-+/* return number of unit referenced by @coord */
-+static int idx_of(const coord_t * coord /* coord of item */ )
-+{
-+ assert("nikita-1285", coord != NULL);
-+ return coord->unit_pos;
-+}
-+
-+/* find position where entry with @entry_key would be inserted into @coord */
-+static int find(const coord_t * coord /* coord of item */ ,
-+ const reiser4_key * entry_key /* key to look for */ ,
-+ cmp_t * last /* result of last comparison */ )
-+{
-+ int entries;
-+
-+ int left;
-+ int right;
-+
-+ cde_unit_header *header;
-+
-+ assert("nikita-1295", coord != NULL);
-+ assert("nikita-1296", entry_key != NULL);
-+ assert("nikita-1297", last != NULL);
-+
-+ entries = units(coord);
-+ left = 0;
-+ right = entries - 1;
-+ while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
-+ int median;
-+
-+ median = (left + right) >> 1;
-+
-+ header = header_at(coord, median);
-+ *last = de_id_key_cmp(&header->hash, entry_key);
-+ switch (*last) {
-+ case LESS_THAN:
-+ left = median;
-+ break;
-+ case GREATER_THAN:
-+ right = median;
-+ break;
-+ case EQUAL_TO:{
-+ do {
-+ median--;
-+ header--;
-+ } while (median >= 0 &&
-+ de_id_key_cmp(&header->hash,
-+ entry_key) == EQUAL_TO);
-+ return median + 1;
-+ }
-+ }
-+ }
-+ header = header_at(coord, left);
-+ for (; left < entries; ++left, ++header) {
-+ prefetch(header + 1);
-+ *last = de_id_key_cmp(&header->hash, entry_key);
-+ if (*last != LESS_THAN)
-+ break;
-+ }
-+ if (left < entries)
-+ return left;
-+ else
-+ return RETERR(-ENOENT);
-+
-+}
-+
-+/* expand @coord as to accommodate for insertion of @no new entries starting
-+ from @pos, with total bodies size @size. */
-+static int expand_item(const coord_t * coord /* coord of item */ ,
-+ int pos /* unit position */ , int no /* number of new
-+ * units*/ ,
-+ int size /* total size of new units' data */ ,
-+ unsigned int data_size /* free space already reserved
-+ * in the item for insertion */ )
-+{
-+ int entries;
-+ cde_unit_header *header;
-+ char *dent;
-+ int i;
-+
-+ assert("nikita-1310", coord != NULL);
-+ assert("nikita-1311", pos >= 0);
-+ assert("nikita-1312", no > 0);
-+ assert("nikita-1313", data_size >= no * sizeof(directory_entry_format));
-+ assert("nikita-1343",
-+ item_length_by_coord(coord) >=
-+ (int)(size + data_size + no * sizeof *header));
-+
-+ entries = units(coord);
-+
-+ if (pos == entries)
-+ dent = address(coord, size);
-+ else
-+ dent = (char *)entry_at(coord, pos);
-+ /* place where new header will be in */
-+ header = header_at(coord, pos);
-+ /* free space for new entry headers */
-+ memmove(header + no, header,
-+ (unsigned)(address(coord, size) - (char *)header));
-+ /* if adding to the end initialise first new header */
-+ if (pos == entries) {
-+ set_offset(coord, pos, (unsigned)size);
-+ }
-+
-+ /* adjust entry pointer and size */
-+ dent = dent + no * sizeof *header;
-+ size += no * sizeof *header;
-+ /* free space for new entries */
-+ memmove(dent + data_size, dent,
-+ (unsigned)(address(coord, size) - dent));
-+
-+ /* increase counter */
-+ entries += no;
-+ put_unaligned(cpu_to_le16((__u16) entries), &formatted_at(coord)->num_of_entries);
-+
-+ /* [ 0 ... pos ] entries were shifted by no * ( sizeof *header )
-+ bytes. */
-+ for (i = 0; i <= pos; ++i)
-+ adj_offset(coord, i, no * sizeof *header);
-+ /* [ pos + no ... +\infty ) entries were shifted by ( no *
-+ sizeof *header + data_size ) bytes */
-+ for (i = pos + no; i < entries; ++i)
-+ adj_offset(coord, i, no * sizeof *header + data_size);
-+ return 0;
-+}
-+
-+/* insert new @entry into item */
-+static int expand(const coord_t * coord /* coord of item */ ,
-+ cde_entry * entry /* entry to insert */ ,
-+ int len /* length of @entry data */ ,
-+ int *pos /* position to insert */ ,
-+ reiser4_dir_entry_desc * dir_entry /* parameters for new
-+ * entry */ )
-+{
-+ cmp_t cmp_res;
-+ int datasize;
-+
-+ *pos = find(coord, &dir_entry->key, &cmp_res);
-+ if (*pos < 0)
-+ *pos = units(coord);
-+
-+ datasize = sizeof(directory_entry_format);
-+ if (is_longname(entry->name->name, entry->name->len))
-+ datasize += entry->name->len + 1;
-+
-+ expand_item(coord, *pos, 1, item_length_by_coord(coord) - len,
-+ datasize);
-+ return 0;
-+}
-+
-+/* paste body of @entry into item */
-+static int paste_entry(const coord_t * coord /* coord of item */ ,
-+ cde_entry * entry /* new entry */ ,
-+ int pos /* position to insert */ ,
-+ reiser4_dir_entry_desc * dir_entry /* parameters for
-+ * new entry */ )
-+{
-+ cde_unit_header *header;
-+ directory_entry_format *dent;
-+ const char *name;
-+ int len;
-+
-+ header = header_at(coord, pos);
-+ dent = entry_at(coord, pos);
-+
-+ build_de_id_by_key(&dir_entry->key, &header->hash);
-+ build_inode_key_id(entry->obj, &dent->id);
-+ /* AUDIT unsafe strcpy() operation! It should be replaced with
-+ much less CPU hungry
-+ memcpy( ( char * ) dent -> name, entry -> name -> name , entry -> name -> len );
-+
-+ Also a more major thing is that there should be a way to figure out
-+ amount of space in dent -> name and be able to check that we are
-+ not going to overwrite more than we supposed to */
-+ name = entry->name->name;
-+ len = entry->name->len;
-+ if (is_longname(name, len)) {
-+ strcpy((unsigned char *)dent->name, name);
-+ put_unaligned(0, &dent->name[len]);
-+ }
-+ return 0;
-+}
-+
-+/* estimate how much space is necessary in item to insert/paste set of entries
-+ described in @data. */
-+int estimate_cde(const coord_t * coord /* coord of item */ ,
-+ const reiser4_item_data * data /* parameters for new item */ )
-+{
-+ cde_entry_data *e;
-+ int result;
-+ int i;
-+
-+ e = (cde_entry_data *) data->data;
-+
-+ assert("nikita-1288", e != NULL);
-+ assert("nikita-1289", e->num_of_entries >= 0);
-+
-+ if (coord == NULL)
-+ /* insert */
-+ result = sizeof(cde_item_format);
-+ else
-+ /* paste */
-+ result = 0;
-+
-+ result += e->num_of_entries *
-+ (sizeof(cde_unit_header) + sizeof(directory_entry_format));
-+ for (i = 0; i < e->num_of_entries; ++i) {
-+ const char *name;
-+ int len;
-+
-+ name = e->entry[i].name->name;
-+ len = e->entry[i].name->len;
-+ assert("nikita-2054", strlen(name) == len);
-+ if (is_longname(name, len))
-+ result += len + 1;
-+ }
-+ ((reiser4_item_data *) data)->length = result;
-+ return result;
-+}
-+
-+/* ->nr_units() method for this item plugin. */
-+pos_in_node_t nr_units_cde(const coord_t * coord /* coord of item */ )
-+{
-+ return units(coord);
-+}
-+
-+/* ->unit_key() method for this item plugin. */
-+reiser4_key *unit_key_cde(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * key /* resulting key */ )
-+{
-+ assert("nikita-1452", coord != NULL);
-+ assert("nikita-1345", idx_of(coord) < units(coord));
-+ assert("nikita-1346", key != NULL);
-+
-+ item_key_by_coord(coord, key);
-+ extract_key_from_de_id(extract_dir_id_from_key(key),
-+ &header_at(coord, idx_of(coord))->hash, key);
-+ return key;
-+}
-+
-+/* mergeable_cde(): implementation of ->mergeable() item method.
-+
-+ Two directory items are mergeable iff they are from the same
-+ directory. That simple.
-+
-+*/
-+int mergeable_cde(const coord_t * p1 /* coord of first item */ ,
-+ const coord_t * p2 /* coord of second item */ )
-+{
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ assert("nikita-1339", p1 != NULL);
-+ assert("nikita-1340", p2 != NULL);
-+
-+ return
-+ (item_plugin_by_coord(p1) == item_plugin_by_coord(p2)) &&
-+ (extract_dir_id_from_key(item_key_by_coord(p1, &k1)) ==
-+ extract_dir_id_from_key(item_key_by_coord(p2, &k2)));
-+
-+}
-+
-+/* ->max_key_inside() method for this item plugin. */
-+reiser4_key *max_key_inside_cde(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * result /* resulting key */ )
-+{
-+ assert("nikita-1342", coord != NULL);
-+
-+ item_key_by_coord(coord, result);
-+ set_key_ordering(result, get_key_ordering(reiser4_max_key()));
-+ set_key_fulloid(result, get_key_fulloid(reiser4_max_key()));
-+ set_key_offset(result, get_key_offset(reiser4_max_key()));
-+ return result;
-+}
-+
-+/* @data contains data which are to be put into tree */
-+int can_contain_key_cde(const coord_t * coord /* coord of item */ ,
-+ const reiser4_key * key /* key to check */ ,
-+ const reiser4_item_data * data /* parameters of new
-+ * item/unit being
-+ * created */ )
-+{
-+ reiser4_key item_key;
-+
-+ /* FIXME-VS: do not rely on anything but iplug field of @data. Only
-+ data->iplug is initialized */
-+ assert("vs-457", data && data->iplug);
-+/* assert( "vs-553", data -> user == 0 );*/
-+ item_key_by_coord(coord, &item_key);
-+
-+ return (item_plugin_by_coord(coord) == data->iplug) &&
-+ (extract_dir_id_from_key(&item_key) ==
-+ extract_dir_id_from_key(key));
-+}
-+
-+#if REISER4_DEBUG
-+/* cde_check ->check() method for compressed directory items
-+
-+ used for debugging, every item should have here the most complete
-+ possible check of the consistency of the item that the inventor can
-+ construct
-+*/
-+int reiser4_check_cde(const coord_t * coord /* coord of item to check */,
-+ const char **error /* where to store error message */)
-+{
-+ int i;
-+ int result;
-+ char *item_start;
-+ char *item_end;
-+ reiser4_key key;
-+
-+ coord_t c;
-+
-+ assert("nikita-1357", coord != NULL);
-+ assert("nikita-1358", error != NULL);
-+
-+ if (!ergo(coord->item_pos != 0,
-+ is_dot_key(item_key_by_coord(coord, &key)))) {
-+ *error = "CDE doesn't start with dot";
-+ return -1;
-+ }
-+ item_start = item_body_by_coord(coord);
-+ item_end = item_start + item_length_by_coord(coord);
-+
-+ coord_dup(&c, coord);
-+ result = 0;
-+ for (i = 0; i < units(coord); ++i) {
-+ directory_entry_format *entry;
-+
-+ if ((char *)(header_at(coord, i) + 1) >
-+ item_end - units(coord) * sizeof *entry) {
-+ *error = "CDE header is out of bounds";
-+ result = -1;
-+ break;
-+ }
-+ entry = entry_at(coord, i);
-+ if ((char *)entry < item_start + sizeof(cde_item_format)) {
-+ *error = "CDE header is too low";
-+ result = -1;
-+ break;
-+ }
-+ if ((char *)(entry + 1) > item_end) {
-+ *error = "CDE header is too high";
-+ result = -1;
-+ break;
-+ }
-+ }
-+
-+ return result;
-+}
-+#endif
-+
-+/* ->init() method for this item plugin. */
-+int init_cde(coord_t * coord /* coord of item */ ,
-+ coord_t * from UNUSED_ARG, reiser4_item_data * data /* structure used for insertion */
-+ UNUSED_ARG)
-+{
-+ put_unaligned(cpu_to_le16(0), &formatted_at(coord)->num_of_entries);
-+ return 0;
-+}
-+
-+/* ->lookup() method for this item plugin. */
-+lookup_result lookup_cde(const reiser4_key * key /* key to search for */ ,
-+ lookup_bias bias /* search bias */ ,
-+ coord_t * coord /* coord of item to lookup in */ )
-+{
-+ cmp_t last_comp;
-+ int pos;
-+
-+ reiser4_key utmost_key;
-+
-+ assert("nikita-1293", coord != NULL);
-+ assert("nikita-1294", key != NULL);
-+
-+ CHECKME(coord);
-+
-+ if (keygt(item_key_by_coord(coord, &utmost_key), key)) {
-+ coord->unit_pos = 0;
-+ coord->between = BEFORE_UNIT;
-+ return CBK_COORD_NOTFOUND;
-+ }
-+ pos = find(coord, key, &last_comp);
-+ if (pos >= 0) {
-+ coord->unit_pos = (int)pos;
-+ switch (last_comp) {
-+ case EQUAL_TO:
-+ coord->between = AT_UNIT;
-+ return CBK_COORD_FOUND;
-+ case GREATER_THAN:
-+ coord->between = BEFORE_UNIT;
-+ return RETERR(-ENOENT);
-+ case LESS_THAN:
-+ default:
-+ impossible("nikita-1298", "Broken find");
-+ return RETERR(-EIO);
-+ }
-+ } else {
-+ coord->unit_pos = units(coord) - 1;
-+ coord->between = AFTER_UNIT;
-+ return (bias ==
-+ FIND_MAX_NOT_MORE_THAN) ? CBK_COORD_FOUND :
-+ CBK_COORD_NOTFOUND;
-+ }
-+}
-+
-+/* ->paste() method for this item plugin. */
-+int paste_cde(coord_t * coord /* coord of item */ ,
-+ reiser4_item_data * data /* parameters of new unit being
-+ * inserted */ ,
-+ carry_plugin_info * info UNUSED_ARG /* todo carry queue */ )
-+{
-+ cde_entry_data *e;
-+ int result;
-+ int i;
-+
-+ CHECKME(coord);
-+ e = (cde_entry_data *) data->data;
-+
-+ result = 0;
-+ for (i = 0; i < e->num_of_entries; ++i) {
-+ int pos;
-+ int phantom_size;
-+
-+ phantom_size = data->length;
-+ if (units(coord) == 0)
-+ phantom_size -= sizeof(cde_item_format);
-+
-+ result =
-+ expand(coord, e->entry + i, phantom_size, &pos, data->arg);
-+ if (result != 0)
-+ break;
-+ result = paste_entry(coord, e->entry + i, pos, data->arg);
-+ if (result != 0)
-+ break;
-+ }
-+ CHECKME(coord);
-+ return result;
-+}
-+
-+/* amount of space occupied by all entries starting from @idx both headers and
-+ bodies. */
-+static unsigned int part_size(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ )
-+{
-+ assert("nikita-1299", coord != NULL);
-+ assert("nikita-1300", idx < (int)units(coord));
-+
-+ return sizeof(cde_item_format) +
-+ (idx + 1) * sizeof(cde_unit_header) + offset_of(coord,
-+ idx + 1) -
-+ offset_of(coord, 0);
-+}
-+
-+/* how many but not more than @want units of @source can be merged with
-+ item in @target node. If pend == append - we try to append last item
-+ of @target by first units of @source. If pend == prepend - we try to
-+ "prepend" first item in @target by last units of @source. @target
-+ node has @free_space bytes of free space. Total size of those units
-+ are returned via @size */
-+int can_shift_cde(unsigned free_space /* free space in item */ ,
-+ coord_t * coord /* coord of source item */ ,
-+ znode * target /* target node */ ,
-+ shift_direction pend /* shift direction */ ,
-+ unsigned *size /* resulting number of shifted bytes */ ,
-+ unsigned want /* maximal number of bytes to shift */ )
-+{
-+ int shift;
-+
-+ CHECKME(coord);
-+ if (want == 0) {
-+ *size = 0;
-+ return 0;
-+ }
-+
-+ /* pend == SHIFT_LEFT <==> shifting to the left */
-+ if (pend == SHIFT_LEFT) {
-+ for (shift = min((int)want - 1, units(coord)); shift >= 0;
-+ --shift) {
-+ *size = part_size(coord, shift);
-+ if (target != NULL)
-+ *size -= sizeof(cde_item_format);
-+ if (*size <= free_space)
-+ break;
-+ }
-+ shift = shift + 1;
-+ } else {
-+ int total_size;
-+
-+ assert("nikita-1301", pend == SHIFT_RIGHT);
-+
-+ total_size = item_length_by_coord(coord);
-+ for (shift = units(coord) - want - 1; shift < units(coord) - 1;
-+ ++shift) {
-+ *size = total_size - part_size(coord, shift);
-+ if (target == NULL)
-+ *size += sizeof(cde_item_format);
-+ if (*size <= free_space)
-+ break;
-+ }
-+ shift = units(coord) - shift - 1;
-+ }
-+ if (shift == 0)
-+ *size = 0;
-+ CHECKME(coord);
-+ return shift;
-+}
-+
-+/* ->copy_units() method for this item plugin. */
-+void copy_units_cde(coord_t * target /* coord of target item */ ,
-+ coord_t * source /* coord of source item */ ,
-+ unsigned from /* starting unit */ ,
-+ unsigned count /* how many units to copy */ ,
-+ shift_direction where_is_free_space /* shift direction */ ,
-+ unsigned free_space /* free space in item */ )
-+{
-+ char *header_from;
-+ char *header_to;
-+
-+ char *entry_from;
-+ char *entry_to;
-+
-+ int pos_in_target;
-+ int data_size;
-+ int data_delta;
-+ int i;
-+
-+ assert("nikita-1303", target != NULL);
-+ assert("nikita-1304", source != NULL);
-+ assert("nikita-1305", (int)from < units(source));
-+ assert("nikita-1307", (int)(from + count) <= units(source));
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ assert("nikita-1453", from == 0);
-+ pos_in_target = units(target);
-+ } else {
-+ assert("nikita-1309", (int)(from + count) == units(source));
-+ pos_in_target = 0;
-+ memmove(item_body_by_coord(target),
-+ (char *)item_body_by_coord(target) + free_space,
-+ item_length_by_coord(target) - free_space);
-+ }
-+
-+ CHECKME(target);
-+ CHECKME(source);
-+
-+ /* expand @target */
-+ data_size =
-+ offset_of(source, (int)(from + count)) - offset_of(source,
-+ (int)from);
-+
-+ if (units(target) == 0)
-+ free_space -= sizeof(cde_item_format);
-+
-+ expand_item(target, pos_in_target, (int)count,
-+ (int)(item_length_by_coord(target) - free_space),
-+ (unsigned)data_size);
-+
-+ /* copy first @count units of @source into @target */
-+ data_delta =
-+ offset_of(target, pos_in_target) - offset_of(source, (int)from);
-+
-+ /* copy entries */
-+ entry_from = (char *)entry_at(source, (int)from);
-+ entry_to = (char *)entry_at(source, (int)(from + count));
-+ memmove(entry_at(target, pos_in_target), entry_from,
-+ (unsigned)(entry_to - entry_from));
-+
-+ /* copy headers */
-+ header_from = (char *)header_at(source, (int)from);
-+ header_to = (char *)header_at(source, (int)(from + count));
-+ memmove(header_at(target, pos_in_target), header_from,
-+ (unsigned)(header_to - header_from));
-+
-+ /* update offsets */
-+ for (i = pos_in_target; i < (int)(pos_in_target + count); ++i)
-+ adj_offset(target, i, data_delta);
-+ CHECKME(target);
-+ CHECKME(source);
-+}
-+
-+/* ->cut_units() method for this item plugin. */
-+int cut_units_cde(coord_t * coord /* coord of item */ ,
-+ pos_in_node_t from /* start unit pos */ ,
-+ pos_in_node_t to /* stop unit pos */ ,
-+ struct carry_cut_data *cdata UNUSED_ARG,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ char *header_from;
-+ char *header_to;
-+
-+ char *entry_from;
-+ char *entry_to;
-+
-+ int size;
-+ int entry_delta;
-+ int header_delta;
-+ int i;
-+
-+ unsigned count;
-+
-+ CHECKME(coord);
-+
-+ count = to - from + 1;
-+
-+ assert("nikita-1454", coord != NULL);
-+ assert("nikita-1455", (int)(from + count) <= units(coord));
-+
-+ if (smallest_removed)
-+ unit_key_by_coord(coord, smallest_removed);
-+
-+ if (new_first) {
-+ coord_t next;
-+
-+ /* not everything is cut from item head */
-+ assert("vs-1527", from == 0);
-+ assert("vs-1528", to < units(coord) - 1);
-+
-+ coord_dup(&next, coord);
-+ next.unit_pos++;
-+ unit_key_by_coord(&next, new_first);
-+ }
-+
-+ size = item_length_by_coord(coord);
-+ if (count == (unsigned)units(coord)) {
-+ return size;
-+ }
-+
-+ header_from = (char *)header_at(coord, (int)from);
-+ header_to = (char *)header_at(coord, (int)(from + count));
-+
-+ entry_from = (char *)entry_at(coord, (int)from);
-+ entry_to = (char *)entry_at(coord, (int)(from + count));
-+
-+ /* move headers */
-+ memmove(header_from, header_to,
-+ (unsigned)(address(coord, size) - header_to));
-+
-+ header_delta = header_to - header_from;
-+
-+ entry_from -= header_delta;
-+ entry_to -= header_delta;
-+ size -= header_delta;
-+
-+ /* copy entries */
-+ memmove(entry_from, entry_to,
-+ (unsigned)(address(coord, size) - entry_to));
-+
-+ entry_delta = entry_to - entry_from;
-+ size -= entry_delta;
-+
-+ /* update offsets */
-+
-+ for (i = 0; i < (int)from; ++i)
-+ adj_offset(coord, i, -header_delta);
-+
-+ for (i = from; i < units(coord) - (int)count; ++i)
-+ adj_offset(coord, i, -header_delta - entry_delta);
-+
-+ put_unaligned(cpu_to_le16((__u16) units(coord) - count),
-+ &formatted_at(coord)->num_of_entries);
-+
-+ if (from == 0) {
-+ /* entries from head was removed - move remaining to right */
-+ memmove((char *)item_body_by_coord(coord) +
-+ header_delta + entry_delta, item_body_by_coord(coord),
-+ (unsigned)size);
-+ if (REISER4_DEBUG)
-+ memset(item_body_by_coord(coord), 0,
-+ (unsigned)header_delta + entry_delta);
-+ } else {
-+ /* freed space is already at the end of item */
-+ if (REISER4_DEBUG)
-+ memset((char *)item_body_by_coord(coord) + size, 0,
-+ (unsigned)header_delta + entry_delta);
-+ }
-+
-+ return header_delta + entry_delta;
-+}
-+
-+int kill_units_cde(coord_t * coord /* coord of item */ ,
-+ pos_in_node_t from /* start unit pos */ ,
-+ pos_in_node_t to /* stop unit pos */ ,
-+ struct carry_kill_data *kdata UNUSED_ARG,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ return cut_units_cde(coord, from, to, NULL, smallest_removed, new_first);
-+}
-+
-+/* ->s.dir.extract_key() method for this item plugin. */
-+int extract_key_cde(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * key /* resulting key */ )
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1155", coord != NULL);
-+ assert("nikita-1156", key != NULL);
-+
-+ dent = entry_at(coord, idx_of(coord));
-+ return extract_key_from_id(&dent->id, key);
-+}
-+
-+int
-+update_key_cde(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh UNUSED_ARG)
-+{
-+ directory_entry_format *dent;
-+ obj_key_id obj_id;
-+ int result;
-+
-+ assert("nikita-2344", coord != NULL);
-+ assert("nikita-2345", key != NULL);
-+
-+ dent = entry_at(coord, idx_of(coord));
-+ result = build_obj_key_id(key, &obj_id);
-+ if (result == 0) {
-+ dent->id = obj_id;
-+ znode_make_dirty(coord->node);
-+ }
-+ return 0;
-+}
-+
-+/* ->s.dir.extract_name() method for this item plugin. */
-+char *extract_name_cde(const coord_t * coord /* coord of item */ , char *buf)
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1157", coord != NULL);
-+
-+ dent = entry_at(coord, idx_of(coord));
-+ return extract_dent_name(coord, dent, buf);
-+}
-+
-+static int cde_bytes(int pasting, const reiser4_item_data * data)
-+{
-+ int result;
-+
-+ result = data->length;
-+ if (!pasting)
-+ result -= sizeof(cde_item_format);
-+ return result;
-+}
-+
-+/* ->s.dir.add_entry() method for this item plugin */
-+int add_entry_cde(struct inode *dir /* directory object */ ,
-+ coord_t * coord /* coord of item */ ,
-+ lock_handle * lh /* lock handle for insertion */ ,
-+ const struct dentry *name /* name to insert */ ,
-+ reiser4_dir_entry_desc * dir_entry /* parameters of new
-+ * directory entry */ )
-+{
-+ reiser4_item_data data;
-+ cde_entry entry;
-+ cde_entry_data edata;
-+ int result;
-+
-+ assert("nikita-1656", coord->node == lh->node);
-+ assert("nikita-1657", znode_is_write_locked(coord->node));
-+
-+ edata.num_of_entries = 1;
-+ edata.entry = &entry;
-+
-+ entry.dir = dir;
-+ entry.obj = dir_entry->obj;
-+ entry.name = &name->d_name;
-+
-+ data.data = (char *)&edata;
-+ data.user = 0; /* &edata is not user space */
-+ data.iplug = item_plugin_by_id(COMPOUND_DIR_ID);
-+ data.arg = dir_entry;
-+ assert("nikita-1302", data.iplug != NULL);
-+
-+ result = is_dot_key(&dir_entry->key);
-+ data.length = estimate_cde(result ? coord : NULL, &data);
-+
-+ /* NOTE-NIKITA quota plugin? */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(dir, cde_bytes(result, &data)))
-+ return RETERR(-EDQUOT);
-+
-+ if (result)
-+ result = insert_by_coord(coord, &data, &dir_entry->key, lh, 0);
-+ else
-+ result = reiser4_resize_item(coord, &data, &dir_entry->key,
-+ lh, 0);
-+ return result;
-+}
-+
-+/* ->s.dir.rem_entry() */
-+int rem_entry_cde(struct inode *dir /* directory of item */ ,
-+ const struct qstr *name, coord_t * coord /* coord of item */ ,
-+ lock_handle * lh UNUSED_ARG /* lock handle for
-+ * removal */ ,
-+ reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
-+ * directory entry
-+ * being removed */ )
-+{
-+ coord_t shadow;
-+ int result;
-+ int length;
-+ ON_DEBUG(char buf[DE_NAME_BUF_LEN]);
-+
-+ assert("nikita-2870", strlen(name->name) == name->len);
-+ assert("nikita-2869",
-+ !strcmp(name->name, extract_name_cde(coord, buf)));
-+
-+ length = sizeof(directory_entry_format) + sizeof(cde_unit_header);
-+ if (is_longname(name->name, name->len))
-+ length += name->len + 1;
-+
-+ if (inode_get_bytes(dir) < length) {
-+ warning("nikita-2628", "Dir is broke: %llu: %llu",
-+ (unsigned long long)get_inode_oid(dir),
-+ inode_get_bytes(dir));
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ /* cut_node() is supposed to take pointers to _different_
-+ coords, because it will modify them without respect to
-+ possible aliasing. To work around this, create temporary copy
-+ of @coord.
-+ */
-+ coord_dup(&shadow, coord);
-+ result =
-+ kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
-+ if (result == 0) {
-+ /* NOTE-NIKITA quota plugin? */
-+ DQUOT_FREE_SPACE_NODIRTY(dir, length);
-+ }
-+ return result;
-+}
-+
-+/* ->s.dir.max_name_len() method for this item plugin */
-+int max_name_len_cde(const struct inode *dir /* directory */ )
-+{
-+ return
-+ reiser4_tree_by_inode(dir)->nplug->max_item_size() -
-+ sizeof(directory_entry_format) - sizeof(cde_item_format) -
-+ sizeof(cde_unit_header) - 2;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/cde.h linux-2.6.20/fs/reiser4/plugin/item/cde.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/cde.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/cde.h 2007-05-06 14:50:43.803008220 +0400
-@@ -0,0 +1,87 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Compound directory item. See cde.c for description. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ )
-+#define __FS_REISER4_PLUGIN_COMPRESSED_DE_H__
-+
-+#include "../../forward.h"
-+#include "../../kassign.h"
-+#include "../../dformat.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+#include <linux/dcache.h> /* for struct dentry, etc */
-+
-+typedef struct cde_unit_header {
-+ de_id hash;
-+ d16 offset;
-+} cde_unit_header;
-+
-+typedef struct cde_item_format {
-+ d16 num_of_entries;
-+ cde_unit_header entry[0];
-+} cde_item_format;
-+
-+typedef struct cde_entry {
-+ const struct inode *dir;
-+ const struct inode *obj;
-+ const struct qstr *name;
-+} cde_entry;
-+
-+typedef struct cde_entry_data {
-+ int num_of_entries;
-+ cde_entry *entry;
-+} cde_entry_data;
-+
-+/* plugin->item.b.* */
-+reiser4_key *max_key_inside_cde(const coord_t * coord, reiser4_key * result);
-+int can_contain_key_cde(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data *);
-+int mergeable_cde(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_cde(const coord_t * coord);
-+reiser4_key *unit_key_cde(const coord_t * coord, reiser4_key * key);
-+int estimate_cde(const coord_t * coord, const reiser4_item_data * data);
-+void print_cde(const char *prefix, coord_t * coord);
-+int init_cde(coord_t * coord, coord_t * from, reiser4_item_data * data);
-+lookup_result lookup_cde(const reiser4_key * key, lookup_bias bias,
-+ coord_t * coord);
-+int paste_cde(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG);
-+int can_shift_cde(unsigned free_space, coord_t * coord, znode * target,
-+ shift_direction pend, unsigned *size, unsigned want);
-+void copy_units_cde(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction where_is_free_space,
-+ unsigned free_space);
-+int cut_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+void print_cde(const char *prefix, coord_t * coord);
-+int reiser4_check_cde(const coord_t * coord, const char **error);
-+
-+/* plugin->u.item.s.dir.* */
-+int extract_key_cde(const coord_t * coord, reiser4_key * key);
-+int update_key_cde(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh);
-+char *extract_name_cde(const coord_t * coord, char *buf);
-+int add_entry_cde(struct inode *dir, coord_t * coord,
-+ lock_handle * lh, const struct dentry *name,
-+ reiser4_dir_entry_desc * entry);
-+int rem_entry_cde(struct inode *dir, const struct qstr *name, coord_t * coord,
-+ lock_handle * lh, reiser4_dir_entry_desc * entry);
-+int max_name_len_cde(const struct inode *dir);
-+
-+/* __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/ctail.c linux-2.6.20/fs/reiser4/plugin/item/ctail.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/ctail.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/ctail.c 2007-05-06 14:50:43.803008220 +0400
-@@ -0,0 +1,1570 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* ctails (aka "clustered tails") are items for cryptcompress objects */
-+
-+/* DESCRIPTION:
-+
-+Each cryptcompress object is stored on disk as a set of clusters sliced
-+into ctails.
-+
-+Internal on-disk structure:
-+
-+ HEADER (1) Here stored disk cluster shift
-+ BODY
-+*/
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "item.h"
-+#include "../node/node.h"
-+#include "../plugin.h"
-+#include "../object.h"
-+#include "../../znode.h"
-+#include "../../carry.h"
-+#include "../../tree.h"
-+#include "../../inode.h"
-+#include "../../super.h"
-+#include "../../context.h"
-+#include "../../page_cache.h"
-+#include "../cluster.h"
-+#include "../../flush.h"
-+#include "../../tree_walk.h"
-+
-+#include <linux/pagevec.h>
-+#include <linux/swap.h>
-+#include <linux/fs.h>
-+
-+/* return body of ctail item at @coord */
-+static ctail_item_format *ctail_formatted_at(const coord_t * coord)
-+{
-+ assert("edward-60", coord != NULL);
-+ return item_body_by_coord(coord);
-+}
-+
-+static int cluster_shift_by_coord(const coord_t * coord)
-+{
-+ return get_unaligned(&ctail_formatted_at(coord)->cluster_shift);
-+}
-+
-+static inline void dclust_set_extension_shift(hint_t * hint)
-+{
-+ assert("edward-1270",
-+ item_id_by_coord(&hint->ext_coord.coord) == CTAIL_ID);
-+ hint->ext_coord.extension.ctail.shift =
-+ cluster_shift_by_coord(&hint->ext_coord.coord);
-+}
-+
-+static loff_t off_by_coord(const coord_t * coord)
-+{
-+ reiser4_key key;
-+ return get_key_offset(item_key_by_coord(coord, &key));
-+}
-+
-+int coord_is_unprepped_ctail(const coord_t * coord)
-+{
-+ assert("edward-1233", coord != NULL);
-+ assert("edward-1234", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-1235",
-+ ergo((int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT,
-+ nr_units_ctail(coord) == (pos_in_node_t) UCTAIL_NR_UNITS));
-+
-+ return (int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT;
-+}
-+
-+static cloff_t clust_by_coord(const coord_t * coord, struct inode *inode)
-+{
-+ int shift;
-+
-+ if (inode != NULL) {
-+ shift = inode_cluster_shift(inode);
-+ assert("edward-1236",
-+ ergo(!coord_is_unprepped_ctail(coord),
-+ shift == cluster_shift_by_coord(coord)));
-+ } else {
-+ assert("edward-1237", !coord_is_unprepped_ctail(coord));
-+ shift = cluster_shift_by_coord(coord);
-+ }
-+ return off_by_coord(coord) >> shift;
-+}
-+
-+static int disk_cluster_size(const coord_t * coord)
-+{
-+ assert("edward-1156",
-+ item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
-+ /* calculation of disk cluster size
-+ is meaninless if ctail is unprepped */
-+ assert("edward-1238", !coord_is_unprepped_ctail(coord));
-+
-+ return 1 << cluster_shift_by_coord(coord);
-+}
-+
-+/* true if the key is of first disk cluster item */
-+static int is_disk_cluster_key(const reiser4_key * key, const coord_t * coord)
-+{
-+ assert("edward-1239", item_id_by_coord(coord) == CTAIL_ID);
-+
-+ return coord_is_unprepped_ctail(coord) ||
-+ ((get_key_offset(key) &
-+ ((loff_t) disk_cluster_size(coord) - 1)) == 0);
-+}
-+
-+static char *first_unit(coord_t * coord)
-+{
-+ /* FIXME: warning: pointer of type `void *' used in arithmetic */
-+ return (char *)item_body_by_coord(coord) + sizeof(ctail_item_format);
-+}
-+
-+/* plugin->u.item.b.max_key_inside :
-+ tail_max_key_inside */
-+
-+/* plugin->u.item.b.can_contain_key */
-+int
-+can_contain_key_ctail(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data * data)
-+{
-+ reiser4_key item_key;
-+
-+ if (item_plugin_by_coord(coord) != data->iplug)
-+ return 0;
-+
-+ item_key_by_coord(coord, &item_key);
-+ if (get_key_locality(key) != get_key_locality(&item_key) ||
-+ get_key_objectid(key) != get_key_objectid(&item_key))
-+ return 0;
-+ if (get_key_offset(&item_key) + nr_units_ctail(coord) !=
-+ get_key_offset(key))
-+ return 0;
-+ if (is_disk_cluster_key(key, coord))
-+ return 0;
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.mergeable
-+ c-tails of different clusters are not mergeable */
-+int mergeable_ctail(const coord_t * p1, const coord_t * p2)
-+{
-+ reiser4_key key1, key2;
-+
-+ assert("edward-62", item_id_by_coord(p1) == CTAIL_ID);
-+ assert("edward-61", plugin_of_group(item_plugin_by_coord(p1),
-+ UNIX_FILE_METADATA_ITEM_TYPE));
-+
-+ if (item_id_by_coord(p2) != CTAIL_ID) {
-+ /* second item is of another type */
-+ return 0;
-+ }
-+
-+ item_key_by_coord(p1, &key1);
-+ item_key_by_coord(p2, &key2);
-+ if (get_key_locality(&key1) != get_key_locality(&key2) ||
-+ get_key_objectid(&key1) != get_key_objectid(&key2) ||
-+ get_key_type(&key1) != get_key_type(&key2)) {
-+ /* items of different objects */
-+ return 0;
-+ }
-+ if (get_key_offset(&key1) + nr_units_ctail(p1) != get_key_offset(&key2))
-+ /* not adjacent items */
-+ return 0;
-+ if (is_disk_cluster_key(&key2, p2))
-+ return 0;
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.nr_units */
-+pos_in_node_t nr_units_ctail(const coord_t * coord)
-+{
-+ return (item_length_by_coord(coord) -
-+ sizeof(ctail_formatted_at(coord)->cluster_shift));
-+}
-+
-+/* plugin->u.item.b.estimate:
-+ estimate how much space is needed to insert/paste @data->length bytes
-+ into ctail at @coord */
-+int estimate_ctail(const coord_t * coord /* coord of item */ ,
-+ const reiser4_item_data *
-+ data /* parameters for new item */ )
-+{
-+ if (coord == NULL)
-+ /* insert */
-+ return (sizeof(ctail_item_format) + data->length);
-+ else
-+ /* paste */
-+ return data->length;
-+}
-+
-+/* ->init() method for this item plugin. */
-+int init_ctail(coord_t * to /* coord of item */ ,
-+ coord_t * from /* old_item */ ,
-+ reiser4_item_data * data /* structure used for insertion */ )
-+{
-+ int cluster_shift; /* cpu value to convert */
-+
-+ if (data) {
-+ assert("edward-463", data->length > sizeof(ctail_item_format));
-+ cluster_shift = *((int *)(data->arg));
-+ data->length -= sizeof(ctail_item_format);
-+ } else {
-+ assert("edward-464", from != NULL);
-+ assert("edward-855", ctail_ok(from));
-+ cluster_shift = (int)(cluster_shift_by_coord(from));
-+ }
-+ put_unaligned((d8)cluster_shift, &ctail_formatted_at(to)->cluster_shift);
-+ assert("edward-856", ctail_ok(to));
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.lookup:
-+ NULL: We are looking for item keys only */
-+
-+#if REISER4_DEBUG
-+int ctail_ok(const coord_t * coord)
-+{
-+ return coord_is_unprepped_ctail(coord) ||
-+ cluster_shift_ok(cluster_shift_by_coord(coord));
-+}
-+
-+/* plugin->u.item.b.check */
-+int check_ctail(const coord_t * coord, const char **error)
-+{
-+ if (!ctail_ok(coord)) {
-+ if (error)
-+ *error = "bad cluster shift in ctail";
-+ return 1;
-+ }
-+ return 0;
-+}
-+#endif
-+
-+/* plugin->u.item.b.paste */
-+int
-+paste_ctail(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG)
-+{
-+ unsigned old_nr_units;
-+
-+ assert("edward-268", data->data != NULL);
-+ /* copy only from kernel space */
-+ assert("edward-66", data->user == 0);
-+
-+ old_nr_units =
-+ item_length_by_coord(coord) - sizeof(ctail_item_format) -
-+ data->length;
-+
-+ /* ctail items never get pasted in the middle */
-+
-+ if (coord->unit_pos == 0 && coord->between == AT_UNIT) {
-+
-+ /* paste at the beginning when create new item */
-+ assert("edward-450",
-+ item_length_by_coord(coord) ==
-+ data->length + sizeof(ctail_item_format));
-+ assert("edward-451", old_nr_units == 0);
-+ } else if (coord->unit_pos == old_nr_units - 1
-+ && coord->between == AFTER_UNIT) {
-+
-+ /* paste at the end */
-+ coord->unit_pos++;
-+ } else
-+ impossible("edward-453", "bad paste position");
-+
-+ memcpy(first_unit(coord) + coord->unit_pos, data->data, data->length);
-+
-+ assert("edward-857", ctail_ok(coord));
-+
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.fast_paste */
-+
-+/* plugin->u.item.b.can_shift
-+ number of units is returned via return value, number of bytes via @size. For
-+ ctail items they coincide */
-+int
-+can_shift_ctail(unsigned free_space, coord_t * source,
-+ znode * target, shift_direction direction UNUSED_ARG,
-+ unsigned *size /* number of bytes */ , unsigned want)
-+{
-+ /* make sure that that we do not want to shift more than we have */
-+ assert("edward-68", want > 0 && want <= nr_units_ctail(source));
-+
-+ *size = min(want, free_space);
-+
-+ if (!target) {
-+ /* new item will be created */
-+ if (*size <= sizeof(ctail_item_format)) {
-+ *size = 0;
-+ return 0;
-+ }
-+ return *size - sizeof(ctail_item_format);
-+ }
-+ return *size;
-+}
-+
-+/* plugin->u.item.b.copy_units
-+ cooperates with ->can_shift() */
-+void
-+copy_units_ctail(coord_t * target, coord_t * source,
-+ unsigned from, unsigned count /* units */ ,
-+ shift_direction where_is_free_space,
-+ unsigned free_space /* bytes */ )
-+{
-+ /* make sure that item @target is expanded already */
-+ assert("edward-69", (unsigned)item_length_by_coord(target) >= count);
-+ assert("edward-70", free_space == count || free_space == count + 1);
-+
-+ assert("edward-858", ctail_ok(source));
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ /* append item @target with @count first bytes of @source:
-+ this restriction came from ordinary tails */
-+ assert("edward-71", from == 0);
-+ assert("edward-860", ctail_ok(target));
-+
-+ memcpy(first_unit(target) + nr_units_ctail(target) - count,
-+ first_unit(source), count);
-+ } else {
-+ /* target item is moved to right already */
-+ reiser4_key key;
-+
-+ assert("edward-72", nr_units_ctail(source) == from + count);
-+
-+ if (free_space == count) {
-+ init_ctail(target, source, NULL);
-+ } else {
-+ /* new item has been created */
-+ assert("edward-862", ctail_ok(target));
-+ }
-+ memcpy(first_unit(target), first_unit(source) + from, count);
-+
-+ assert("edward-863", ctail_ok(target));
-+
-+ /* new units are inserted before first unit in an item,
-+ therefore, we have to update item key */
-+ item_key_by_coord(source, &key);
-+ set_key_offset(&key, get_key_offset(&key) + from);
-+
-+ node_plugin_by_node(target->node)->update_item_key(target, &key,
-+ NULL /*info */);
-+ }
-+}
-+
-+/* plugin->u.item.b.create_hook */
-+int create_hook_ctail(const coord_t * coord, void *arg)
-+{
-+ assert("edward-864", znode_is_loaded(coord->node));
-+
-+ znode_set_convertible(coord->node);
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.kill_hook */
-+int
-+kill_hook_ctail(const coord_t * coord, pos_in_node_t from, pos_in_node_t count,
-+ carry_kill_data * kdata)
-+{
-+ struct inode *inode;
-+
-+ assert("edward-1157", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-291", znode_is_write_locked(coord->node));
-+
-+ inode = kdata->inode;
-+ if (inode) {
-+ reiser4_key key;
-+ item_key_by_coord(coord, &key);
-+
-+ if (from == 0 && is_disk_cluster_key(&key, coord)) {
-+ /* disk cluster is killed */
-+ cloff_t start =
-+ off_to_clust(get_key_offset(&key), inode);
-+ truncate_page_cluster_cryptcompress(inode, start,
-+ kdata->params.truncate);
-+ inode_sub_bytes(inode, inode_cluster_size(inode));
-+ }
-+ }
-+ return 0;
-+}
-+
-+/* for shift_hook_ctail(),
-+ return true if the first disk cluster item has dirty child
-+*/
-+static int ctail_convertible(const coord_t * coord)
-+{
-+ int result;
-+ reiser4_key key;
-+ jnode *child = NULL;
-+
-+ assert("edward-477", coord != NULL);
-+ assert("edward-478", item_id_by_coord(coord) == CTAIL_ID);
-+
-+ if (coord_is_unprepped_ctail(coord))
-+ /* unprepped ctail should be converted */
-+ return 1;
-+
-+ item_key_by_coord(coord, &key);
-+ child = jlookup(current_tree,
-+ get_key_objectid(&key),
-+ off_to_pg(off_by_coord(coord)));
-+ if (!child)
-+ return 0;
-+ result = JF_ISSET(child, JNODE_DIRTY);
-+ jput(child);
-+ return result;
-+}
-+
-+/* FIXME-EDWARD */
-+/* plugin->u.item.b.shift_hook */
-+int shift_hook_ctail(const coord_t * item /* coord of item */ ,
-+ unsigned from UNUSED_ARG /* start unit */ ,
-+ unsigned count UNUSED_ARG /* stop unit */ ,
-+ znode * old_node /* old parent */ )
-+{
-+ assert("edward-479", item != NULL);
-+ assert("edward-480", item->node != old_node);
-+
-+ if (!znode_convertible(old_node) || znode_convertible(item->node))
-+ return 0;
-+ if (ctail_convertible(item))
-+ znode_set_convertible(item->node);
-+ return 0;
-+}
-+
-+static int
-+cut_or_kill_ctail_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ int cut, void *p, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ pos_in_node_t count; /* number of units to cut */
-+ char *item;
-+
-+ count = to - from + 1;
-+ item = item_body_by_coord(coord);
-+
-+ assert("edward-74", ergo(from != 0, to == coord_last_unit_pos(coord)));
-+
-+ if (smallest_removed) {
-+ /* store smallest key removed */
-+ item_key_by_coord(coord, smallest_removed);
-+ set_key_offset(smallest_removed,
-+ get_key_offset(smallest_removed) + from);
-+ }
-+
-+ if (new_first) {
-+ assert("vs-1531", from == 0);
-+
-+ item_key_by_coord(coord, new_first);
-+ set_key_offset(new_first,
-+ get_key_offset(new_first) + from + count);
-+ }
-+
-+ if (!cut)
-+ kill_hook_ctail(coord, from, 0, (struct carry_kill_data *)p);
-+
-+ if (from == 0) {
-+ if (count != nr_units_ctail(coord)) {
-+ /* part of item is removed, so move free space at the beginning
-+ of the item and update item key */
-+ reiser4_key key;
-+ memcpy(item + to + 1, item, sizeof(ctail_item_format));
-+ item_key_by_coord(coord, &key);
-+ set_key_offset(&key, get_key_offset(&key) + count);
-+ node_plugin_by_node(coord->node)->update_item_key(coord,
-+ &key,
-+ NULL);
-+ } else {
-+ /* cut_units should not be called to cut evrything */
-+ assert("vs-1532", ergo(cut, 0));
-+ /* whole item is cut, so more then amount of space occupied
-+ by units got freed */
-+ count += sizeof(ctail_item_format);
-+ }
-+ if (REISER4_DEBUG)
-+ memset(item, 0, count);
-+ } else if (REISER4_DEBUG)
-+ memset(item + sizeof(ctail_item_format) + from, 0, count);
-+ return count;
-+}
-+
-+/* plugin->u.item.b.cut_units */
-+int
-+cut_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
-+ carry_cut_data * cdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ return cut_or_kill_ctail_units(item, from, to, 1, NULL,
-+ smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.b.kill_units */
-+int
-+kill_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *kdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ return cut_or_kill_ctail_units(item, from, to, 0, kdata,
-+ smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.s.file.read */
-+int read_ctail(struct file *file UNUSED_ARG, flow_t * f, hint_t * hint)
-+{
-+ uf_coord_t *uf_coord;
-+ coord_t *coord;
-+
-+ uf_coord = &hint->ext_coord;
-+ coord = &uf_coord->coord;
-+ assert("edward-127", f->user == 0);
-+ assert("edward-129", coord && coord->node);
-+ assert("edward-130", coord_is_existing_unit(coord));
-+ assert("edward-132", znode_is_loaded(coord->node));
-+
-+ /* start read only from the beginning of ctail */
-+ assert("edward-133", coord->unit_pos == 0);
-+ /* read only whole ctails */
-+ assert("edward-135", nr_units_ctail(coord) <= f->length);
-+
-+ assert("edward-136", reiser4_schedulable());
-+ assert("edward-886", ctail_ok(coord));
-+
-+ if (f->data)
-+ memcpy(f->data, (char *)first_unit(coord),
-+ (size_t) nr_units_ctail(coord));
-+
-+ dclust_set_extension_shift(hint);
-+ mark_page_accessed(znode_page(coord->node));
-+ move_flow_forward(f, nr_units_ctail(coord));
-+
-+ return 0;
-+}
-+
-+/* Reads a disk cluster consists of ctail items,
-+ attaches a transform stream with plain text */
-+int ctail_read_disk_cluster(reiser4_cluster_t * clust, struct inode *inode,
-+ znode_lock_mode mode)
-+{
-+ int result;
-+ assert("edward-1450", mode == ZNODE_READ_LOCK || ZNODE_WRITE_LOCK);
-+ assert("edward-671", clust->hint != NULL);
-+ assert("edward-140", clust->dstat == INVAL_DISK_CLUSTER);
-+ assert("edward-672", cryptcompress_inode_ok(inode));
-+
-+ /* set input stream */
-+ result = grab_tfm_stream(inode, &clust->tc, INPUT_STREAM);
-+ if (result)
-+ return result;
-+
-+ result = find_disk_cluster(clust, inode, 1 /* read items */, mode);
-+ assert("edward-1340", !result);
-+ if (result)
-+ return result;
-+ if (mode == ZNODE_READ_LOCK)
-+ /* write still need the lock to insert unprepped
-+ items, etc... */
-+ put_hint_cluster(clust, inode, ZNODE_READ_LOCK);
-+
-+ if (clust->dstat == FAKE_DISK_CLUSTER ||
-+ clust->dstat == UNPR_DISK_CLUSTER) {
-+ tfm_cluster_set_uptodate(&clust->tc);
-+ return 0;
-+ }
-+ result = grab_coa(&clust->tc, inode_compression_plugin(inode));
-+ if (result)
-+ return result;
-+ result = reiser4_inflate_cluster(clust, inode);
-+ if (result)
-+ return result;
-+ tfm_cluster_set_uptodate(&clust->tc);
-+ return 0;
-+}
-+
-+/* read one locked page */
-+int do_readpage_ctail(struct inode * inode, reiser4_cluster_t * clust,
-+ struct page *page, znode_lock_mode mode)
-+{
-+ int ret;
-+ unsigned cloff;
-+ char *data;
-+ size_t pgcnt;
-+ tfm_cluster_t *tc = &clust->tc;
-+
-+ assert("edward-212", PageLocked(page));
-+
-+ if (PageUptodate(page))
-+ goto exit;
-+
-+ if (!tfm_cluster_is_uptodate(&clust->tc)) {
-+ clust->index = pg_to_clust(page->index, inode);
-+ unlock_page(page);
-+ ret = ctail_read_disk_cluster(clust, inode, mode);
-+ lock_page(page);
-+ if (ret)
-+ return ret;
-+ }
-+ if (PageUptodate(page))
-+ /* races with another read/write */
-+ goto exit;
-+
-+ /* bytes in the page */
-+ pgcnt = cnt_to_pgcnt(i_size_read(inode), page->index);
-+
-+ if (pgcnt == 0) {
-+ assert("edward-1290", 0);
-+ return RETERR(-EINVAL);
-+ }
-+ assert("edward-119", tfm_cluster_is_uptodate(tc));
-+
-+ switch (clust->dstat) {
-+ case UNPR_DISK_CLUSTER:
-+ assert("edward-1285", 0);
-+#if REISER4_DEBUG
-+ warning("edward-1168",
-+ "page %lu is not uptodate and disk cluster %lu (inode %llu) is unprepped\n",
-+ page->index, clust->index,
-+ (unsigned long long)get_inode_oid(inode));
-+#endif
-+ case FAKE_DISK_CLUSTER:
-+ /* fill the page by zeroes */
-+ data = kmap_atomic(page, KM_USER0);
-+
-+ memset(data, 0, PAGE_CACHE_SIZE);
-+ flush_dcache_page(page);
-+ kunmap_atomic(data, KM_USER0);
-+ SetPageUptodate(page);
-+ break;
-+ case PREP_DISK_CLUSTER:
-+ /* fill the page by transformed data */
-+ assert("edward-1058", !PageUptodate(page));
-+ assert("edward-120", tc->len <= inode_cluster_size(inode));
-+
-+ /* start page offset in the cluster */
-+ cloff = pg_to_off_to_cloff(page->index, inode);
-+
-+ data = kmap(page);
-+ memcpy(data, tfm_stream_data(tc, OUTPUT_STREAM) + cloff, pgcnt);
-+ memset(data + pgcnt, 0, (size_t) PAGE_CACHE_SIZE - pgcnt);
-+ flush_dcache_page(page);
-+ kunmap(page);
-+ SetPageUptodate(page);
-+ break;
-+ default:
-+ impossible("edward-1169", "bad disk cluster state");
-+ }
-+ exit:
-+ return 0;
-+}
-+
-+/* plugin->u.item.s.file.readpage */
-+int readpage_ctail(void *vp, struct page *page)
-+{
-+ int result;
-+ hint_t *hint;
-+ reiser4_cluster_t *clust = vp;
-+
-+ assert("edward-114", clust != NULL);
-+ assert("edward-115", PageLocked(page));
-+ assert("edward-116", !PageUptodate(page));
-+ assert("edward-117", !jprivate(page) && !PagePrivate(page));
-+ assert("edward-118", page->mapping && page->mapping->host);
-+ assert("edward-867", !tfm_cluster_is_uptodate(&clust->tc));
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL) {
-+ unlock_page(page);
-+ return RETERR(-ENOMEM);
-+ }
-+ clust->hint = hint;
-+ result = load_file_hint(clust->file, hint);
-+ if (result) {
-+ kfree(hint);
-+ unlock_page(page);
-+ return result;
-+ }
-+ assert("vs-25", hint->ext_coord.lh == &hint->lh);
-+ result = do_readpage_ctail(page->mapping->host, clust, page,
-+ ZNODE_READ_LOCK);
-+
-+ assert("edward-213", PageLocked(page));
-+ assert("edward-1163", ergo(!result, PageUptodate(page)));
-+ assert("edward-868",
-+ ergo(!result, tfm_cluster_is_uptodate(&clust->tc)));
-+
-+ unlock_page(page);
-+ done_lh(&hint->lh);
-+ hint->ext_coord.valid = 0;
-+ save_file_hint(clust->file, hint);
-+ kfree(hint);
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+
-+ return result;
-+}
-+
-+/* Helper function for ->readpages() */
-+static int
-+ctail_read_page_cluster(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ int i;
-+ int result;
-+ assert("edward-779", clust != NULL);
-+ assert("edward-1059", clust->win == NULL);
-+ assert("edward-780", inode != NULL);
-+
-+ result = prepare_page_cluster(inode, clust, 0 /* do not capture */ );
-+ if (result)
-+ return result;
-+ result = ctail_read_disk_cluster(clust, inode, ZNODE_READ_LOCK);
-+ if (result)
-+ goto out;
-+ /* at this point stream with valid plain text is attached */
-+ assert("edward-781", tfm_cluster_is_uptodate(&clust->tc));
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ struct page *page = clust->pages[i];
-+ lock_page(page);
-+ result = do_readpage_ctail(inode, clust, page, ZNODE_READ_LOCK);
-+ unlock_page(page);
-+ if (result)
-+ break;
-+ }
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+ out:
-+ reiser4_release_cluster_pages(clust);
-+ return result;
-+}
-+
-+/* filler for read_cache_pages() */
-+static int ctail_readpages_filler(void * data, struct page * page)
-+{
-+ int ret = 0;
-+ reiser4_cluster_t * clust = data;
-+ struct inode * inode = clust->file->f_dentry->d_inode;
-+
-+ if (PageUptodate(page)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ unlock_page(page);
-+ move_cluster_forward(clust, inode, page->index);
-+ ret = ctail_read_page_cluster(clust, inode);
-+ if (ret)
-+ return ret;
-+ assert("edward-869", !tfm_cluster_is_uptodate(&clust->tc));
-+
-+ lock_page(page);
-+ ret = do_readpage_ctail(inode, clust, page, ZNODE_READ_LOCK);
-+ assert("edward-1061", ergo(!ret, PageUptodate(page)));
-+ unlock_page(page);
-+
-+ return ret;
-+}
-+
-+/* We populate a bit more then upper readahead suggests:
-+ with each nominated page we read the whole page cluster
-+ this page belongs to. */
-+int readpages_ctail(struct file *file, struct address_space *mapping,
-+ struct list_head *pages)
-+{
-+ int ret = 0;
-+ hint_t *hint;
-+ reiser4_cluster_t clust;
-+ struct inode *inode = mapping->host;
-+
-+ assert("edward-1521", inode == file->f_dentry->d_inode);
-+
-+ cluster_init_read(&clust, NULL);
-+ clust.file = file;
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL) {
-+ warning("vs-28", "failed to allocate hint");
-+ ret = RETERR(-ENOMEM);
-+ goto exit1;
-+ }
-+ clust.hint = hint;
-+ ret = load_file_hint(clust.file, hint);
-+ if (ret) {
-+ warning("edward-1522", "failed to load hint");
-+ goto exit2;
-+ }
-+ assert("vs-26", hint->ext_coord.lh == &hint->lh);
-+ ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
-+ if (ret) {
-+ warning("edward-1523", "failed to alloc pgset");
-+ goto exit3;
-+ }
-+ ret = read_cache_pages(mapping, pages, ctail_readpages_filler, &clust);
-+
-+ assert("edward-870", !tfm_cluster_is_uptodate(&clust.tc));
-+ exit3:
-+ done_lh(&hint->lh);
-+ save_file_hint(file, hint);
-+ hint->ext_coord.valid = 0;
-+ exit2:
-+ kfree(hint);
-+ exit1:
-+ put_cluster_handle(&clust);
-+ return ret;
-+}
-+
-+/*
-+ plugin->u.item.s.file.append_key
-+ key of the first item of the next disk cluster
-+*/
-+reiser4_key *append_key_ctail(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("edward-1241", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-1242", cluster_shift_ok(cluster_shift_by_coord(coord)));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key,
-+ ((__u64) (clust_by_coord(coord, NULL)) +
-+ 1) << cluster_shift_by_coord(coord));
-+ return key;
-+}
-+
-+static int
-+insert_unprepped_ctail(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ int result;
-+ char buf[UCTAIL_NR_UNITS];
-+ reiser4_item_data data;
-+ reiser4_key key;
-+ int shift = (int)UCTAIL_SHIFT;
-+
-+ memset(buf, 0, (size_t) UCTAIL_NR_UNITS);
-+ result = key_by_inode_cryptcompress(inode,
-+ clust_to_off(clust->index, inode),
-+ &key);
-+ if (result)
-+ return result;
-+ data.user = 0;
-+ data.iplug = item_plugin_by_id(CTAIL_ID);
-+ data.arg = &shift;
-+ data.length = sizeof(ctail_item_format) + (size_t) UCTAIL_NR_UNITS;
-+ data.data = buf;
-+
-+ result = insert_by_coord(&clust->hint->ext_coord.coord,
-+ &data, &key, clust->hint->ext_coord.lh, 0);
-+ return result;
-+}
-+
-+static int
-+insert_cryptcompress_flow(coord_t * coord, lock_handle * lh, flow_t * f,
-+ struct inode *inode)
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ reiser4_item_data *data;
-+ carry_op *op;
-+ int cluster_shift = inode_cluster_shift(inode);
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*data));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+ data = (reiser4_item_data *) (lowest_level + 3);
-+
-+ assert("edward-466", coord->between == AFTER_ITEM
-+ || coord->between == AFTER_UNIT || coord->between == BEFORE_ITEM
-+ || coord->between == EMPTY_NODE
-+ || coord->between == BEFORE_UNIT);
-+
-+ if (coord->between == AFTER_UNIT) {
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+ }
-+ op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
-+ 0 /* operate directly on coord -> node */);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+ data->user = 0;
-+ data->iplug = item_plugin_by_id(CTAIL_ID);
-+ data->arg = &cluster_shift;
-+
-+ data->length = 0;
-+ data->data = NULL;
-+
-+ op->u.insert_flow.flags = COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT;
-+ op->u.insert_flow.insert_point = coord;
-+ op->u.insert_flow.flow = f;
-+ op->u.insert_flow.data = data;
-+ op->u.insert_flow.new_nodes = 0;
-+
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* Implementation of CRC_APPEND_ITEM mode of ctail conversion */
-+static int insert_cryptcompress_flow_in_place(coord_t * coord,
-+ lock_handle * lh, flow_t * f,
-+ struct inode *inode)
-+{
-+ int ret;
-+ coord_t pos;
-+ lock_handle lock;
-+
-+ assert("edward-674", f->length <= inode_scaled_cluster_size(inode));
-+ assert("edward-484", coord->between == AT_UNIT
-+ || coord->between == AFTER_ITEM);
-+ assert("edward-485", item_id_by_coord(coord) == CTAIL_ID);
-+
-+ coord_dup(&pos, coord);
-+ pos.unit_pos = 0;
-+ pos.between = AFTER_ITEM;
-+
-+ init_lh(&lock);
-+ copy_lh(&lock, lh);
-+
-+ ret = insert_cryptcompress_flow(&pos, &lock, f, inode);
-+ done_lh(&lock);
-+ assert("edward-1347", znode_is_write_locked(lh->node));
-+ assert("edward-1228", !ret);
-+ return ret;
-+}
-+
-+/* Implementation of CRC_OVERWRITE_ITEM mode of ctail conversion */
-+static int overwrite_ctail(coord_t * coord, flow_t * f)
-+{
-+ unsigned count;
-+
-+ assert("edward-269", f->user == 0);
-+ assert("edward-270", f->data != NULL);
-+ assert("edward-271", f->length > 0);
-+ assert("edward-272", coord_is_existing_unit(coord));
-+ assert("edward-273", coord->unit_pos == 0);
-+ assert("edward-274", znode_is_write_locked(coord->node));
-+ assert("edward-275", reiser4_schedulable());
-+ assert("edward-467", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-1243", ctail_ok(coord));
-+
-+ count = nr_units_ctail(coord);
-+
-+ if (count > f->length)
-+ count = f->length;
-+ memcpy(first_unit(coord), f->data, count);
-+ move_flow_forward(f, count);
-+ coord->unit_pos += count;
-+ return 0;
-+}
-+
-+/* Implementation of CRC_CUT_ITEM mode of ctail conversion:
-+ cut ctail (part or whole) starting from next unit position */
-+static int cut_ctail(coord_t * coord)
-+{
-+ coord_t stop;
-+
-+ assert("edward-435", coord->between == AT_UNIT &&
-+ coord->item_pos < coord_num_items(coord) &&
-+ coord->unit_pos <= coord_num_units(coord));
-+
-+ if (coord->unit_pos == coord_num_units(coord))
-+ /* nothing to cut */
-+ return 0;
-+ coord_dup(&stop, coord);
-+ stop.unit_pos = coord_last_unit_pos(coord);
-+
-+ return cut_node_content(coord, &stop, NULL, NULL, NULL);
-+}
-+
-+int
-+ctail_insert_unprepped_cluster(reiser4_cluster_t * clust, struct inode *inode)
-+{
-+ int result;
-+ assert("edward-1244", inode != NULL);
-+ assert("edward-1245", clust->hint != NULL);
-+ assert("edward-1246", clust->dstat == FAKE_DISK_CLUSTER);
-+ assert("edward-1247", clust->reserved == 1);
-+
-+ result = get_disk_cluster_locked(clust, inode, ZNODE_WRITE_LOCK);
-+ if (cbk_errored(result))
-+ return result;
-+ assert("edward-1249", result == CBK_COORD_NOTFOUND);
-+ assert("edward-1250", znode_is_write_locked(clust->hint->lh.node));
-+
-+ assert("edward-1295",
-+ clust->hint->ext_coord.lh->node ==
-+ clust->hint->ext_coord.coord.node);
-+
-+ coord_set_between_clusters(&clust->hint->ext_coord.coord);
-+
-+ result = insert_unprepped_ctail(clust, inode);
-+ all_grabbed2free();
-+
-+ assert("edward-1251", !result);
-+ assert("edward-1252", cryptcompress_inode_ok(inode));
-+ assert("edward-1253", znode_is_write_locked(clust->hint->lh.node));
-+ assert("edward-1254",
-+ reiser4_clustered_blocks(reiser4_get_current_sb()));
-+ assert("edward-1255",
-+ znode_convertible(clust->hint->ext_coord.coord.node));
-+
-+ return result;
-+}
-+
-+static int do_convert_ctail(flush_pos_t * pos, cryptcompress_write_mode_t mode)
-+{
-+ int result = 0;
-+ convert_item_info_t *info;
-+
-+ assert("edward-468", pos != NULL);
-+ assert("edward-469", pos->sq != NULL);
-+ assert("edward-845", item_convert_data(pos) != NULL);
-+
-+ info = item_convert_data(pos);
-+ assert("edward-679", info->flow.data != NULL);
-+
-+ switch (mode) {
-+ case CRC_APPEND_ITEM:
-+ assert("edward-1229", info->flow.length != 0);
-+ assert("edward-1256",
-+ cluster_shift_ok(cluster_shift_by_coord(&pos->coord)));
-+ result =
-+ insert_cryptcompress_flow_in_place(&pos->coord,
-+ &pos->lock,
-+ &info->flow,
-+ info->inode);
-+ break;
-+ case CRC_OVERWRITE_ITEM:
-+ assert("edward-1230", info->flow.length != 0);
-+ overwrite_ctail(&pos->coord, &info->flow);
-+ if (info->flow.length != 0)
-+ break;
-+ case CRC_CUT_ITEM:
-+ assert("edward-1231", info->flow.length == 0);
-+ result = cut_ctail(&pos->coord);
-+ break;
-+ default:
-+ result = RETERR(-EIO);
-+ impossible("edward-244", "bad convert mode");
-+ }
-+ return result;
-+}
-+
-+/* plugin->u.item.f.scan */
-+int scan_ctail(flush_scan * scan)
-+{
-+ int result = 0;
-+ struct page *page;
-+ struct inode *inode;
-+ jnode *node = scan->node;
-+
-+ assert("edward-227", scan->node != NULL);
-+ assert("edward-228", jnode_is_cluster_page(scan->node));
-+ assert("edward-639", znode_is_write_locked(scan->parent_lock.node));
-+
-+ page = jnode_page(node);
-+ inode = page->mapping->host;
-+
-+ if (!reiser4_scanning_left(scan))
-+ return result;
-+ if (!ZF_ISSET(scan->parent_lock.node, JNODE_DIRTY))
-+ znode_make_dirty(scan->parent_lock.node);
-+
-+ if (!znode_convertible(scan->parent_lock.node)) {
-+ if (JF_ISSET(scan->node, JNODE_DIRTY))
-+ znode_set_convertible(scan->parent_lock.node);
-+ else {
-+ warning("edward-681",
-+ "cluster page is already processed");
-+ return -EAGAIN;
-+ }
-+ }
-+ return result;
-+}
-+
-+/* If true, this function attaches children */
-+static int should_attach_convert_idata(flush_pos_t * pos)
-+{
-+ int result;
-+ assert("edward-431", pos != NULL);
-+ assert("edward-432", pos->child == NULL);
-+ assert("edward-619", znode_is_write_locked(pos->coord.node));
-+ assert("edward-470",
-+ item_plugin_by_coord(&pos->coord) ==
-+ item_plugin_by_id(CTAIL_ID));
-+
-+ /* check for leftmost child */
-+ utmost_child_ctail(&pos->coord, LEFT_SIDE, &pos->child);
-+
-+ if (!pos->child)
-+ return 0;
-+ spin_lock_jnode(pos->child);
-+ result = (JF_ISSET(pos->child, JNODE_DIRTY) &&
-+ pos->child->atom == ZJNODE(pos->coord.node)->atom);
-+ spin_unlock_jnode(pos->child);
-+ if (!result && pos->child) {
-+ /* existing child isn't to attach, clear up this one */
-+ jput(pos->child);
-+ pos->child = NULL;
-+ }
-+ return result;
-+}
-+
-+/* plugin->init_convert_data() */
-+static int
-+init_convert_data_ctail(convert_item_info_t * idata, struct inode *inode)
-+{
-+ assert("edward-813", idata != NULL);
-+ assert("edward-814", inode != NULL);
-+
-+ idata->inode = inode;
-+ idata->d_cur = DC_FIRST_ITEM;
-+ idata->d_next = DC_INVALID_STATE;
-+
-+ return 0;
-+}
-+
-+static int alloc_item_convert_data(convert_info_t * sq)
-+{
-+ assert("edward-816", sq != NULL);
-+ assert("edward-817", sq->itm == NULL);
-+
-+ sq->itm = kmalloc(sizeof(*sq->itm), reiser4_ctx_gfp_mask_get());
-+ if (sq->itm == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+static void free_item_convert_data(convert_info_t * sq)
-+{
-+ assert("edward-818", sq != NULL);
-+ assert("edward-819", sq->itm != NULL);
-+ assert("edward-820", sq->iplug != NULL);
-+
-+ kfree(sq->itm);
-+ sq->itm = NULL;
-+ return;
-+}
-+
-+static int alloc_convert_data(flush_pos_t * pos)
-+{
-+ assert("edward-821", pos != NULL);
-+ assert("edward-822", pos->sq == NULL);
-+
-+ pos->sq = kmalloc(sizeof(*pos->sq), reiser4_ctx_gfp_mask_get());
-+ if (!pos->sq)
-+ return RETERR(-ENOMEM);
-+ memset(pos->sq, 0, sizeof(*pos->sq));
-+ cluster_init_write(&pos->sq->clust, 0);
-+ return 0;
-+}
-+
-+void free_convert_data(flush_pos_t * pos)
-+{
-+ convert_info_t *sq;
-+
-+ assert("edward-823", pos != NULL);
-+ assert("edward-824", pos->sq != NULL);
-+
-+ sq = pos->sq;
-+ if (sq->itm)
-+ free_item_convert_data(sq);
-+ put_cluster_handle(&sq->clust);
-+ kfree(pos->sq);
-+ pos->sq = NULL;
-+ return;
-+}
-+
-+static int init_item_convert_data(flush_pos_t * pos, struct inode *inode)
-+{
-+ convert_info_t *sq;
-+
-+ assert("edward-825", pos != NULL);
-+ assert("edward-826", pos->sq != NULL);
-+ assert("edward-827", item_convert_data(pos) != NULL);
-+ assert("edward-828", inode != NULL);
-+
-+ sq = pos->sq;
-+
-+ memset(sq->itm, 0, sizeof(*sq->itm));
-+
-+ /* iplug->init_convert_data() */
-+ return init_convert_data_ctail(sq->itm, inode);
-+}
-+
-+/* create and attach disk cluster info used by 'convert' phase of the flush
-+ squalloc() */
-+static int attach_convert_idata(flush_pos_t * pos, struct inode *inode)
-+{
-+ int ret = 0;
-+ convert_item_info_t *info;
-+ reiser4_cluster_t *clust;
-+ file_plugin *fplug = inode_file_plugin(inode);
-+ compression_plugin *cplug = inode_compression_plugin(inode);
-+
-+ assert("edward-248", pos != NULL);
-+ assert("edward-249", pos->child != NULL);
-+ assert("edward-251", inode != NULL);
-+ assert("edward-682", cryptcompress_inode_ok(inode));
-+ assert("edward-252",
-+ fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+ assert("edward-473",
-+ item_plugin_by_coord(&pos->coord) ==
-+ item_plugin_by_id(CTAIL_ID));
-+
-+ if (!pos->sq) {
-+ ret = alloc_convert_data(pos);
-+ if (ret)
-+ return ret;
-+ }
-+ clust = &pos->sq->clust;
-+ ret = grab_coa(&clust->tc, cplug);
-+ if (ret)
-+ goto err;
-+ ret = set_cluster_by_page(clust,
-+ jnode_page(pos->child),
-+ MAX_CLUSTER_NRPAGES);
-+ if (ret)
-+ goto err;
-+
-+ assert("edward-829", pos->sq != NULL);
-+ assert("edward-250", item_convert_data(pos) == NULL);
-+
-+ pos->sq->iplug = item_plugin_by_id(CTAIL_ID);
-+
-+ ret = alloc_item_convert_data(pos->sq);
-+ if (ret)
-+ goto err;
-+ ret = init_item_convert_data(pos, inode);
-+ if (ret)
-+ goto err;
-+ info = item_convert_data(pos);
-+
-+ ret = flush_cluster_pages(clust, pos->child, inode);
-+ if (ret)
-+ goto err;
-+
-+ reiser4_deflate_cluster(clust, inode);
-+ inc_item_convert_count(pos);
-+
-+ /* make flow by transformed stream */
-+ fplug->flow_by_inode(info->inode,
-+ (const char __user *)tfm_stream_data(&clust->tc, OUTPUT_STREAM),
-+ 0 /* kernel space */ ,
-+ clust->tc.len,
-+ clust_to_off(clust->index, inode),
-+ WRITE_OP, &info->flow);
-+ jput(pos->child);
-+
-+ assert("edward-683", cryptcompress_inode_ok(inode));
-+ return 0;
-+ err:
-+ jput(pos->child);
-+ free_convert_data(pos);
-+ return ret;
-+}
-+
-+/* clear up disk cluster info */
-+static void detach_convert_idata(convert_info_t * sq)
-+{
-+ convert_item_info_t *info;
-+
-+ assert("edward-253", sq != NULL);
-+ assert("edward-840", sq->itm != NULL);
-+
-+ info = sq->itm;
-+ assert("edward-255", info->inode != NULL);
-+ assert("edward-1212", info->flow.length == 0);
-+
-+ free_item_convert_data(sq);
-+ return;
-+}
-+
-+/* plugin->u.item.f.utmost_child */
-+
-+/* This function sets leftmost child for a first cluster item,
-+ if the child exists, and NULL in other cases.
-+ NOTE-EDWARD: Do not call this for RIGHT_SIDE */
-+
-+int utmost_child_ctail(const coord_t * coord, sideof side, jnode ** child)
-+{
-+ reiser4_key key;
-+
-+ item_key_by_coord(coord, &key);
-+
-+ assert("edward-257", coord != NULL);
-+ assert("edward-258", child != NULL);
-+ assert("edward-259", side == LEFT_SIDE);
-+ assert("edward-260",
-+ item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
-+
-+ if (!is_disk_cluster_key(&key, coord))
-+ *child = NULL;
-+ else
-+ *child = jlookup(current_tree,
-+ get_key_objectid(item_key_by_coord
-+ (coord, &key)),
-+ off_to_pg(get_key_offset(&key)));
-+ return 0;
-+}
-+
-+/* Returns true if @p2 is the next item to @p1
-+ in the _same_ disk cluster.
-+ Disk cluster is a set of items. If ->clustered() != NULL,
-+ with each item the whole disk cluster should be read/modified
-+*/
-+static int clustered_ctail(const coord_t * p1, const coord_t * p2)
-+{
-+ return mergeable_ctail(p1, p2);
-+}
-+
-+/* Go rightward and check for next disk cluster item, set
-+ d_next to DC_CHAINED_ITEM, if the last one exists.
-+ If the current position is last item, go to right neighbor.
-+ Skip empty nodes. Note, that right neighbors may be not in
-+ the slum because of races. If so, make it dirty and
-+ convertible.
-+*/
-+static int next_item_dc_stat(flush_pos_t * pos)
-+{
-+ int ret = 0;
-+ int stop = 0;
-+ znode *cur;
-+ coord_t coord;
-+ lock_handle lh;
-+ lock_handle right_lock;
-+
-+ assert("edward-1232", !node_is_empty(pos->coord.node));
-+ assert("edward-1014",
-+ pos->coord.item_pos < coord_num_items(&pos->coord));
-+ assert("edward-1015", chaining_data_present(pos));
-+ assert("edward-1017",
-+ item_convert_data(pos)->d_next == DC_INVALID_STATE);
-+
-+ item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
-+
-+ if (item_convert_data(pos)->d_cur == DC_AFTER_CLUSTER)
-+ return ret;
-+ if (pos->coord.item_pos < coord_num_items(&pos->coord) - 1)
-+ return ret;
-+
-+ /* check next slum item */
-+ init_lh(&right_lock);
-+ cur = pos->coord.node;
-+
-+ while (!stop) {
-+ init_lh(&lh);
-+ ret = reiser4_get_right_neighbor(&lh,
-+ cur,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (ret)
-+ break;
-+ ret = zload(lh.node);
-+ if (ret) {
-+ done_lh(&lh);
-+ break;
-+ }
-+ coord_init_before_first_item(&coord, lh.node);
-+
-+ if (node_is_empty(lh.node)) {
-+ znode_make_dirty(lh.node);
-+ znode_set_convertible(lh.node);
-+ stop = 0;
-+ } else if (clustered_ctail(&pos->coord, &coord)) {
-+
-+ item_convert_data(pos)->d_next = DC_CHAINED_ITEM;
-+
-+ if (!ZF_ISSET(lh.node, JNODE_DIRTY)) {
-+ /*
-+ warning("edward-1024",
-+ "next slum item mergeable, "
-+ "but znode %p isn't dirty\n",
-+ lh.node);
-+ */
-+ znode_make_dirty(lh.node);
-+ }
-+ if (!znode_convertible(lh.node)) {
-+ /*
-+ warning("edward-1272",
-+ "next slum item mergeable, "
-+ "but znode %p isn't convertible\n",
-+ lh.node);
-+ */
-+ znode_set_convertible(lh.node);
-+ }
-+ stop = 1;
-+ } else
-+ stop = 1;
-+ zrelse(lh.node);
-+ done_lh(&right_lock);
-+ copy_lh(&right_lock, &lh);
-+ done_lh(&lh);
-+ cur = right_lock.node;
-+ }
-+ done_lh(&right_lock);
-+
-+ if (ret == -E_NO_NEIGHBOR)
-+ ret = 0;
-+ return ret;
-+}
-+
-+static int
-+assign_convert_mode(convert_item_info_t * idata,
-+ cryptcompress_write_mode_t * mode)
-+{
-+ int result = 0;
-+
-+ assert("edward-1025", idata != NULL);
-+
-+ if (idata->flow.length) {
-+ /* append or overwrite */
-+ switch (idata->d_cur) {
-+ case DC_FIRST_ITEM:
-+ case DC_CHAINED_ITEM:
-+ *mode = CRC_OVERWRITE_ITEM;
-+ break;
-+ case DC_AFTER_CLUSTER:
-+ *mode = CRC_APPEND_ITEM;
-+ break;
-+ default:
-+ impossible("edward-1018", "wrong current item state");
-+ }
-+ } else {
-+ /* cut or invalidate */
-+ switch (idata->d_cur) {
-+ case DC_FIRST_ITEM:
-+ case DC_CHAINED_ITEM:
-+ *mode = CRC_CUT_ITEM;
-+ break;
-+ case DC_AFTER_CLUSTER:
-+ result = 1;
-+ break;
-+ default:
-+ impossible("edward-1019", "wrong current item state");
-+ }
-+ }
-+ return result;
-+}
-+
-+/* plugin->u.item.f.convert */
-+/* write ctail in guessed mode */
-+int convert_ctail(flush_pos_t * pos)
-+{
-+ int result;
-+ int nr_items;
-+ cryptcompress_write_mode_t mode = CRC_OVERWRITE_ITEM;
-+
-+ assert("edward-1020", pos != NULL);
-+ assert("edward-1213", coord_num_items(&pos->coord) != 0);
-+ assert("edward-1257", item_id_by_coord(&pos->coord) == CTAIL_ID);
-+ assert("edward-1258", ctail_ok(&pos->coord));
-+ assert("edward-261", pos->coord.node != NULL);
-+
-+ nr_items = coord_num_items(&pos->coord);
-+ if (!chaining_data_present(pos)) {
-+ if (should_attach_convert_idata(pos)) {
-+ /* attach convert item info */
-+ struct inode *inode;
-+
-+ assert("edward-264", pos->child != NULL);
-+ assert("edward-265", jnode_page(pos->child) != NULL);
-+ assert("edward-266",
-+ jnode_page(pos->child)->mapping != NULL);
-+
-+ inode = jnode_page(pos->child)->mapping->host;
-+
-+ assert("edward-267", inode != NULL);
-+
-+ /* attach item convert info by child and put the last one */
-+ result = attach_convert_idata(pos, inode);
-+ pos->child = NULL;
-+ if (result == -E_REPEAT) {
-+ /* jnode became clean, or there is no dirty
-+ pages (nothing to update in disk cluster) */
-+ warning("edward-1021",
-+ "convert_ctail: nothing to attach");
-+ return 0;
-+ }
-+ if (result != 0)
-+ return result;
-+ } else
-+ /* unconvertible */
-+ return 0;
-+ } else {
-+ /* use old convert info */
-+
-+ convert_item_info_t *idata;
-+
-+ idata = item_convert_data(pos);
-+
-+ result = assign_convert_mode(idata, &mode);
-+ if (result) {
-+ /* disk cluster is over,
-+ nothing to update anymore */
-+ detach_convert_idata(pos->sq);
-+ return 0;
-+ }
-+ }
-+
-+ assert("edward-433", chaining_data_present(pos));
-+ assert("edward-1022",
-+ pos->coord.item_pos < coord_num_items(&pos->coord));
-+
-+ result = next_item_dc_stat(pos);
-+ if (result) {
-+ detach_convert_idata(pos->sq);
-+ return result;
-+ }
-+ result = do_convert_ctail(pos, mode);
-+ if (result) {
-+ detach_convert_idata(pos->sq);
-+ return result;
-+ }
-+ switch (mode) {
-+ case CRC_CUT_ITEM:
-+ assert("edward-1214", item_convert_data(pos)->flow.length == 0);
-+ assert("edward-1215",
-+ coord_num_items(&pos->coord) == nr_items ||
-+ coord_num_items(&pos->coord) == nr_items - 1);
-+ if (item_convert_data(pos)->d_next == DC_CHAINED_ITEM)
-+ break;
-+ if (coord_num_items(&pos->coord) != nr_items) {
-+ /* the item was killed, no more chained items */
-+ detach_convert_idata(pos->sq);
-+ if (!node_is_empty(pos->coord.node))
-+ /* make sure the next item will be scanned */
-+ coord_init_before_item(&pos->coord);
-+ break;
-+ }
-+ case CRC_APPEND_ITEM:
-+ assert("edward-434", item_convert_data(pos)->flow.length == 0);
-+ detach_convert_idata(pos->sq);
-+ break;
-+ case CRC_OVERWRITE_ITEM:
-+ if (coord_is_unprepped_ctail(&pos->coord)) {
-+ /* convert unpprepped ctail to prepped one */
-+ int shift;
-+ shift =
-+ inode_cluster_shift(item_convert_data(pos)->inode);
-+ assert("edward-1259", cluster_shift_ok(shift));
-+ put_unaligned((d8)shift,
-+ &ctail_formatted_at(&pos->coord)->
-+ cluster_shift);
-+ }
-+ break;
-+ }
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/ctail.h linux-2.6.20/fs/reiser4/plugin/item/ctail.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/ctail.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/ctail.h 2007-05-06 14:50:43.803008220 +0400
-@@ -0,0 +1,97 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined( __FS_REISER4_CTAIL_H__ )
-+#define __FS_REISER4_CTAIL_H__
-+
-+/* Disk format of ctail item */
-+typedef struct ctail_item_format {
-+ /* packed shift; size of (prepped) disk cluster
-+ is calculated as (1 << cluster_shift) */
-+ d8 cluster_shift;
-+ /* ctail body */
-+ d8 body[0];
-+} __attribute__ ((packed)) ctail_item_format;
-+
-+/* Unprepped disk cluster is represented by a single ctail item
-+ with the following "magic" attributes: */
-+/* "magic" cluster_shift */
-+#define UCTAIL_SHIFT 0xff
-+/* How many units unprepped ctail item has */
-+#define UCTAIL_NR_UNITS 1
-+
-+/* The following is a set of various item states in a disk cluster.
-+ Disk cluster is a set of items whose keys belong to the interval
-+ [dc_key , dc_key + disk_cluster_size - 1] */
-+typedef enum {
-+ DC_INVALID_STATE = 0,
-+ DC_FIRST_ITEM = 1,
-+ DC_CHAINED_ITEM = 2,
-+ DC_AFTER_CLUSTER = 3
-+} dc_item_stat;
-+
-+/* ctail-specific extension.
-+ In particular this describes parameters of disk cluster an item belongs to */
-+typedef struct {
-+ int shift; /* this contains cluster_shift extracted from
-+ ctail_item_format (above), or UCTAIL_SHIFT
-+ (the last one is the "magic" of unprepped disk clusters)*/
-+ int dsize; /* size of a prepped disk cluster */
-+ int ncount; /* count of nodes occupied by a disk cluster */
-+} ctail_coord_extension_t;
-+
-+struct cut_list;
-+
-+/* plugin->item.b.* */
-+int can_contain_key_ctail(const coord_t *, const reiser4_key *,
-+ const reiser4_item_data *);
-+int mergeable_ctail(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_ctail(const coord_t * coord);
-+int estimate_ctail(const coord_t * coord, const reiser4_item_data * data);
-+void print_ctail(const char *prefix, coord_t * coord);
-+lookup_result lookup_ctail(const reiser4_key *, lookup_bias, coord_t *);
-+
-+int paste_ctail(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG);
-+int init_ctail(coord_t *, coord_t *, reiser4_item_data *);
-+int can_shift_ctail(unsigned free_space, coord_t * coord,
-+ znode * target, shift_direction pend, unsigned *size,
-+ unsigned want);
-+void copy_units_ctail(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction where_is_free_space,
-+ unsigned free_space);
-+int cut_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int ctail_ok(const coord_t * coord);
-+int check_ctail(const coord_t * coord, const char **error);
-+
-+/* plugin->u.item.s.* */
-+int read_ctail(struct file *, flow_t *, hint_t *);
-+int readpage_ctail(void *, struct page *);
-+int readpages_ctail(struct file *, struct address_space *, struct list_head *);
-+reiser4_key *append_key_ctail(const coord_t *, reiser4_key *);
-+int create_hook_ctail(const coord_t * coord, void *arg);
-+int kill_hook_ctail(const coord_t *, pos_in_node_t, pos_in_node_t,
-+ carry_kill_data *);
-+int shift_hook_ctail(const coord_t *, unsigned, unsigned, znode *);
-+
-+/* plugin->u.item.f */
-+int utmost_child_ctail(const coord_t *, sideof, jnode **);
-+int scan_ctail(flush_scan *);
-+int convert_ctail(flush_pos_t *);
-+size_t inode_scaled_cluster_size(struct inode *);
-+
-+#endif /* __FS_REISER4_CTAIL_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/extent.c linux-2.6.20/fs/reiser4/plugin/item/extent.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/extent.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/extent.c 2007-05-06 14:50:43.807009470 +0400
-@@ -0,0 +1,197 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../key.h"
-+#include "../../super.h"
-+#include "../../carry.h"
-+#include "../../inode.h"
-+#include "../../page_cache.h"
-+#include "../../flush.h"
-+#include "../object.h"
-+
-+/* prepare structure reiser4_item_data. It is used to put one extent unit into tree */
-+/* Audited by: green(2002.06.13) */
-+reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
-+ int nr_extents)
-+{
-+ data->data = ext_unit;
-+ /* data->data is kernel space */
-+ data->user = 0;
-+ data->length = sizeof(reiser4_extent) * nr_extents;
-+ data->arg = NULL;
-+ data->iplug = item_plugin_by_id(EXTENT_POINTER_ID);
-+ return data;
-+}
-+
-+/* how many bytes are addressed by @nr first extents of the extent item */
-+reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr)
-+{
-+ pos_in_node_t i;
-+ reiser4_block_nr blocks;
-+ reiser4_extent *ext;
-+
-+ ext = item_body_by_coord(coord);
-+ assert("vs-263", nr <= nr_units_extent(coord));
-+
-+ blocks = 0;
-+ for (i = 0; i < nr; i++, ext++) {
-+ blocks += extent_get_width(ext);
-+ }
-+
-+ return blocks * current_blocksize;
-+}
-+
-+extent_state state_of_extent(reiser4_extent * ext)
-+{
-+ switch ((int)extent_get_start(ext)) {
-+ case 0:
-+ return HOLE_EXTENT;
-+ case 1:
-+ return UNALLOCATED_EXTENT;
-+ default:
-+ break;
-+ }
-+ return ALLOCATED_EXTENT;
-+}
-+
-+int extent_is_unallocated(const coord_t * item)
-+{
-+ assert("jmacd-5133", item_is_extent(item));
-+
-+ return state_of_extent(extent_by_coord(item)) == UNALLOCATED_EXTENT;
-+}
-+
-+/* set extent's start and width */
-+void reiser4_set_extent(reiser4_extent * ext, reiser4_block_nr start,
-+ reiser4_block_nr width)
-+{
-+ extent_set_start(ext, start);
-+ extent_set_width(ext, width);
-+}
-+
-+/**
-+ * reiser4_replace_extent - replace extent and paste 1 or 2 after it
-+ * @un_extent: coordinate of extent to be overwritten
-+ * @lh: need better comment
-+ * @key: need better comment
-+ * @exts_to_add: data prepared for insertion into tree
-+ * @replace: need better comment
-+ * @flags: need better comment
-+ * @return_insert_position: need better comment
-+ *
-+ * Overwrites one extent, pastes 1 or 2 more ones after overwritten one. If
-+ * @return_inserted_position is 1 - @un_extent and @lh are returned set to
-+ * first of newly inserted units, if it is 0 - @un_extent and @lh are returned
-+ * set to extent which was overwritten.
-+ */
-+int reiser4_replace_extent(struct replace_handle *h,
-+ int return_inserted_position)
-+{
-+ int result;
-+ znode *orig_znode;
-+ /*ON_DEBUG(reiser4_extent orig_ext);*/ /* this is for debugging */
-+
-+ assert("vs-990", coord_is_existing_unit(h->coord));
-+ assert("vs-1375", znode_is_write_locked(h->coord->node));
-+ assert("vs-1426", extent_get_width(&h->overwrite) != 0);
-+ assert("vs-1427", extent_get_width(&h->new_extents[0]) != 0);
-+ assert("vs-1427", ergo(h->nr_new_extents == 2,
-+ extent_get_width(&h->new_extents[1]) != 0));
-+
-+ /* compose structure for paste */
-+ init_new_extent(&h->item, &h->new_extents[0], h->nr_new_extents);
-+
-+ coord_dup(&h->coord_after, h->coord);
-+ init_lh(&h->lh_after);
-+ copy_lh(&h->lh_after, h->lh);
-+ reiser4_tap_init(&h->watch, &h->coord_after, &h->lh_after, ZNODE_WRITE_LOCK);
-+ reiser4_tap_monitor(&h->watch);
-+
-+ ON_DEBUG(h->orig_ext = *extent_by_coord(h->coord));
-+ orig_znode = h->coord->node;
-+
-+#if REISER4_DEBUG
-+ /* make sure that key is set properly */
-+ unit_key_by_coord(h->coord, &h->tmp);
-+ set_key_offset(&h->tmp,
-+ get_key_offset(&h->tmp) +
-+ extent_get_width(&h->overwrite) * current_blocksize);
-+ assert("vs-1080", keyeq(&h->tmp, &h->paste_key));
-+#endif
-+
-+ /* set insert point after unit to be replaced */
-+ h->coord->between = AFTER_UNIT;
-+
-+ result = insert_into_item(h->coord, return_inserted_position ? h->lh : NULL,
-+ &h->paste_key, &h->item, h->flags);
-+ if (!result) {
-+ /* now we have to replace the unit after which new units were
-+ inserted. Its position is tracked by @watch */
-+ reiser4_extent *ext;
-+ znode *node;
-+
-+ node = h->coord_after.node;
-+ if (node != orig_znode) {
-+ coord_clear_iplug(&h->coord_after);
-+ result = zload(node);
-+ }
-+
-+ if (likely(!result)) {
-+ ext = extent_by_coord(&h->coord_after);
-+
-+ assert("vs-987", znode_is_loaded(node));
-+ assert("vs-988", !memcmp(ext, &h->orig_ext, sizeof(*ext)));
-+
-+ /* overwrite extent unit */
-+ memcpy(ext, &h->overwrite, sizeof(reiser4_extent));
-+ znode_make_dirty(node);
-+
-+ if (node != orig_znode)
-+ zrelse(node);
-+
-+ if (return_inserted_position == 0) {
-+ /* coord and lh are to be set to overwritten
-+ extent */
-+ assert("vs-1662",
-+ WITH_DATA(node, !memcmp(&h->overwrite,
-+ extent_by_coord(
-+ &h->coord_after),
-+ sizeof(reiser4_extent))));
-+
-+ *h->coord = h->coord_after;
-+ done_lh(h->lh);
-+ copy_lh(h->lh, &h->lh_after);
-+ } else {
-+ /* h->coord and h->lh are to be set to first of
-+ inserted units */
-+ assert("vs-1663",
-+ WITH_DATA(h->coord->node,
-+ !memcmp(&h->new_extents[0],
-+ extent_by_coord(h->coord),
-+ sizeof(reiser4_extent))));
-+ assert("vs-1664", h->lh->node == h->coord->node);
-+ }
-+ }
-+ }
-+ reiser4_tap_done(&h->watch);
-+
-+ return result;
-+}
-+
-+lock_handle *znode_lh(znode *node)
-+{
-+ assert("vs-1371", znode_is_write_locked(node));
-+ assert("vs-1372", znode_is_wlocked_once(node));
-+ return list_entry(node->lock.owners.next, lock_handle, owners_link);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/extent_file_ops.c linux-2.6.20/fs/reiser4/plugin/item/extent_file_ops.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/extent_file_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/extent_file_ops.c 2007-05-06 14:50:43.807009470 +0400
-@@ -0,0 +1,1443 @@
-+/* COPYRIGHT 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../inode.h"
-+#include "../../page_cache.h"
-+#include "../object.h"
-+
-+#include <linux/quotaops.h>
-+#include <linux/swap.h>
-+#include "../../../../mm/filemap.h"
-+
-+static inline reiser4_extent *ext_by_offset(const znode *node, int offset)
-+{
-+ reiser4_extent *ext;
-+
-+ ext = (reiser4_extent *) (zdata(node) + offset);
-+ return ext;
-+}
-+
-+/**
-+ * check_uf_coord - verify coord extension
-+ * @uf_coord:
-+ * @key:
-+ *
-+ * Makes sure that all fields of @uf_coord are set properly. If @key is
-+ * specified - check whether @uf_coord is set correspondingly.
-+ */
-+static void check_uf_coord(const uf_coord_t *uf_coord, const reiser4_key *key)
-+{
-+#if REISER4_DEBUG
-+ const coord_t *coord;
-+ const extent_coord_extension_t *ext_coord;
-+ reiser4_extent *ext;
-+
-+ coord = &uf_coord->coord;
-+ ext_coord = &uf_coord->extension.extent;
-+ ext = ext_by_offset(coord->node, uf_coord->extension.extent.ext_offset);
-+
-+ assert("",
-+ WITH_DATA(coord->node,
-+ (uf_coord->valid == 1 &&
-+ coord_is_iplug_set(coord) &&
-+ item_is_extent(coord) &&
-+ ext_coord->nr_units == nr_units_extent(coord) &&
-+ ext == extent_by_coord(coord) &&
-+ ext_coord->width == extent_get_width(ext) &&
-+ coord->unit_pos < ext_coord->nr_units &&
-+ ext_coord->pos_in_unit < ext_coord->width &&
-+ memcmp(ext, &ext_coord->extent,
-+ sizeof(reiser4_extent)) == 0)));
-+ if (key) {
-+ reiser4_key coord_key;
-+
-+ unit_key_by_coord(&uf_coord->coord, &coord_key);
-+ set_key_offset(&coord_key,
-+ get_key_offset(&coord_key) +
-+ (uf_coord->extension.extent.
-+ pos_in_unit << PAGE_CACHE_SHIFT));
-+ assert("", keyeq(key, &coord_key));
-+ }
-+#endif
-+}
-+
-+static inline reiser4_extent *ext_by_ext_coord(const uf_coord_t *uf_coord)
-+{
-+ check_uf_coord(uf_coord, NULL);
-+
-+ return ext_by_offset(uf_coord->coord.node,
-+ uf_coord->extension.extent.ext_offset);
-+}
-+
-+#if REISER4_DEBUG
-+
-+/**
-+ * offset_is_in_unit
-+ *
-+ *
-+ *
-+ */
-+/* return 1 if offset @off is inside of extent unit pointed to by @coord. Set
-+ pos_in_unit inside of unit correspondingly */
-+static int offset_is_in_unit(const coord_t *coord, loff_t off)
-+{
-+ reiser4_key unit_key;
-+ __u64 unit_off;
-+ reiser4_extent *ext;
-+
-+ ext = extent_by_coord(coord);
-+
-+ unit_key_extent(coord, &unit_key);
-+ unit_off = get_key_offset(&unit_key);
-+ if (off < unit_off)
-+ return 0;
-+ if (off >= (unit_off + (current_blocksize * extent_get_width(ext))))
-+ return 0;
-+ return 1;
-+}
-+
-+static int
-+coord_matches_key_extent(const coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key item_key;
-+
-+ assert("vs-771", coord_is_existing_unit(coord));
-+ assert("vs-1258", keylt(key, append_key_extent(coord, &item_key)));
-+ assert("vs-1259", keyge(key, item_key_by_coord(coord, &item_key)));
-+
-+ return offset_is_in_unit(coord, get_key_offset(key));
-+}
-+
-+#endif
-+
-+/**
-+ * can_append -
-+ * @key:
-+ * @coord:
-+ *
-+ * Returns 1 if @key is equal to an append key of item @coord is set to
-+ */
-+static int can_append(const reiser4_key *key, const coord_t *coord)
-+{
-+ reiser4_key append_key;
-+
-+ return keyeq(key, append_key_extent(coord, &append_key));
-+}
-+
-+/**
-+ * append_hole
-+ * @coord:
-+ * @lh:
-+ * @key:
-+ *
-+ */
-+static int append_hole(coord_t *coord, lock_handle *lh,
-+ const reiser4_key *key)
-+{
-+ reiser4_key append_key;
-+ reiser4_block_nr hole_width;
-+ reiser4_extent *ext, new_ext;
-+ reiser4_item_data idata;
-+
-+ /* last item of file may have to be appended with hole */
-+ assert("vs-708", znode_get_level(coord->node) == TWIG_LEVEL);
-+ assert("vs-714", item_id_by_coord(coord) == EXTENT_POINTER_ID);
-+
-+ /* key of first byte which is not addressed by this extent */
-+ append_key_extent(coord, &append_key);
-+
-+ assert("", keyle(&append_key, key));
-+
-+ /*
-+ * extent item has to be appended with hole. Calculate length of that
-+ * hole
-+ */
-+ hole_width = ((get_key_offset(key) - get_key_offset(&append_key) +
-+ current_blocksize - 1) >> current_blocksize_bits);
-+ assert("vs-954", hole_width > 0);
-+
-+ /* set coord after last unit */
-+ coord_init_after_item_end(coord);
-+
-+ /* get last extent in the item */
-+ ext = extent_by_coord(coord);
-+ if (state_of_extent(ext) == HOLE_EXTENT) {
-+ /*
-+ * last extent of a file is hole extent. Widen that extent by
-+ * @hole_width blocks. Note that we do not worry about
-+ * overflowing - extent width is 64 bits
-+ */
-+ reiser4_set_extent(ext, HOLE_EXTENT_START,
-+ extent_get_width(ext) + hole_width);
-+ znode_make_dirty(coord->node);
-+ return 0;
-+ }
-+
-+ /* append last item of the file with hole extent unit */
-+ assert("vs-713", (state_of_extent(ext) == ALLOCATED_EXTENT ||
-+ state_of_extent(ext) == UNALLOCATED_EXTENT));
-+
-+ reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
-+ init_new_extent(&idata, &new_ext, 1);
-+ return insert_into_item(coord, lh, &append_key, &idata, 0);
-+}
-+
-+/**
-+ * check_jnodes
-+ * @twig: longterm locked twig node
-+ * @key:
-+ *
-+ */
-+static void check_jnodes(znode *twig, const reiser4_key *key, int count)
-+{
-+#if REISER4_DEBUG
-+ coord_t c;
-+ reiser4_key node_key, jnode_key;
-+
-+ jnode_key = *key;
-+
-+ assert("", twig != NULL);
-+ assert("", znode_get_level(twig) == TWIG_LEVEL);
-+ assert("", znode_is_write_locked(twig));
-+
-+ zload(twig);
-+ /* get the smallest key in twig node */
-+ coord_init_first_unit(&c, twig);
-+ unit_key_by_coord(&c, &node_key);
-+ assert("", keyle(&node_key, &jnode_key));
-+
-+ coord_init_last_unit(&c, twig);
-+ unit_key_by_coord(&c, &node_key);
-+ if (item_plugin_by_coord(&c)->s.file.append_key)
-+ item_plugin_by_coord(&c)->s.file.append_key(&c, &node_key);
-+ set_key_offset(&jnode_key,
-+ get_key_offset(&jnode_key) + (loff_t)count * PAGE_CACHE_SIZE - 1);
-+ assert("", keylt(&jnode_key, &node_key));
-+ zrelse(twig);
-+#endif
-+}
-+
-+/**
-+ * append_last_extent - append last file item
-+ * @uf_coord: coord to start insertion from
-+ * @jnodes: array of jnodes
-+ * @count: number of jnodes in the array
-+ *
-+ * There is already at least one extent item of file @inode in the tree. Append
-+ * the last of them with unallocated extent unit of width @count. Assign
-+ * fake block numbers to jnodes corresponding to the inserted extent.
-+ */
-+static int append_last_extent(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode **jnodes, int count)
-+{
-+ int result;
-+ reiser4_extent new_ext;
-+ reiser4_item_data idata;
-+ coord_t *coord;
-+ extent_coord_extension_t *ext_coord;
-+ reiser4_extent *ext;
-+ reiser4_block_nr block;
-+ jnode *node;
-+ int i;
-+
-+ coord = &uf_coord->coord;
-+ ext_coord = &uf_coord->extension.extent;
-+ ext = ext_by_ext_coord(uf_coord);
-+
-+ /* check correctness of position in the item */
-+ assert("vs-228", coord->unit_pos == coord_last_unit_pos(coord));
-+ assert("vs-1311", coord->between == AFTER_UNIT);
-+ assert("vs-1302", ext_coord->pos_in_unit == ext_coord->width - 1);
-+
-+ if (!can_append(key, coord)) {
-+ /* hole extent has to be inserted */
-+ result = append_hole(coord, uf_coord->lh, key);
-+ uf_coord->valid = 0;
-+ return result;
-+ }
-+
-+ if (count == 0)
-+ return 0;
-+
-+ assert("", get_key_offset(key) == (loff_t)index_jnode(jnodes[0]) * PAGE_CACHE_SIZE);
-+
-+ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(jnodes[0])->host,
-+ count);
-+ BUG_ON(result != 0);
-+
-+ switch (state_of_extent(ext)) {
-+ case UNALLOCATED_EXTENT:
-+ /*
-+ * last extent unit of the file is unallocated one. Increase
-+ * its width by @count
-+ */
-+ reiser4_set_extent(ext, UNALLOCATED_EXTENT_START,
-+ extent_get_width(ext) + count);
-+ znode_make_dirty(coord->node);
-+
-+ /* update coord extension */
-+ ext_coord->width += count;
-+ ON_DEBUG(extent_set_width
-+ (&uf_coord->extension.extent.extent,
-+ ext_coord->width));
-+ break;
-+
-+ case HOLE_EXTENT:
-+ case ALLOCATED_EXTENT:
-+ /*
-+ * last extent unit of the file is either hole or allocated
-+ * one. Append one unallocated extent of width @count
-+ */
-+ reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
-+ init_new_extent(&idata, &new_ext, 1);
-+ result = insert_into_item(coord, uf_coord->lh, key, &idata, 0);
-+ uf_coord->valid = 0;
-+ if (result)
-+ return result;
-+ break;
-+
-+ default:
-+ return RETERR(-EIO);
-+ }
-+
-+ /*
-+ * make sure that we hold long term locked twig node containing all
-+ * jnodes we are about to capture
-+ */
-+ check_jnodes(uf_coord->lh->node, key, count);
-+
-+ /*
-+ * assign fake block numbers to all jnodes. FIXME: make sure whether
-+ * twig node containing inserted extent item is locked
-+ */
-+ block = fake_blocknr_unformatted(count);
-+ for (i = 0; i < count; i ++, block ++) {
-+ node = jnodes[i];
-+ spin_lock_jnode(node);
-+ JF_SET(node, JNODE_CREATED);
-+ jnode_set_block(node, &block);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+ }
-+ return count;
-+}
-+
-+/**
-+ * insert_first_hole - inser hole extent into tree
-+ * @coord:
-+ * @lh:
-+ * @key:
-+ *
-+ *
-+ */
-+static int insert_first_hole(coord_t *coord, lock_handle *lh,
-+ const reiser4_key *key)
-+{
-+ reiser4_extent new_ext;
-+ reiser4_item_data idata;
-+ reiser4_key item_key;
-+ reiser4_block_nr hole_width;
-+
-+ /* @coord must be set for inserting of new item */
-+ assert("vs-711", coord_is_between_items(coord));
-+
-+ item_key = *key;
-+ set_key_offset(&item_key, 0ull);
-+
-+ hole_width = ((get_key_offset(key) + current_blocksize - 1) >>
-+ current_blocksize_bits);
-+ assert("vs-710", hole_width > 0);
-+
-+ /* compose body of hole extent and insert item into tree */
-+ reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
-+ init_new_extent(&idata, &new_ext, 1);
-+ return insert_extent_by_coord(coord, &idata, &item_key, lh);
-+}
-+
-+
-+/**
-+ * insert_first_extent - insert first file item
-+ * @inode: inode of file
-+ * @uf_coord: coord to start insertion from
-+ * @jnodes: array of jnodes
-+ * @count: number of jnodes in the array
-+ * @inode:
-+ *
-+ * There are no items of file @inode in the tree yet. Insert unallocated extent
-+ * of width @count into tree or hole extent if writing not to the
-+ * beginning. Assign fake block numbers to jnodes corresponding to the inserted
-+ * unallocated extent. Returns number of jnodes or error code.
-+ */
-+static int insert_first_extent(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode **jnodes, int count,
-+ struct inode *inode)
-+{
-+ int result;
-+ int i;
-+ reiser4_extent new_ext;
-+ reiser4_item_data idata;
-+ reiser4_block_nr block;
-+ unix_file_info_t *uf_info;
-+ jnode *node;
-+
-+ /* first extent insertion starts at leaf level */
-+ assert("vs-719", znode_get_level(uf_coord->coord.node) == LEAF_LEVEL);
-+ assert("vs-711", coord_is_between_items(&uf_coord->coord));
-+
-+ if (get_key_offset(key) != 0) {
-+ result = insert_first_hole(&uf_coord->coord, uf_coord->lh, key);
-+ uf_coord->valid = 0;
-+ uf_info = unix_file_inode_data(inode);
-+
-+ /*
-+ * first item insertion is only possible when writing to empty
-+ * file or performing tail conversion
-+ */
-+ assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
-+ (reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED) &&
-+ reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV))));
-+ /* if file was empty - update its state */
-+ if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ return result;
-+ }
-+
-+ if (count == 0)
-+ return 0;
-+
-+ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(jnodes[0])->host, count);
-+ BUG_ON(result != 0);
-+
-+ /*
-+ * prepare for tree modification: compose body of item and item data
-+ * structure needed for insertion
-+ */
-+ reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
-+ init_new_extent(&idata, &new_ext, 1);
-+
-+ /* insert extent item into the tree */
-+ result = insert_extent_by_coord(&uf_coord->coord, &idata, key,
-+ uf_coord->lh);
-+ if (result)
-+ return result;
-+
-+ /*
-+ * make sure that we hold long term locked twig node containing all
-+ * jnodes we are about to capture
-+ */
-+ check_jnodes(uf_coord->lh->node, key, count);
-+ /*
-+ * assign fake block numbers to all jnodes, capture and mark them dirty
-+ */
-+ block = fake_blocknr_unformatted(count);
-+ for (i = 0; i < count; i ++, block ++) {
-+ node = jnodes[i];
-+ spin_lock_jnode(node);
-+ JF_SET(node, JNODE_CREATED);
-+ jnode_set_block(node, &block);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+ }
-+
-+ /*
-+ * invalidate coordinate, research must be performed to continue
-+ * because write will continue on twig level
-+ */
-+ uf_coord->valid = 0;
-+ return count;
-+}
-+
-+/**
-+ * plug_hole - replace hole extent with unallocated and holes
-+ * @uf_coord:
-+ * @key:
-+ * @node:
-+ * @h: structure containing coordinate, lock handle, key, etc
-+ *
-+ * Creates an unallocated extent of width 1 within a hole. In worst case two
-+ * additional extents can be created.
-+ */
-+static int plug_hole(uf_coord_t *uf_coord, const reiser4_key *key, int *how)
-+{
-+ struct replace_handle rh;
-+ reiser4_extent *ext;
-+ reiser4_block_nr width, pos_in_unit;
-+ coord_t *coord;
-+ extent_coord_extension_t *ext_coord;
-+ int return_inserted_position;
-+
-+ check_uf_coord(uf_coord, key);
-+
-+ rh.coord = coord_by_uf_coord(uf_coord);
-+ rh.lh = uf_coord->lh;
-+ rh.flags = 0;
-+
-+ coord = coord_by_uf_coord(uf_coord);
-+ ext_coord = ext_coord_by_uf_coord(uf_coord);
-+ ext = ext_by_ext_coord(uf_coord);
-+
-+ width = ext_coord->width;
-+ pos_in_unit = ext_coord->pos_in_unit;
-+
-+ *how = 0;
-+ if (width == 1) {
-+ reiser4_set_extent(ext, UNALLOCATED_EXTENT_START, 1);
-+ znode_make_dirty(coord->node);
-+ /* update uf_coord */
-+ ON_DEBUG(ext_coord->extent = *ext);
-+ *how = 1;
-+ return 0;
-+ } else if (pos_in_unit == 0) {
-+ /* we deal with first element of extent */
-+ if (coord->unit_pos) {
-+ /* there is an extent to the left */
-+ if (state_of_extent(ext - 1) == UNALLOCATED_EXTENT) {
-+ /*
-+ * left neighboring unit is an unallocated
-+ * extent. Increase its width and decrease
-+ * width of hole
-+ */
-+ extent_set_width(ext - 1,
-+ extent_get_width(ext - 1) + 1);
-+ extent_set_width(ext, width - 1);
-+ znode_make_dirty(coord->node);
-+
-+ /* update coord extension */
-+ coord->unit_pos--;
-+ ext_coord->width = extent_get_width(ext - 1);
-+ ext_coord->pos_in_unit = ext_coord->width - 1;
-+ ext_coord->ext_offset -= sizeof(reiser4_extent);
-+ ON_DEBUG(ext_coord->extent =
-+ *extent_by_coord(coord));
-+ *how = 2;
-+ return 0;
-+ }
-+ }
-+ /* extent for replace */
-+ reiser4_set_extent(&rh.overwrite, UNALLOCATED_EXTENT_START, 1);
-+ /* extent to be inserted */
-+ reiser4_set_extent(&rh.new_extents[0], HOLE_EXTENT_START,
-+ width - 1);
-+ rh.nr_new_extents = 1;
-+
-+ /* have reiser4_replace_extent to return with @coord and
-+ @uf_coord->lh set to unit which was replaced */
-+ return_inserted_position = 0;
-+ *how = 3;
-+ } else if (pos_in_unit == width - 1) {
-+ /* we deal with last element of extent */
-+ if (coord->unit_pos < nr_units_extent(coord) - 1) {
-+ /* there is an extent unit to the right */
-+ if (state_of_extent(ext + 1) == UNALLOCATED_EXTENT) {
-+ /*
-+ * right neighboring unit is an unallocated
-+ * extent. Increase its width and decrease
-+ * width of hole
-+ */
-+ extent_set_width(ext + 1,
-+ extent_get_width(ext + 1) + 1);
-+ extent_set_width(ext, width - 1);
-+ znode_make_dirty(coord->node);
-+
-+ /* update coord extension */
-+ coord->unit_pos++;
-+ ext_coord->width = extent_get_width(ext + 1);
-+ ext_coord->pos_in_unit = 0;
-+ ext_coord->ext_offset += sizeof(reiser4_extent);
-+ ON_DEBUG(ext_coord->extent =
-+ *extent_by_coord(coord));
-+ *how = 4;
-+ return 0;
-+ }
-+ }
-+ /* extent for replace */
-+ reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START, width - 1);
-+ /* extent to be inserted */
-+ reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
-+ 1);
-+ rh.nr_new_extents = 1;
-+
-+ /* have reiser4_replace_extent to return with @coord and
-+ @uf_coord->lh set to unit which was inserted */
-+ return_inserted_position = 1;
-+ *how = 5;
-+ } else {
-+ /* extent for replace */
-+ reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START,
-+ pos_in_unit);
-+ /* extents to be inserted */
-+ reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
-+ 1);
-+ reiser4_set_extent(&rh.new_extents[1], HOLE_EXTENT_START,
-+ width - pos_in_unit - 1);
-+ rh.nr_new_extents = 2;
-+
-+ /* have reiser4_replace_extent to return with @coord and
-+ @uf_coord->lh set to first of units which were inserted */
-+ return_inserted_position = 1;
-+ *how = 6;
-+ }
-+ unit_key_by_coord(coord, &rh.paste_key);
-+ set_key_offset(&rh.paste_key, get_key_offset(&rh.paste_key) +
-+ extent_get_width(&rh.overwrite) * current_blocksize);
-+
-+ uf_coord->valid = 0;
-+ return reiser4_replace_extent(&rh, return_inserted_position);
-+}
-+
-+/**
-+ * overwrite_one_block -
-+ * @uf_coord:
-+ * @key:
-+ * @node:
-+ *
-+ * If @node corresponds to hole extent - create unallocated extent for it and
-+ * assign fake block number. If @node corresponds to allocated extent - assign
-+ * block number of jnode
-+ */
-+static int overwrite_one_block(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode *node, int *hole_plugged)
-+{
-+ int result;
-+ extent_coord_extension_t *ext_coord;
-+ reiser4_extent *ext;
-+ reiser4_block_nr block;
-+ int how;
-+
-+ assert("vs-1312", uf_coord->coord.between == AT_UNIT);
-+
-+ result = 0;
-+ ext_coord = ext_coord_by_uf_coord(uf_coord);
-+ ext = ext_by_ext_coord(uf_coord);
-+ assert("", state_of_extent(ext) != UNALLOCATED_EXTENT);
-+
-+ switch (state_of_extent(ext)) {
-+ case ALLOCATED_EXTENT:
-+ block = extent_get_start(ext) + ext_coord->pos_in_unit;
-+ break;
-+
-+ case HOLE_EXTENT:
-+ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(node)->host, 1);
-+ BUG_ON(result != 0);
-+ result = plug_hole(uf_coord, key, &how);
-+ if (result)
-+ return result;
-+ block = fake_blocknr_unformatted(1);
-+ if (hole_plugged)
-+ *hole_plugged = 1;
-+ JF_SET(node, JNODE_CREATED);
-+ break;
-+
-+ default:
-+ return RETERR(-EIO);
-+ }
-+
-+ jnode_set_block(node, &block);
-+ return 0;
-+}
-+
-+/**
-+ * move_coord - move coordinate forward
-+ * @uf_coord:
-+ *
-+ * Move coordinate one data block pointer forward. Return 1 if coord is set to
-+ * the last one already or is invalid.
-+ */
-+static int move_coord(uf_coord_t *uf_coord)
-+{
-+ extent_coord_extension_t *ext_coord;
-+
-+ if (uf_coord->valid == 0)
-+ return 1;
-+ ext_coord = &uf_coord->extension.extent;
-+ ext_coord->pos_in_unit ++;
-+ if (ext_coord->pos_in_unit < ext_coord->width)
-+ /* coordinate moved within the unit */
-+ return 0;
-+
-+ /* end of unit is reached. Try to move to next unit */
-+ ext_coord->pos_in_unit = 0;
-+ uf_coord->coord.unit_pos ++;
-+ if (uf_coord->coord.unit_pos < ext_coord->nr_units) {
-+ /* coordinate moved to next unit */
-+ ext_coord->ext_offset += sizeof(reiser4_extent);
-+ ext_coord->width =
-+ extent_get_width(ext_by_offset
-+ (uf_coord->coord.node,
-+ ext_coord->ext_offset));
-+ ON_DEBUG(ext_coord->extent =
-+ *ext_by_offset(uf_coord->coord.node,
-+ ext_coord->ext_offset));
-+ return 0;
-+ }
-+ /* end of item is reached */
-+ uf_coord->valid = 0;
-+ return 1;
-+}
-+
-+/**
-+ * overwrite_extent -
-+ * @inode:
-+ *
-+ * Returns number of handled jnodes.
-+ */
-+static int overwrite_extent(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode **jnodes, int count, int *plugged_hole)
-+{
-+ int result;
-+ reiser4_key k;
-+ int i;
-+ jnode *node;
-+
-+ k = *key;
-+ for (i = 0; i < count; i ++) {
-+ node = jnodes[i];
-+ if (*jnode_get_block(node) == 0) {
-+ result = overwrite_one_block(uf_coord, &k, node, plugged_hole);
-+ if (result)
-+ return result;
-+ }
-+ /*
-+ * make sure that we hold long term locked twig node containing
-+ * all jnodes we are about to capture
-+ */
-+ check_jnodes(uf_coord->lh->node, &k, 1);
-+ /*
-+ * assign fake block numbers to all jnodes, capture and mark
-+ * them dirty
-+ */
-+ spin_lock_jnode(node);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+
-+ if (uf_coord->valid == 0)
-+ return i + 1;
-+
-+ check_uf_coord(uf_coord, &k);
-+
-+ if (move_coord(uf_coord)) {
-+ /*
-+ * failed to move to the next node pointer. Either end
-+ * of file or end of twig node is reached. In the later
-+ * case we might go to the right neighbor.
-+ */
-+ uf_coord->valid = 0;
-+ return i + 1;
-+ }
-+ set_key_offset(&k, get_key_offset(&k) + PAGE_CACHE_SIZE);
-+ }
-+
-+ return count;
-+}
-+
-+/**
-+ * reiser4_update_extent
-+ * @file:
-+ * @jnodes:
-+ * @count:
-+ * @off:
-+ *
-+ */
-+int reiser4_update_extent(struct inode *inode, jnode *node, loff_t pos,
-+ int *plugged_hole)
-+{
-+ int result;
-+ znode *loaded;
-+ uf_coord_t uf_coord;
-+ coord_t *coord;
-+ lock_handle lh;
-+ reiser4_key key;
-+
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+
-+ key_by_inode_and_offset_common(inode, pos, &key);
-+
-+ init_uf_coord(&uf_coord, &lh);
-+ coord = &uf_coord.coord;
-+ result = find_file_item_nohint(coord, &lh, &key,
-+ ZNODE_WRITE_LOCK, inode);
-+ if (IS_CBKERR(result)) {
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return result;
-+ }
-+
-+ result = zload(coord->node);
-+ BUG_ON(result != 0);
-+ loaded = coord->node;
-+
-+ if (coord->between == AFTER_UNIT) {
-+ /*
-+ * append existing extent item with unallocated extent of width
-+ * nr_jnodes
-+ */
-+ init_coord_extension_extent(&uf_coord,
-+ get_key_offset(&key));
-+ result = append_last_extent(&uf_coord, &key,
-+ &node, 1);
-+ } else if (coord->between == AT_UNIT) {
-+ /*
-+ * overwrite
-+ * not optimal yet. Will be optimized if new write will show
-+ * performance win.
-+ */
-+ init_coord_extension_extent(&uf_coord,
-+ get_key_offset(&key));
-+ result = overwrite_extent(&uf_coord, &key,
-+ &node, 1, plugged_hole);
-+ } else {
-+ /*
-+ * there are no items of this file in the tree yet. Create
-+ * first item of the file inserting one unallocated extent of
-+ * width nr_jnodes
-+ */
-+ result = insert_first_extent(&uf_coord, &key, &node, 1, inode);
-+ }
-+ assert("", result == 1 || result < 0);
-+ zrelse(loaded);
-+ done_lh(&lh);
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return (result == 1) ? 0 : result;
-+}
-+
-+/**
-+ * update_extents
-+ * @file:
-+ * @jnodes:
-+ * @count:
-+ * @off:
-+ *
-+ */
-+static int update_extents(struct file *file, jnode **jnodes, int count, loff_t pos)
-+{
-+ struct inode *inode;
-+ struct hint hint;
-+ reiser4_key key;
-+ int result;
-+ znode *loaded;
-+
-+ result = load_file_hint(file, &hint);
-+ BUG_ON(result != 0);
-+
-+ inode = file->f_dentry->d_inode;
-+ if (count != 0)
-+ /*
-+ * count == 0 is special case: expanding truncate
-+ */
-+ pos = (loff_t)index_jnode(jnodes[0]) << PAGE_CACHE_SHIFT;
-+ key_by_inode_and_offset_common(inode, pos, &key);
-+
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+
-+ do {
-+ result = find_file_item(&hint, &key, ZNODE_WRITE_LOCK, inode);
-+ if (IS_CBKERR(result)) {
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return result;
-+ }
-+
-+ result = zload(hint.ext_coord.coord.node);
-+ BUG_ON(result != 0);
-+ loaded = hint.ext_coord.coord.node;
-+
-+ if (hint.ext_coord.coord.between == AFTER_UNIT) {
-+ /*
-+ * append existing extent item with unallocated extent
-+ * of width nr_jnodes
-+ */
-+ if (hint.ext_coord.valid == 0)
-+ /* NOTE: get statistics on this */
-+ init_coord_extension_extent(&hint.ext_coord,
-+ get_key_offset(&key));
-+ result = append_last_extent(&hint.ext_coord, &key,
-+ jnodes, count);
-+ } else if (hint.ext_coord.coord.between == AT_UNIT) {
-+ /*
-+ * overwrite
-+ * not optimal yet. Will be optimized if new write will
-+ * show performance win.
-+ */
-+ if (hint.ext_coord.valid == 0)
-+ /* NOTE: get statistics on this */
-+ init_coord_extension_extent(&hint.ext_coord,
-+ get_key_offset(&key));
-+ result = overwrite_extent(&hint.ext_coord, &key,
-+ jnodes, count, NULL);
-+ } else {
-+ /*
-+ * there are no items of this file in the tree
-+ * yet. Create first item of the file inserting one
-+ * unallocated extent of * width nr_jnodes
-+ */
-+ result = insert_first_extent(&hint.ext_coord, &key,
-+ jnodes, count, inode);
-+ }
-+ zrelse(loaded);
-+ if (result < 0) {
-+ done_lh(hint.ext_coord.lh);
-+ break;
-+ }
-+
-+ jnodes += result;
-+ count -= result;
-+ set_key_offset(&key, get_key_offset(&key) + result * PAGE_CACHE_SIZE);
-+
-+ /* seal and unlock znode */
-+ if (hint.ext_coord.valid)
-+ reiser4_set_hint(&hint, &key, ZNODE_WRITE_LOCK);
-+ else
-+ reiser4_unset_hint(&hint);
-+
-+ } while (count > 0);
-+
-+ save_file_hint(file, &hint);
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return result;
-+}
-+
-+/**
-+ * write_extent_reserve_space - reserve space for extent write operation
-+ * @inode:
-+ *
-+ * Estimates and reserves space which may be required for writing
-+ * WRITE_GRANULARITY pages of file.
-+ */
-+static int write_extent_reserve_space(struct inode *inode)
-+{
-+ __u64 count;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * to write WRITE_GRANULARITY pages to a file by extents we have to
-+ * reserve disk space for:
-+
-+ * 1. find_file_item may have to insert empty node to the tree (empty
-+ * leaf node between two extent items). This requires 1 block and
-+ * number of blocks which are necessary to perform insertion of an
-+ * internal item into twig level.
-+
-+ * 2. for each of written pages there might be needed 1 block and
-+ * number of blocks which might be necessary to perform insertion of or
-+ * paste to an extent item.
-+
-+ * 3. stat data update
-+ */
-+ tree = reiser4_tree_by_inode(inode);
-+ count = estimate_one_insert_item(tree) +
-+ WRITE_GRANULARITY * (1 + estimate_one_insert_into_item(tree)) +
-+ estimate_one_insert_item(tree);
-+ grab_space_enable();
-+ return reiser4_grab_space(count, 0 /* flags */);
-+}
-+
-+/**
-+ * reiser4_write_extent - write method of extent item plugin
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @count: number of bytes to write
-+ * @pos: position in file to write to
-+ *
-+ */
-+ssize_t reiser4_write_extent(struct file *file, const char __user *buf,
-+ size_t count, loff_t *pos)
-+{
-+ int have_to_update_extent;
-+ int nr_pages, nr_dirty;
-+ struct page *page;
-+ jnode *jnodes[WRITE_GRANULARITY + 1];
-+ struct inode *inode;
-+ unsigned long index;
-+ unsigned long end;
-+ int i;
-+ int to_page, page_off;
-+ size_t left, written;
-+ int result = 0;
-+
-+ inode = file->f_dentry->d_inode;
-+ if (write_extent_reserve_space(inode))
-+ return RETERR(-ENOSPC);
-+
-+ if (count == 0) {
-+ /* truncate case */
-+ update_extents(file, jnodes, 0, *pos);
-+ return 0;
-+ }
-+
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+
-+ left = count;
-+ index = *pos >> PAGE_CACHE_SHIFT;
-+ /* calculate number of pages which are to be written */
-+ end = ((*pos + count - 1) >> PAGE_CACHE_SHIFT);
-+ nr_pages = end - index + 1;
-+ nr_dirty = 0;
-+ assert("", nr_pages <= WRITE_GRANULARITY + 1);
-+
-+ /* get pages and jnodes */
-+ for (i = 0; i < nr_pages; i ++) {
-+ page = find_or_create_page(inode->i_mapping, index + i,
-+ reiser4_ctx_gfp_mask_get());
-+ if (page == NULL) {
-+ nr_pages = i;
-+ result = RETERR(-ENOMEM);
-+ goto out;
-+ }
-+
-+ jnodes[i] = jnode_of_page(page);
-+ if (IS_ERR(jnodes[i])) {
-+ unlock_page(page);
-+ page_cache_release(page);
-+ nr_pages = i;
-+ result = RETERR(-ENOMEM);
-+ goto out;
-+ }
-+ /* prevent jnode and page from disconnecting */
-+ JF_SET(jnodes[i], JNODE_WRITE_PREPARED);
-+ unlock_page(page);
-+ }
-+
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+
-+ have_to_update_extent = 0;
-+
-+ page_off = (*pos & (PAGE_CACHE_SIZE - 1));
-+ for (i = 0; i < nr_pages; i ++) {
-+ to_page = PAGE_CACHE_SIZE - page_off;
-+ if (to_page > left)
-+ to_page = left;
-+ page = jnode_page(jnodes[i]);
-+ if (page_offset(page) < inode->i_size &&
-+ !PageUptodate(page) && to_page != PAGE_CACHE_SIZE) {
-+ /*
-+ * the above is not optimal for partial write to last
-+ * page of file when file size is not at boundary of
-+ * page
-+ */
-+ lock_page(page);
-+ if (!PageUptodate(page)) {
-+ result = readpage_unix_file(NULL, page);
-+ BUG_ON(result != 0);
-+ /* wait for read completion */
-+ lock_page(page);
-+ BUG_ON(!PageUptodate(page));
-+ } else
-+ result = 0;
-+ unlock_page(page);
-+ }
-+
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+ fault_in_pages_readable(buf, to_page);
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+
-+ lock_page(page);
-+ if (!PageUptodate(page) && to_page != PAGE_CACHE_SIZE) {
-+ void *kaddr;
-+
-+ kaddr = kmap_atomic(page, KM_USER0);
-+ memset(kaddr, 0, page_off);
-+ memset(kaddr + page_off + to_page, 0,
-+ PAGE_CACHE_SIZE - (page_off + to_page));
-+ flush_dcache_page(page);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ }
-+
-+ written = filemap_copy_from_user(page, page_off, buf, to_page);
-+ if (unlikely(written != to_page)) {
-+ unlock_page(page);
-+ result = RETERR(-EFAULT);
-+ break;
-+ }
-+
-+ flush_dcache_page(page);
-+ reiser4_set_page_dirty_internal(page);
-+ unlock_page(page);
-+ nr_dirty++;
-+
-+ mark_page_accessed(page);
-+ SetPageUptodate(page);
-+
-+ if (jnodes[i]->blocknr == 0)
-+ have_to_update_extent ++;
-+
-+ page_off = 0;
-+ buf += to_page;
-+ left -= to_page;
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+ }
-+
-+ if (have_to_update_extent) {
-+ update_extents(file, jnodes, nr_dirty, *pos);
-+ } else {
-+ for (i = 0; i < nr_dirty; i ++) {
-+ int ret;
-+ spin_lock_jnode(jnodes[i]);
-+ ret = reiser4_try_capture(jnodes[i],
-+ ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(ret != 0);
-+ jnode_make_dirty_locked(jnodes[i]);
-+ spin_unlock_jnode(jnodes[i]);
-+ }
-+ }
-+out:
-+ for (i = 0; i < nr_pages; i ++) {
-+ page_cache_release(jnode_page(jnodes[i]));
-+ JF_CLR(jnodes[i], JNODE_WRITE_PREPARED);
-+ jput(jnodes[i]);
-+ }
-+
-+ /* the only errors handled so far is ENOMEM and
-+ EFAULT on copy_from_user */
-+
-+ return (count - left) ? (count - left) : result;
-+}
-+
-+static inline void zero_page(struct page *page)
-+{
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+
-+ memset(kaddr, 0, PAGE_CACHE_SIZE);
-+ flush_dcache_page(page);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+}
-+
-+int reiser4_do_readpage_extent(reiser4_extent * ext, reiser4_block_nr pos,
-+ struct page *page)
-+{
-+ jnode *j;
-+ struct address_space *mapping;
-+ unsigned long index;
-+ oid_t oid;
-+ reiser4_block_nr block;
-+
-+ mapping = page->mapping;
-+ oid = get_inode_oid(mapping->host);
-+ index = page->index;
-+
-+ switch (state_of_extent(ext)) {
-+ case HOLE_EXTENT:
-+ /*
-+ * it is possible to have hole page with jnode, if page was
-+ * eflushed previously.
-+ */
-+ j = jfind(mapping, index);
-+ if (j == NULL) {
-+ zero_page(page);
-+ return 0;
-+ }
-+ spin_lock_jnode(j);
-+ if (!jnode_page(j)) {
-+ jnode_attach_page(j, page);
-+ } else {
-+ BUG_ON(jnode_page(j) != page);
-+ assert("vs-1504", jnode_page(j) == page);
-+ }
-+ block = *jnode_get_io_block(j);
-+ spin_unlock_jnode(j);
-+ if (block == 0) {
-+ zero_page(page);
-+ jput(j);
-+ return 0;
-+ }
-+ break;
-+
-+ case ALLOCATED_EXTENT:
-+ j = jnode_of_page(page);
-+ if (IS_ERR(j))
-+ return PTR_ERR(j);
-+ if (*jnode_get_block(j) == 0) {
-+ reiser4_block_nr blocknr;
-+
-+ blocknr = extent_get_start(ext) + pos;
-+ jnode_set_block(j, &blocknr);
-+ } else
-+ assert("vs-1403",
-+ j->blocknr == extent_get_start(ext) + pos);
-+ break;
-+
-+ case UNALLOCATED_EXTENT:
-+ j = jfind(mapping, index);
-+ assert("nikita-2688", j);
-+ assert("vs-1426", jnode_page(j) == NULL);
-+
-+ spin_lock_jnode(j);
-+ jnode_attach_page(j, page);
-+ spin_unlock_jnode(j);
-+ break;
-+
-+ default:
-+ warning("vs-957", "wrong extent\n");
-+ return RETERR(-EIO);
-+ }
-+
-+ BUG_ON(j == 0);
-+ reiser4_page_io(page, j, READ, reiser4_ctx_gfp_mask_get());
-+ jput(j);
-+ return 0;
-+}
-+
-+/* Implements plugin->u.item.s.file.read operation for extent items. */
-+int reiser4_read_extent(struct file *file, flow_t *flow, hint_t *hint)
-+{
-+ int result;
-+ struct page *page;
-+ unsigned long cur_page, next_page;
-+ unsigned long page_off, count;
-+ struct address_space *mapping;
-+ loff_t file_off;
-+ uf_coord_t *uf_coord;
-+ coord_t *coord;
-+ extent_coord_extension_t *ext_coord;
-+ unsigned long nr_pages;
-+ char *kaddr;
-+
-+ assert("vs-1353", current_blocksize == PAGE_CACHE_SIZE);
-+ assert("vs-572", flow->user == 1);
-+ assert("vs-1351", flow->length > 0);
-+
-+ uf_coord = &hint->ext_coord;
-+
-+ check_uf_coord(uf_coord, NULL);
-+ assert("vs-33", uf_coord->lh == &hint->lh);
-+
-+ coord = &uf_coord->coord;
-+ assert("vs-1119", znode_is_rlocked(coord->node));
-+ assert("vs-1120", znode_is_loaded(coord->node));
-+ assert("vs-1256", coord_matches_key_extent(coord, &flow->key));
-+
-+ mapping = file->f_dentry->d_inode->i_mapping;
-+ ext_coord = &uf_coord->extension.extent;
-+
-+ /* offset in a file to start read from */
-+ file_off = get_key_offset(&flow->key);
-+ /* offset within the page to start read from */
-+ page_off = (unsigned long)(file_off & (PAGE_CACHE_SIZE - 1));
-+ /* bytes which can be read from the page which contains file_off */
-+ count = PAGE_CACHE_SIZE - page_off;
-+
-+ /* index of page containing offset read is to start from */
-+ cur_page = (unsigned long)(file_off >> PAGE_CACHE_SHIFT);
-+ next_page = cur_page;
-+ /* number of pages flow spans over */
-+ nr_pages =
-+ ((file_off + flow->length + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT) - cur_page;
-+
-+ /* we start having twig node read locked. However, we do not want to
-+ keep that lock all the time readahead works. So, set a sel and
-+ release twig node. */
-+ reiser4_set_hint(hint, &flow->key, ZNODE_READ_LOCK);
-+ /* &hint->lh is done-ed */
-+
-+ do {
-+ reiser4_txn_restart_current();
-+ page = read_mapping_page(mapping, cur_page, file);
-+ if (IS_ERR(page))
-+ return PTR_ERR(page);
-+ lock_page(page);
-+ if (!PageUptodate(page)) {
-+ unlock_page(page);
-+ page_cache_release(page);
-+ warning("jmacd-97178", "extent_read: page is not up to date");
-+ return RETERR(-EIO);
-+ }
-+ mark_page_accessed(page);
-+ unlock_page(page);
-+
-+ /* If users can be writing to this page using arbitrary virtual
-+ addresses, take care about potential aliasing before reading
-+ the page on the kernel side.
-+ */
-+ if (mapping_writably_mapped(mapping))
-+ flush_dcache_page(page);
-+
-+ assert("nikita-3034", reiser4_schedulable());
-+
-+ /* number of bytes which are to be read from the page */
-+ if (count > flow->length)
-+ count = flow->length;
-+
-+ result = fault_in_pages_writeable(flow->data, count);
-+ if (result) {
-+ page_cache_release(page);
-+ return RETERR(-EFAULT);
-+ }
-+
-+ kaddr = kmap_atomic(page, KM_USER0);
-+ result = __copy_to_user_inatomic(flow->data,
-+ kaddr + page_off, count);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ if (result != 0) {
-+ kaddr = kmap(page);
-+ result = __copy_to_user(flow->data, kaddr + page_off, count);
-+ kunmap(page);
-+ if (unlikely(result))
-+ return RETERR(-EFAULT);
-+ }
-+
-+ page_cache_release(page);
-+
-+ /* increase key (flow->key), update user area pointer (flow->data) */
-+ move_flow_forward(flow, count);
-+
-+ page_off = 0;
-+ cur_page ++;
-+ count = PAGE_CACHE_SIZE;
-+ nr_pages--;
-+ } while (flow->length);
-+
-+ return 0;
-+}
-+
-+/*
-+ plugin->s.file.readpage
-+ reiser4_read->unix_file_read->page_cache_readahead->reiser4_readpage->unix_file_readpage->extent_readpage
-+ or
-+ filemap_nopage->reiser4_readpage->readpage_unix_file->->readpage_extent
-+
-+ At the beginning: coord->node is read locked, zloaded, page is
-+ locked, coord is set to existing unit inside of extent item (it is not necessary that coord matches to page->index)
-+*/
-+int reiser4_readpage_extent(void *vp, struct page *page)
-+{
-+ uf_coord_t *uf_coord = vp;
-+ ON_DEBUG(coord_t * coord = &uf_coord->coord);
-+ ON_DEBUG(reiser4_key key);
-+
-+ assert("vs-1040", PageLocked(page));
-+ assert("vs-1050", !PageUptodate(page));
-+ assert("vs-1039", page->mapping && page->mapping->host);
-+
-+ assert("vs-1044", znode_is_loaded(coord->node));
-+ assert("vs-758", item_is_extent(coord));
-+ assert("vs-1046", coord_is_existing_unit(coord));
-+ assert("vs-1045", znode_is_rlocked(coord->node));
-+ assert("vs-1047",
-+ page->mapping->host->i_ino ==
-+ get_key_objectid(item_key_by_coord(coord, &key)));
-+ check_uf_coord(uf_coord, NULL);
-+
-+ return reiser4_do_readpage_extent(
-+ ext_by_ext_coord(uf_coord),
-+ uf_coord->extension.extent.pos_in_unit, page);
-+}
-+
-+/**
-+ * get_block_address_extent
-+ * @coord:
-+ * @block:
-+ * @result:
-+ *
-+ *
-+ */
-+int get_block_address_extent(const coord_t *coord, sector_t block,
-+ sector_t *result)
-+{
-+ reiser4_extent *ext;
-+
-+ if (!coord_is_existing_unit(coord))
-+ return RETERR(-EINVAL);
-+
-+ ext = extent_by_coord(coord);
-+
-+ if (state_of_extent(ext) != ALLOCATED_EXTENT)
-+ /* FIXME: bad things may happen if it is unallocated extent */
-+ *result = 0;
-+ else {
-+ reiser4_key key;
-+
-+ unit_key_by_coord(coord, &key);
-+ assert("vs-1645",
-+ block >= get_key_offset(&key) >> current_blocksize_bits);
-+ assert("vs-1646",
-+ block <
-+ (get_key_offset(&key) >> current_blocksize_bits) +
-+ extent_get_width(ext));
-+ *result =
-+ extent_get_start(ext) + (block -
-+ (get_key_offset(&key) >>
-+ current_blocksize_bits));
-+ }
-+ return 0;
-+}
-+
-+/*
-+ plugin->u.item.s.file.append_key
-+ key of first byte which is the next to last byte by addressed by this extent
-+*/
-+reiser4_key *append_key_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key,
-+ get_key_offset(key) + reiser4_extent_size(coord,
-+ nr_units_extent
-+ (coord)));
-+
-+ assert("vs-610", get_key_offset(key)
-+ && (get_key_offset(key) & (current_blocksize - 1)) == 0);
-+ return key;
-+}
-+
-+/* plugin->u.item.s.file.init_coord_extension */
-+void init_coord_extension_extent(uf_coord_t * uf_coord, loff_t lookuped)
-+{
-+ coord_t *coord;
-+ extent_coord_extension_t *ext_coord;
-+ reiser4_key key;
-+ loff_t offset;
-+
-+ assert("vs-1295", uf_coord->valid == 0);
-+
-+ coord = &uf_coord->coord;
-+ assert("vs-1288", coord_is_iplug_set(coord));
-+ assert("vs-1327", znode_is_loaded(coord->node));
-+
-+ if (coord->between != AFTER_UNIT && coord->between != AT_UNIT)
-+ return;
-+
-+ ext_coord = &uf_coord->extension.extent;
-+ ext_coord->nr_units = nr_units_extent(coord);
-+ ext_coord->ext_offset =
-+ (char *)extent_by_coord(coord) - zdata(coord->node);
-+ ext_coord->width = extent_get_width(extent_by_coord(coord));
-+ ON_DEBUG(ext_coord->extent = *extent_by_coord(coord));
-+ uf_coord->valid = 1;
-+
-+ /* pos_in_unit is the only uninitialized field in extended coord */
-+ if (coord->between == AFTER_UNIT) {
-+ assert("vs-1330",
-+ coord->unit_pos == nr_units_extent(coord) - 1);
-+
-+ ext_coord->pos_in_unit = ext_coord->width - 1;
-+ } else {
-+ /* AT_UNIT */
-+ unit_key_by_coord(coord, &key);
-+ offset = get_key_offset(&key);
-+
-+ assert("vs-1328", offset <= lookuped);
-+ assert("vs-1329",
-+ lookuped <
-+ offset + ext_coord->width * current_blocksize);
-+ ext_coord->pos_in_unit =
-+ ((lookuped - offset) >> current_blocksize_bits);
-+ }
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/extent_flush_ops.c linux-2.6.20/fs/reiser4/plugin/item/extent_flush_ops.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/extent_flush_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/extent_flush_ops.c 2007-05-06 14:50:43.811010720 +0400
-@@ -0,0 +1,1028 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../tree.h"
-+#include "../../jnode.h"
-+#include "../../super.h"
-+#include "../../flush.h"
-+#include "../../carry.h"
-+#include "../object.h"
-+
-+#include <linux/pagemap.h>
-+
-+static reiser4_block_nr extent_unit_start(const coord_t * item);
-+
-+/* Return either first or last extent (depending on @side) of the item
-+ @coord is set to. Set @pos_in_unit either to first or to last block
-+ of extent. */
-+static reiser4_extent *extent_utmost_ext(const coord_t * coord, sideof side,
-+ reiser4_block_nr * pos_in_unit)
-+{
-+ reiser4_extent *ext;
-+
-+ if (side == LEFT_SIDE) {
-+ /* get first extent of item */
-+ ext = extent_item(coord);
-+ *pos_in_unit = 0;
-+ } else {
-+ /* get last extent of item and last position within it */
-+ assert("vs-363", side == RIGHT_SIDE);
-+ ext = extent_item(coord) + coord_last_unit_pos(coord);
-+ *pos_in_unit = extent_get_width(ext) - 1;
-+ }
-+
-+ return ext;
-+}
-+
-+/* item_plugin->f.utmost_child */
-+/* Return the child. Coord is set to extent item. Find jnode corresponding
-+ either to first or to last unformatted node pointed by the item */
-+int utmost_child_extent(const coord_t * coord, sideof side, jnode ** childp)
-+{
-+ reiser4_extent *ext;
-+ reiser4_block_nr pos_in_unit;
-+
-+ ext = extent_utmost_ext(coord, side, &pos_in_unit);
-+
-+ switch (state_of_extent(ext)) {
-+ case HOLE_EXTENT:
-+ *childp = NULL;
-+ return 0;
-+ case ALLOCATED_EXTENT:
-+ case UNALLOCATED_EXTENT:
-+ break;
-+ default:
-+ /* this should never happen */
-+ assert("vs-1417", 0);
-+ }
-+
-+ {
-+ reiser4_key key;
-+ reiser4_tree *tree;
-+ unsigned long index;
-+
-+ if (side == LEFT_SIDE) {
-+ /* get key of first byte addressed by the extent */
-+ item_key_by_coord(coord, &key);
-+ } else {
-+ /* get key of byte which next after last byte addressed by the extent */
-+ append_key_extent(coord, &key);
-+ }
-+
-+ assert("vs-544",
-+ (get_key_offset(&key) >> PAGE_CACHE_SHIFT) < ~0ul);
-+ /* index of first or last (depending on @side) page addressed
-+ by the extent */
-+ index =
-+ (unsigned long)(get_key_offset(&key) >> PAGE_CACHE_SHIFT);
-+ if (side == RIGHT_SIDE)
-+ index--;
-+
-+ tree = coord->node->zjnode.tree;
-+ *childp = jlookup(tree, get_key_objectid(&key), index);
-+ }
-+
-+ return 0;
-+}
-+
-+/* item_plugin->f.utmost_child_real_block */
-+/* Return the child's block, if allocated. */
-+int
-+utmost_child_real_block_extent(const coord_t * coord, sideof side,
-+ reiser4_block_nr * block)
-+{
-+ reiser4_extent *ext;
-+
-+ ext = extent_by_coord(coord);
-+
-+ switch (state_of_extent(ext)) {
-+ case ALLOCATED_EXTENT:
-+ *block = extent_get_start(ext);
-+ if (side == RIGHT_SIDE)
-+ *block += extent_get_width(ext) - 1;
-+ break;
-+ case HOLE_EXTENT:
-+ case UNALLOCATED_EXTENT:
-+ *block = 0;
-+ break;
-+ default:
-+ /* this should never happen */
-+ assert("vs-1418", 0);
-+ }
-+
-+ return 0;
-+}
-+
-+/* item_plugin->f.scan */
-+/* Performs leftward scanning starting from an unformatted node and its parent coordinate.
-+ This scan continues, advancing the parent coordinate, until either it encounters a
-+ formatted child or it finishes scanning this node.
-+
-+ If unallocated, the entire extent must be dirty and in the same atom. (Actually, I'm
-+ not sure this is last property (same atom) is enforced, but it should be the case since
-+ one atom must write the parent and the others must read the parent, thus fusing?). In
-+ any case, the code below asserts this case for unallocated extents. Unallocated
-+ extents are thus optimized because we can skip to the endpoint when scanning.
-+
-+ It returns control to reiser4_scan_extent, handles these terminating conditions,
-+ e.g., by loading the next twig.
-+*/
-+int reiser4_scan_extent(flush_scan * scan)
-+{
-+ coord_t coord;
-+ jnode *neighbor;
-+ unsigned long scan_index, unit_index, unit_width, scan_max, scan_dist;
-+ reiser4_block_nr unit_start;
-+ __u64 oid;
-+ reiser4_key key;
-+ int ret = 0, allocated, incr;
-+ reiser4_tree *tree;
-+
-+ if (!JF_ISSET(scan->node, JNODE_DIRTY)) {
-+ scan->stop = 1;
-+ return 0; /* Race with truncate, this node is already
-+ * truncated. */
-+ }
-+
-+ coord_dup(&coord, &scan->parent_coord);
-+
-+ assert("jmacd-1404", !reiser4_scan_finished(scan));
-+ assert("jmacd-1405", jnode_get_level(scan->node) == LEAF_LEVEL);
-+ assert("jmacd-1406", jnode_is_unformatted(scan->node));
-+
-+ /* The scan_index variable corresponds to the current page index of the
-+ unformatted block scan position. */
-+ scan_index = index_jnode(scan->node);
-+
-+ assert("jmacd-7889", item_is_extent(&coord));
-+
-+ repeat:
-+ /* objectid of file */
-+ oid = get_key_objectid(item_key_by_coord(&coord, &key));
-+
-+ allocated = !extent_is_unallocated(&coord);
-+ /* Get the values of this extent unit: */
-+ unit_index = extent_unit_index(&coord);
-+ unit_width = extent_unit_width(&coord);
-+ unit_start = extent_unit_start(&coord);
-+
-+ assert("jmacd-7187", unit_width > 0);
-+ assert("jmacd-7188", scan_index >= unit_index);
-+ assert("jmacd-7189", scan_index <= unit_index + unit_width - 1);
-+
-+ /* Depending on the scan direction, we set different maximum values for scan_index
-+ (scan_max) and the number of nodes that would be passed if the scan goes the
-+ entire way (scan_dist). Incr is an integer reflecting the incremental
-+ direction of scan_index. */
-+ if (reiser4_scanning_left(scan)) {
-+ scan_max = unit_index;
-+ scan_dist = scan_index - unit_index;
-+ incr = -1;
-+ } else {
-+ scan_max = unit_index + unit_width - 1;
-+ scan_dist = scan_max - unit_index;
-+ incr = +1;
-+ }
-+
-+ tree = coord.node->zjnode.tree;
-+
-+ /* If the extent is allocated we have to check each of its blocks. If the extent
-+ is unallocated we can skip to the scan_max. */
-+ if (allocated) {
-+ do {
-+ neighbor = jlookup(tree, oid, scan_index);
-+ if (neighbor == NULL)
-+ goto stop_same_parent;
-+
-+ if (scan->node != neighbor
-+ && !reiser4_scan_goto(scan, neighbor)) {
-+ /* @neighbor was jput() by reiser4_scan_goto */
-+ goto stop_same_parent;
-+ }
-+
-+ ret = scan_set_current(scan, neighbor, 1, &coord);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ /* reference to @neighbor is stored in @scan, no need
-+ to jput(). */
-+ scan_index += incr;
-+
-+ } while (incr + scan_max != scan_index);
-+
-+ } else {
-+ /* Optimized case for unallocated extents, skip to the end. */
-+ neighbor = jlookup(tree, oid, scan_max /*index */ );
-+ if (neighbor == NULL) {
-+ /* Race with truncate */
-+ scan->stop = 1;
-+ ret = 0;
-+ goto exit;
-+ }
-+
-+ assert("zam-1043",
-+ reiser4_blocknr_is_fake(jnode_get_block(neighbor)));
-+
-+ ret = scan_set_current(scan, neighbor, scan_dist, &coord);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+ }
-+
-+ if (coord_sideof_unit(&coord, scan->direction) == 0
-+ && item_is_extent(&coord)) {
-+ /* Continue as long as there are more extent units. */
-+
-+ scan_index =
-+ extent_unit_index(&coord) +
-+ (reiser4_scanning_left(scan) ?
-+ extent_unit_width(&coord) - 1 : 0);
-+ goto repeat;
-+ }
-+
-+ if (0) {
-+ stop_same_parent:
-+
-+ /* If we are scanning left and we stop in the middle of an allocated
-+ extent, we know the preceder immediately.. */
-+ /* middle of extent is (scan_index - unit_index) != 0. */
-+ if (reiser4_scanning_left(scan) &&
-+ (scan_index - unit_index) != 0) {
-+ /* FIXME(B): Someone should step-through and verify that this preceder
-+ calculation is indeed correct. */
-+ /* @unit_start is starting block (number) of extent
-+ unit. Flush stopped at the @scan_index block from
-+ the beginning of the file, which is (scan_index -
-+ unit_index) block within extent.
-+ */
-+ if (unit_start) {
-+ /* skip preceder update when we are at hole */
-+ scan->preceder_blk =
-+ unit_start + scan_index - unit_index;
-+ check_preceder(scan->preceder_blk);
-+ }
-+ }
-+
-+ /* In this case, we leave coord set to the parent of scan->node. */
-+ scan->stop = 1;
-+
-+ } else {
-+ /* In this case, we are still scanning, coord is set to the next item which is
-+ either off-the-end of the node or not an extent. */
-+ assert("jmacd-8912", scan->stop == 0);
-+ assert("jmacd-7812",
-+ (coord_is_after_sideof_unit(&coord, scan->direction)
-+ || !item_is_extent(&coord)));
-+ }
-+
-+ ret = 0;
-+ exit:
-+ return ret;
-+}
-+
-+/* ask block allocator for some blocks */
-+static void extent_allocate_blocks(reiser4_blocknr_hint *preceder,
-+ reiser4_block_nr wanted_count,
-+ reiser4_block_nr *first_allocated,
-+ reiser4_block_nr *allocated,
-+ block_stage_t block_stage)
-+{
-+ *allocated = wanted_count;
-+ preceder->max_dist = 0; /* scan whole disk, if needed */
-+
-+ /* that number of blocks (wanted_count) is either in UNALLOCATED or in GRABBED */
-+ preceder->block_stage = block_stage;
-+
-+ /* FIXME: we do not handle errors here now */
-+ check_me("vs-420",
-+ reiser4_alloc_blocks(preceder, first_allocated, allocated,
-+ BA_PERMANENT) == 0);
-+ /* update flush_pos's preceder to last allocated block number */
-+ preceder->blk = *first_allocated + *allocated - 1;
-+}
-+
-+/* when on flush time unallocated extent is to be replaced with allocated one it may happen that one unallocated extent
-+ will have to be replaced with set of allocated extents. In this case insert_into_item will be called which may have
-+ to add new nodes into tree. Space for that is taken from inviolable reserve (5%). */
-+static reiser4_block_nr reserve_replace(void)
-+{
-+ reiser4_block_nr grabbed, needed;
-+
-+ grabbed = get_current_context()->grabbed_blocks;
-+ needed = estimate_one_insert_into_item(current_tree);
-+ check_me("vpf-340", !reiser4_grab_space_force(needed, BA_RESERVED));
-+ return grabbed;
-+}
-+
-+static void free_replace_reserved(reiser4_block_nr grabbed)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = get_current_context();
-+ grabbed2free(ctx, get_super_private(ctx->super),
-+ ctx->grabbed_blocks - grabbed);
-+}
-+
-+/* Block offset of first block addressed by unit */
-+__u64 extent_unit_index(const coord_t * item)
-+{
-+ reiser4_key key;
-+
-+ assert("vs-648", coord_is_existing_unit(item));
-+ unit_key_by_coord(item, &key);
-+ return get_key_offset(&key) >> current_blocksize_bits;
-+}
-+
-+/* AUDIT shouldn't return value be of reiser4_block_nr type?
-+ Josh's answer: who knows? Is a "number of blocks" the same type as "block offset"? */
-+__u64 extent_unit_width(const coord_t * item)
-+{
-+ assert("vs-649", coord_is_existing_unit(item));
-+ return width_by_coord(item);
-+}
-+
-+/* Starting block location of this unit */
-+static reiser4_block_nr extent_unit_start(const coord_t * item)
-+{
-+ return extent_get_start(extent_by_coord(item));
-+}
-+
-+/**
-+ * split_allocated_extent -
-+ * @coord:
-+ * @pos_in_unit:
-+ *
-+ * replace allocated extent with two allocated extents
-+ */
-+static int split_allocated_extent(coord_t *coord, reiser4_block_nr pos_in_unit)
-+{
-+ int result;
-+ struct replace_handle *h;
-+ reiser4_extent *ext;
-+ reiser4_block_nr grabbed;
-+
-+ ext = extent_by_coord(coord);
-+ assert("vs-1410", state_of_extent(ext) == ALLOCATED_EXTENT);
-+ assert("vs-1411", extent_get_width(ext) > pos_in_unit);
-+
-+ h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
-+ if (h == NULL)
-+ return RETERR(-ENOMEM);
-+ h->coord = coord;
-+ h->lh = znode_lh(coord->node);
-+ h->pkey = &h->key;
-+ unit_key_by_coord(coord, h->pkey);
-+ set_key_offset(h->pkey,
-+ (get_key_offset(h->pkey) +
-+ pos_in_unit * current_blocksize));
-+ reiser4_set_extent(&h->overwrite, extent_get_start(ext),
-+ pos_in_unit);
-+ reiser4_set_extent(&h->new_extents[0],
-+ extent_get_start(ext) + pos_in_unit,
-+ extent_get_width(ext) - pos_in_unit);
-+ h->nr_new_extents = 1;
-+ h->flags = COPI_DONT_SHIFT_LEFT;
-+ h->paste_key = h->key;
-+
-+ /* reserve space for extent unit paste, @grabbed is reserved before */
-+ grabbed = reserve_replace();
-+ result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
-+ extent */);
-+ /* restore reserved */
-+ free_replace_reserved(grabbed);
-+ kfree(h);
-+ return result;
-+}
-+
-+/* replace extent @ext by extent @replace. Try to merge @replace with previous extent of the item (if there is
-+ one). Return 1 if it succeeded, 0 - otherwise */
-+static int try_to_merge_with_left(coord_t *coord, reiser4_extent *ext,
-+ reiser4_extent *replace)
-+{
-+ assert("vs-1415", extent_by_coord(coord) == ext);
-+
-+ if (coord->unit_pos == 0
-+ || state_of_extent(ext - 1) != ALLOCATED_EXTENT)
-+ /* @ext either does not exist or is not allocated extent */
-+ return 0;
-+ if (extent_get_start(ext - 1) + extent_get_width(ext - 1) !=
-+ extent_get_start(replace))
-+ return 0;
-+
-+ /* we can glue, widen previous unit */
-+ extent_set_width(ext - 1,
-+ extent_get_width(ext - 1) + extent_get_width(replace));
-+
-+ if (extent_get_width(ext) != extent_get_width(replace)) {
-+ /* make current extent narrower */
-+ if (state_of_extent(ext) == ALLOCATED_EXTENT)
-+ extent_set_start(ext,
-+ extent_get_start(ext) +
-+ extent_get_width(replace));
-+ extent_set_width(ext,
-+ extent_get_width(ext) -
-+ extent_get_width(replace));
-+ } else {
-+ /* current extent completely glued with its left neighbor, remove it */
-+ coord_t from, to;
-+
-+ coord_dup(&from, coord);
-+ from.unit_pos = nr_units_extent(coord) - 1;
-+ coord_dup(&to, &from);
-+
-+ /* currently cut from extent can cut either from the beginning or from the end. Move place which got
-+ freed after unit removal to end of item */
-+ memmove(ext, ext + 1,
-+ (from.unit_pos -
-+ coord->unit_pos) * sizeof(reiser4_extent));
-+ /* wipe part of item which is going to be cut, so that node_check will not be confused */
-+ cut_node_content(&from, &to, NULL, NULL, NULL);
-+ }
-+ znode_make_dirty(coord->node);
-+ /* move coord back */
-+ coord->unit_pos--;
-+ return 1;
-+}
-+
-+/**
-+ * conv_extent - replace extent with 2 ones
-+ * @coord: coordinate of extent to be replaced
-+ * @replace: extent to overwrite the one @coord is set to
-+ *
-+ * Overwrites extent @coord is set to and paste one extent unit after
-+ * overwritten one if @replace is shorter than initial extent
-+ */
-+static int conv_extent(coord_t *coord, reiser4_extent *replace)
-+{
-+ int result;
-+ struct replace_handle *h;
-+ reiser4_extent *ext;
-+ reiser4_block_nr start, width, new_width;
-+ reiser4_block_nr grabbed;
-+ extent_state state;
-+
-+ ext = extent_by_coord(coord);
-+ state = state_of_extent(ext);
-+ start = extent_get_start(ext);
-+ width = extent_get_width(ext);
-+ new_width = extent_get_width(replace);
-+
-+ assert("vs-1458", (state == UNALLOCATED_EXTENT ||
-+ state == ALLOCATED_EXTENT));
-+ assert("vs-1459", width >= new_width);
-+
-+ if (try_to_merge_with_left(coord, ext, replace)) {
-+ /* merged @replace with left neighbor. Current unit is either
-+ removed or narrowed */
-+ return 0;
-+ }
-+
-+ if (width == new_width) {
-+ /* replace current extent with @replace */
-+ *ext = *replace;
-+ znode_make_dirty(coord->node);
-+ return 0;
-+ }
-+
-+ h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
-+ if (h == NULL)
-+ return RETERR(-ENOMEM);
-+ h->coord = coord;
-+ h->lh = znode_lh(coord->node);
-+ h->pkey = &h->key;
-+ unit_key_by_coord(coord, h->pkey);
-+ set_key_offset(h->pkey,
-+ (get_key_offset(h->pkey) + new_width * current_blocksize));
-+ h->overwrite = *replace;
-+
-+ /* replace @ext with @replace and padding extent */
-+ reiser4_set_extent(&h->new_extents[0],
-+ (state == ALLOCATED_EXTENT) ?
-+ (start + new_width) :
-+ UNALLOCATED_EXTENT_START,
-+ width - new_width);
-+ h->nr_new_extents = 1;
-+ h->flags = COPI_DONT_SHIFT_LEFT;
-+ h->paste_key = h->key;
-+
-+ /* reserve space for extent unit paste, @grabbed is reserved before */
-+ grabbed = reserve_replace();
-+ result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
-+ extent */);
-+
-+ /* restore reserved */
-+ free_replace_reserved(grabbed);
-+ kfree(h);
-+ return result;
-+}
-+
-+/**
-+ * assign_real_blocknrs
-+ * @flush_pos:
-+ * @oid: objectid of file jnodes to assign block number to belongs to
-+ * @index: first jnode on the range
-+ * @count: number of jnodes to assign block numbers to
-+ * @first: start of allocated block range
-+ *
-+ * Assigns block numbers to each of @count jnodes. Index of first jnode is
-+ * @index. Jnodes get lookuped with jlookup.
-+ */
-+static void assign_real_blocknrs(flush_pos_t *flush_pos, oid_t oid,
-+ unsigned long index, reiser4_block_nr count,
-+ reiser4_block_nr first)
-+{
-+ unsigned long i;
-+ reiser4_tree *tree;
-+ txn_atom *atom;
-+ int nr;
-+
-+ atom = atom_locked_by_fq(flush_pos->fq);
-+ assert("vs-1468", atom);
-+ BUG_ON(atom == NULL);
-+
-+ nr = 0;
-+ tree = current_tree;
-+ for (i = 0; i < count; ++i, ++index) {
-+ jnode *node;
-+
-+ node = jlookup(tree, oid, index);
-+ assert("", node != NULL);
-+ BUG_ON(node == NULL);
-+
-+ spin_lock_jnode(node);
-+ assert("", !jnode_is_flushprepped(node));
-+ assert("vs-1475", node->atom == atom);
-+ assert("vs-1476", atomic_read(&node->x_count) > 0);
-+
-+ JF_CLR(node, JNODE_FLUSH_RESERVED);
-+ jnode_set_block(node, &first);
-+ unformatted_make_reloc(node, flush_pos->fq);
-+ ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
-+ FQ_LIST, 0));
-+ spin_unlock_jnode(node);
-+ first++;
-+
-+ atomic_dec(&node->x_count);
-+ nr ++;
-+ }
-+
-+ spin_unlock_atom(atom);
-+ return;
-+}
-+
-+/**
-+ * make_node_ovrwr - assign node to overwrite set
-+ * @jnodes: overwrite set list head
-+ * @node: jnode to belong to overwrite set
-+ *
-+ * Sets OVRWR jnode state bit and puts @node to the end of list head @jnodes
-+ * which is an accumulator for nodes before they get to overwrite set list of
-+ * atom.
-+ */
-+static void make_node_ovrwr(struct list_head *jnodes, jnode *node)
-+{
-+ spin_lock_jnode(node);
-+
-+ assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
-+ assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
-+
-+ JF_SET(node, JNODE_OVRWR);
-+ list_move_tail(&node->capture_link, jnodes);
-+ ON_DEBUG(count_jnode(node->atom, node, DIRTY_LIST, OVRWR_LIST, 0));
-+
-+ spin_unlock_jnode(node);
-+}
-+
-+/**
-+ * mark_jnodes_overwrite - put bunch of jnodes to overwrite set
-+ * @flush_pos: flush position
-+ * @oid: objectid of file jnodes belong to
-+ * @index: starting index
-+ * @width: extent width
-+ *
-+ * Puts nodes of one extent (file objectid @oid, extent width @width) to atom's
-+ * overwrite set. Starting from the one with index @index. If end of slum is
-+ * detected (node is not found or flushprepped) - stop iterating and set flush
-+ * position's state to POS_INVALID.
-+ */
-+static void mark_jnodes_overwrite(flush_pos_t *flush_pos, oid_t oid,
-+ unsigned long index, reiser4_block_nr width)
-+{
-+ unsigned long i;
-+ reiser4_tree *tree;
-+ jnode *node;
-+ txn_atom *atom;
-+ LIST_HEAD(jnodes);
-+
-+ tree = current_tree;
-+
-+ atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
-+ assert("vs-1478", atom);
-+
-+ for (i = flush_pos->pos_in_unit; i < width; i++, index++) {
-+ node = jlookup(tree, oid, index);
-+ if (!node) {
-+ flush_pos->state = POS_INVALID;
-+ break;
-+ }
-+ if (jnode_check_flushprepped(node)) {
-+ flush_pos->state = POS_INVALID;
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+ if (node->atom != atom) {
-+ flush_pos->state = POS_INVALID;
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+ make_node_ovrwr(&jnodes, node);
-+ atomic_dec(&node->x_count);
-+ }
-+
-+ list_splice_init(&jnodes, ATOM_OVRWR_LIST(atom)->prev);
-+ spin_unlock_atom(atom);
-+}
-+
-+/**
-+ * allocated_extent_slum_size
-+ * @flush_pos:
-+ * @oid:
-+ * @index:
-+ * @count:
-+ *
-+ *
-+ */
-+static int allocated_extent_slum_size(flush_pos_t *flush_pos, oid_t oid,
-+ unsigned long index, unsigned long count)
-+{
-+ unsigned long i;
-+ reiser4_tree *tree;
-+ txn_atom *atom;
-+ int nr;
-+
-+ atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
-+ assert("vs-1468", atom);
-+
-+ nr = 0;
-+ tree = current_tree;
-+ for (i = 0; i < count; ++i, ++index) {
-+ jnode *node;
-+
-+ node = jlookup(tree, oid, index);
-+ if (!node)
-+ break;
-+
-+ if (jnode_check_flushprepped(node)) {
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+
-+ if (node->atom != atom) {
-+ /*
-+ * this is possible on overwrite: extent_write may
-+ * capture several unformatted nodes without capturing
-+ * any formatted nodes.
-+ */
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+
-+ assert("vs-1476", atomic_read(&node->x_count) > 1);
-+ atomic_dec(&node->x_count);
-+ nr ++;
-+ }
-+
-+ spin_unlock_atom(atom);
-+ return nr;
-+}
-+
-+/**
-+ * alloc_extent
-+ * @flush_pos:
-+ *
-+ *
-+ * this is called by handle_pos_on_twig to proceed extent unit flush_pos->coord
-+ * is set to. It is to prepare for flushing sequence of not flushprepped nodes
-+ * (slum). It supposes that slum starts at flush_pos->pos_in_unit position
-+ * within the extent. Slum gets to relocate set if flush_pos->leaf_relocate is
-+ * set to 1 and to overwrite set otherwise
-+ */
-+int reiser4_alloc_extent(flush_pos_t *flush_pos)
-+{
-+ coord_t *coord;
-+ reiser4_extent *ext;
-+ reiser4_extent replace_ext;
-+ oid_t oid;
-+ reiser4_block_nr protected;
-+ reiser4_block_nr start;
-+ __u64 index;
-+ __u64 width;
-+ extent_state state;
-+ int result;
-+ reiser4_block_nr first_allocated;
-+ __u64 allocated;
-+ reiser4_key key;
-+ block_stage_t block_stage;
-+
-+ assert("vs-1468", flush_pos->state == POS_ON_EPOINT);
-+ assert("vs-1469", coord_is_existing_unit(&flush_pos->coord)
-+ && item_is_extent(&flush_pos->coord));
-+
-+ coord = &flush_pos->coord;
-+
-+ ext = extent_by_coord(coord);
-+ state = state_of_extent(ext);
-+ if (state == HOLE_EXTENT) {
-+ flush_pos->state = POS_INVALID;
-+ return 0;
-+ }
-+
-+ item_key_by_coord(coord, &key);
-+ oid = get_key_objectid(&key);
-+ index = extent_unit_index(coord) + flush_pos->pos_in_unit;
-+ start = extent_get_start(ext);
-+ width = extent_get_width(ext);
-+
-+ assert("vs-1457", width > flush_pos->pos_in_unit);
-+
-+ if (flush_pos->leaf_relocate || state == UNALLOCATED_EXTENT) {
-+ /* relocate */
-+ if (flush_pos->pos_in_unit) {
-+ /* split extent unit into two */
-+ result =
-+ split_allocated_extent(coord,
-+ flush_pos->pos_in_unit);
-+ flush_pos->pos_in_unit = 0;
-+ return result;
-+ }
-+
-+ /* limit number of nodes to allocate */
-+ if (flush_pos->nr_to_write < width)
-+ width = flush_pos->nr_to_write;
-+
-+ if (state == ALLOCATED_EXTENT) {
-+ /*
-+ * all protected nodes are not flushprepped, therefore
-+ * they are counted as flush_reserved
-+ */
-+ block_stage = BLOCK_FLUSH_RESERVED;
-+ protected = allocated_extent_slum_size(flush_pos, oid,
-+ index, width);
-+ if (protected == 0) {
-+ flush_pos->state = POS_INVALID;
-+ flush_pos->pos_in_unit = 0;
-+ return 0;
-+ }
-+ } else {
-+ block_stage = BLOCK_UNALLOCATED;
-+ protected = width;
-+ }
-+
-+ /*
-+ * look at previous unit if possible. If it is allocated, make
-+ * preceder more precise
-+ */
-+ if (coord->unit_pos &&
-+ (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
-+ reiser4_pos_hint(flush_pos)->blk =
-+ extent_get_start(ext - 1) +
-+ extent_get_width(ext - 1);
-+
-+ /* allocate new block numbers for protected nodes */
-+ extent_allocate_blocks(reiser4_pos_hint(flush_pos),
-+ protected,
-+ &first_allocated, &allocated,
-+ block_stage);
-+
-+ if (state == ALLOCATED_EXTENT)
-+ /*
-+ * on relocating - free nodes which are going to be
-+ * relocated
-+ */
-+ reiser4_dealloc_blocks(&start, &allocated,
-+ BLOCK_ALLOCATED, BA_DEFER);
-+
-+ /* assign new block numbers to protected nodes */
-+ assign_real_blocknrs(flush_pos, oid, index, allocated, first_allocated);
-+
-+ /* prepare extent which will replace current one */
-+ reiser4_set_extent(&replace_ext, first_allocated, allocated);
-+
-+ /* adjust extent item */
-+ result = conv_extent(coord, &replace_ext);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("vs-1461",
-+ "Failed to allocate extent. Should not happen\n");
-+ return result;
-+ }
-+
-+ /*
-+ * break flush: we prepared for flushing as many blocks as we
-+ * were asked for
-+ */
-+ if (flush_pos->nr_to_write == allocated)
-+ flush_pos->state = POS_INVALID;
-+ } else {
-+ /* overwrite */
-+ mark_jnodes_overwrite(flush_pos, oid, index, width);
-+ }
-+ flush_pos->pos_in_unit = 0;
-+ return 0;
-+}
-+
-+/* if @key is glueable to the item @coord is set to */
-+static int must_insert(const coord_t *coord, const reiser4_key *key)
-+{
-+ reiser4_key last;
-+
-+ if (item_id_by_coord(coord) == EXTENT_POINTER_ID
-+ && keyeq(append_key_extent(coord, &last), key))
-+ return 0;
-+ return 1;
-+}
-+
-+/* copy extent @copy to the end of @node. It may have to either insert new item after the last one, or append last item,
-+ or modify last unit of last item to have greater width */
-+static int put_unit_to_end(znode *node, const reiser4_key *key,
-+ reiser4_extent *copy_ext)
-+{
-+ int result;
-+ coord_t coord;
-+ cop_insert_flag flags;
-+ reiser4_extent *last_ext;
-+ reiser4_item_data data;
-+
-+ /* set coord after last unit in an item */
-+ coord_init_last_unit(&coord, node);
-+ coord.between = AFTER_UNIT;
-+
-+ flags =
-+ COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT | COPI_DONT_ALLOCATE;
-+ if (must_insert(&coord, key)) {
-+ result =
-+ insert_by_coord(&coord, init_new_extent(&data, copy_ext, 1),
-+ key, NULL /*lh */ , flags);
-+
-+ } else {
-+ /* try to glue with last unit */
-+ last_ext = extent_by_coord(&coord);
-+ if (state_of_extent(last_ext) &&
-+ extent_get_start(last_ext) + extent_get_width(last_ext) ==
-+ extent_get_start(copy_ext)) {
-+ /* widen last unit of node */
-+ extent_set_width(last_ext,
-+ extent_get_width(last_ext) +
-+ extent_get_width(copy_ext));
-+ znode_make_dirty(node);
-+ return 0;
-+ }
-+
-+ /* FIXME: put an assertion here that we can not merge last unit in @node and new unit */
-+ result =
-+ insert_into_item(&coord, NULL /*lh */ , key,
-+ init_new_extent(&data, copy_ext, 1),
-+ flags);
-+ }
-+
-+ assert("vs-438", result == 0 || result == -E_NODE_FULL);
-+ return result;
-+}
-+
-+/* @coord is set to extent unit */
-+squeeze_result squalloc_extent(znode *left, const coord_t *coord,
-+ flush_pos_t *flush_pos,
-+ reiser4_key *stop_key)
-+{
-+ reiser4_extent *ext;
-+ __u64 index;
-+ __u64 width;
-+ reiser4_block_nr start;
-+ extent_state state;
-+ oid_t oid;
-+ reiser4_block_nr first_allocated;
-+ __u64 allocated;
-+ __u64 protected;
-+ reiser4_extent copy_extent;
-+ reiser4_key key;
-+ int result;
-+ block_stage_t block_stage;
-+
-+ assert("vs-1457", flush_pos->pos_in_unit == 0);
-+ assert("vs-1467", coord_is_leftmost_unit(coord));
-+ assert("vs-1467", item_is_extent(coord));
-+
-+ ext = extent_by_coord(coord);
-+ index = extent_unit_index(coord);
-+ start = extent_get_start(ext);
-+ width = extent_get_width(ext);
-+ state = state_of_extent(ext);
-+ unit_key_by_coord(coord, &key);
-+ oid = get_key_objectid(&key);
-+
-+ if ((flush_pos->leaf_relocate && state == ALLOCATED_EXTENT) ||
-+ (state == UNALLOCATED_EXTENT)) {
-+ /* relocate */
-+ if (state == ALLOCATED_EXTENT) {
-+ /* all protected nodes are not flushprepped, therefore
-+ * they are counted as flush_reserved */
-+ block_stage = BLOCK_FLUSH_RESERVED;
-+ protected = allocated_extent_slum_size(flush_pos, oid,
-+ index, width);
-+ if (protected == 0) {
-+ flush_pos->state = POS_INVALID;
-+ flush_pos->pos_in_unit = 0;
-+ return 0;
-+ }
-+ } else {
-+ block_stage = BLOCK_UNALLOCATED;
-+ protected = width;
-+ }
-+
-+ /*
-+ * look at previous unit if possible. If it is allocated, make
-+ * preceder more precise
-+ */
-+ if (coord->unit_pos &&
-+ (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
-+ reiser4_pos_hint(flush_pos)->blk =
-+ extent_get_start(ext - 1) +
-+ extent_get_width(ext - 1);
-+
-+ /* allocate new block numbers for protected nodes */
-+ extent_allocate_blocks(reiser4_pos_hint(flush_pos),
-+ protected,
-+ &first_allocated, &allocated,
-+ block_stage);
-+
-+ /* prepare extent which will be copied to left */
-+ reiser4_set_extent(©_extent, first_allocated, allocated);
-+
-+ result = put_unit_to_end(left, &key, ©_extent);
-+ if (result == -E_NODE_FULL) {
-+ int target_block_stage;
-+
-+ /* free blocks which were just allocated */
-+ target_block_stage =
-+ (state ==
-+ ALLOCATED_EXTENT) ? BLOCK_FLUSH_RESERVED :
-+ BLOCK_UNALLOCATED;
-+ reiser4_dealloc_blocks(&first_allocated, &allocated,
-+ target_block_stage,
-+ BA_PERMANENT);
-+
-+ /* rewind the preceder. */
-+ flush_pos->preceder.blk = first_allocated;
-+ check_preceder(flush_pos->preceder.blk);
-+
-+ return SQUEEZE_TARGET_FULL;
-+ }
-+
-+ if (state == ALLOCATED_EXTENT) {
-+ /* free nodes which were relocated */
-+ reiser4_dealloc_blocks(&start, &allocated,
-+ BLOCK_ALLOCATED, BA_DEFER);
-+ }
-+
-+ /* assign new block numbers to protected nodes */
-+ assign_real_blocknrs(flush_pos, oid, index, allocated,
-+ first_allocated);
-+
-+ set_key_offset(&key,
-+ get_key_offset(&key) +
-+ (allocated << current_blocksize_bits));
-+ } else {
-+ /*
-+ * overwrite: try to copy unit as it is to left neighbor and
-+ * make all first not flushprepped nodes overwrite nodes
-+ */
-+ reiser4_set_extent(©_extent, start, width);
-+ result = put_unit_to_end(left, &key, ©_extent);
-+ if (result == -E_NODE_FULL)
-+ return SQUEEZE_TARGET_FULL;
-+
-+ if (state != HOLE_EXTENT)
-+ mark_jnodes_overwrite(flush_pos, oid, index, width);
-+ set_key_offset(&key,
-+ get_key_offset(&key) +
-+ (width << current_blocksize_bits));
-+ }
-+ *stop_key = key;
-+ return SQUEEZE_CONTINUE;
-+}
-+
-+int key_by_offset_extent(struct inode *inode, loff_t off, reiser4_key * key)
-+{
-+ return key_by_inode_and_offset_common(inode, off, key);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/extent.h linux-2.6.20/fs/reiser4/plugin/item/extent.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/extent.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/extent.h 2007-05-06 14:50:43.811010720 +0400
-@@ -0,0 +1,231 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#ifndef __REISER4_EXTENT_H__
-+#define __REISER4_EXTENT_H__
-+
-+/* on disk extent */
-+typedef struct {
-+ reiser4_dblock_nr start;
-+ reiser4_dblock_nr width;
-+} reiser4_extent;
-+
-+typedef struct extent_stat {
-+ int unallocated_units;
-+ int unallocated_blocks;
-+ int allocated_units;
-+ int allocated_blocks;
-+ int hole_units;
-+ int hole_blocks;
-+} extent_stat;
-+
-+/* extents in an extent item can be either holes, or unallocated or allocated
-+ extents */
-+typedef enum {
-+ HOLE_EXTENT,
-+ UNALLOCATED_EXTENT,
-+ ALLOCATED_EXTENT
-+} extent_state;
-+
-+#define HOLE_EXTENT_START 0
-+#define UNALLOCATED_EXTENT_START 1
-+#define UNALLOCATED_EXTENT_START2 2
-+
-+typedef struct {
-+ reiser4_block_nr pos_in_unit;
-+ reiser4_block_nr width; /* width of current unit */
-+ pos_in_node_t nr_units; /* number of units */
-+ int ext_offset; /* offset from the beginning of zdata() */
-+ unsigned long expected_page;
-+#if REISER4_DEBUG
-+ reiser4_extent extent;
-+#endif
-+} extent_coord_extension_t;
-+
-+/* macros to set/get fields of on-disk extent */
-+static inline reiser4_block_nr extent_get_start(const reiser4_extent * ext)
-+{
-+ return le64_to_cpu(ext->start);
-+}
-+
-+static inline reiser4_block_nr extent_get_width(const reiser4_extent * ext)
-+{
-+ return le64_to_cpu(ext->width);
-+}
-+
-+extern __u64 reiser4_current_block_count(void);
-+
-+static inline void
-+extent_set_start(reiser4_extent * ext, reiser4_block_nr start)
-+{
-+ cassert(sizeof(ext->start) == 8);
-+ assert("nikita-2510",
-+ ergo(start > 1, start < reiser4_current_block_count()));
-+ put_unaligned(cpu_to_le64(start), &ext->start);
-+}
-+
-+static inline void
-+extent_set_width(reiser4_extent * ext, reiser4_block_nr width)
-+{
-+ cassert(sizeof(ext->width) == 8);
-+ assert("", width > 0);
-+ put_unaligned(cpu_to_le64(width), &ext->width);
-+ assert("nikita-2511",
-+ ergo(extent_get_start(ext) > 1,
-+ extent_get_start(ext) + width <=
-+ reiser4_current_block_count()));
-+}
-+
-+#define extent_item(coord) \
-+({ \
-+ assert("nikita-3143", item_is_extent(coord)); \
-+ ((reiser4_extent *)item_body_by_coord (coord)); \
-+})
-+
-+#define extent_by_coord(coord) \
-+({ \
-+ assert("nikita-3144", item_is_extent(coord)); \
-+ (extent_item (coord) + (coord)->unit_pos); \
-+})
-+
-+#define width_by_coord(coord) \
-+({ \
-+ assert("nikita-3145", item_is_extent(coord)); \
-+ extent_get_width (extent_by_coord(coord)); \
-+})
-+
-+struct carry_cut_data;
-+struct carry_kill_data;
-+
-+/* plugin->u.item.b.* */
-+reiser4_key *max_key_inside_extent(const coord_t *, reiser4_key *);
-+int can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data *);
-+int mergeable_extent(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_extent(const coord_t *);
-+lookup_result lookup_extent(const reiser4_key *, lookup_bias, coord_t *);
-+void init_coord_extent(coord_t *);
-+int init_extent(coord_t *, reiser4_item_data *);
-+int paste_extent(coord_t *, reiser4_item_data *, carry_plugin_info *);
-+int can_shift_extent(unsigned free_space,
-+ coord_t * source, znode * target, shift_direction,
-+ unsigned *size, unsigned want);
-+void copy_units_extent(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction where_is_free_space,
-+ unsigned free_space);
-+int kill_hook_extent(const coord_t *, pos_in_node_t from, pos_in_node_t count,
-+ struct carry_kill_data *);
-+int create_hook_extent(const coord_t * coord, void *arg);
-+int cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+reiser4_key *unit_key_extent(const coord_t *, reiser4_key *);
-+reiser4_key *max_unit_key_extent(const coord_t *, reiser4_key *);
-+void print_extent(const char *, coord_t *);
-+int utmost_child_extent(const coord_t * coord, sideof side, jnode ** child);
-+int utmost_child_real_block_extent(const coord_t * coord, sideof side,
-+ reiser4_block_nr * block);
-+void item_stat_extent(const coord_t * coord, void *vp);
-+int reiser4_check_extent(const coord_t * coord, const char **error);
-+
-+/* plugin->u.item.s.file.* */
-+ssize_t reiser4_write_extent(struct file *, const char __user *,
-+ size_t, loff_t *);
-+int reiser4_read_extent(struct file *, flow_t *, hint_t *);
-+int reiser4_readpage_extent(void *, struct page *);
-+int reiser4_do_readpage_extent(reiser4_extent*, reiser4_block_nr, struct page*);
-+reiser4_key *append_key_extent(const coord_t *, reiser4_key *);
-+void init_coord_extension_extent(uf_coord_t *, loff_t offset);
-+int get_block_address_extent(const coord_t *, sector_t block,
-+ sector_t * result);
-+
-+/* these are used in flush.c
-+ FIXME-VS: should they be somewhere in item_plugin? */
-+int allocate_extent_item_in_place(coord_t *, lock_handle *, flush_pos_t * pos);
-+int allocate_and_copy_extent(znode * left, coord_t * right, flush_pos_t * pos,
-+ reiser4_key * stop_key);
-+
-+int extent_is_unallocated(const coord_t * item); /* True if this extent is unallocated (i.e., not a hole, not allocated). */
-+__u64 extent_unit_index(const coord_t * item); /* Block offset of this unit. */
-+__u64 extent_unit_width(const coord_t * item); /* Number of blocks in this unit. */
-+
-+/* plugin->u.item.f. */
-+int reiser4_scan_extent(flush_scan * scan);
-+extern int key_by_offset_extent(struct inode *, loff_t, reiser4_key *);
-+
-+reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
-+ int nr_extents);
-+reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr);
-+extent_state state_of_extent(reiser4_extent * ext);
-+void reiser4_set_extent(reiser4_extent *, reiser4_block_nr start,
-+ reiser4_block_nr width);
-+int reiser4_update_extent(struct inode *, jnode *, loff_t pos,
-+ int *plugged_hole);
-+
-+#include "../../coord.h"
-+#include "../../lock.h"
-+#include "../../tap.h"
-+
-+struct replace_handle {
-+ /* these are to be set before calling reiser4_replace_extent */
-+ coord_t *coord;
-+ lock_handle *lh;
-+ reiser4_key key;
-+ reiser4_key *pkey;
-+ reiser4_extent overwrite;
-+ reiser4_extent new_extents[2];
-+ int nr_new_extents;
-+ unsigned flags;
-+
-+ /* these are used by reiser4_replace_extent */
-+ reiser4_item_data item;
-+ coord_t coord_after;
-+ lock_handle lh_after;
-+ tap_t watch;
-+ reiser4_key paste_key;
-+#if REISER4_DEBUG
-+ reiser4_extent orig_ext;
-+ reiser4_key tmp;
-+#endif
-+};
-+
-+/* this structure is kmalloced before calling make_extent to avoid excessive
-+ stack consumption on plug_hole->reiser4_replace_extent */
-+struct make_extent_handle {
-+ uf_coord_t *uf_coord;
-+ reiser4_block_nr blocknr;
-+ int created;
-+ struct inode *inode;
-+ union {
-+ struct {
-+ } append;
-+ struct replace_handle replace;
-+ } u;
-+};
-+
-+int reiser4_replace_extent(struct replace_handle *,
-+ int return_inserted_position);
-+lock_handle *znode_lh(znode *);
-+
-+/* the reiser4 repacker support */
-+struct repacker_cursor;
-+extern int process_extent_backward_for_repacking(tap_t *,
-+ struct repacker_cursor *);
-+extern int mark_extent_for_repacking(tap_t *, int);
-+
-+#define coord_by_uf_coord(uf_coord) (&((uf_coord)->coord))
-+#define ext_coord_by_uf_coord(uf_coord) (&((uf_coord)->extension.extent))
-+
-+/* __REISER4_EXTENT_H__ */
-+#endif
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/extent_item_ops.c linux-2.6.20/fs/reiser4/plugin/item/extent_item_ops.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/extent_item_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/extent_item_ops.c 2007-05-06 14:50:43.815011970 +0400
-@@ -0,0 +1,889 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../inode.h"
-+#include "../../tree_walk.h" /* check_sibling_list() */
-+#include "../../page_cache.h"
-+#include "../../carry.h"
-+
-+#include <linux/quotaops.h>
-+
-+/* item_plugin->b.max_key_inside */
-+reiser4_key *max_key_inside_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, get_key_offset(reiser4_max_key()));
-+ return key;
-+}
-+
-+/* item_plugin->b.can_contain_key
-+ this checks whether @key of @data is matching to position set by @coord */
-+int
-+can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data * data)
-+{
-+ reiser4_key item_key;
-+
-+ if (item_plugin_by_coord(coord) != data->iplug)
-+ return 0;
-+
-+ item_key_by_coord(coord, &item_key);
-+ if (get_key_locality(key) != get_key_locality(&item_key) ||
-+ get_key_objectid(key) != get_key_objectid(&item_key) ||
-+ get_key_ordering(key) != get_key_ordering(&item_key))
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/* item_plugin->b.mergeable
-+ first item is of extent type */
-+/* Audited by: green(2002.06.13) */
-+int mergeable_extent(const coord_t * p1, const coord_t * p2)
-+{
-+ reiser4_key key1, key2;
-+
-+ assert("vs-299", item_id_by_coord(p1) == EXTENT_POINTER_ID);
-+ /* FIXME-VS: Which is it? Assert or return 0 */
-+ if (item_id_by_coord(p2) != EXTENT_POINTER_ID) {
-+ return 0;
-+ }
-+
-+ item_key_by_coord(p1, &key1);
-+ item_key_by_coord(p2, &key2);
-+ if (get_key_locality(&key1) != get_key_locality(&key2) ||
-+ get_key_objectid(&key1) != get_key_objectid(&key2) ||
-+ get_key_ordering(&key1) != get_key_ordering(&key2) ||
-+ get_key_type(&key1) != get_key_type(&key2))
-+ return 0;
-+ if (get_key_offset(&key1) +
-+ reiser4_extent_size(p1, nr_units_extent(p1)) !=
-+ get_key_offset(&key2))
-+ return 0;
-+ return 1;
-+}
-+
-+/* item_plugin->b.nr_units */
-+pos_in_node_t nr_units_extent(const coord_t * coord)
-+{
-+ /* length of extent item has to be multiple of extent size */
-+ assert("vs-1424",
-+ (item_length_by_coord(coord) % sizeof(reiser4_extent)) == 0);
-+ return item_length_by_coord(coord) / sizeof(reiser4_extent);
-+}
-+
-+/* item_plugin->b.lookup */
-+lookup_result
-+lookup_extent(const reiser4_key * key, lookup_bias bias UNUSED_ARG,
-+ coord_t * coord)
-+{ /* znode and item_pos are
-+ set to an extent item to
-+ look through */
-+ reiser4_key item_key;
-+ reiser4_block_nr lookuped, offset;
-+ unsigned i, nr_units;
-+ reiser4_extent *ext;
-+ unsigned blocksize;
-+ unsigned char blocksize_bits;
-+
-+ item_key_by_coord(coord, &item_key);
-+ offset = get_key_offset(&item_key);
-+
-+ /* key we are looking for must be greater than key of item @coord */
-+ assert("vs-414", keygt(key, &item_key));
-+
-+ assert("umka-99945",
-+ !keygt(key, max_key_inside_extent(coord, &item_key)));
-+
-+ ext = extent_item(coord);
-+ assert("vs-1350", (char *)ext == (zdata(coord->node) + coord->offset));
-+
-+ blocksize = current_blocksize;
-+ blocksize_bits = current_blocksize_bits;
-+
-+ /* offset we are looking for */
-+ lookuped = get_key_offset(key);
-+
-+ nr_units = nr_units_extent(coord);
-+ /* go through all extents until the one which address given offset */
-+ for (i = 0; i < nr_units; i++, ext++) {
-+ offset += (extent_get_width(ext) << blocksize_bits);
-+ if (offset > lookuped) {
-+ /* desired byte is somewhere in this extent */
-+ coord->unit_pos = i;
-+ coord->between = AT_UNIT;
-+ return CBK_COORD_FOUND;
-+ }
-+ }
-+
-+ /* set coord after last unit */
-+ coord->unit_pos = nr_units - 1;
-+ coord->between = AFTER_UNIT;
-+ return CBK_COORD_FOUND;
-+}
-+
-+/* item_plugin->b.paste
-+ item @coord is set to has been appended with @data->length of free
-+ space. data->data contains data to be pasted into the item in position
-+ @coord->in_item.unit_pos. It must fit into that free space.
-+ @coord must be set between units.
-+*/
-+int
-+paste_extent(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG)
-+{
-+ unsigned old_nr_units;
-+ reiser4_extent *ext;
-+ int item_length;
-+
-+ ext = extent_item(coord);
-+ item_length = item_length_by_coord(coord);
-+ old_nr_units = (item_length - data->length) / sizeof(reiser4_extent);
-+
-+ /* this is also used to copy extent into newly created item, so
-+ old_nr_units could be 0 */
-+ assert("vs-260", item_length >= data->length);
-+
-+ /* make sure that coord is set properly */
-+ assert("vs-35",
-+ ((!coord_is_existing_unit(coord))
-+ || (!old_nr_units && !coord->unit_pos)));
-+
-+ /* first unit to be moved */
-+ switch (coord->between) {
-+ case AFTER_UNIT:
-+ coord->unit_pos++;
-+ case BEFORE_UNIT:
-+ coord->between = AT_UNIT;
-+ break;
-+ case AT_UNIT:
-+ assert("vs-331", !old_nr_units && !coord->unit_pos);
-+ break;
-+ default:
-+ impossible("vs-330", "coord is set improperly");
-+ }
-+
-+ /* prepare space for new units */
-+ memmove(ext + coord->unit_pos + data->length / sizeof(reiser4_extent),
-+ ext + coord->unit_pos,
-+ (old_nr_units - coord->unit_pos) * sizeof(reiser4_extent));
-+
-+ /* copy new data from kernel space */
-+ assert("vs-556", data->user == 0);
-+ memcpy(ext + coord->unit_pos, data->data, (unsigned)data->length);
-+
-+ /* after paste @coord is set to first of pasted units */
-+ assert("vs-332", coord_is_existing_unit(coord));
-+ assert("vs-333",
-+ !memcmp(data->data, extent_by_coord(coord),
-+ (unsigned)data->length));
-+ return 0;
-+}
-+
-+/* item_plugin->b.can_shift */
-+int
-+can_shift_extent(unsigned free_space, coord_t * source,
-+ znode * target UNUSED_ARG, shift_direction pend UNUSED_ARG,
-+ unsigned *size, unsigned want)
-+{
-+ *size = item_length_by_coord(source);
-+ if (*size > free_space)
-+ /* never split a unit of extent item */
-+ *size = free_space - free_space % sizeof(reiser4_extent);
-+
-+ /* we can shift *size bytes, calculate how many do we want to shift */
-+ if (*size > want * sizeof(reiser4_extent))
-+ *size = want * sizeof(reiser4_extent);
-+
-+ if (*size % sizeof(reiser4_extent) != 0)
-+ impossible("vs-119", "Wrong extent size: %i %zd", *size,
-+ sizeof(reiser4_extent));
-+ return *size / sizeof(reiser4_extent);
-+
-+}
-+
-+/* item_plugin->b.copy_units */
-+void
-+copy_units_extent(coord_t * target, coord_t * source,
-+ unsigned from, unsigned count,
-+ shift_direction where_is_free_space, unsigned free_space)
-+{
-+ char *from_ext, *to_ext;
-+
-+ assert("vs-217", free_space == count * sizeof(reiser4_extent));
-+
-+ from_ext = item_body_by_coord(source);
-+ to_ext = item_body_by_coord(target);
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ assert("vs-215", from == 0);
-+
-+ /* At this moment, item length was already updated in the item
-+ header by shifting code, hence nr_units_extent() will
-+ return "new" number of units---one we obtain after copying
-+ units.
-+ */
-+ to_ext +=
-+ (nr_units_extent(target) - count) * sizeof(reiser4_extent);
-+ } else {
-+ reiser4_key key;
-+ coord_t coord;
-+
-+ assert("vs-216",
-+ from + count == coord_last_unit_pos(source) + 1);
-+
-+ from_ext += item_length_by_coord(source) - free_space;
-+
-+ /* new units are inserted before first unit in an item,
-+ therefore, we have to update item key */
-+ coord = *source;
-+ coord.unit_pos = from;
-+ unit_key_extent(&coord, &key);
-+
-+ node_plugin_by_node(target->node)->update_item_key(target, &key,
-+ NULL /*info */);
-+ }
-+
-+ memcpy(to_ext, from_ext, free_space);
-+}
-+
-+/* item_plugin->b.create_hook
-+ @arg is znode of leaf node for which we need to update right delimiting key */
-+int create_hook_extent(const coord_t * coord, void *arg)
-+{
-+ coord_t *child_coord;
-+ znode *node;
-+ reiser4_key key;
-+ reiser4_tree *tree;
-+
-+ if (!arg)
-+ return 0;
-+
-+ child_coord = arg;
-+ tree = znode_get_tree(coord->node);
-+
-+ assert("nikita-3246", znode_get_level(child_coord->node) == LEAF_LEVEL);
-+
-+ write_lock_tree(tree);
-+ write_lock_dk(tree);
-+ /* find a node on the left level for which right delimiting key has to
-+ be updated */
-+ if (coord_wrt(child_coord) == COORD_ON_THE_LEFT) {
-+ assert("vs-411", znode_is_left_connected(child_coord->node));
-+ node = child_coord->node->left;
-+ } else {
-+ assert("vs-412", coord_wrt(child_coord) == COORD_ON_THE_RIGHT);
-+ node = child_coord->node;
-+ assert("nikita-3314", node != NULL);
-+ }
-+
-+ if (node != NULL) {
-+ znode_set_rd_key(node, item_key_by_coord(coord, &key));
-+
-+ assert("nikita-3282", check_sibling_list(node));
-+ /* break sibling links */
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && node->right) {
-+ ON_DEBUG(node->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ node->right_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ node->right->left = NULL;
-+ node->right = NULL;
-+ }
-+ }
-+ write_unlock_dk(tree);
-+ write_unlock_tree(tree);
-+ return 0;
-+}
-+
-+#define ITEM_TAIL_KILLED 0
-+#define ITEM_HEAD_KILLED 1
-+#define ITEM_KILLED 2
-+
-+/* item_plugin->b.kill_hook
-+ this is called when @count units starting from @from-th one are going to be removed
-+ */
-+int
-+kill_hook_extent(const coord_t * coord, pos_in_node_t from, pos_in_node_t count,
-+ struct carry_kill_data *kdata)
-+{
-+ reiser4_extent *ext;
-+ reiser4_block_nr start, length;
-+ const reiser4_key *pfrom_key, *pto_key;
-+ struct inode *inode;
-+ reiser4_tree *tree;
-+ pgoff_t from_off, to_off, offset, skip;
-+ int retval;
-+
-+ /* these are located in memory kmalloc-ed by kill_node_content */
-+ reiser4_key *min_item_key, *max_item_key, *from_key, *to_key, *key;
-+ coord_t *dup, *next;
-+
-+ assert("zam-811", znode_is_write_locked(coord->node));
-+ assert("nikita-3315", kdata != NULL);
-+ assert("vs-34", kdata->buf != NULL);
-+
-+ /* map structures to kdata->buf */
-+ min_item_key = (reiser4_key *) (kdata->buf);
-+ max_item_key = min_item_key + 1;
-+ from_key = max_item_key + 1;
-+ to_key = from_key + 1;
-+ key = to_key + 1;
-+ dup = (coord_t *) (key + 1);
-+ next = dup + 1;
-+
-+ item_key_by_coord(coord, min_item_key);
-+ max_item_key_by_coord(coord, max_item_key);
-+
-+ if (kdata->params.from_key) {
-+ pfrom_key = kdata->params.from_key;
-+ pto_key = kdata->params.to_key;
-+ } else {
-+ assert("vs-1549", from == coord->unit_pos);
-+ unit_key_by_coord(coord, from_key);
-+ pfrom_key = from_key;
-+
-+ coord_dup(dup, coord);
-+ dup->unit_pos = from + count - 1;
-+ max_unit_key_by_coord(dup, to_key);
-+ pto_key = to_key;
-+ }
-+
-+ if (!keylt(pto_key, max_item_key)) {
-+ if (!keygt(pfrom_key, min_item_key)) {
-+ znode *left, *right;
-+
-+ /* item is to be removed completely */
-+ assert("nikita-3316", kdata->left != NULL
-+ && kdata->right != NULL);
-+
-+ left = kdata->left->node;
-+ right = kdata->right->node;
-+
-+ tree = current_tree;
-+ /* we have to do two things:
-+ *
-+ * 1. link left and right formatted neighbors of
-+ * extent being removed, and
-+ *
-+ * 2. update their delimiting keys.
-+ *
-+ * atomicity of these operations is protected by
-+ * taking dk-lock and tree-lock.
-+ */
-+ /* if neighbors of item being removed are znodes -
-+ * link them */
-+ write_lock_tree(tree);
-+ write_lock_dk(tree);
-+ link_left_and_right(left, right);
-+ if (left) {
-+ /* update right delimiting key of left
-+ * neighbor of extent item */
-+ /*coord_t next;
-+ reiser4_key key; */
-+
-+ coord_dup(next, coord);
-+
-+ if (coord_next_item(next))
-+ *key = *znode_get_rd_key(coord->node);
-+ else
-+ item_key_by_coord(next, key);
-+ znode_set_rd_key(left, key);
-+ }
-+ write_unlock_dk(tree);
-+ write_unlock_tree(tree);
-+
-+ from_off =
-+ get_key_offset(min_item_key) >> PAGE_CACHE_SHIFT;
-+ to_off =
-+ (get_key_offset(max_item_key) +
-+ 1) >> PAGE_CACHE_SHIFT;
-+ retval = ITEM_KILLED;
-+ } else {
-+ /* tail of item is to be removed */
-+ from_off =
-+ (get_key_offset(pfrom_key) + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT;
-+ to_off =
-+ (get_key_offset(max_item_key) +
-+ 1) >> PAGE_CACHE_SHIFT;
-+ retval = ITEM_TAIL_KILLED;
-+ }
-+ } else {
-+ /* head of item is to be removed */
-+ assert("vs-1571", keyeq(pfrom_key, min_item_key));
-+ assert("vs-1572",
-+ (get_key_offset(pfrom_key) & (PAGE_CACHE_SIZE - 1)) ==
-+ 0);
-+ assert("vs-1573",
-+ ((get_key_offset(pto_key) + 1) & (PAGE_CACHE_SIZE -
-+ 1)) == 0);
-+
-+ if (kdata->left->node) {
-+ /* update right delimiting key of left neighbor of extent item */
-+ /*reiser4_key key; */
-+
-+ *key = *pto_key;
-+ set_key_offset(key, get_key_offset(pto_key) + 1);
-+
-+ write_lock_dk(current_tree);
-+ znode_set_rd_key(kdata->left->node, key);
-+ write_unlock_dk(current_tree);
-+ }
-+
-+ from_off = get_key_offset(pfrom_key) >> PAGE_CACHE_SHIFT;
-+ to_off = (get_key_offset(pto_key) + 1) >> PAGE_CACHE_SHIFT;
-+ retval = ITEM_HEAD_KILLED;
-+ }
-+
-+ inode = kdata->inode;
-+ assert("vs-1545", inode != NULL);
-+ if (inode != NULL)
-+ /* take care of pages and jnodes corresponding to part of item being killed */
-+ reiser4_invalidate_pages(inode->i_mapping, from_off,
-+ to_off - from_off,
-+ kdata->params.truncate);
-+
-+ ext = extent_item(coord) + from;
-+ offset =
-+ (get_key_offset(min_item_key) +
-+ reiser4_extent_size(coord, from)) >> PAGE_CACHE_SHIFT;
-+
-+ assert("vs-1551", from_off >= offset);
-+ assert("vs-1552", from_off - offset <= extent_get_width(ext));
-+ skip = from_off - offset;
-+ offset = from_off;
-+
-+ while (offset < to_off) {
-+ length = extent_get_width(ext) - skip;
-+ if (state_of_extent(ext) == HOLE_EXTENT) {
-+ skip = 0;
-+ offset += length;
-+ ext++;
-+ continue;
-+ }
-+
-+ if (offset + length > to_off) {
-+ length = to_off - offset;
-+ }
-+
-+ DQUOT_FREE_BLOCK_NODIRTY(inode, length);
-+
-+ if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
-+ /* some jnodes corresponding to this unallocated extent */
-+ fake_allocated2free(length, 0 /* unformatted */ );
-+
-+ skip = 0;
-+ offset += length;
-+ ext++;
-+ continue;
-+ }
-+
-+ assert("vs-1218", state_of_extent(ext) == ALLOCATED_EXTENT);
-+
-+ if (length != 0) {
-+ start = extent_get_start(ext) + skip;
-+
-+ /* BA_DEFER bit parameter is turned on because blocks which get freed are not safe to be freed
-+ immediately */
-+ reiser4_dealloc_blocks(&start, &length,
-+ 0 /* not used */ ,
-+ BA_DEFER
-+ /* unformatted with defer */ );
-+ }
-+ skip = 0;
-+ offset += length;
-+ ext++;
-+ }
-+ return retval;
-+}
-+
-+/* item_plugin->b.kill_units */
-+int
-+kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *kdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ reiser4_extent *ext;
-+ reiser4_key item_key;
-+ pos_in_node_t count;
-+ reiser4_key from_key, to_key;
-+ const reiser4_key *pfrom_key, *pto_key;
-+ loff_t off;
-+ int result;
-+
-+ assert("vs-1541",
-+ ((kdata->params.from_key == NULL && kdata->params.to_key == NULL)
-+ || (kdata->params.from_key != NULL
-+ && kdata->params.to_key != NULL)));
-+
-+ if (kdata->params.from_key) {
-+ pfrom_key = kdata->params.from_key;
-+ pto_key = kdata->params.to_key;
-+ } else {
-+ coord_t dup;
-+
-+ /* calculate key range of kill */
-+ assert("vs-1549", from == coord->unit_pos);
-+ unit_key_by_coord(coord, &from_key);
-+ pfrom_key = &from_key;
-+
-+ coord_dup(&dup, coord);
-+ dup.unit_pos = to;
-+ max_unit_key_by_coord(&dup, &to_key);
-+ pto_key = &to_key;
-+ }
-+
-+ item_key_by_coord(coord, &item_key);
-+
-+#if REISER4_DEBUG
-+ {
-+ reiser4_key max_item_key;
-+
-+ max_item_key_by_coord(coord, &max_item_key);
-+
-+ if (new_first) {
-+ /* head of item is to be cut */
-+ assert("vs-1542", keyeq(pfrom_key, &item_key));
-+ assert("vs-1538", keylt(pto_key, &max_item_key));
-+ } else {
-+ /* tail of item is to be cut */
-+ assert("vs-1540", keygt(pfrom_key, &item_key));
-+ assert("vs-1543", !keylt(pto_key, &max_item_key));
-+ }
-+ }
-+#endif
-+
-+ if (smallest_removed)
-+ *smallest_removed = *pfrom_key;
-+
-+ if (new_first) {
-+ /* item head is cut. Item key will change. This new key is calculated here */
-+ assert("vs-1556",
-+ (get_key_offset(pto_key) & (PAGE_CACHE_SIZE - 1)) ==
-+ (PAGE_CACHE_SIZE - 1));
-+ *new_first = *pto_key;
-+ set_key_offset(new_first, get_key_offset(new_first) + 1);
-+ }
-+
-+ count = to - from + 1;
-+ result = kill_hook_extent(coord, from, count, kdata);
-+ if (result == ITEM_TAIL_KILLED) {
-+ assert("vs-1553",
-+ get_key_offset(pfrom_key) >=
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, from));
-+ off =
-+ get_key_offset(pfrom_key) -
-+ (get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, from));
-+ if (off) {
-+ /* unit @from is to be cut partially. Its width decreases */
-+ ext = extent_item(coord) + from;
-+ extent_set_width(ext,
-+ (off + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT);
-+ count--;
-+ }
-+ } else {
-+ __u64 max_to_offset;
-+ __u64 rest;
-+
-+ assert("vs-1575", result == ITEM_HEAD_KILLED);
-+ assert("", from == 0);
-+ assert("",
-+ ((get_key_offset(pto_key) + 1) & (PAGE_CACHE_SIZE -
-+ 1)) == 0);
-+ assert("",
-+ get_key_offset(pto_key) + 1 >
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to));
-+ max_to_offset =
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to + 1) - 1;
-+ assert("", get_key_offset(pto_key) <= max_to_offset);
-+
-+ rest =
-+ (max_to_offset -
-+ get_key_offset(pto_key)) >> PAGE_CACHE_SHIFT;
-+ if (rest) {
-+ /* unit @to is to be cut partially */
-+ ext = extent_item(coord) + to;
-+
-+ assert("", extent_get_width(ext) > rest);
-+
-+ if (state_of_extent(ext) == ALLOCATED_EXTENT)
-+ extent_set_start(ext,
-+ extent_get_start(ext) +
-+ (extent_get_width(ext) -
-+ rest));
-+
-+ extent_set_width(ext, rest);
-+ count--;
-+ }
-+ }
-+ return count * sizeof(reiser4_extent);
-+}
-+
-+/* item_plugin->b.cut_units
-+ this is too similar to kill_units_extent */
-+int
-+cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *cdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ reiser4_extent *ext;
-+ reiser4_key item_key;
-+ pos_in_node_t count;
-+ reiser4_key from_key, to_key;
-+ const reiser4_key *pfrom_key, *pto_key;
-+ loff_t off;
-+
-+ assert("vs-1541",
-+ ((cdata->params.from_key == NULL && cdata->params.to_key == NULL)
-+ || (cdata->params.from_key != NULL
-+ && cdata->params.to_key != NULL)));
-+
-+ if (cdata->params.from_key) {
-+ pfrom_key = cdata->params.from_key;
-+ pto_key = cdata->params.to_key;
-+ } else {
-+ coord_t dup;
-+
-+ /* calculate key range of kill */
-+ coord_dup(&dup, coord);
-+ dup.unit_pos = from;
-+ unit_key_by_coord(&dup, &from_key);
-+
-+ dup.unit_pos = to;
-+ max_unit_key_by_coord(&dup, &to_key);
-+
-+ pfrom_key = &from_key;
-+ pto_key = &to_key;
-+ }
-+
-+ assert("vs-1555",
-+ (get_key_offset(pfrom_key) & (PAGE_CACHE_SIZE - 1)) == 0);
-+ assert("vs-1556",
-+ (get_key_offset(pto_key) & (PAGE_CACHE_SIZE - 1)) ==
-+ (PAGE_CACHE_SIZE - 1));
-+
-+ item_key_by_coord(coord, &item_key);
-+
-+#if REISER4_DEBUG
-+ {
-+ reiser4_key max_item_key;
-+
-+ assert("vs-1584",
-+ get_key_locality(pfrom_key) ==
-+ get_key_locality(&item_key));
-+ assert("vs-1585",
-+ get_key_type(pfrom_key) == get_key_type(&item_key));
-+ assert("vs-1586",
-+ get_key_objectid(pfrom_key) ==
-+ get_key_objectid(&item_key));
-+ assert("vs-1587",
-+ get_key_ordering(pfrom_key) ==
-+ get_key_ordering(&item_key));
-+
-+ max_item_key_by_coord(coord, &max_item_key);
-+
-+ if (new_first != NULL) {
-+ /* head of item is to be cut */
-+ assert("vs-1542", keyeq(pfrom_key, &item_key));
-+ assert("vs-1538", keylt(pto_key, &max_item_key));
-+ } else {
-+ /* tail of item is to be cut */
-+ assert("vs-1540", keygt(pfrom_key, &item_key));
-+ assert("vs-1543", keyeq(pto_key, &max_item_key));
-+ }
-+ }
-+#endif
-+
-+ if (smallest_removed)
-+ *smallest_removed = *pfrom_key;
-+
-+ if (new_first) {
-+ /* item head is cut. Item key will change. This new key is calculated here */
-+ *new_first = *pto_key;
-+ set_key_offset(new_first, get_key_offset(new_first) + 1);
-+ }
-+
-+ count = to - from + 1;
-+
-+ assert("vs-1553",
-+ get_key_offset(pfrom_key) >=
-+ get_key_offset(&item_key) + reiser4_extent_size(coord, from));
-+ off =
-+ get_key_offset(pfrom_key) - (get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, from));
-+ if (off) {
-+ /* tail of unit @from is to be cut partially. Its width decreases */
-+ assert("vs-1582", new_first == NULL);
-+ ext = extent_item(coord) + from;
-+ extent_set_width(ext, off >> PAGE_CACHE_SHIFT);
-+ count--;
-+ }
-+
-+ assert("vs-1554",
-+ get_key_offset(pto_key) <=
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to + 1) - 1);
-+ off =
-+ (get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to + 1) - 1) -
-+ get_key_offset(pto_key);
-+ if (off) {
-+ /* @to_key is smaller than max key of unit @to. Unit @to will not be removed. It gets start increased
-+ and width decreased. */
-+ assert("vs-1583", (off & (PAGE_CACHE_SIZE - 1)) == 0);
-+ ext = extent_item(coord) + to;
-+ if (state_of_extent(ext) == ALLOCATED_EXTENT)
-+ extent_set_start(ext,
-+ extent_get_start(ext) +
-+ (extent_get_width(ext) -
-+ (off >> PAGE_CACHE_SHIFT)));
-+
-+ extent_set_width(ext, (off >> PAGE_CACHE_SHIFT));
-+ count--;
-+ }
-+ return count * sizeof(reiser4_extent);
-+}
-+
-+/* item_plugin->b.unit_key */
-+reiser4_key *unit_key_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("vs-300", coord_is_existing_unit(coord));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key,
-+ (get_key_offset(key) +
-+ reiser4_extent_size(coord, coord->unit_pos)));
-+
-+ return key;
-+}
-+
-+/* item_plugin->b.max_unit_key */
-+reiser4_key *max_unit_key_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("vs-300", coord_is_existing_unit(coord));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key,
-+ (get_key_offset(key) +
-+ reiser4_extent_size(coord, coord->unit_pos + 1) - 1));
-+ return key;
-+}
-+
-+/* item_plugin->b.estimate
-+ item_plugin->b.item_data_by_flow */
-+
-+#if REISER4_DEBUG
-+
-+/* item_plugin->b.check
-+ used for debugging, every item should have here the most complete
-+ possible check of the consistency of the item that the inventor can
-+ construct
-+*/
-+int reiser4_check_extent(const coord_t * coord /* coord of item to check */,
-+ const char **error /* where to store error message */)
-+{
-+ reiser4_extent *ext, *first;
-+ unsigned i, j;
-+ reiser4_block_nr start, width, blk_cnt;
-+ unsigned num_units;
-+ reiser4_tree *tree;
-+ oid_t oid;
-+ reiser4_key key;
-+ coord_t scan;
-+
-+ assert("vs-933", REISER4_DEBUG);
-+
-+ if (znode_get_level(coord->node) != TWIG_LEVEL) {
-+ *error = "Extent on the wrong level";
-+ return -1;
-+ }
-+ if (item_length_by_coord(coord) % sizeof(reiser4_extent) != 0) {
-+ *error = "Wrong item size";
-+ return -1;
-+ }
-+ ext = first = extent_item(coord);
-+ blk_cnt = reiser4_block_count(reiser4_get_current_sb());
-+ num_units = coord_num_units(coord);
-+ tree = znode_get_tree(coord->node);
-+ item_key_by_coord(coord, &key);
-+ oid = get_key_objectid(&key);
-+ coord_dup(&scan, coord);
-+
-+ for (i = 0; i < num_units; ++i, ++ext) {
-+ __u64 index;
-+
-+ scan.unit_pos = i;
-+ index = extent_unit_index(&scan);
-+
-+#if 0
-+ /* check that all jnodes are present for the unallocated
-+ * extent */
-+ if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
-+ for (j = 0; j < extent_get_width(ext); j++) {
-+ jnode *node;
-+
-+ node = jlookup(tree, oid, index + j);
-+ if (node == NULL) {
-+ print_coord("scan", &scan, 0);
-+ *error = "Jnode missing";
-+ return -1;
-+ }
-+ jput(node);
-+ }
-+ }
-+#endif
-+
-+ start = extent_get_start(ext);
-+ if (start < 2)
-+ continue;
-+ /* extent is allocated one */
-+ width = extent_get_width(ext);
-+ if (start >= blk_cnt) {
-+ *error = "Start too large";
-+ return -1;
-+ }
-+ if (start + width > blk_cnt) {
-+ *error = "End too large";
-+ return -1;
-+ }
-+ /* make sure that this extent does not overlap with other
-+ allocated extents extents */
-+ for (j = 0; j < i; j++) {
-+ if (state_of_extent(first + j) != ALLOCATED_EXTENT)
-+ continue;
-+ if (!
-+ ((extent_get_start(ext) >=
-+ extent_get_start(first + j) +
-+ extent_get_width(first + j))
-+ || (extent_get_start(ext) +
-+ extent_get_width(ext) <=
-+ extent_get_start(first + j)))) {
-+ *error = "Extent overlaps with others";
-+ return -1;
-+ }
-+ }
-+
-+ }
-+
-+ return 0;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/internal.c linux-2.6.20/fs/reiser4/plugin/item/internal.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/internal.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/internal.c 2007-05-06 14:50:43.815011970 +0400
-@@ -0,0 +1,396 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Implementation of internal-item plugin methods. */
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "internal.h"
-+#include "item.h"
-+#include "../node/node.h"
-+#include "../plugin.h"
-+#include "../../jnode.h"
-+#include "../../znode.h"
-+#include "../../tree_walk.h"
-+#include "../../tree_mod.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../block_alloc.h"
-+
-+/* see internal.h for explanation */
-+
-+/* plugin->u.item.b.mergeable */
-+int mergeable_internal(const coord_t * p1 UNUSED_ARG /* first item */ ,
-+ const coord_t * p2 UNUSED_ARG /* second item */ )
-+{
-+ /* internal items are not mergeable */
-+ return 0;
-+}
-+
-+/* ->lookup() method for internal items */
-+lookup_result lookup_internal(const reiser4_key * key /* key to look up */ ,
-+ lookup_bias bias UNUSED_ARG /* lookup bias */ ,
-+ coord_t * coord /* coord of item */ )
-+{
-+ reiser4_key ukey;
-+
-+ switch (keycmp(unit_key_by_coord(coord, &ukey), key)) {
-+ default:
-+ impossible("", "keycmp()?!");
-+ case LESS_THAN:
-+ /* FIXME-VS: AFTER_ITEM used to be here. But with new coord
-+ item plugin can not be taken using coord set this way */
-+ assert("vs-681", coord->unit_pos == 0);
-+ coord->between = AFTER_UNIT;
-+ case EQUAL_TO:
-+ return CBK_COORD_FOUND;
-+ case GREATER_THAN:
-+ return CBK_COORD_NOTFOUND;
-+ }
-+}
-+
-+/* return body of internal item at @coord */
-+static internal_item_layout *internal_at(const coord_t * coord /* coord of
-+ * item */ )
-+{
-+ assert("nikita-607", coord != NULL);
-+ assert("nikita-1650",
-+ item_plugin_by_coord(coord) ==
-+ item_plugin_by_id(NODE_POINTER_ID));
-+ return (internal_item_layout *) item_body_by_coord(coord);
-+}
-+
-+void reiser4_update_internal(const coord_t * coord,
-+ const reiser4_block_nr * blocknr)
-+{
-+ internal_item_layout *item = internal_at(coord);
-+ assert("nikita-2959", reiser4_blocknr_is_sane(blocknr));
-+
-+ put_unaligned(cpu_to_le64(*blocknr), &item->pointer);
-+}
-+
-+/* return child block number stored in the internal item at @coord */
-+static reiser4_block_nr pointer_at(const coord_t * coord /* coord of item */ )
-+{
-+ assert("nikita-608", coord != NULL);
-+ return le64_to_cpu(get_unaligned(&internal_at(coord)->pointer));
-+}
-+
-+/* get znode pointed to by internal @item */
-+static znode *znode_at(const coord_t * item /* coord of item */ ,
-+ znode * parent /* parent node */ )
-+{
-+ return child_znode(item, parent, 1, 0);
-+}
-+
-+/* store pointer from internal item into "block". Implementation of
-+ ->down_link() method */
-+void down_link_internal(const coord_t * coord /* coord of item */ ,
-+ const reiser4_key * key UNUSED_ARG /* key to get
-+ * pointer for */ ,
-+ reiser4_block_nr * block /* resulting block number */ )
-+{
-+ ON_DEBUG(reiser4_key item_key);
-+
-+ assert("nikita-609", coord != NULL);
-+ assert("nikita-611", block != NULL);
-+ assert("nikita-612", (key == NULL) ||
-+ /* twig horrors */
-+ (znode_get_level(coord->node) == TWIG_LEVEL)
-+ || keyle(item_key_by_coord(coord, &item_key), key));
-+
-+ *block = pointer_at(coord);
-+ assert("nikita-2960", reiser4_blocknr_is_sane(block));
-+}
-+
-+/* Get the child's block number, or 0 if the block is unallocated. */
-+int
-+utmost_child_real_block_internal(const coord_t * coord, sideof side UNUSED_ARG,
-+ reiser4_block_nr * block)
-+{
-+ assert("jmacd-2059", coord != NULL);
-+
-+ *block = pointer_at(coord);
-+ assert("nikita-2961", reiser4_blocknr_is_sane(block));
-+
-+ if (reiser4_blocknr_is_fake(block)) {
-+ *block = 0;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Return the child. */
-+int
-+utmost_child_internal(const coord_t * coord, sideof side UNUSED_ARG,
-+ jnode ** childp)
-+{
-+ reiser4_block_nr block = pointer_at(coord);
-+ znode *child;
-+
-+ assert("jmacd-2059", childp != NULL);
-+ assert("nikita-2962", reiser4_blocknr_is_sane(&block));
-+
-+ child = zlook(znode_get_tree(coord->node), &block);
-+
-+ if (IS_ERR(child)) {
-+ return PTR_ERR(child);
-+ }
-+
-+ *childp = ZJNODE(child);
-+
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+
-+static void check_link(znode * left, znode * right)
-+{
-+ znode *scan;
-+
-+ for (scan = left; scan != right; scan = scan->right) {
-+ if (ZF_ISSET(scan, JNODE_RIP))
-+ break;
-+ if (znode_is_right_connected(scan) && scan->right != NULL) {
-+ if (ZF_ISSET(scan->right, JNODE_RIP))
-+ break;
-+ assert("nikita-3285",
-+ znode_is_left_connected(scan->right));
-+ assert("nikita-3265",
-+ ergo(scan != left,
-+ ZF_ISSET(scan, JNODE_HEARD_BANSHEE)));
-+ assert("nikita-3284", scan->right->left == scan);
-+ } else
-+ break;
-+ }
-+}
-+
-+int check__internal(const coord_t * coord, const char **error)
-+{
-+ reiser4_block_nr blk;
-+ znode *child;
-+ coord_t cpy;
-+
-+ blk = pointer_at(coord);
-+ if (!reiser4_blocknr_is_sane(&blk)) {
-+ *error = "Invalid pointer";
-+ return -1;
-+ }
-+ coord_dup(&cpy, coord);
-+ child = znode_at(&cpy, cpy.node);
-+ if (child != NULL) {
-+ znode *left_child;
-+ znode *right_child;
-+
-+ left_child = right_child = NULL;
-+
-+ assert("nikita-3256", znode_invariant(child));
-+ if (coord_prev_item(&cpy) == 0 && item_is_internal(&cpy)) {
-+ left_child = znode_at(&cpy, cpy.node);
-+ if (left_child != NULL) {
-+ read_lock_tree(znode_get_tree(child));
-+ check_link(left_child, child);
-+ read_unlock_tree(znode_get_tree(child));
-+ zput(left_child);
-+ }
-+ }
-+ coord_dup(&cpy, coord);
-+ if (coord_next_item(&cpy) == 0 && item_is_internal(&cpy)) {
-+ right_child = znode_at(&cpy, cpy.node);
-+ if (right_child != NULL) {
-+ read_lock_tree(znode_get_tree(child));
-+ check_link(child, right_child);
-+ read_unlock_tree(znode_get_tree(child));
-+ zput(right_child);
-+ }
-+ }
-+ zput(child);
-+ }
-+ return 0;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/* return true only if this item really points to "block" */
-+/* Audited by: green(2002.06.14) */
-+int has_pointer_to_internal(const coord_t * coord /* coord of item */ ,
-+ const reiser4_block_nr * block /* block number to
-+ * check */ )
-+{
-+ assert("nikita-613", coord != NULL);
-+ assert("nikita-614", block != NULL);
-+
-+ return pointer_at(coord) == *block;
-+}
-+
-+/* hook called by ->create_item() method of node plugin after new internal
-+ item was just created.
-+
-+ This is point where pointer to new node is inserted into tree. Initialize
-+ parent pointer in child znode, insert child into sibling list and slum.
-+
-+*/
-+int create_hook_internal(const coord_t * item /* coord of item */ ,
-+ void *arg /* child's left neighbor, if any */ )
-+{
-+ znode *child;
-+ __u64 child_ptr;
-+
-+ assert("nikita-1252", item != NULL);
-+ assert("nikita-1253", item->node != NULL);
-+ assert("nikita-1181", znode_get_level(item->node) > LEAF_LEVEL);
-+ assert("nikita-1450", item->unit_pos == 0);
-+
-+ /*
-+ * preparing to item insertion build_child_ptr_data sets pointer to
-+ * data to be inserted to jnode's blocknr which is in cpu byte
-+ * order. Node's create_item simply copied those data. As result we
-+ * have child pointer in cpu's byte order. Convert content of internal
-+ * item to little endian byte order.
-+ */
-+ child_ptr = get_unaligned((__u64 *)item_body_by_coord(item));
-+ reiser4_update_internal(item, &child_ptr);
-+
-+ child = znode_at(item, item->node);
-+ if (child != NULL && !IS_ERR(child)) {
-+ znode *left;
-+ int result = 0;
-+ reiser4_tree *tree;
-+
-+ left = arg;
-+ tree = znode_get_tree(item->node);
-+ write_lock_tree(tree);
-+ write_lock_dk(tree);
-+ assert("nikita-1400", (child->in_parent.node == NULL)
-+ || (znode_above_root(child->in_parent.node)));
-+ ++item->node->c_count;
-+ coord_to_parent_coord(item, &child->in_parent);
-+ sibling_list_insert_nolock(child, left);
-+
-+ assert("nikita-3297", ZF_ISSET(child, JNODE_ORPHAN));
-+ ZF_CLR(child, JNODE_ORPHAN);
-+
-+ if ((left != NULL) && !keyeq(znode_get_rd_key(left),
-+ znode_get_rd_key(child))) {
-+ znode_set_rd_key(child, znode_get_rd_key(left));
-+ }
-+ write_unlock_dk(tree);
-+ write_unlock_tree(tree);
-+ zput(child);
-+ return result;
-+ } else {
-+ if (child == NULL)
-+ child = ERR_PTR(-EIO);
-+ return PTR_ERR(child);
-+ }
-+}
-+
-+/* hook called by ->cut_and_kill() method of node plugin just before internal
-+ item is removed.
-+
-+ This is point where empty node is removed from the tree. Clear parent
-+ pointer in child, and mark node for pending deletion.
-+
-+ Node will be actually deleted later and in several installations:
-+
-+ . when last lock on this node will be released, node will be removed from
-+ the sibling list and its lock will be invalidated
-+
-+ . when last reference to this node will be dropped, bitmap will be updated
-+ and node will be actually removed from the memory.
-+
-+*/
-+int kill_hook_internal(const coord_t * item /* coord of item */ ,
-+ pos_in_node_t from UNUSED_ARG /* start unit */ ,
-+ pos_in_node_t count UNUSED_ARG /* stop unit */ ,
-+ struct carry_kill_data *p UNUSED_ARG)
-+{
-+ znode *child;
-+
-+ assert("nikita-1222", item != NULL);
-+ assert("nikita-1224", from == 0);
-+ assert("nikita-1225", count == 1);
-+
-+ child = znode_at(item, item->node);
-+ if (IS_ERR(child))
-+ return PTR_ERR(child);
-+ else if (node_is_empty(child)) {
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1397", znode_is_write_locked(child));
-+ assert("nikita-1398", child->c_count == 0);
-+ assert("nikita-2546", ZF_ISSET(child, JNODE_HEARD_BANSHEE));
-+
-+ tree = znode_get_tree(item->node);
-+ write_lock_tree(tree);
-+ init_parent_coord(&child->in_parent, NULL);
-+ --item->node->c_count;
-+ write_unlock_tree(tree);
-+ zput(child);
-+ return 0;
-+ } else {
-+ warning("nikita-1223",
-+ "Cowardly refuse to remove link to non-empty node");
-+ zput(child);
-+ return RETERR(-EIO);
-+ }
-+}
-+
-+/* hook called by ->shift() node plugin method when iternal item was just
-+ moved from one node to another.
-+
-+ Update parent pointer in child and c_counts in old and new parent
-+
-+*/
-+int shift_hook_internal(const coord_t * item /* coord of item */ ,
-+ unsigned from UNUSED_ARG /* start unit */ ,
-+ unsigned count UNUSED_ARG /* stop unit */ ,
-+ znode * old_node /* old parent */ )
-+{
-+ znode *child;
-+ znode *new_node;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1276", item != NULL);
-+ assert("nikita-1277", from == 0);
-+ assert("nikita-1278", count == 1);
-+ assert("nikita-1451", item->unit_pos == 0);
-+
-+ new_node = item->node;
-+ assert("nikita-2132", new_node != old_node);
-+ tree = znode_get_tree(item->node);
-+ child = child_znode(item, old_node, 1, 0);
-+ if (child == NULL)
-+ return 0;
-+ if (!IS_ERR(child)) {
-+ write_lock_tree(tree);
-+ ++new_node->c_count;
-+ assert("nikita-1395", znode_parent(child) == old_node);
-+ assert("nikita-1396", old_node->c_count > 0);
-+ coord_to_parent_coord(item, &child->in_parent);
-+ assert("nikita-1781", znode_parent(child) == new_node);
-+ assert("nikita-1782",
-+ check_tree_pointer(item, child) == NS_FOUND);
-+ --old_node->c_count;
-+ write_unlock_tree(tree);
-+ zput(child);
-+ return 0;
-+ } else
-+ return PTR_ERR(child);
-+}
-+
-+/* plugin->u.item.b.max_key_inside - not defined */
-+
-+/* plugin->u.item.b.nr_units - item.c:single_unit */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/internal.h linux-2.6.20/fs/reiser4/plugin/item/internal.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/internal.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/internal.h 2007-05-06 14:50:43.815011970 +0400
-@@ -0,0 +1,57 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Internal item contains down-link to the child of the internal/twig
-+ node in a tree. It is internal items that are actually used during
-+ tree traversal. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ )
-+#define __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+
-+/* on-disk layout of internal item */
-+typedef struct internal_item_layout {
-+ /* 0 */ reiser4_dblock_nr pointer;
-+ /* 4 */
-+} internal_item_layout;
-+
-+struct cut_list;
-+
-+int mergeable_internal(const coord_t * p1, const coord_t * p2);
-+lookup_result lookup_internal(const reiser4_key * key, lookup_bias bias,
-+ coord_t * coord);
-+/* store pointer from internal item into "block". Implementation of
-+ ->down_link() method */
-+extern void down_link_internal(const coord_t * coord, const reiser4_key * key,
-+ reiser4_block_nr * block);
-+extern int has_pointer_to_internal(const coord_t * coord,
-+ const reiser4_block_nr * block);
-+extern int create_hook_internal(const coord_t * item, void *arg);
-+extern int kill_hook_internal(const coord_t * item, pos_in_node_t from,
-+ pos_in_node_t count, struct carry_kill_data *);
-+extern int shift_hook_internal(const coord_t * item, unsigned from,
-+ unsigned count, znode * old_node);
-+extern void reiser4_print_internal(const char *prefix, coord_t * coord);
-+
-+extern int utmost_child_internal(const coord_t * coord, sideof side,
-+ jnode ** child);
-+int utmost_child_real_block_internal(const coord_t * coord, sideof side,
-+ reiser4_block_nr * block);
-+
-+extern void reiser4_update_internal(const coord_t * coord,
-+ const reiser4_block_nr * blocknr);
-+/* FIXME: reiserfs has check_internal */
-+extern int check__internal(const coord_t * coord, const char **error);
-+
-+/* __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/item.c linux-2.6.20/fs/reiser4/plugin/item/item.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/item.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/item.c 2007-05-06 14:50:43.815011970 +0400
-@@ -0,0 +1,719 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* definition of item plugins. */
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "../plugin_header.h"
-+#include "sde.h"
-+#include "internal.h"
-+#include "item.h"
-+#include "static_stat.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../tree.h"
-+#include "../../context.h"
-+#include "ctail.h"
-+
-+/* return pointer to item body */
-+void item_body_by_coord_hard(coord_t * coord /* coord to query */ )
-+{
-+ assert("nikita-324", coord != NULL);
-+ assert("nikita-325", coord->node != NULL);
-+ assert("nikita-326", znode_is_loaded(coord->node));
-+ assert("nikita-3200", coord->offset == INVALID_OFFSET);
-+
-+ coord->offset =
-+ node_plugin_by_node(coord->node)->item_by_coord(coord) -
-+ zdata(coord->node);
-+ ON_DEBUG(coord->body_v = coord->node->times_locked);
-+}
-+
-+void *item_body_by_coord_easy(const coord_t * coord /* coord to query */ )
-+{
-+ return zdata(coord->node) + coord->offset;
-+}
-+
-+#if REISER4_DEBUG
-+
-+int item_body_is_valid(const coord_t * coord)
-+{
-+ return
-+ coord->offset ==
-+ node_plugin_by_node(coord->node)->item_by_coord(coord) -
-+ zdata(coord->node);
-+}
-+
-+#endif
-+
-+/* return length of item at @coord */
-+pos_in_node_t item_length_by_coord(const coord_t * coord /* coord to query */ )
-+{
-+ int len;
-+
-+ assert("nikita-327", coord != NULL);
-+ assert("nikita-328", coord->node != NULL);
-+ assert("nikita-329", znode_is_loaded(coord->node));
-+
-+ len = node_plugin_by_node(coord->node)->length_by_coord(coord);
-+ return len;
-+}
-+
-+void obtain_item_plugin(const coord_t * coord)
-+{
-+ assert("nikita-330", coord != NULL);
-+ assert("nikita-331", coord->node != NULL);
-+ assert("nikita-332", znode_is_loaded(coord->node));
-+
-+ coord_set_iplug((coord_t *) coord,
-+ node_plugin_by_node(coord->node)->
-+ plugin_by_coord(coord));
-+ assert("nikita-2479",
-+ coord_iplug(coord) ==
-+ node_plugin_by_node(coord->node)->plugin_by_coord(coord));
-+}
-+
-+/* return id of item */
-+/* Audited by: green(2002.06.15) */
-+item_id item_id_by_coord(const coord_t * coord /* coord to query */ )
-+{
-+ assert("vs-539", coord != NULL);
-+ assert("vs-538", coord->node != NULL);
-+ assert("vs-537", znode_is_loaded(coord->node));
-+ assert("vs-536", item_plugin_by_coord(coord) != NULL);
-+ assert("vs-540",
-+ item_id_by_plugin(item_plugin_by_coord(coord)) < LAST_ITEM_ID);
-+
-+ return item_id_by_plugin(item_plugin_by_coord(coord));
-+}
-+
-+/* return key of item at @coord */
-+/* Audited by: green(2002.06.15) */
-+reiser4_key *item_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-338", coord != NULL);
-+ assert("nikita-339", coord->node != NULL);
-+ assert("nikita-340", znode_is_loaded(coord->node));
-+
-+ return node_plugin_by_node(coord->node)->key_at(coord, key);
-+}
-+
-+/* this returns max key in the item */
-+reiser4_key *max_item_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ coord_t last;
-+
-+ assert("nikita-338", coord != NULL);
-+ assert("nikita-339", coord->node != NULL);
-+ assert("nikita-340", znode_is_loaded(coord->node));
-+
-+ /* make coord pointing to last item's unit */
-+ coord_dup(&last, coord);
-+ last.unit_pos = coord_num_units(&last) - 1;
-+ assert("vs-1560", coord_is_existing_unit(&last));
-+
-+ max_unit_key_by_coord(&last, key);
-+ return key;
-+}
-+
-+/* return key of unit at @coord */
-+reiser4_key *unit_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-772", coord != NULL);
-+ assert("nikita-774", coord->node != NULL);
-+ assert("nikita-775", znode_is_loaded(coord->node));
-+
-+ if (item_plugin_by_coord(coord)->b.unit_key != NULL)
-+ return item_plugin_by_coord(coord)->b.unit_key(coord, key);
-+ else
-+ return item_key_by_coord(coord, key);
-+}
-+
-+/* return the biggest key contained the unit @coord */
-+reiser4_key *max_unit_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-772", coord != NULL);
-+ assert("nikita-774", coord->node != NULL);
-+ assert("nikita-775", znode_is_loaded(coord->node));
-+
-+ if (item_plugin_by_coord(coord)->b.max_unit_key != NULL)
-+ return item_plugin_by_coord(coord)->b.max_unit_key(coord, key);
-+ else
-+ return unit_key_by_coord(coord, key);
-+}
-+
-+/* ->max_key_inside() method for items consisting of exactly one key (like
-+ stat-data) */
-+static reiser4_key *max_key_inside_single_key(const coord_t *
-+ coord /* coord of item */ ,
-+ reiser4_key *
-+ result /* resulting key */ )
-+{
-+ assert("nikita-604", coord != NULL);
-+
-+ /* coord -> key is starting key of this item and it has to be already
-+ filled in */
-+ return unit_key_by_coord(coord, result);
-+}
-+
-+/* ->nr_units() method for items consisting of exactly one unit always */
-+pos_in_node_t
-+nr_units_single_unit(const coord_t * coord UNUSED_ARG /* coord of item */ )
-+{
-+ return 1;
-+}
-+
-+static int
-+paste_no_paste(coord_t * coord UNUSED_ARG,
-+ reiser4_item_data * data UNUSED_ARG,
-+ carry_plugin_info * info UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* default ->fast_paste() method */
-+static int
-+agree_to_fast_op(const coord_t * coord UNUSED_ARG /* coord of item */ )
-+{
-+ return 1;
-+}
-+
-+int item_can_contain_key(const coord_t * item /* coord of item */ ,
-+ const reiser4_key * key /* key to check */ ,
-+ const reiser4_item_data * data /* parameters of item
-+ * being created */ )
-+{
-+ item_plugin *iplug;
-+ reiser4_key min_key_in_item;
-+ reiser4_key max_key_in_item;
-+
-+ assert("nikita-1658", item != NULL);
-+ assert("nikita-1659", key != NULL);
-+
-+ iplug = item_plugin_by_coord(item);
-+ if (iplug->b.can_contain_key != NULL)
-+ return iplug->b.can_contain_key(item, key, data);
-+ else {
-+ assert("nikita-1681", iplug->b.max_key_inside != NULL);
-+ item_key_by_coord(item, &min_key_in_item);
-+ iplug->b.max_key_inside(item, &max_key_in_item);
-+
-+ /* can contain key if
-+ min_key_in_item <= key &&
-+ key <= max_key_in_item
-+ */
-+ return keyle(&min_key_in_item, key)
-+ && keyle(key, &max_key_in_item);
-+ }
-+}
-+
-+/* mergeable method for non mergeable items */
-+static int
-+not_mergeable(const coord_t * i1 UNUSED_ARG, const coord_t * i2 UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* return 0 if @item1 and @item2 are not mergeable, !0 - otherwise */
-+int are_items_mergeable(const coord_t * i1 /* coord of first item */ ,
-+ const coord_t * i2 /* coord of second item */ )
-+{
-+ item_plugin *iplug;
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ assert("nikita-1336", i1 != NULL);
-+ assert("nikita-1337", i2 != NULL);
-+
-+ iplug = item_plugin_by_coord(i1);
-+ assert("nikita-1338", iplug != NULL);
-+
-+ /* NOTE-NIKITA are_items_mergeable() is also called by assertions in
-+ shifting code when nodes are in "suspended" state. */
-+ assert("nikita-1663",
-+ keyle(item_key_by_coord(i1, &k1), item_key_by_coord(i2, &k2)));
-+
-+ if (iplug->b.mergeable != NULL) {
-+ return iplug->b.mergeable(i1, i2);
-+ } else if (iplug->b.max_key_inside != NULL) {
-+ iplug->b.max_key_inside(i1, &k1);
-+ item_key_by_coord(i2, &k2);
-+
-+ /* mergeable if ->max_key_inside() >= key of i2; */
-+ return keyge(iplug->b.max_key_inside(i1, &k1),
-+ item_key_by_coord(i2, &k2));
-+ } else {
-+ item_key_by_coord(i1, &k1);
-+ item_key_by_coord(i2, &k2);
-+
-+ return
-+ (get_key_locality(&k1) == get_key_locality(&k2)) &&
-+ (get_key_objectid(&k1) == get_key_objectid(&k2))
-+ && (iplug == item_plugin_by_coord(i2));
-+ }
-+}
-+
-+int item_is_extent(const coord_t * item)
-+{
-+ assert("vs-482", coord_is_existing_item(item));
-+ return item_id_by_coord(item) == EXTENT_POINTER_ID;
-+}
-+
-+int item_is_tail(const coord_t * item)
-+{
-+ assert("vs-482", coord_is_existing_item(item));
-+ return item_id_by_coord(item) == FORMATTING_ID;
-+}
-+
-+#if REISER4_DEBUG
-+
-+int item_is_statdata(const coord_t * item)
-+{
-+ assert("vs-516", coord_is_existing_item(item));
-+ return plugin_of_group(item_plugin_by_coord(item), STAT_DATA_ITEM_TYPE);
-+}
-+
-+int item_is_ctail(const coord_t * item)
-+{
-+ assert("edward-xx", coord_is_existing_item(item));
-+ return item_id_by_coord(item) == CTAIL_ID;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+static int change_item(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ /* cannot change constituent item (sd, or dir_item) */
-+ return RETERR(-EINVAL);
-+}
-+
-+static reiser4_plugin_ops item_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = change_item
-+};
-+
-+item_plugin item_plugins[LAST_ITEM_ID] = {
-+ [STATIC_STAT_DATA_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = STATIC_STAT_DATA_ID,
-+ .groups = (1 << STAT_DATA_ITEM_TYPE),
-+ .pops = &item_plugin_ops,
-+ .label = "sd",
-+ .desc = "stat-data",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_single_key,
-+ .can_contain_key = NULL,
-+ .mergeable = not_mergeable,
-+ .nr_units = nr_units_single_unit,
-+ .lookup = NULL,
-+ .init = NULL,
-+ .paste = paste_no_paste,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .sd = {
-+ .init_inode = init_inode_static_sd,
-+ .save_len = save_len_static_sd,
-+ .save = save_static_sd
-+ }
-+ }
-+ },
-+ [SIMPLE_DIR_ENTRY_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = SIMPLE_DIR_ENTRY_ID,
-+ .groups = (1 << DIR_ENTRY_ITEM_TYPE),
-+ .pops = &item_plugin_ops,
-+ .label = "de",
-+ .desc = "directory entry",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_single_key,
-+ .can_contain_key = NULL,
-+ .mergeable = NULL,
-+ .nr_units = nr_units_single_unit,
-+ .lookup = NULL,
-+ .init = NULL,
-+ .paste = NULL,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .dir = {
-+ .extract_key = extract_key_de,
-+ .update_key = update_key_de,
-+ .extract_name = extract_name_de,
-+ .extract_file_type = extract_file_type_de,
-+ .add_entry = add_entry_de,
-+ .rem_entry = rem_entry_de,
-+ .max_name_len = max_name_len_de
-+ }
-+ }
-+ },
-+ [COMPOUND_DIR_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = COMPOUND_DIR_ID,
-+ .groups = (1 << DIR_ENTRY_ITEM_TYPE),
-+ .pops = &item_plugin_ops,
-+ .label = "cde",
-+ .desc = "compressed directory entry",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_cde,
-+ .can_contain_key = can_contain_key_cde,
-+ .mergeable = mergeable_cde,
-+ .nr_units = nr_units_cde,
-+ .lookup = lookup_cde,
-+ .init = init_cde,
-+ .paste = paste_cde,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_cde,
-+ .copy_units = copy_units_cde,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = cut_units_cde,
-+ .kill_units = kill_units_cde,
-+ .unit_key = unit_key_cde,
-+ .max_unit_key = unit_key_cde,
-+ .estimate = estimate_cde,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = reiser4_check_cde
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .dir = {
-+ .extract_key = extract_key_cde,
-+ .update_key = update_key_cde,
-+ .extract_name = extract_name_cde,
-+ .extract_file_type = extract_file_type_de,
-+ .add_entry = add_entry_cde,
-+ .rem_entry = rem_entry_cde,
-+ .max_name_len = max_name_len_cde
-+ }
-+ }
-+ },
-+ [NODE_POINTER_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = NODE_POINTER_ID,
-+ .groups = (1 << INTERNAL_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "internal",
-+ .desc = "internal item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = NULL,
-+ .can_contain_key = NULL,
-+ .mergeable = mergeable_internal,
-+ .nr_units = nr_units_single_unit,
-+ .lookup = lookup_internal,
-+ .init = NULL,
-+ .paste = NULL,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = create_hook_internal,
-+ .kill_hook = kill_hook_internal,
-+ .shift_hook = shift_hook_internal,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = check__internal
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = utmost_child_internal,
-+ .utmost_child_real_block =
-+ utmost_child_real_block_internal,
-+ .update = reiser4_update_internal,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .internal = {
-+ .down_link = down_link_internal,
-+ .has_pointer_to = has_pointer_to_internal
-+ }
-+ }
-+ },
-+ [EXTENT_POINTER_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = EXTENT_POINTER_ID,
-+ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "extent",
-+ .desc = "extent item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_extent,
-+ .can_contain_key = can_contain_key_extent,
-+ .mergeable = mergeable_extent,
-+ .nr_units = nr_units_extent,
-+ .lookup = lookup_extent,
-+ .init = NULL,
-+ .paste = paste_extent,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_extent,
-+ .create_hook = create_hook_extent,
-+ .copy_units = copy_units_extent,
-+ .kill_hook = kill_hook_extent,
-+ .shift_hook = NULL,
-+ .cut_units = cut_units_extent,
-+ .kill_units = kill_units_extent,
-+ .unit_key = unit_key_extent,
-+ .max_unit_key = max_unit_key_extent,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = reiser4_check_extent
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = utmost_child_extent,
-+ .utmost_child_real_block =
-+ utmost_child_real_block_extent,
-+ .update = NULL,
-+ .scan = reiser4_scan_extent,
-+ .convert = NULL,
-+ .key_by_offset = key_by_offset_extent
-+ },
-+ .s = {
-+ .file = {
-+ .write = reiser4_write_extent,
-+ .read = reiser4_read_extent,
-+ .readpage = reiser4_readpage_extent,
-+ .get_block = get_block_address_extent,
-+ .append_key = append_key_extent,
-+ .init_coord_extension =
-+ init_coord_extension_extent
-+ }
-+ }
-+ },
-+ [FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = FORMATTING_ID,
-+ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "body",
-+ .desc = "body (or tail?) item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_tail,
-+ .can_contain_key = can_contain_key_tail,
-+ .mergeable = mergeable_tail,
-+ .nr_units = nr_units_tail,
-+ .lookup = lookup_tail,
-+ .init = NULL,
-+ .paste = paste_tail,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_tail,
-+ .create_hook = NULL,
-+ .copy_units = copy_units_tail,
-+ .kill_hook = kill_hook_tail,
-+ .shift_hook = NULL,
-+ .cut_units = cut_units_tail,
-+ .kill_units = kill_units_tail,
-+ .unit_key = unit_key_tail,
-+ .max_unit_key = unit_key_tail,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .file = {
-+ .write = reiser4_write_tail,
-+ .read = reiser4_read_tail,
-+ .readpage = readpage_tail,
-+ .get_block = get_block_address_tail,
-+ .append_key = append_key_tail,
-+ .init_coord_extension =
-+ init_coord_extension_tail
-+ }
-+ }
-+ },
-+ [CTAIL_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = CTAIL_ID,
-+ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "ctail",
-+ .desc = "cryptcompress tail item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_tail,
-+ .can_contain_key = can_contain_key_ctail,
-+ .mergeable = mergeable_ctail,
-+ .nr_units = nr_units_ctail,
-+ .lookup = NULL,
-+ .init = init_ctail,
-+ .paste = paste_ctail,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_ctail,
-+ .create_hook = create_hook_ctail,
-+ .copy_units = copy_units_ctail,
-+ .kill_hook = kill_hook_ctail,
-+ .shift_hook = shift_hook_ctail,
-+ .cut_units = cut_units_ctail,
-+ .kill_units = kill_units_ctail,
-+ .unit_key = unit_key_tail,
-+ .max_unit_key = unit_key_tail,
-+ .estimate = estimate_ctail,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = check_ctail
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = utmost_child_ctail,
-+ /* FIXME-EDWARD: write this */
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = scan_ctail,
-+ .convert = convert_ctail
-+ },
-+ .s = {
-+ .file = {
-+ .write = NULL,
-+ .read = read_ctail,
-+ .readpage = readpage_ctail,
-+ .get_block = get_block_address_tail,
-+ .append_key = append_key_ctail,
-+ .init_coord_extension =
-+ init_coord_extension_tail
-+ }
-+ }
-+ },
-+ [BLACK_BOX_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = BLACK_BOX_ID,
-+ .groups = (1 << OTHER_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "blackbox",
-+ .desc = "black box item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = NULL,
-+ .can_contain_key = NULL,
-+ .mergeable = not_mergeable,
-+ .nr_units = nr_units_single_unit,
-+ /* to need for ->lookup method */
-+ .lookup = NULL,
-+ .init = NULL,
-+ .paste = NULL,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ }
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/item.h linux-2.6.20/fs/reiser4/plugin/item/item.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/item.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/item.h 2007-05-06 14:50:43.819013220 +0400
-@@ -0,0 +1,400 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* first read balance.c comments before reading this */
-+
-+/* An item_plugin implements all of the operations required for
-+ balancing that are item specific. */
-+
-+/* an item plugin also implements other operations that are specific to that
-+ item. These go into the item specific operations portion of the item
-+ handler, and all of the item specific portions of the item handler are put
-+ into a union. */
-+
-+#if !defined( __REISER4_ITEM_H__ )
-+#define __REISER4_ITEM_H__
-+
-+#include "../../forward.h"
-+#include "../plugin_header.h"
-+#include "../../dformat.h"
-+#include "../../seal.h"
-+#include "../../plugin/file/file.h"
-+
-+#include <linux/fs.h> /* for struct file, struct inode */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/dcache.h> /* for struct dentry */
-+
-+typedef enum {
-+ STAT_DATA_ITEM_TYPE,
-+ DIR_ENTRY_ITEM_TYPE,
-+ INTERNAL_ITEM_TYPE,
-+ UNIX_FILE_METADATA_ITEM_TYPE,
-+ OTHER_ITEM_TYPE
-+} item_type_id;
-+
-+/* this is the part of each item plugin that all items are expected to
-+ support or at least explicitly fail to support by setting the
-+ pointer to null. */
-+typedef struct {
-+ /* operations called by balancing
-+
-+ It is interesting to consider that some of these item
-+ operations could be given sources or targets that are not
-+ really items in nodes. This could be ok/useful.
-+
-+ */
-+ /* maximal key that can _possibly_ be occupied by this item
-+
-+ When inserting, and node ->lookup() method (called by
-+ coord_by_key()) reaches an item after binary search,
-+ the ->max_key_inside() item plugin method is used to determine
-+ whether new item should pasted into existing item
-+ (new_key<=max_key_inside()) or new item has to be created
-+ (new_key>max_key_inside()).
-+
-+ For items that occupy exactly one key (like stat-data)
-+ this method should return this key. For items that can
-+ grow indefinitely (extent, directory item) this should
-+ return reiser4_max_key().
-+
-+ For example extent with the key
-+
-+ (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
-+
-+ ->max_key_inside is (LOCALITY,4,OBJID,0xffffffffffffffff), and
-+ */
-+ reiser4_key *(*max_key_inside) (const coord_t *, reiser4_key *);
-+
-+ /* true if item @coord can merge data at @key. */
-+ int (*can_contain_key) (const coord_t *, const reiser4_key *,
-+ const reiser4_item_data *);
-+ /* mergeable() - check items for mergeability
-+
-+ Optional method. Returns true if two items can be merged.
-+
-+ */
-+ int (*mergeable) (const coord_t *, const coord_t *);
-+
-+ /* number of atomic things in an item.
-+ NOTE FOR CONTRIBUTORS: use a generic method
-+ nr_units_single_unit() for solid (atomic) items, as
-+ tree operations use it as a criterion of solidness
-+ (see is_solid_item macro) */
-+ pos_in_node_t(*nr_units) (const coord_t *);
-+
-+ /* search within item for a unit within the item, and return a
-+ pointer to it. This can be used to calculate how many
-+ bytes to shrink an item if you use pointer arithmetic and
-+ compare to the start of the item body if the item's data
-+ are continuous in the node, if the item's data are not
-+ continuous in the node, all sorts of other things are maybe
-+ going to break as well. */
-+ lookup_result(*lookup) (const reiser4_key *, lookup_bias, coord_t *);
-+ /* method called by ode_plugin->create_item() to initialise new
-+ item */
-+ int (*init) (coord_t * target, coord_t * from,
-+ reiser4_item_data * data);
-+ /* method called (e.g., by reiser4_resize_item()) to place new data
-+ into item when it grows */
-+ int (*paste) (coord_t *, reiser4_item_data *, carry_plugin_info *);
-+ /* return true if paste into @coord is allowed to skip
-+ carry. That is, if such paste would require any changes
-+ at the parent level
-+ */
-+ int (*fast_paste) (const coord_t *);
-+ /* how many but not more than @want units of @source can be
-+ shifted into @target node. If pend == append - we try to
-+ append last item of @target by first units of @source. If
-+ pend == prepend - we try to "prepend" first item in @target
-+ by last units of @source. @target node has @free_space
-+ bytes of free space. Total size of those units are returned
-+ via @size.
-+
-+ @target is not NULL if shifting to the mergeable item and
-+ NULL is new item will be created during shifting.
-+ */
-+ int (*can_shift) (unsigned free_space, coord_t *,
-+ znode *, shift_direction, unsigned *size,
-+ unsigned want);
-+
-+ /* starting off @from-th unit of item @source append or
-+ prepend @count units to @target. @target has been already
-+ expanded by @free_space bytes. That must be exactly what is
-+ needed for those items in @target. If @where_is_free_space
-+ == SHIFT_LEFT - free space is at the end of @target item,
-+ othersize - it is in the beginning of it. */
-+ void (*copy_units) (coord_t *, coord_t *,
-+ unsigned from, unsigned count,
-+ shift_direction where_is_free_space,
-+ unsigned free_space);
-+
-+ int (*create_hook) (const coord_t *, void *);
-+ /* do whatever is necessary to do when @count units starting
-+ from @from-th one are removed from the tree */
-+ /* FIXME-VS: this is used to be here for, in particular,
-+ extents and items of internal type to free blocks they point
-+ to at the same time with removing items from a
-+ tree. Problems start, however, when dealloc_block fails due
-+ to some reason. Item gets removed, but blocks it pointed to
-+ are not freed. It is not clear how to fix this for items of
-+ internal type because a need to remove internal item may
-+ appear in the middle of balancing, and there is no way to
-+ undo changes made. OTOH, if space allocator involves
-+ balancing to perform dealloc_block - this will probably
-+ break balancing due to deadlock issues
-+ */
-+ int (*kill_hook) (const coord_t *, pos_in_node_t from,
-+ pos_in_node_t count, struct carry_kill_data *);
-+ int (*shift_hook) (const coord_t *, unsigned from, unsigned count,
-+ znode * _node);
-+
-+ /* unit @*from contains @from_key. unit @*to contains @to_key. Cut all keys between @from_key and @to_key
-+ including boundaries. When units are cut from item beginning - move space which gets freed to head of
-+ item. When units are cut from item end - move freed space to item end. When units are cut from the middle of
-+ item - move freed space to item head. Return amount of space which got freed. Save smallest removed key in
-+ @smallest_removed if it is not 0. Save new first item key in @new_first_key if it is not 0
-+ */
-+ int (*cut_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *,
-+ reiser4_key * smallest_removed,
-+ reiser4_key * new_first_key);
-+
-+ /* like cut_units, except that these units are removed from the
-+ tree, not only from a node */
-+ int (*kill_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *,
-+ reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+
-+ /* if @key_of_coord == 1 - returned key of coord, otherwise -
-+ key of unit is returned. If @coord is not set to certain
-+ unit - ERR_PTR(-ENOENT) is returned */
-+ reiser4_key *(*unit_key) (const coord_t *, reiser4_key *);
-+ reiser4_key *(*max_unit_key) (const coord_t *, reiser4_key *);
-+ /* estimate how much space is needed for paste @data into item at
-+ @coord. if @coord==0 - estimate insertion, otherwise - estimate
-+ pasting
-+ */
-+ int (*estimate) (const coord_t *, const reiser4_item_data *);
-+
-+ /* converts flow @f to item data. @coord == 0 on insert */
-+ int (*item_data_by_flow) (const coord_t *, const flow_t *,
-+ reiser4_item_data *);
-+
-+ /*void (*show) (struct seq_file *, coord_t *); */
-+
-+#if REISER4_DEBUG
-+ /* used for debugging, every item should have here the most
-+ complete possible check of the consistency of the item that
-+ the inventor can construct */
-+ int (*check) (const coord_t *, const char **error);
-+#endif
-+
-+} balance_ops;
-+
-+typedef struct {
-+ /* return the right or left child of @coord, only if it is in memory */
-+ int (*utmost_child) (const coord_t *, sideof side, jnode ** child);
-+
-+ /* return whether the right or left child of @coord has a non-fake
-+ block number. */
-+ int (*utmost_child_real_block) (const coord_t *, sideof side,
-+ reiser4_block_nr *);
-+ /* relocate child at @coord to the @block */
-+ void (*update) (const coord_t *, const reiser4_block_nr *);
-+ /* count unformatted nodes per item for leave relocation policy, etc.. */
-+ int (*scan) (flush_scan * scan);
-+ /* convert item by flush */
-+ int (*convert) (flush_pos_t * pos);
-+ /* backward mapping from jnode offset to a key. */
-+ int (*key_by_offset) (struct inode *, loff_t, reiser4_key *);
-+} flush_ops;
-+
-+/* operations specific to the directory item */
-+typedef struct {
-+ /* extract stat-data key from directory entry at @coord and place it
-+ into @key. */
-+ int (*extract_key) (const coord_t *, reiser4_key * key);
-+ /* update object key in item. */
-+ int (*update_key) (const coord_t *, const reiser4_key *, lock_handle *);
-+ /* extract name from directory entry at @coord and return it */
-+ char *(*extract_name) (const coord_t *, char *buf);
-+ /* extract file type (DT_* stuff) from directory entry at @coord and
-+ return it */
-+ unsigned (*extract_file_type) (const coord_t *);
-+ int (*add_entry) (struct inode * dir,
-+ coord_t *, lock_handle *,
-+ const struct dentry * name,
-+ reiser4_dir_entry_desc * entry);
-+ int (*rem_entry) (struct inode * dir, const struct qstr * name,
-+ coord_t *, lock_handle *,
-+ reiser4_dir_entry_desc * entry);
-+ int (*max_name_len) (const struct inode * dir);
-+} dir_entry_ops;
-+
-+/* operations specific to items regular (unix) file metadata are built of */
-+typedef struct {
-+ int (*write) (struct file *, const char __user *, size_t, loff_t *pos);
-+ int (*read) (struct file *, flow_t *, hint_t *);
-+ int (*readpage) (void *, struct page *);
-+ int (*get_block) (const coord_t *, sector_t, sector_t *);
-+ /*
-+ * key of first byte which is not addressed by the item @coord is set
-+ * to.
-+ * For example, for extent item with the key
-+ *
-+ * (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
-+ *
-+ * ->append_key is
-+ *
-+ * (LOCALITY,4,OBJID,STARTING-OFFSET + BLK * block_size)
-+ */
-+ reiser4_key *(*append_key) (const coord_t *, reiser4_key *);
-+
-+ void (*init_coord_extension) (uf_coord_t *, loff_t);
-+} file_ops;
-+
-+/* operations specific to items of stat data type */
-+typedef struct {
-+ int (*init_inode) (struct inode * inode, char *sd, int len);
-+ int (*save_len) (struct inode * inode);
-+ int (*save) (struct inode * inode, char **area);
-+} sd_ops;
-+
-+/* operations specific to internal item */
-+typedef struct {
-+ /* all tree traversal want to know from internal item is where
-+ to go next. */
-+ void (*down_link) (const coord_t * coord,
-+ const reiser4_key * key, reiser4_block_nr * block);
-+ /* check that given internal item contains given pointer. */
-+ int (*has_pointer_to) (const coord_t * coord,
-+ const reiser4_block_nr * block);
-+} internal_item_ops;
-+
-+struct item_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+
-+ /* methods common for all item types */
-+ balance_ops b;
-+ /* methods used during flush */
-+ flush_ops f;
-+
-+ /* methods specific to particular type of item */
-+ union {
-+ dir_entry_ops dir;
-+ file_ops file;
-+ sd_ops sd;
-+ internal_item_ops internal;
-+ } s;
-+
-+};
-+
-+#define is_solid_item(iplug) ((iplug)->b.nr_units == nr_units_single_unit)
-+
-+static inline item_id item_id_by_plugin(item_plugin * plugin)
-+{
-+ return plugin->h.id;
-+}
-+
-+static inline char get_iplugid(item_plugin * iplug)
-+{
-+ assert("nikita-2838", iplug != NULL);
-+ assert("nikita-2839", iplug->h.id < 0xff);
-+ return (char)item_id_by_plugin(iplug);
-+}
-+
-+extern unsigned long znode_times_locked(const znode * z);
-+
-+static inline void coord_set_iplug(coord_t * coord, item_plugin * iplug)
-+{
-+ assert("nikita-2837", coord != NULL);
-+ assert("nikita-2838", iplug != NULL);
-+ coord->iplugid = get_iplugid(iplug);
-+ ON_DEBUG(coord->plug_v = znode_times_locked(coord->node));
-+}
-+
-+static inline item_plugin *coord_iplug(const coord_t * coord)
-+{
-+ assert("nikita-2833", coord != NULL);
-+ assert("nikita-2834", coord->iplugid != INVALID_PLUGID);
-+ assert("nikita-3549", coord->plug_v == znode_times_locked(coord->node));
-+ return (item_plugin *) plugin_by_id(REISER4_ITEM_PLUGIN_TYPE,
-+ coord->iplugid);
-+}
-+
-+extern int item_can_contain_key(const coord_t * item, const reiser4_key * key,
-+ const reiser4_item_data *);
-+extern int are_items_mergeable(const coord_t * i1, const coord_t * i2);
-+extern int item_is_extent(const coord_t *);
-+extern int item_is_tail(const coord_t *);
-+extern int item_is_statdata(const coord_t * item);
-+extern int item_is_ctail(const coord_t *);
-+
-+extern pos_in_node_t item_length_by_coord(const coord_t * coord);
-+extern pos_in_node_t nr_units_single_unit(const coord_t * coord);
-+extern item_id item_id_by_coord(const coord_t * coord /* coord to query */ );
-+extern reiser4_key *item_key_by_coord(const coord_t * coord, reiser4_key * key);
-+extern reiser4_key *max_item_key_by_coord(const coord_t *, reiser4_key *);
-+extern reiser4_key *unit_key_by_coord(const coord_t * coord, reiser4_key * key);
-+extern reiser4_key *max_unit_key_by_coord(const coord_t * coord,
-+ reiser4_key * key);
-+extern void obtain_item_plugin(const coord_t * coord);
-+
-+#if defined(REISER4_DEBUG)
-+extern int znode_is_loaded(const znode * node);
-+#endif
-+
-+/* return plugin of item at @coord */
-+static inline item_plugin *item_plugin_by_coord(const coord_t *
-+ coord /* coord to query */ )
-+{
-+ assert("nikita-330", coord != NULL);
-+ assert("nikita-331", coord->node != NULL);
-+ assert("nikita-332", znode_is_loaded(coord->node));
-+
-+ if (unlikely(!coord_is_iplug_set(coord)))
-+ obtain_item_plugin(coord);
-+ return coord_iplug(coord);
-+}
-+
-+/* this returns true if item is of internal type */
-+static inline int item_is_internal(const coord_t * item)
-+{
-+ assert("vs-483", coord_is_existing_item(item));
-+ return plugin_of_group(item_plugin_by_coord(item), INTERNAL_ITEM_TYPE);
-+}
-+
-+extern void item_body_by_coord_hard(coord_t * coord);
-+extern void *item_body_by_coord_easy(const coord_t * coord);
-+#if REISER4_DEBUG
-+extern int item_body_is_valid(const coord_t * coord);
-+#endif
-+
-+/* return pointer to item body */
-+static inline void *item_body_by_coord(const coord_t *
-+ coord /* coord to query */ )
-+{
-+ assert("nikita-324", coord != NULL);
-+ assert("nikita-325", coord->node != NULL);
-+ assert("nikita-326", znode_is_loaded(coord->node));
-+
-+ if (coord->offset == INVALID_OFFSET)
-+ item_body_by_coord_hard((coord_t *) coord);
-+ assert("nikita-3201", item_body_is_valid(coord));
-+ assert("nikita-3550", coord->body_v == znode_times_locked(coord->node));
-+ return item_body_by_coord_easy(coord);
-+}
-+
-+/* __REISER4_ITEM_H__ */
-+#endif
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/Makefile linux-2.6.20/fs/reiser4/plugin/item/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/item/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/Makefile 2007-05-06 14:50:43.819013220 +0400
-@@ -0,0 +1,18 @@
-+obj-$(CONFIG_REISER4_FS) += item_plugins.o
-+
-+item_plugins-objs := \
-+ item.o \
-+ static_stat.o \
-+ sde.o \
-+ cde.o \
-+ blackbox.o \
-+ internal.o \
-+ tail.o \
-+ ctail.o \
-+ extent.o \
-+ extent_item_ops.o \
-+ extent_file_ops.o \
-+ extent_flush_ops.o
-+
-+
-+
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/sde.c linux-2.6.20/fs/reiser4/plugin/item/sde.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/sde.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/sde.c 2007-05-06 14:50:43.819013220 +0400
-@@ -0,0 +1,190 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry implementation */
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../coord.h"
-+#include "sde.h"
-+#include "item.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../carry.h"
-+#include "../../tree.h"
-+#include "../../inode.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/quotaops.h>
-+
-+/* ->extract_key() method of simple directory item plugin. */
-+int extract_key_de(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * key /* resulting key */ )
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1458", coord != NULL);
-+ assert("nikita-1459", key != NULL);
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ assert("nikita-1158", item_length_by_coord(coord) >= (int)sizeof *dent);
-+ return extract_key_from_id(&dent->id, key);
-+}
-+
-+int
-+update_key_de(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh UNUSED_ARG)
-+{
-+ directory_entry_format *dent;
-+ obj_key_id obj_id;
-+ int result;
-+
-+ assert("nikita-2342", coord != NULL);
-+ assert("nikita-2343", key != NULL);
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ result = build_obj_key_id(key, &obj_id);
-+ if (result == 0) {
-+ dent->id = obj_id;
-+ znode_make_dirty(coord->node);
-+ }
-+ return 0;
-+}
-+
-+char *extract_dent_name(const coord_t * coord, directory_entry_format * dent,
-+ char *buf)
-+{
-+ reiser4_key key;
-+
-+ unit_key_by_coord(coord, &key);
-+ if (get_key_type(&key) != KEY_FILE_NAME_MINOR)
-+ reiser4_print_address("oops", znode_get_block(coord->node));
-+ if (!is_longname_key(&key)) {
-+ if (is_dot_key(&key))
-+ return (char *)".";
-+ else
-+ return extract_name_from_key(&key, buf);
-+ } else
-+ return (char *)dent->name;
-+}
-+
-+/* ->extract_name() method of simple directory item plugin. */
-+char *extract_name_de(const coord_t * coord /* coord of item */ , char *buf)
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1460", coord != NULL);
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ return extract_dent_name(coord, dent, buf);
-+}
-+
-+/* ->extract_file_type() method of simple directory item plugin. */
-+unsigned extract_file_type_de(const coord_t * coord UNUSED_ARG /* coord of
-+ * item */ )
-+{
-+ assert("nikita-1764", coord != NULL);
-+ /* we don't store file type in the directory entry yet.
-+
-+ But see comments at kassign.h:obj_key_id
-+ */
-+ return DT_UNKNOWN;
-+}
-+
-+int add_entry_de(struct inode *dir /* directory of item */ ,
-+ coord_t * coord /* coord of item */ ,
-+ lock_handle * lh /* insertion lock handle */ ,
-+ const struct dentry *de /* name to add */ ,
-+ reiser4_dir_entry_desc * entry /* parameters of new directory
-+ * entry */ )
-+{
-+ reiser4_item_data data;
-+ directory_entry_format *dent;
-+ int result;
-+ const char *name;
-+ int len;
-+ int longname;
-+
-+ name = de->d_name.name;
-+ len = de->d_name.len;
-+ assert("nikita-1163", strlen(name) == len);
-+
-+ longname = is_longname(name, len);
-+
-+ data.length = sizeof *dent;
-+ if (longname)
-+ data.length += len + 1;
-+ data.data = NULL;
-+ data.user = 0;
-+ data.iplug = item_plugin_by_id(SIMPLE_DIR_ENTRY_ID);
-+
-+ /* NOTE-NIKITA quota plugin */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(dir, data.length))
-+ return -EDQUOT;
-+
-+ result = insert_by_coord(coord, &data, &entry->key, lh, 0 /*flags */ );
-+ if (result != 0)
-+ return result;
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ build_inode_key_id(entry->obj, &dent->id);
-+ if (longname) {
-+ memcpy(dent->name, name, len);
-+ put_unaligned(0, &dent->name[len]);
-+ }
-+ return 0;
-+}
-+
-+int rem_entry_de(struct inode *dir /* directory of item */ ,
-+ const struct qstr *name UNUSED_ARG,
-+ coord_t * coord /* coord of item */ ,
-+ lock_handle * lh UNUSED_ARG /* lock handle for
-+ * removal */ ,
-+ reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
-+ * directory entry
-+ * being removed */ )
-+{
-+ coord_t shadow;
-+ int result;
-+ int length;
-+
-+ length = item_length_by_coord(coord);
-+ if (inode_get_bytes(dir) < length) {
-+ warning("nikita-2627", "Dir is broke: %llu: %llu",
-+ (unsigned long long)get_inode_oid(dir),
-+ inode_get_bytes(dir));
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ /* cut_node() is supposed to take pointers to _different_
-+ coords, because it will modify them without respect to
-+ possible aliasing. To work around this, create temporary copy
-+ of @coord.
-+ */
-+ coord_dup(&shadow, coord);
-+ result =
-+ kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
-+ if (result == 0) {
-+ /* NOTE-NIKITA quota plugin */
-+ DQUOT_FREE_SPACE_NODIRTY(dir, length);
-+ }
-+ return result;
-+}
-+
-+int max_name_len_de(const struct inode *dir)
-+{
-+ return reiser4_tree_by_inode(dir)->nplug->max_item_size() -
-+ sizeof(directory_entry_format) - 2;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/sde.h linux-2.6.20/fs/reiser4/plugin/item/sde.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/sde.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/sde.h 2007-05-06 14:50:43.819013220 +0400
-@@ -0,0 +1,66 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
-+#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+
-+#include <linux/fs.h>
-+#include <linux/dcache.h> /* for struct dentry */
-+
-+typedef struct directory_entry_format {
-+ /* key of object stat-data. It's not necessary to store whole
-+ key here, because it's always key of stat-data, so minor
-+ packing locality and offset can be omitted here. But this
-+ relies on particular key allocation scheme for stat-data, so,
-+ for extensibility sake, whole key can be stored here.
-+
-+ We store key as array of bytes, because we don't want 8-byte
-+ alignment of dir entries.
-+ */
-+ obj_key_id id;
-+ /* file name. Null terminated string. */
-+ d8 name[0];
-+} directory_entry_format;
-+
-+void print_de(const char *prefix, coord_t * coord);
-+int extract_key_de(const coord_t * coord, reiser4_key * key);
-+int update_key_de(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh);
-+char *extract_name_de(const coord_t * coord, char *buf);
-+unsigned extract_file_type_de(const coord_t * coord);
-+int add_entry_de(struct inode *dir, coord_t * coord,
-+ lock_handle * lh, const struct dentry *name,
-+ reiser4_dir_entry_desc * entry);
-+int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
-+ lock_handle * lh, reiser4_dir_entry_desc * entry);
-+int max_name_len_de(const struct inode *dir);
-+
-+int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
-+
-+char *extract_dent_name(const coord_t * coord,
-+ directory_entry_format * dent, char *buf);
-+
-+#if REISER4_LARGE_KEY
-+#define DE_NAME_BUF_LEN (24)
-+#else
-+#define DE_NAME_BUF_LEN (16)
-+#endif
-+
-+/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/static_stat.c linux-2.6.20/fs/reiser4/plugin/item/static_stat.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/static_stat.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/static_stat.c 2007-05-06 14:50:43.823014469 +0400
-@@ -0,0 +1,1107 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* stat data manipulation. */
-+
-+#include "../../forward.h"
-+#include "../../super.h"
-+#include "../../vfs_ops.h"
-+#include "../../inode.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../object.h"
-+#include "../plugin.h"
-+#include "../plugin_header.h"
-+#include "static_stat.h"
-+#include "item.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+
-+/* see static_stat.h for explanation */
-+
-+/* helper function used while we are dumping/loading inode/plugin state
-+ to/from the stat-data. */
-+
-+static void move_on(int *length /* space remaining in stat-data */ ,
-+ char **area /* current coord in stat data */ ,
-+ int size_of /* how many bytes to move forward */ )
-+{
-+ assert("nikita-615", length != NULL);
-+ assert("nikita-616", area != NULL);
-+
-+ *length -= size_of;
-+ *area += size_of;
-+
-+ assert("nikita-617", *length >= 0);
-+}
-+
-+/* helper function used while loading inode/plugin state from stat-data.
-+ Complain if there is less space in stat-data than was expected.
-+ Can only happen on disk corruption. */
-+static int not_enough_space(struct inode *inode /* object being processed */ ,
-+ const char *where /* error message */ )
-+{
-+ assert("nikita-618", inode != NULL);
-+
-+ warning("nikita-619", "Not enough space in %llu while loading %s",
-+ (unsigned long long)get_inode_oid(inode), where);
-+
-+ return RETERR(-EINVAL);
-+}
-+
-+/* helper function used while loading inode/plugin state from
-+ stat-data. Call it if invalid plugin id was found. */
-+static int unknown_plugin(reiser4_plugin_id id /* invalid id */ ,
-+ struct inode *inode /* object being processed */ )
-+{
-+ warning("nikita-620", "Unknown plugin %i in %llu",
-+ id, (unsigned long long)get_inode_oid(inode));
-+
-+ return RETERR(-EINVAL);
-+}
-+
-+/* this is installed as ->init_inode() method of
-+ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c).
-+ Copies data from on-disk stat-data format into inode.
-+ Handles stat-data extensions. */
-+/* was sd_load */
-+int init_inode_static_sd(struct inode *inode /* object being processed */ ,
-+ char *sd /* stat-data body */ ,
-+ int len /* length of stat-data */ )
-+{
-+ int result;
-+ int bit;
-+ int chunk;
-+ __u16 mask;
-+ __u64 bigmask;
-+ reiser4_stat_data_base *sd_base;
-+ reiser4_inode *state;
-+
-+ assert("nikita-625", inode != NULL);
-+ assert("nikita-626", sd != NULL);
-+
-+ result = 0;
-+ sd_base = (reiser4_stat_data_base *) sd;
-+ state = reiser4_inode_data(inode);
-+ mask = le16_to_cpu(get_unaligned(&sd_base->extmask));
-+ bigmask = mask;
-+ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
-+
-+ move_on(&len, &sd, sizeof *sd_base);
-+ for (bit = 0, chunk = 0;
-+ mask != 0 || bit <= LAST_IMPORTANT_SD_EXTENSION;
-+ ++bit, mask >>= 1) {
-+ if (((bit + 1) % 16) != 0) {
-+ /* handle extension */
-+ sd_ext_plugin *sdplug;
-+
-+ if (bit >= LAST_SD_EXTENSION) {
-+ warning("vpf-1904",
-+ "No such extension %i in inode %llu",
-+ bit,
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+
-+ sdplug = sd_ext_plugin_by_id(bit);
-+ if (sdplug == NULL) {
-+ warning("nikita-627",
-+ "No such extension %i in inode %llu",
-+ bit,
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ if (mask & 1) {
-+ assert("nikita-628", sdplug->present);
-+ /* alignment is not supported in node layout
-+ plugin yet.
-+ result = align( inode, &len, &sd,
-+ sdplug -> alignment );
-+ if( result != 0 )
-+ return result; */
-+ result = sdplug->present(inode, &sd, &len);
-+ } else if (sdplug->absent != NULL)
-+ result = sdplug->absent(inode);
-+ if (result)
-+ break;
-+ /* else, we are looking at the last bit in 16-bit
-+ portion of bitmask */
-+ } else if (mask & 1) {
-+ /* next portion of bitmask */
-+ if (len < (int)sizeof(d16)) {
-+ warning("nikita-629",
-+ "No space for bitmap in inode %llu",
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ mask = le16_to_cpu(get_unaligned((d16 *)sd));
-+ bigmask <<= 16;
-+ bigmask |= mask;
-+ move_on(&len, &sd, sizeof(d16));
-+ ++chunk;
-+ if (chunk == 3) {
-+ if (!(mask & 0x8000)) {
-+ /* clear last bit */
-+ mask &= ~0x8000;
-+ continue;
-+ }
-+ /* too much */
-+ warning("nikita-630",
-+ "Too many extensions in %llu",
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ } else
-+ /* bitmask exhausted */
-+ break;
-+ }
-+ state->extmask = bigmask;
-+ /* common initialisations */
-+ if (len - (bit / 16 * sizeof(d16)) > 0) {
-+ /* alignment in save_len_static_sd() is taken into account
-+ -edward */
-+ warning("nikita-631", "unused space in inode %llu",
-+ (unsigned long long)get_inode_oid(inode));
-+ }
-+
-+ return result;
-+}
-+
-+/* estimates size of stat-data required to store inode.
-+ Installed as ->save_len() method of
-+ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
-+/* was sd_len */
-+int save_len_static_sd(struct inode *inode /* object being processed */ )
-+{
-+ unsigned int result;
-+ __u64 mask;
-+ int bit;
-+
-+ assert("nikita-632", inode != NULL);
-+
-+ result = sizeof(reiser4_stat_data_base);
-+ mask = reiser4_inode_data(inode)->extmask;
-+ for (bit = 0; mask != 0; ++bit, mask >>= 1) {
-+ if (mask & 1) {
-+ sd_ext_plugin *sdplug;
-+
-+ sdplug = sd_ext_plugin_by_id(bit);
-+ assert("nikita-633", sdplug != NULL);
-+ /* no aligment support
-+ result +=
-+ round_up( result, sdplug -> alignment ) - result; */
-+ result += sdplug->save_len(inode);
-+ }
-+ }
-+ result += bit / 16 * sizeof(d16);
-+ return result;
-+}
-+
-+/* saves inode into stat-data.
-+ Installed as ->save() method of
-+ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
-+/* was sd_save */
-+int save_static_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* where to save stat-data */ )
-+{
-+ int result;
-+ __u64 emask;
-+ int bit;
-+ unsigned int len;
-+ reiser4_stat_data_base *sd_base;
-+
-+ assert("nikita-634", inode != NULL);
-+ assert("nikita-635", area != NULL);
-+
-+ result = 0;
-+ emask = reiser4_inode_data(inode)->extmask;
-+ sd_base = (reiser4_stat_data_base *) * area;
-+ put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)), &sd_base->extmask);
-+ /*cputod16((unsigned)(emask & 0xffff), &sd_base->extmask);*/
-+
-+ *area += sizeof *sd_base;
-+ len = 0xffffffffu;
-+ for (bit = 0; emask != 0; ++bit, emask >>= 1) {
-+ if (emask & 1) {
-+ if ((bit + 1) % 16 != 0) {
-+ sd_ext_plugin *sdplug;
-+ sdplug = sd_ext_plugin_by_id(bit);
-+ assert("nikita-636", sdplug != NULL);
-+ /* no alignment support yet
-+ align( inode, &len, area,
-+ sdplug -> alignment ); */
-+ result = sdplug->save(inode, area);
-+ if (result)
-+ break;
-+ } else {
-+ put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)),
-+ (d16 *)(*area));
-+ /*cputod16((unsigned)(emask & 0xffff),
-+ (d16 *) * area);*/
-+ *area += sizeof(d16);
-+ }
-+ }
-+ }
-+ return result;
-+}
-+
-+/* stat-data extension handling functions. */
-+
-+static int present_lw_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ if (*len >= (int)sizeof(reiser4_light_weight_stat)) {
-+ reiser4_light_weight_stat *sd_lw;
-+
-+ sd_lw = (reiser4_light_weight_stat *) * area;
-+
-+ inode->i_mode = le16_to_cpu(get_unaligned(&sd_lw->mode));
-+ inode->i_nlink = le32_to_cpu(get_unaligned(&sd_lw->nlink));
-+ inode->i_size = le64_to_cpu(get_unaligned(&sd_lw->size));
-+ if ((inode->i_mode & S_IFMT) == (S_IFREG | S_IFIFO)) {
-+ inode->i_mode &= ~S_IFIFO;
-+ warning("", "partially converted file is encountered");
-+ reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
-+ }
-+ move_on(len, area, sizeof *sd_lw);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "lw sd");
-+}
-+
-+static int save_len_lw_sd(struct inode *inode UNUSED_ARG /* object being
-+ * processed */ )
-+{
-+ return sizeof(reiser4_light_weight_stat);
-+}
-+
-+static int save_lw_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_light_weight_stat *sd;
-+ mode_t delta;
-+
-+ assert("nikita-2705", inode != NULL);
-+ assert("nikita-2706", area != NULL);
-+ assert("nikita-2707", *area != NULL);
-+
-+ sd = (reiser4_light_weight_stat *) * area;
-+
-+ delta = (reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED) ? S_IFIFO : 0);
-+ put_unaligned(cpu_to_le16(inode->i_mode | delta), &sd->mode);
-+ put_unaligned(cpu_to_le32(inode->i_nlink), &sd->nlink);
-+ put_unaligned(cpu_to_le64((__u64) inode->i_size), &sd->size);
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+static int present_unix_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ assert("nikita-637", inode != NULL);
-+ assert("nikita-638", area != NULL);
-+ assert("nikita-639", *area != NULL);
-+ assert("nikita-640", len != NULL);
-+ assert("nikita-641", *len > 0);
-+
-+ if (*len >= (int)sizeof(reiser4_unix_stat)) {
-+ reiser4_unix_stat *sd;
-+
-+ sd = (reiser4_unix_stat *) * area;
-+
-+ inode->i_uid = le32_to_cpu(get_unaligned(&sd->uid));
-+ inode->i_gid = le32_to_cpu(get_unaligned(&sd->gid));
-+ inode->i_atime.tv_sec = le32_to_cpu(get_unaligned(&sd->atime));
-+ inode->i_mtime.tv_sec = le32_to_cpu(get_unaligned(&sd->mtime));
-+ inode->i_ctime.tv_sec = le32_to_cpu(get_unaligned(&sd->ctime));
-+ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
-+ inode->i_rdev = le64_to_cpu(get_unaligned(&sd->u.rdev));
-+ else
-+ inode_set_bytes(inode, (loff_t) le64_to_cpu(get_unaligned(&sd->u.bytes)));
-+ move_on(len, area, sizeof *sd);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "unix sd");
-+}
-+
-+static int absent_unix_sd(struct inode *inode /* object being processed */ )
-+{
-+ inode->i_uid = get_super_private(inode->i_sb)->default_uid;
-+ inode->i_gid = get_super_private(inode->i_sb)->default_gid;
-+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-+ inode_set_bytes(inode, inode->i_size);
-+ /* mark inode as lightweight, so that caller (lookup_common) will
-+ complete initialisation by copying [ug]id from a parent. */
-+ reiser4_inode_set_flag(inode, REISER4_LIGHT_WEIGHT);
-+ return 0;
-+}
-+
-+/* Audited by: green(2002.06.14) */
-+static int save_len_unix_sd(struct inode *inode UNUSED_ARG /* object being
-+ * processed */ )
-+{
-+ return sizeof(reiser4_unix_stat);
-+}
-+
-+static int save_unix_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_unix_stat *sd;
-+
-+ assert("nikita-642", inode != NULL);
-+ assert("nikita-643", area != NULL);
-+ assert("nikita-644", *area != NULL);
-+
-+ sd = (reiser4_unix_stat *) * area;
-+ put_unaligned(cpu_to_le32(inode->i_uid), &sd->uid);
-+ put_unaligned(cpu_to_le32(inode->i_gid), &sd->gid);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_sec), &sd->atime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_sec), &sd->ctime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_sec), &sd->mtime);
-+ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
-+ put_unaligned(cpu_to_le64(inode->i_rdev), &sd->u.rdev);
-+ else
-+ put_unaligned(cpu_to_le64((__u64) inode_get_bytes(inode)), &sd->u.bytes);
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+static int
-+present_large_times_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ if (*len >= (int)sizeof(reiser4_large_times_stat)) {
-+ reiser4_large_times_stat *sd_lt;
-+
-+ sd_lt = (reiser4_large_times_stat *) * area;
-+
-+ inode->i_atime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->atime));
-+ inode->i_mtime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->mtime));
-+ inode->i_ctime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->ctime));
-+
-+ move_on(len, area, sizeof *sd_lt);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "large times sd");
-+}
-+
-+static int
-+save_len_large_times_sd(struct inode *inode UNUSED_ARG
-+ /* object being processed */ )
-+{
-+ return sizeof(reiser4_large_times_stat);
-+}
-+
-+static int
-+save_large_times_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_large_times_stat *sd;
-+
-+ assert("nikita-2817", inode != NULL);
-+ assert("nikita-2818", area != NULL);
-+ assert("nikita-2819", *area != NULL);
-+
-+ sd = (reiser4_large_times_stat *) * area;
-+
-+ put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_nsec), &sd->atime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_nsec), &sd->ctime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_nsec), &sd->mtime);
-+
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+/* symlink stat data extension */
-+
-+/* allocate memory for symlink target and attach it to inode->i_private */
-+static int
-+symlink_target_to_inode(struct inode *inode, const char *target, int len)
-+{
-+ assert("vs-845", inode->i_private == NULL);
-+ assert("vs-846", !reiser4_inode_get_flag(inode,
-+ REISER4_GENERIC_PTR_USED));
-+ /* FIXME-VS: this is prone to deadlock. Not more than other similar
-+ places, though */
-+ inode->i_private = kmalloc((size_t) len + 1,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!inode->i_private)
-+ return RETERR(-ENOMEM);
-+
-+ memcpy((char *)(inode->i_private), target, (size_t) len);
-+ ((char *)(inode->i_private))[len] = 0;
-+ reiser4_inode_set_flag(inode, REISER4_GENERIC_PTR_USED);
-+ return 0;
-+}
-+
-+/* this is called on read_inode. There is nothing to do actually, but some
-+ sanity checks */
-+static int present_symlink_sd(struct inode *inode, char **area, int *len)
-+{
-+ int result;
-+ int length;
-+ reiser4_symlink_stat *sd;
-+
-+ length = (int)inode->i_size;
-+ /*
-+ * *len is number of bytes in stat data item from *area to the end of
-+ * item. It must be not less than size of symlink + 1 for ending 0
-+ */
-+ if (length > *len)
-+ return not_enough_space(inode, "symlink");
-+
-+ if (*(*area + length) != 0) {
-+ warning("vs-840", "Symlink is not zero terminated");
-+ return RETERR(-EIO);
-+ }
-+
-+ sd = (reiser4_symlink_stat *) * area;
-+ result = symlink_target_to_inode(inode, sd->body, length);
-+
-+ move_on(len, area, length + 1);
-+ return result;
-+}
-+
-+static int save_len_symlink_sd(struct inode *inode)
-+{
-+ return inode->i_size + 1;
-+}
-+
-+/* this is called on create and update stat data. Do nothing on update but
-+ update @area */
-+static int save_symlink_sd(struct inode *inode, char **area)
-+{
-+ int result;
-+ int length;
-+ reiser4_symlink_stat *sd;
-+
-+ length = (int)inode->i_size;
-+ /* inode->i_size must be set already */
-+ assert("vs-841", length);
-+
-+ result = 0;
-+ sd = (reiser4_symlink_stat *) * area;
-+ if (!reiser4_inode_get_flag(inode, REISER4_GENERIC_PTR_USED)) {
-+ const char *target;
-+
-+ target = (const char *)(inode->i_private);
-+ inode->i_private = NULL;
-+
-+ result = symlink_target_to_inode(inode, target, length);
-+
-+ /* copy symlink to stat data */
-+ memcpy(sd->body, target, (size_t) length);
-+ (*area)[length] = 0;
-+ } else {
-+ /* there is nothing to do in update but move area */
-+ assert("vs-844",
-+ !memcmp(inode->i_private, sd->body,
-+ (size_t) length + 1));
-+ }
-+
-+ *area += (length + 1);
-+ return result;
-+}
-+
-+static int present_flags_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ assert("nikita-645", inode != NULL);
-+ assert("nikita-646", area != NULL);
-+ assert("nikita-647", *area != NULL);
-+ assert("nikita-648", len != NULL);
-+ assert("nikita-649", *len > 0);
-+
-+ if (*len >= (int)sizeof(reiser4_flags_stat)) {
-+ reiser4_flags_stat *sd;
-+
-+ sd = (reiser4_flags_stat *) * area;
-+ inode->i_flags = le32_to_cpu(get_unaligned(&sd->flags));
-+ move_on(len, area, sizeof *sd);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "generation and attrs");
-+}
-+
-+/* Audited by: green(2002.06.14) */
-+static int save_len_flags_sd(struct inode *inode UNUSED_ARG /* object being
-+ * processed */ )
-+{
-+ return sizeof(reiser4_flags_stat);
-+}
-+
-+static int save_flags_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_flags_stat *sd;
-+
-+ assert("nikita-650", inode != NULL);
-+ assert("nikita-651", area != NULL);
-+ assert("nikita-652", *area != NULL);
-+
-+ sd = (reiser4_flags_stat *) * area;
-+ put_unaligned(cpu_to_le32(inode->i_flags), &sd->flags);
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+static int absent_plugin_sd(struct inode *inode);
-+static int present_plugin_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */,
-+ int is_pset /* 1 if plugin set, 0 if heir set. */)
-+{
-+ reiser4_plugin_stat *sd;
-+ reiser4_plugin *plugin;
-+ reiser4_inode *info;
-+ int i;
-+ __u16 mask;
-+ int result;
-+ int num_of_plugins;
-+
-+ assert("nikita-653", inode != NULL);
-+ assert("nikita-654", area != NULL);
-+ assert("nikita-655", *area != NULL);
-+ assert("nikita-656", len != NULL);
-+ assert("nikita-657", *len > 0);
-+
-+ if (*len < (int)sizeof(reiser4_plugin_stat))
-+ return not_enough_space(inode, "plugin");
-+
-+ sd = (reiser4_plugin_stat *) * area;
-+ info = reiser4_inode_data(inode);
-+
-+ mask = 0;
-+ num_of_plugins = le16_to_cpu(get_unaligned(&sd->plugins_no));
-+ move_on(len, area, sizeof *sd);
-+ result = 0;
-+ for (i = 0; i < num_of_plugins; ++i) {
-+ reiser4_plugin_slot *slot;
-+ reiser4_plugin_type type;
-+ pset_member memb;
-+
-+ slot = (reiser4_plugin_slot *) * area;
-+ if (*len < (int)sizeof *slot)
-+ return not_enough_space(inode, "additional plugin");
-+
-+ memb = le16_to_cpu(get_unaligned(&slot->pset_memb));
-+ type = aset_member_to_type_unsafe(memb);
-+
-+ if (type == REISER4_PLUGIN_TYPES) {
-+ warning("nikita-3502",
-+ "wrong %s member (%i) for %llu", is_pset ?
-+ "pset" : "hset", memb,
-+ (unsigned long long)get_inode_oid(inode));
-+ return RETERR(-EINVAL);
-+ }
-+ plugin = plugin_by_disk_id(reiser4_tree_by_inode(inode),
-+ type, &slot->id);
-+ if (plugin == NULL)
-+ return unknown_plugin(le16_to_cpu(get_unaligned(&slot->id)), inode);
-+
-+ /* plugin is loaded into inode, mark this into inode's
-+ bitmask of loaded non-standard plugins */
-+ if (!(mask & (1 << memb))) {
-+ mask |= (1 << memb);
-+ } else {
-+ warning("nikita-658", "duplicate plugin for %llu",
-+ (unsigned long long)get_inode_oid(inode));
-+ return RETERR(-EINVAL);
-+ }
-+ move_on(len, area, sizeof *slot);
-+ /* load plugin data, if any */
-+ if (plugin->h.pops != NULL && plugin->h.pops->load)
-+ result = plugin->h.pops->load(inode, plugin, area, len);
-+ else
-+ result = aset_set_unsafe(is_pset ? &info->pset :
-+ &info->hset, memb, plugin);
-+ if (result)
-+ return result;
-+ }
-+ if (is_pset) {
-+ /* if object plugin wasn't loaded from stat-data, guess it by
-+ mode bits */
-+ plugin = file_plugin_to_plugin(inode_file_plugin(inode));
-+ if (plugin == NULL)
-+ result = absent_plugin_sd(inode);
-+ info->plugin_mask = mask;
-+ } else
-+ info->heir_mask = mask;
-+
-+ return result;
-+}
-+
-+static int present_pset_sd(struct inode *inode, char **area, int *len) {
-+ return present_plugin_sd(inode, area, len, 1 /* pset */);
-+}
-+
-+/* Determine object plugin for @inode based on i_mode.
-+
-+ Many objects in reiser4 file system are controlled by standard object
-+ plugins that emulate traditional unix objects: unix file, directory, symlink, fifo, and so on.
-+
-+ For such files we don't explicitly store plugin id in object stat
-+ data. Rather required plugin is guessed from mode bits, where file "type"
-+ is encoded (see stat(2)).
-+*/
-+static int
-+guess_plugin_by_mode(struct inode *inode /* object to guess plugins for */ )
-+{
-+ int fplug_id;
-+ int dplug_id;
-+ reiser4_inode *info;
-+
-+ assert("nikita-736", inode != NULL);
-+
-+ dplug_id = fplug_id = -1;
-+
-+ switch (inode->i_mode & S_IFMT) {
-+ case S_IFSOCK:
-+ case S_IFBLK:
-+ case S_IFCHR:
-+ case S_IFIFO:
-+ fplug_id = SPECIAL_FILE_PLUGIN_ID;
-+ break;
-+ case S_IFLNK:
-+ fplug_id = SYMLINK_FILE_PLUGIN_ID;
-+ break;
-+ case S_IFDIR:
-+ fplug_id = DIRECTORY_FILE_PLUGIN_ID;
-+ dplug_id = HASHED_DIR_PLUGIN_ID;
-+ break;
-+ default:
-+ warning("nikita-737", "wrong file mode: %o", inode->i_mode);
-+ return RETERR(-EIO);
-+ case S_IFREG:
-+ fplug_id = UNIX_FILE_PLUGIN_ID;
-+ break;
-+ }
-+ info = reiser4_inode_data(inode);
-+ set_plugin(&info->pset, PSET_FILE, (fplug_id >= 0) ?
-+ plugin_by_id(REISER4_FILE_PLUGIN_TYPE, fplug_id) : NULL);
-+ set_plugin(&info->pset, PSET_DIR, (dplug_id >= 0) ?
-+ plugin_by_id(REISER4_DIR_PLUGIN_TYPE, dplug_id) : NULL);
-+ return 0;
-+}
-+
-+/* Audited by: green(2002.06.14) */
-+static int absent_plugin_sd(struct inode *inode /* object being processed */ )
-+{
-+ int result;
-+
-+ assert("nikita-659", inode != NULL);
-+
-+ result = guess_plugin_by_mode(inode);
-+ /* if mode was wrong, guess_plugin_by_mode() returns "regular file",
-+ but setup_inode_ops() will call make_bad_inode().
-+ Another, more logical but bit more complex solution is to add
-+ "bad-file plugin". */
-+ /* FIXME-VS: activate was called here */
-+ return result;
-+}
-+
-+/* helper function for plugin_sd_save_len(): calculate how much space
-+ required to save state of given plugin */
-+/* Audited by: green(2002.06.14) */
-+static int len_for(reiser4_plugin * plugin /* plugin to save */ ,
-+ struct inode *inode /* object being processed */ ,
-+ pset_member memb,
-+ int len, int is_pset)
-+{
-+ reiser4_inode *info;
-+ assert("nikita-661", inode != NULL);
-+
-+ if (plugin == NULL)
-+ return len;
-+
-+ info = reiser4_inode_data(inode);
-+ if (is_pset ?
-+ info->plugin_mask & (1 << memb) :
-+ info->heir_mask & (1 << memb)) {
-+ len += sizeof(reiser4_plugin_slot);
-+ if (plugin->h.pops && plugin->h.pops->save_len != NULL) {
-+ /* non-standard plugin, call method */
-+ /* commented as it is incompatible with alignment
-+ * policy in save_plug() -edward */
-+ /* len = round_up(len, plugin->h.pops->alignment); */
-+ len += plugin->h.pops->save_len(inode, plugin);
-+ }
-+ }
-+ return len;
-+}
-+
-+/* calculate how much space is required to save state of all plugins,
-+ associated with inode */
-+static int save_len_plugin_sd(struct inode *inode /* object being processed */,
-+ int is_pset)
-+{
-+ int len;
-+ int last;
-+ reiser4_inode *state;
-+ pset_member memb;
-+
-+ assert("nikita-663", inode != NULL);
-+
-+ state = reiser4_inode_data(inode);
-+
-+ /* common case: no non-standard plugins */
-+ if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
-+ return 0;
-+ len = sizeof(reiser4_plugin_stat);
-+ last = PSET_LAST;
-+
-+ for (memb = 0; memb < last; ++memb) {
-+ len = len_for(aset_get(is_pset ? state->pset : state->hset, memb),
-+ inode, memb, len, is_pset);
-+ }
-+ assert("nikita-664", len > (int)sizeof(reiser4_plugin_stat));
-+ return len;
-+}
-+
-+static int save_len_pset_sd(struct inode *inode) {
-+ return save_len_plugin_sd(inode, 1 /* pset */);
-+}
-+
-+/* helper function for plugin_sd_save(): save plugin, associated with
-+ inode. */
-+static int save_plug(reiser4_plugin * plugin /* plugin to save */ ,
-+ struct inode *inode /* object being processed */ ,
-+ int memb /* what element of pset is saved */ ,
-+ char **area /* position in stat-data */ ,
-+ int *count /* incremented if plugin were actually saved. */,
-+ int is_pset /* 1 for plugin set, 0 for heir set */)
-+{
-+ reiser4_plugin_slot *slot;
-+ int fake_len;
-+ int result;
-+
-+ assert("nikita-665", inode != NULL);
-+ assert("nikita-666", area != NULL);
-+ assert("nikita-667", *area != NULL);
-+
-+ if (plugin == NULL)
-+ return 0;
-+
-+ if (is_pset ?
-+ !(reiser4_inode_data(inode)->plugin_mask & (1 << memb)) :
-+ !(reiser4_inode_data(inode)->heir_mask & (1 << memb)))
-+ return 0;
-+ slot = (reiser4_plugin_slot *) * area;
-+ put_unaligned(cpu_to_le16(memb), &slot->pset_memb);
-+ put_unaligned(cpu_to_le16(plugin->h.id), &slot->id);
-+ fake_len = (int)0xffff;
-+ move_on(&fake_len, area, sizeof *slot);
-+ ++*count;
-+ result = 0;
-+ if (plugin->h.pops != NULL) {
-+ if (plugin->h.pops->save != NULL)
-+ result = plugin->h.pops->save(inode, plugin, area);
-+ }
-+ return result;
-+}
-+
-+/* save state of all non-standard plugins associated with inode */
-+static int save_plugin_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */,
-+ int is_pset /* 1 for pset, 0 for hset */)
-+{
-+ int fake_len;
-+ int result = 0;
-+ int num_of_plugins;
-+ reiser4_plugin_stat *sd;
-+ reiser4_inode *state;
-+ pset_member memb;
-+
-+ assert("nikita-669", inode != NULL);
-+ assert("nikita-670", area != NULL);
-+ assert("nikita-671", *area != NULL);
-+
-+ state = reiser4_inode_data(inode);
-+ if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
-+ return 0;
-+ sd = (reiser4_plugin_stat *) * area;
-+ fake_len = (int)0xffff;
-+ move_on(&fake_len, area, sizeof *sd);
-+
-+ num_of_plugins = 0;
-+ for (memb = 0; memb < PSET_LAST; ++memb) {
-+ result = save_plug(aset_get(is_pset ? state->pset : state->hset,
-+ memb),
-+ inode, memb, area, &num_of_plugins, is_pset);
-+ if (result != 0)
-+ break;
-+ }
-+
-+ put_unaligned(cpu_to_le16((__u16)num_of_plugins), &sd->plugins_no);
-+ return result;
-+}
-+
-+static int save_pset_sd(struct inode *inode, char **area) {
-+ return save_plugin_sd(inode, area, 1 /* pset */);
-+}
-+
-+static int present_hset_sd(struct inode *inode, char **area, int *len) {
-+ return present_plugin_sd(inode, area, len, 0 /* hset */);
-+}
-+
-+static int save_len_hset_sd(struct inode *inode) {
-+ return save_len_plugin_sd(inode, 0 /* pset */);
-+}
-+
-+static int save_hset_sd(struct inode *inode, char **area) {
-+ return save_plugin_sd(inode, area, 0 /* hset */);
-+}
-+
-+/* helper function for crypto_sd_present(), crypto_sd_save.
-+ Allocates memory for crypto stat, keyid and attaches it to the inode */
-+static int extract_crypto_stat (struct inode * inode,
-+ reiser4_crypto_stat * sd)
-+{
-+ crypto_stat_t * info;
-+ assert("edward-11", !inode_crypto_stat(inode));
-+ assert("edward-1413",
-+ !reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED));
-+ /* create and attach a crypto-stat without secret key loaded */
-+ info = reiser4_alloc_crypto_stat(inode);
-+ if (IS_ERR(info))
-+ return PTR_ERR(info);
-+ info->keysize = le16_to_cpu(get_unaligned(&sd->keysize));
-+ memcpy(info->keyid, sd->keyid, inode_digest_plugin(inode)->fipsize);
-+ reiser4_attach_crypto_stat(inode, info);
-+ reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
-+ return 0;
-+}
-+
-+/* crypto stat-data extension */
-+
-+static int present_crypto_sd(struct inode *inode, char **area, int *len)
-+{
-+ int result;
-+ reiser4_crypto_stat *sd;
-+ digest_plugin *dplug = inode_digest_plugin(inode);
-+
-+ assert("edward-06", dplug != NULL);
-+ assert("edward-684", dplug->fipsize);
-+ assert("edward-07", area != NULL);
-+ assert("edward-08", *area != NULL);
-+ assert("edward-09", len != NULL);
-+ assert("edward-10", *len > 0);
-+
-+ if (*len < (int)sizeof(reiser4_crypto_stat)) {
-+ return not_enough_space(inode, "crypto-sd");
-+ }
-+ /* *len is number of bytes in stat data item from *area to the end of
-+ item. It must be not less than size of this extension */
-+ assert("edward-75", sizeof(*sd) + dplug->fipsize <= *len);
-+
-+ sd = (reiser4_crypto_stat *) * area;
-+ result = extract_crypto_stat(inode, sd);
-+ move_on(len, area, sizeof(*sd) + dplug->fipsize);
-+
-+ return result;
-+}
-+
-+static int save_len_crypto_sd(struct inode *inode)
-+{
-+ return sizeof(reiser4_crypto_stat) +
-+ inode_digest_plugin(inode)->fipsize;
-+}
-+
-+static int save_crypto_sd(struct inode *inode, char **area)
-+{
-+ int result = 0;
-+ reiser4_crypto_stat *sd;
-+ crypto_stat_t * info = inode_crypto_stat(inode);
-+ digest_plugin *dplug = inode_digest_plugin(inode);
-+
-+ assert("edward-12", dplug != NULL);
-+ assert("edward-13", area != NULL);
-+ assert("edward-14", *area != NULL);
-+ assert("edward-15", info != NULL);
-+ assert("edward-1414", info->keyid != NULL);
-+ assert("edward-1415", info->keysize != 0);
-+ assert("edward-76", reiser4_inode_data(inode) != NULL);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) {
-+ /* file is just created */
-+ sd = (reiser4_crypto_stat *) *area;
-+ /* copy everything but private key to the disk stat-data */
-+ put_unaligned(cpu_to_le16(info->keysize), &sd->keysize);
-+ memcpy(sd->keyid, info->keyid, (size_t) dplug->fipsize);
-+ reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
-+ }
-+ *area += (sizeof(*sd) + dplug->fipsize);
-+ return result;
-+}
-+
-+static int eio(struct inode *inode, char **area, int *len)
-+{
-+ return RETERR(-EIO);
-+}
-+
-+sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION] = {
-+ [LIGHT_WEIGHT_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = LIGHT_WEIGHT_STAT,
-+ .pops = NULL,
-+ .label = "light-weight sd",
-+ .desc = "sd for light-weight files",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_lw_sd,
-+ .absent = NULL,
-+ .save_len = save_len_lw_sd,
-+ .save = save_lw_sd,
-+ .alignment = 8
-+ },
-+ [UNIX_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = UNIX_STAT,
-+ .pops = NULL,
-+ .label = "unix-sd",
-+ .desc = "unix stat-data fields",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_unix_sd,
-+ .absent = absent_unix_sd,
-+ .save_len = save_len_unix_sd,
-+ .save = save_unix_sd,
-+ .alignment = 8
-+ },
-+ [LARGE_TIMES_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = LARGE_TIMES_STAT,
-+ .pops = NULL,
-+ .label = "64time-sd",
-+ .desc = "nanosecond resolution for times",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_large_times_sd,
-+ .absent = NULL,
-+ .save_len = save_len_large_times_sd,
-+ .save = save_large_times_sd,
-+ .alignment = 8
-+ },
-+ [SYMLINK_STAT] = {
-+ /* stat data of symlink has this extension */
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = SYMLINK_STAT,
-+ .pops = NULL,
-+ .label = "symlink-sd",
-+ .desc =
-+ "stat data is appended with symlink name",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_symlink_sd,
-+ .absent = NULL,
-+ .save_len = save_len_symlink_sd,
-+ .save = save_symlink_sd,
-+ .alignment = 8
-+ },
-+ [PLUGIN_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = PLUGIN_STAT,
-+ .pops = NULL,
-+ .label = "plugin-sd",
-+ .desc = "plugin stat-data fields",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_pset_sd,
-+ .absent = absent_plugin_sd,
-+ .save_len = save_len_pset_sd,
-+ .save = save_pset_sd,
-+ .alignment = 8
-+ },
-+ [HEIR_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = HEIR_STAT,
-+ .pops = NULL,
-+ .label = "heir-plugin-sd",
-+ .desc = "heir plugin stat-data fields",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_hset_sd,
-+ .absent = NULL,
-+ .save_len = save_len_hset_sd,
-+ .save = save_hset_sd,
-+ .alignment = 8
-+ },
-+ [FLAGS_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = FLAGS_STAT,
-+ .pops = NULL,
-+ .label = "flags-sd",
-+ .desc = "inode bit flags",
-+ .linkage = {NULL, NULL}
-+ },
-+ .present = present_flags_sd,
-+ .absent = NULL,
-+ .save_len = save_len_flags_sd,
-+ .save = save_flags_sd,
-+ .alignment = 8
-+ },
-+ [CAPABILITIES_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = CAPABILITIES_STAT,
-+ .pops = NULL,
-+ .label = "capabilities-sd",
-+ .desc = "capabilities",
-+ .linkage = {NULL, NULL}
-+ },
-+ .present = eio,
-+ .absent = NULL,
-+ .save_len = save_len_flags_sd,
-+ .save = save_flags_sd,
-+ .alignment = 8
-+ },
-+ [CRYPTO_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = CRYPTO_STAT,
-+ .pops = NULL,
-+ .label = "crypto-sd",
-+ .desc = "secret key size and id",
-+ .linkage = {NULL, NULL}
-+ },
-+ .present = present_crypto_sd,
-+ .absent = NULL,
-+ .save_len = save_len_crypto_sd,
-+ .save = save_crypto_sd,
-+ .alignment = 8
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/static_stat.h linux-2.6.20/fs/reiser4/plugin/item/static_stat.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/static_stat.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/static_stat.h 2007-05-06 14:50:43.823014469 +0400
-@@ -0,0 +1,224 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* This describes the static_stat item, used to hold all information needed by the stat() syscall.
-+
-+In the case where each file has not less than the fields needed by the
-+stat() syscall, it is more compact to store those fields in this
-+struct.
-+
-+If this item does not exist, then all stats are dynamically resolved.
-+At the moment, we either resolve all stats dynamically or all of them
-+statically. If you think this is not fully optimal, and the rest of
-+reiser4 is working, then fix it...:-)
-+
-+*/
-+
-+#if !defined( __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ )
-+#define __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+
-+/* Stat data layout: goals and implementation.
-+
-+ We want to be able to have lightweight files which have complete flexibility in what semantic metadata is attached to
-+ them, including not having semantic metadata attached to them.
-+
-+ There is one problem with doing that, which is that if in fact you have exactly the same metadata for most files you
-+ want to store, then it takes more space to store that metadata in a dynamically sized structure than in a statically
-+ sized structure because the statically sized structure knows without recording it what the names and lengths of the
-+ attributes are.
-+
-+ This leads to a natural compromise, which is to special case those files which have simply the standard unix file
-+ attributes, and only employ the full dynamic stat data mechanism for those files that differ from the standard unix
-+ file in their use of file attributes.
-+
-+ Yet this compromise deserves to be compromised a little.
-+
-+ We accommodate the case where you have no more than the standard unix file attributes by using an "extension
-+ bitmask": each bit in it indicates presence or absence of or particular stat data extension (see sd_ext_bits enum).
-+
-+ If the first bit of the extension bitmask bit is 0, we have light-weight file whose attributes are either inherited
-+ from parent directory (as uid, gid) or initialised to some sane values.
-+
-+ To capitalize on existing code infrastructure, extensions are
-+ implemented as plugins of type REISER4_SD_EXT_PLUGIN_TYPE.
-+ Each stat-data extension plugin implements four methods:
-+
-+ ->present() called by sd_load() when this extension is found in stat-data
-+ ->absent() called by sd_load() when this extension is not found in stat-data
-+ ->save_len() called by sd_len() to calculate total length of stat-data
-+ ->save() called by sd_save() to store extension data into stat-data
-+
-+ Implementation is in fs/reiser4/plugin/item/static_stat.c
-+*/
-+
-+/* stat-data extension. Please order this by presumed frequency of use */
-+typedef enum {
-+ /* support for light-weight files */
-+ LIGHT_WEIGHT_STAT,
-+ /* data required to implement unix stat(2) call. Layout is in
-+ reiser4_unix_stat. If this is not present, file is light-weight */
-+ UNIX_STAT,
-+ /* this contains additional set of 32bit [anc]time fields to implement
-+ nanosecond resolution. Layout is in reiser4_large_times_stat. Usage
-+ if this extension is governed by 32bittimes mount option. */
-+ LARGE_TIMES_STAT,
-+ /* stat data has link name included */
-+ SYMLINK_STAT,
-+ /* on-disk slots of non-standard plugins for main plugin table
-+ (@reiser4_inode->pset), that is, plugins that cannot be deduced
-+ from file mode bits), for example, aggregation, interpolation etc. */
-+ PLUGIN_STAT,
-+ /* this extension contains persistent inode flags. These flags are
-+ single bits: immutable, append, only, etc. Layout is in
-+ reiser4_flags_stat. */
-+ FLAGS_STAT,
-+ /* this extension contains capabilities sets, associated with this
-+ file. Layout is in reiser4_capabilities_stat */
-+ CAPABILITIES_STAT,
-+ /* this extension contains size and public id of the secret key.
-+ Layout is in reiser4_crypto_stat */
-+ CRYPTO_STAT,
-+ /* on-disk slots of non-default plugins for inheritance, which
-+ are extracted to special plugin table (@reiser4_inode->hset).
-+ By default, children of the object will inherit plugins from
-+ its main plugin table (pset). */
-+ HEIR_STAT,
-+ LAST_SD_EXTENSION,
-+ /*
-+ * init_inode_static_sd() iterates over extension mask until all
-+ * non-zero bits are processed. This means, that neither ->present(),
-+ * nor ->absent() methods will be called for stat-data extensions that
-+ * go after last present extension. But some basic extensions, we want
-+ * either ->absent() or ->present() method to be called, because these
-+ * extensions set up something in inode even when they are not
-+ * present. This is what LAST_IMPORTANT_SD_EXTENSION is for: for all
-+ * extensions before and including LAST_IMPORTANT_SD_EXTENSION either
-+ * ->present(), or ->absent() method will be called, independently of
-+ * what other extensions are present.
-+ */
-+ LAST_IMPORTANT_SD_EXTENSION = PLUGIN_STAT
-+} sd_ext_bits;
-+
-+/* minimal stat-data. This allows to support light-weight files. */
-+typedef struct reiser4_stat_data_base {
-+ /* 0 */ __le16 extmask;
-+ /* 2 */
-+} PACKED reiser4_stat_data_base;
-+
-+typedef struct reiser4_light_weight_stat {
-+ /* 0 */ __le16 mode;
-+ /* 2 */ __le32 nlink;
-+ /* 6 */ __le64 size;
-+ /* size in bytes */
-+ /* 14 */
-+} PACKED reiser4_light_weight_stat;
-+
-+typedef struct reiser4_unix_stat {
-+ /* owner id */
-+ /* 0 */ __le32 uid;
-+ /* group id */
-+ /* 4 */ __le32 gid;
-+ /* access time */
-+ /* 8 */ __le32 atime;
-+ /* modification time */
-+ /* 12 */ __le32 mtime;
-+ /* change time */
-+ /* 16 */ __le32 ctime;
-+ union {
-+ /* minor:major for device files */
-+ /* 20 */ __le64 rdev;
-+ /* bytes used by file */
-+ /* 20 */ __le64 bytes;
-+ } u;
-+ /* 28 */
-+} PACKED reiser4_unix_stat;
-+
-+/* symlink stored as part of inode */
-+typedef struct reiser4_symlink_stat {
-+ char body[0];
-+} PACKED reiser4_symlink_stat;
-+
-+typedef struct reiser4_plugin_slot {
-+ /* 0 */ __le16 pset_memb;
-+ /* 2 */ __le16 id;
-+ /* 4 *//* here plugin stores its persistent state */
-+} PACKED reiser4_plugin_slot;
-+
-+/* stat-data extension for files with non-standard plugin. */
-+typedef struct reiser4_plugin_stat {
-+ /* number of additional plugins, associated with this object */
-+ /* 0 */ __le16 plugins_no;
-+ /* 2 */ reiser4_plugin_slot slot[0];
-+ /* 2 */
-+} PACKED reiser4_plugin_stat;
-+
-+/* stat-data extension for inode flags. Currently it is just fixed-width 32
-+ * bit mask. If need arise, this can be replaced with variable width
-+ * bitmask. */
-+typedef struct reiser4_flags_stat {
-+ /* 0 */ __le32 flags;
-+ /* 4 */
-+} PACKED reiser4_flags_stat;
-+
-+typedef struct reiser4_capabilities_stat {
-+ /* 0 */ __le32 effective;
-+ /* 8 */ __le32 permitted;
-+ /* 16 */
-+} PACKED reiser4_capabilities_stat;
-+
-+typedef struct reiser4_cluster_stat {
-+/* this defines cluster size (an attribute of cryptcompress objects) as PAGE_SIZE << cluster shift */
-+ /* 0 */ d8 cluster_shift;
-+ /* 1 */
-+} PACKED reiser4_cluster_stat;
-+
-+typedef struct reiser4_crypto_stat {
-+ /* secret key size, bits */
-+ /* 0 */ d16 keysize;
-+ /* secret key id */
-+ /* 2 */ d8 keyid[0];
-+ /* 2 */
-+} PACKED reiser4_crypto_stat;
-+
-+typedef struct reiser4_large_times_stat {
-+ /* access time */
-+ /* 0 */ d32 atime;
-+ /* modification time */
-+ /* 4 */ d32 mtime;
-+ /* change time */
-+ /* 8 */ d32 ctime;
-+ /* 12 */
-+} PACKED reiser4_large_times_stat;
-+
-+/* this structure is filled by sd_item_stat */
-+typedef struct sd_stat {
-+ int dirs;
-+ int files;
-+ int others;
-+} sd_stat;
-+
-+/* plugin->item.common.* */
-+extern void print_sd(const char *prefix, coord_t * coord);
-+extern void item_stat_static_sd(const coord_t * coord, void *vp);
-+
-+/* plugin->item.s.sd.* */
-+extern int init_inode_static_sd(struct inode *inode, char *sd, int len);
-+extern int save_len_static_sd(struct inode *inode);
-+extern int save_static_sd(struct inode *inode, char **area);
-+
-+/* __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/tail.c linux-2.6.20/fs/reiser4/plugin/item/tail.c
---- linux-2.6.20.orig/fs/reiser4/plugin/item/tail.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/tail.c 2007-05-06 14:50:43.823014469 +0400
-@@ -0,0 +1,812 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../inode.h"
-+#include "../../page_cache.h"
-+#include "../../carry.h"
-+#include "../../vfs_ops.h"
-+
-+#include <linux/quotaops.h>
-+#include <asm/uaccess.h>
-+#include <linux/swap.h>
-+#include <linux/writeback.h>
-+
-+/* plugin->u.item.b.max_key_inside */
-+reiser4_key *max_key_inside_tail(const coord_t *coord, reiser4_key *key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, get_key_offset(reiser4_max_key()));
-+ return key;
-+}
-+
-+/* plugin->u.item.b.can_contain_key */
-+int can_contain_key_tail(const coord_t *coord, const reiser4_key *key,
-+ const reiser4_item_data *data)
-+{
-+ reiser4_key item_key;
-+
-+ if (item_plugin_by_coord(coord) != data->iplug)
-+ return 0;
-+
-+ item_key_by_coord(coord, &item_key);
-+ if (get_key_locality(key) != get_key_locality(&item_key) ||
-+ get_key_objectid(key) != get_key_objectid(&item_key))
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.mergeable
-+ first item is of tail type */
-+/* Audited by: green(2002.06.14) */
-+int mergeable_tail(const coord_t *p1, const coord_t *p2)
-+{
-+ reiser4_key key1, key2;
-+
-+ assert("vs-535", plugin_of_group(item_plugin_by_coord(p1),
-+ UNIX_FILE_METADATA_ITEM_TYPE));
-+ assert("vs-365", item_id_by_coord(p1) == FORMATTING_ID);
-+
-+ if (item_id_by_coord(p2) != FORMATTING_ID) {
-+ /* second item is of another type */
-+ return 0;
-+ }
-+
-+ item_key_by_coord(p1, &key1);
-+ item_key_by_coord(p2, &key2);
-+ if (get_key_locality(&key1) != get_key_locality(&key2) ||
-+ get_key_objectid(&key1) != get_key_objectid(&key2)
-+ || get_key_type(&key1) != get_key_type(&key2)) {
-+ /* items of different objects */
-+ return 0;
-+ }
-+ if (get_key_offset(&key1) + nr_units_tail(p1) != get_key_offset(&key2)) {
-+ /* not adjacent items */
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.print
-+ plugin->u.item.b.check */
-+
-+/* plugin->u.item.b.nr_units */
-+pos_in_node_t nr_units_tail(const coord_t * coord)
-+{
-+ return item_length_by_coord(coord);
-+}
-+
-+/* plugin->u.item.b.lookup */
-+lookup_result
-+lookup_tail(const reiser4_key * key, lookup_bias bias, coord_t * coord)
-+{
-+ reiser4_key item_key;
-+ __u64 lookuped, offset;
-+ unsigned nr_units;
-+
-+ item_key_by_coord(coord, &item_key);
-+ offset = get_key_offset(item_key_by_coord(coord, &item_key));
-+ nr_units = nr_units_tail(coord);
-+
-+ /* key we are looking for must be greater than key of item @coord */
-+ assert("vs-416", keygt(key, &item_key));
-+
-+ /* offset we are looking for */
-+ lookuped = get_key_offset(key);
-+
-+ if (lookuped >= offset && lookuped < offset + nr_units) {
-+ /* byte we are looking for is in this item */
-+ coord->unit_pos = lookuped - offset;
-+ coord->between = AT_UNIT;
-+ return CBK_COORD_FOUND;
-+ }
-+
-+ /* set coord after last unit */
-+ coord->unit_pos = nr_units - 1;
-+ coord->between = AFTER_UNIT;
-+ return bias ==
-+ FIND_MAX_NOT_MORE_THAN ? CBK_COORD_FOUND : CBK_COORD_NOTFOUND;
-+}
-+
-+/* plugin->u.item.b.paste */
-+int
-+paste_tail(coord_t *coord, reiser4_item_data *data,
-+ carry_plugin_info *info UNUSED_ARG)
-+{
-+ unsigned old_item_length;
-+ char *item;
-+
-+ /* length the item had before resizing has been performed */
-+ old_item_length = item_length_by_coord(coord) - data->length;
-+
-+ /* tail items never get pasted in the middle */
-+ assert("vs-363",
-+ (coord->unit_pos == 0 && coord->between == BEFORE_UNIT) ||
-+ (coord->unit_pos == old_item_length - 1 &&
-+ coord->between == AFTER_UNIT) ||
-+ (coord->unit_pos == 0 && old_item_length == 0
-+ && coord->between == AT_UNIT));
-+
-+ item = item_body_by_coord(coord);
-+ if (coord->unit_pos == 0)
-+ /* make space for pasted data when pasting at the beginning of
-+ the item */
-+ memmove(item + data->length, item, old_item_length);
-+
-+ if (coord->between == AFTER_UNIT)
-+ coord->unit_pos++;
-+
-+ if (data->data) {
-+ assert("vs-554", data->user == 0 || data->user == 1);
-+ if (data->user) {
-+ assert("nikita-3035", reiser4_schedulable());
-+ /* copy from user space */
-+ if (__copy_from_user(item + coord->unit_pos,
-+ (const char __user *)data->data,
-+ (unsigned)data->length))
-+ return RETERR(-EFAULT);
-+ } else
-+ /* copy from kernel space */
-+ memcpy(item + coord->unit_pos, data->data,
-+ (unsigned)data->length);
-+ } else {
-+ memset(item + coord->unit_pos, 0, (unsigned)data->length);
-+ }
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.fast_paste */
-+
-+/* plugin->u.item.b.can_shift
-+ number of units is returned via return value, number of bytes via @size. For
-+ tail items they coincide */
-+int
-+can_shift_tail(unsigned free_space, coord_t * source UNUSED_ARG,
-+ znode * target UNUSED_ARG, shift_direction direction UNUSED_ARG,
-+ unsigned *size, unsigned want)
-+{
-+ /* make sure that that we do not want to shift more than we have */
-+ assert("vs-364", want > 0
-+ && want <= (unsigned)item_length_by_coord(source));
-+
-+ *size = min(want, free_space);
-+ return *size;
-+}
-+
-+/* plugin->u.item.b.copy_units */
-+void
-+copy_units_tail(coord_t * target, coord_t * source,
-+ unsigned from, unsigned count,
-+ shift_direction where_is_free_space,
-+ unsigned free_space UNUSED_ARG)
-+{
-+ /* make sure that item @target is expanded already */
-+ assert("vs-366", (unsigned)item_length_by_coord(target) >= count);
-+ assert("vs-370", free_space >= count);
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ /* append item @target with @count first bytes of @source */
-+ assert("vs-365", from == 0);
-+
-+ memcpy((char *)item_body_by_coord(target) +
-+ item_length_by_coord(target) - count,
-+ (char *)item_body_by_coord(source), count);
-+ } else {
-+ /* target item is moved to right already */
-+ reiser4_key key;
-+
-+ assert("vs-367",
-+ (unsigned)item_length_by_coord(source) == from + count);
-+
-+ memcpy((char *)item_body_by_coord(target),
-+ (char *)item_body_by_coord(source) + from, count);
-+
-+ /* new units are inserted before first unit in an item,
-+ therefore, we have to update item key */
-+ item_key_by_coord(source, &key);
-+ set_key_offset(&key, get_key_offset(&key) + from);
-+
-+ node_plugin_by_node(target->node)->update_item_key(target, &key,
-+ NULL /*info */);
-+ }
-+}
-+
-+/* plugin->u.item.b.create_hook */
-+
-+/* item_plugin->b.kill_hook
-+ this is called when @count units starting from @from-th one are going to be removed
-+ */
-+int
-+kill_hook_tail(const coord_t * coord, pos_in_node_t from,
-+ pos_in_node_t count, struct carry_kill_data *kdata)
-+{
-+ reiser4_key key;
-+ loff_t start, end;
-+
-+ assert("vs-1577", kdata);
-+ assert("vs-1579", kdata->inode);
-+
-+ item_key_by_coord(coord, &key);
-+ start = get_key_offset(&key) + from;
-+ end = start + count;
-+ fake_kill_hook_tail(kdata->inode, start, end, kdata->params.truncate);
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.shift_hook */
-+
-+/* helper for kill_units_tail and cut_units_tail */
-+static int
-+do_cut_or_kill(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ pos_in_node_t count;
-+
-+ /* this method is only called to remove part of item */
-+ assert("vs-374", (to - from + 1) < item_length_by_coord(coord));
-+ /* tails items are never cut from the middle of an item */
-+ assert("vs-396", ergo(from != 0, to == coord_last_unit_pos(coord)));
-+ assert("vs-1558", ergo(from == 0, to < coord_last_unit_pos(coord)));
-+
-+ count = to - from + 1;
-+
-+ if (smallest_removed) {
-+ /* store smallest key removed */
-+ item_key_by_coord(coord, smallest_removed);
-+ set_key_offset(smallest_removed,
-+ get_key_offset(smallest_removed) + from);
-+ }
-+ if (new_first) {
-+ /* head of item is cut */
-+ assert("vs-1529", from == 0);
-+
-+ item_key_by_coord(coord, new_first);
-+ set_key_offset(new_first,
-+ get_key_offset(new_first) + from + count);
-+ }
-+
-+ if (REISER4_DEBUG)
-+ memset((char *)item_body_by_coord(coord) + from, 0, count);
-+ return count;
-+}
-+
-+/* plugin->u.item.b.cut_units */
-+int
-+cut_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *cdata UNUSED_ARG,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.b.kill_units */
-+int
-+kill_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *kdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ kill_hook_tail(coord, from, to - from + 1, kdata);
-+ return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.b.unit_key */
-+reiser4_key *unit_key_tail(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("vs-375", coord_is_existing_unit(coord));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, (get_key_offset(key) + coord->unit_pos));
-+
-+ return key;
-+}
-+
-+/* plugin->u.item.b.estimate
-+ plugin->u.item.b.item_data_by_flow */
-+
-+/* tail redpage function. It is called from readpage_tail(). */
-+static int do_readpage_tail(uf_coord_t *uf_coord, struct page *page)
-+{
-+ tap_t tap;
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+ int count, mapped;
-+ struct inode *inode;
-+ char *pagedata;
-+
-+ /* saving passed coord in order to do not move it by tap. */
-+ init_lh(&lh);
-+ copy_lh(&lh, uf_coord->lh);
-+ inode = page->mapping->host;
-+ coord_dup(&coord, &uf_coord->coord);
-+
-+ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
-+
-+ if ((result = reiser4_tap_load(&tap)))
-+ goto out_tap_done;
-+
-+ /* lookup until page is filled up. */
-+ for (mapped = 0; mapped < PAGE_CACHE_SIZE; ) {
-+ /* number of bytes to be copied to page */
-+ count = item_length_by_coord(&coord) - coord.unit_pos;
-+ if (count > PAGE_CACHE_SIZE - mapped)
-+ count = PAGE_CACHE_SIZE - mapped;
-+
-+ /* attach @page to address space and get data address */
-+ pagedata = kmap_atomic(page, KM_USER0);
-+
-+ /* copy tail item to page */
-+ memcpy(pagedata + mapped,
-+ ((char *)item_body_by_coord(&coord) + coord.unit_pos),
-+ count);
-+ mapped += count;
-+
-+ flush_dcache_page(page);
-+
-+ /* dettach page from address space */
-+ kunmap_atomic(pagedata, KM_USER0);
-+
-+ /* Getting next tail item. */
-+ if (mapped < PAGE_CACHE_SIZE) {
-+ /*
-+ * unlock page in order to avoid keep it locked
-+ * during tree lookup, which takes long term locks
-+ */
-+ unlock_page(page);
-+
-+ /* getting right neighbour. */
-+ result = go_dir_el(&tap, RIGHT_SIDE, 0);
-+
-+ /* lock page back */
-+ lock_page(page);
-+ if (PageUptodate(page)) {
-+ /*
-+ * another thread read the page, we have
-+ * nothing to do
-+ */
-+ result = 0;
-+ goto out_unlock_page;
-+ }
-+
-+ if (result) {
-+ if (result == -E_NO_NEIGHBOR) {
-+ /*
-+ * rigth neighbor is not a formatted
-+ * node
-+ */
-+ result = 0;
-+ goto done;
-+ } else {
-+ goto out_tap_relse;
-+ }
-+ } else {
-+ if (!inode_file_plugin(inode)->
-+ owns_item(inode, &coord)) {
-+ /* item of another file is found */
-+ result = 0;
-+ goto done;
-+ }
-+ }
-+ }
-+ }
-+
-+ done:
-+ if (mapped != PAGE_CACHE_SIZE) {
-+ pagedata = kmap_atomic(page, KM_USER0);
-+ memset(pagedata + mapped, 0, PAGE_CACHE_SIZE - mapped);
-+ flush_dcache_page(page);
-+ kunmap_atomic(pagedata, KM_USER0);
-+ }
-+ SetPageUptodate(page);
-+ out_unlock_page:
-+ unlock_page(page);
-+ out_tap_relse:
-+ reiser4_tap_relse(&tap);
-+ out_tap_done:
-+ reiser4_tap_done(&tap);
-+ return result;
-+}
-+
-+/*
-+ plugin->s.file.readpage
-+ reiser4_read->unix_file_read->page_cache_readahead->reiser4_readpage->unix_file_readpage->readpage_tail
-+ or
-+ filemap_nopage->reiser4_readpage->readpage_unix_file->->readpage_tail
-+
-+ At the beginning: coord->node is read locked, zloaded, page is locked, coord is set to existing unit inside of tail
-+ item. */
-+int readpage_tail(void *vp, struct page *page)
-+{
-+ uf_coord_t *uf_coord = vp;
-+ ON_DEBUG(coord_t * coord = &uf_coord->coord);
-+ ON_DEBUG(reiser4_key key);
-+
-+ assert("umka-2515", PageLocked(page));
-+ assert("umka-2516", !PageUptodate(page));
-+ assert("umka-2517", !jprivate(page) && !PagePrivate(page));
-+ assert("umka-2518", page->mapping && page->mapping->host);
-+
-+ assert("umka-2519", znode_is_loaded(coord->node));
-+ assert("umka-2520", item_is_tail(coord));
-+ assert("umka-2521", coord_is_existing_unit(coord));
-+ assert("umka-2522", znode_is_rlocked(coord->node));
-+ assert("umka-2523",
-+ page->mapping->host->i_ino ==
-+ get_key_objectid(item_key_by_coord(coord, &key)));
-+
-+ return do_readpage_tail(uf_coord, page);
-+}
-+
-+/**
-+ * overwrite_tail
-+ * @flow:
-+ * @coord:
-+ *
-+ * Overwrites tail item or its part by user data. Returns number of bytes
-+ * written or error code.
-+ */
-+static int overwrite_tail(flow_t *flow, coord_t *coord)
-+{
-+ unsigned count;
-+
-+ assert("vs-570", flow->user == 1);
-+ assert("vs-946", flow->data);
-+ assert("vs-947", coord_is_existing_unit(coord));
-+ assert("vs-948", znode_is_write_locked(coord->node));
-+ assert("nikita-3036", reiser4_schedulable());
-+
-+ count = item_length_by_coord(coord) - coord->unit_pos;
-+ if (count > flow->length)
-+ count = flow->length;
-+
-+ if (__copy_from_user((char *)item_body_by_coord(coord) + coord->unit_pos,
-+ (const char __user *)flow->data, count))
-+ return RETERR(-EFAULT);
-+
-+ znode_make_dirty(coord->node);
-+ return count;
-+}
-+
-+/**
-+ * insert_first_tail
-+ * @inode:
-+ * @flow:
-+ * @coord:
-+ * @lh:
-+ *
-+ * Returns number of bytes written or error code.
-+ */
-+static ssize_t insert_first_tail(struct inode *inode, flow_t *flow,
-+ coord_t *coord, lock_handle *lh)
-+{
-+ int result;
-+ loff_t to_write;
-+ unix_file_info_t *uf_info;
-+
-+ if (get_key_offset(&flow->key) != 0) {
-+ /*
-+ * file is empty and we have to write not to the beginning of
-+ * file. Create a hole at the beginning of file. On success
-+ * insert_flow returns 0 as number of written bytes which is
-+ * what we have to return on padding a file with holes
-+ */
-+ flow->data = NULL;
-+ flow->length = get_key_offset(&flow->key);
-+ set_key_offset(&flow->key, 0);
-+ /*
-+ * holes in files built of tails are stored just like if there
-+ * were real data which are all zeros. Therefore we have to
-+ * allocate quota here as well
-+ */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ /*
-+ * first item insertion is only possible when writing to empty
-+ * file or performing tail conversion
-+ */
-+ assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
-+ (reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED) &&
-+ reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV))));
-+ /* if file was empty - update its state */
-+ if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ return result;
-+ }
-+
-+ /* check quota before appending data */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+
-+ to_write = flow->length;
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+ return (to_write - flow->length) ? (to_write - flow->length) : result;
-+}
-+
-+/**
-+ * append_tail
-+ * @inode:
-+ * @flow:
-+ * @coord:
-+ * @lh:
-+ *
-+ * Returns number of bytes written or error code.
-+ */
-+static ssize_t append_tail(struct inode *inode,
-+ flow_t *flow, coord_t *coord, lock_handle *lh)
-+{
-+ int result;
-+ reiser4_key append_key;
-+ loff_t to_write;
-+
-+ if (!keyeq(&flow->key, append_key_tail(coord, &append_key))) {
-+ flow->data = NULL;
-+ flow->length = get_key_offset(&flow->key) - get_key_offset(&append_key);
-+ set_key_offset(&flow->key, get_key_offset(&append_key));
-+ /*
-+ * holes in files built of tails are stored just like if there
-+ * were real data which are all zeros. Therefore we have to
-+ * allocate quota here as well
-+ */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+ return result;
-+ }
-+
-+ /* check quota before appending data */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+
-+ to_write = flow->length;
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+ return (to_write - flow->length) ? (to_write - flow->length) : result;
-+}
-+
-+/**
-+ * write_tail_reserve_space - reserve space for tail write operation
-+ * @inode:
-+ *
-+ * Estimates and reserves space which may be required for writing one flow to a
-+ * file
-+ */
-+static int write_extent_reserve_space(struct inode *inode)
-+{
-+ __u64 count;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * to write one flow to a file by tails we have to reserve disk space for:
-+
-+ * 1. find_file_item may have to insert empty node to the tree (empty
-+ * leaf node between two extent items). This requires 1 block and
-+ * number of blocks which are necessary to perform insertion of an
-+ * internal item into twig level.
-+ *
-+ * 2. flow insertion
-+ *
-+ * 3. stat data update
-+ */
-+ tree = reiser4_tree_by_inode(inode);
-+ count = estimate_one_insert_item(tree) +
-+ estimate_insert_flow(tree->height) +
-+ estimate_one_insert_item(tree);
-+ grab_space_enable();
-+ return reiser4_grab_space(count, 0 /* flags */);
-+}
-+
-+#define PAGE_PER_FLOW 4
-+
-+static loff_t faultin_user_pages(const char __user *buf, size_t count)
-+{
-+ loff_t faulted;
-+ int to_fault;
-+
-+ if (count > PAGE_PER_FLOW * PAGE_CACHE_SIZE)
-+ count = PAGE_PER_FLOW * PAGE_CACHE_SIZE;
-+ faulted = 0;
-+ while (count > 0) {
-+ to_fault = PAGE_CACHE_SIZE;
-+ if (count < to_fault)
-+ to_fault = count;
-+ fault_in_pages_readable(buf + faulted, to_fault);
-+ count -= to_fault;
-+ faulted += to_fault;
-+ }
-+ return faulted;
-+}
-+
-+/**
-+ * reiser4_write_extent - write method of tail item plugin
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @count: number of bytes to write
-+ * @pos: position in file to write to
-+ *
-+ * Returns number of written bytes or error code.
-+ */
-+ssize_t reiser4_write_tail(struct file *file, const char __user *buf,
-+ size_t count, loff_t *pos)
-+{
-+ struct inode *inode;
-+ struct hint hint;
-+ int result;
-+ flow_t flow;
-+ coord_t *coord;
-+ lock_handle *lh;
-+ znode *loaded;
-+
-+ inode = file->f_dentry->d_inode;
-+
-+ if (write_extent_reserve_space(inode))
-+ return RETERR(-ENOSPC);
-+
-+ result = load_file_hint(file, &hint);
-+ BUG_ON(result != 0);
-+
-+ flow.length = faultin_user_pages(buf, count);
-+ flow.user = 1;
-+ memcpy(&flow.data, &buf, sizeof(buf));
-+ flow.op = WRITE_OP;
-+ key_by_inode_and_offset_common(inode, *pos, &flow.key);
-+
-+ result = find_file_item(&hint, &flow.key, ZNODE_WRITE_LOCK, inode);
-+ if (IS_CBKERR(result))
-+ return result;
-+
-+ coord = &hint.ext_coord.coord;
-+ lh = hint.ext_coord.lh;
-+
-+ result = zload(coord->node);
-+ BUG_ON(result != 0);
-+ loaded = coord->node;
-+
-+ if (coord->between == AFTER_UNIT) {
-+ /* append with data or hole */
-+ result = append_tail(inode, &flow, coord, lh);
-+ } else if (coord->between == AT_UNIT) {
-+ /* overwrite */
-+ result = overwrite_tail(&flow, coord);
-+ } else {
-+ /* no items of this file yet. insert data or hole */
-+ result = insert_first_tail(inode, &flow, coord, lh);
-+ }
-+ zrelse(loaded);
-+ if (result < 0) {
-+ done_lh(lh);
-+ return result;
-+ }
-+
-+ /* seal and unlock znode */
-+ hint.ext_coord.valid = 0;
-+ if (hint.ext_coord.valid)
-+ reiser4_set_hint(&hint, &flow.key, ZNODE_WRITE_LOCK);
-+ else
-+ reiser4_unset_hint(&hint);
-+
-+ save_file_hint(file, &hint);
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+
-+static int
-+coord_matches_key_tail(const coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key item_key;
-+
-+ assert("vs-1356", coord_is_existing_unit(coord));
-+ assert("vs-1354", keylt(key, append_key_tail(coord, &item_key)));
-+ assert("vs-1355", keyge(key, item_key_by_coord(coord, &item_key)));
-+ return get_key_offset(key) ==
-+ get_key_offset(&item_key) + coord->unit_pos;
-+
-+}
-+
-+#endif
-+
-+/* plugin->u.item.s.file.read */
-+int reiser4_read_tail(struct file *file UNUSED_ARG, flow_t *f, hint_t *hint)
-+{
-+ unsigned count;
-+ int item_length;
-+ coord_t *coord;
-+ uf_coord_t *uf_coord;
-+
-+ uf_coord = &hint->ext_coord;
-+ coord = &uf_coord->coord;
-+
-+ assert("vs-571", f->user == 1);
-+ assert("vs-571", f->data);
-+ assert("vs-967", coord && coord->node);
-+ assert("vs-1117", znode_is_rlocked(coord->node));
-+ assert("vs-1118", znode_is_loaded(coord->node));
-+
-+ assert("nikita-3037", reiser4_schedulable());
-+ assert("vs-1357", coord_matches_key_tail(coord, &f->key));
-+
-+ /* calculate number of bytes to read off the item */
-+ item_length = item_length_by_coord(coord);
-+ count = item_length_by_coord(coord) - coord->unit_pos;
-+ if (count > f->length)
-+ count = f->length;
-+
-+ /* user page has to be brought in so that major page fault does not
-+ * occur here when longtem lock is held */
-+ if (__copy_to_user((char __user *)f->data,
-+ ((char *)item_body_by_coord(coord) + coord->unit_pos),
-+ count))
-+ return RETERR(-EFAULT);
-+
-+ /* probably mark_page_accessed() should only be called if
-+ * coord->unit_pos is zero. */
-+ mark_page_accessed(znode_page(coord->node));
-+ move_flow_forward(f, count);
-+
-+ coord->unit_pos += count;
-+ if (item_length == coord->unit_pos) {
-+ coord->unit_pos--;
-+ coord->between = AFTER_UNIT;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ plugin->u.item.s.file.append_key
-+ key of first byte which is the next to last byte by addressed by this item
-+*/
-+reiser4_key *append_key_tail(const coord_t * coord, reiser4_key * key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, get_key_offset(key) + item_length_by_coord(coord));
-+ return key;
-+}
-+
-+/* plugin->u.item.s.file.init_coord_extension */
-+void init_coord_extension_tail(uf_coord_t * uf_coord, loff_t lookuped)
-+{
-+ uf_coord->valid = 1;
-+}
-+
-+/*
-+ plugin->u.item.s.file.get_block
-+*/
-+int
-+get_block_address_tail(const coord_t * coord, sector_t lblock, sector_t * block)
-+{
-+ assert("nikita-3252", znode_get_level(coord->node) == LEAF_LEVEL);
-+
-+ if (reiser4_blocknr_is_fake(znode_get_block(coord->node)))
-+ /* if node has'nt obtainet its block number yet, return 0.
-+ * Lets avoid upsetting users with some cosmic numbers beyond
-+ * the device capacity.*/
-+ *block = 0;
-+ else
-+ *block = *znode_get_block(coord->node);
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/item/tail.h linux-2.6.20/fs/reiser4/plugin/item/tail.h
---- linux-2.6.20.orig/fs/reiser4/plugin/item/tail.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/item/tail.h 2007-05-06 14:50:43.827015719 +0400
-@@ -0,0 +1,58 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined( __REISER4_TAIL_H__ )
-+#define __REISER4_TAIL_H__
-+
-+typedef struct {
-+ int not_used;
-+} tail_coord_extension_t;
-+
-+struct cut_list;
-+
-+/* plugin->u.item.b.* */
-+reiser4_key *max_key_inside_tail(const coord_t *, reiser4_key *);
-+int can_contain_key_tail(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data *);
-+int mergeable_tail(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_tail(const coord_t *);
-+lookup_result lookup_tail(const reiser4_key *, lookup_bias, coord_t *);
-+int paste_tail(coord_t *, reiser4_item_data *, carry_plugin_info *);
-+int can_shift_tail(unsigned free_space, coord_t * source,
-+ znode * target, shift_direction, unsigned *size,
-+ unsigned want);
-+void copy_units_tail(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction, unsigned free_space);
-+int kill_hook_tail(const coord_t *, pos_in_node_t from, pos_in_node_t count,
-+ struct carry_kill_data *);
-+int cut_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+reiser4_key *unit_key_tail(const coord_t *, reiser4_key *);
-+
-+/* plugin->u.item.s.* */
-+ssize_t reiser4_write_tail(struct file *file, const char __user *buf,
-+ size_t count, loff_t *pos);
-+int reiser4_read_tail(struct file *, flow_t *, hint_t *);
-+int readpage_tail(void *vp, struct page *page);
-+reiser4_key *append_key_tail(const coord_t *, reiser4_key *);
-+void init_coord_extension_tail(uf_coord_t *, loff_t offset);
-+int get_block_address_tail(const coord_t *, sector_t, sector_t *);
-+int item_balance_dirty_pages(struct address_space *, const flow_t *,
-+ hint_t *, int back_to_dirty, int set_hint);
-+
-+/* __REISER4_TAIL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/Makefile linux-2.6.20/fs/reiser4/plugin/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/Makefile 2007-05-06 14:50:43.827015719 +0400
-@@ -0,0 +1,26 @@
-+obj-$(CONFIG_REISER4_FS) += plugins.o
-+
-+plugins-objs := \
-+ plugin.o \
-+ plugin_set.o \
-+ object.o \
-+ inode_ops.o \
-+ inode_ops_rename.o \
-+ file_ops.o \
-+ file_ops_readdir.o \
-+ file_plugin_common.o \
-+ dir_plugin_common.o \
-+ digest.o \
-+ hash.o \
-+ fibration.o \
-+ tail_policy.o \
-+ regular.o
-+
-+obj-$(CONFIG_REISER4_FS) += item/
-+obj-$(CONFIG_REISER4_FS) += file/
-+obj-$(CONFIG_REISER4_FS) += dir/
-+obj-$(CONFIG_REISER4_FS) += node/
-+obj-$(CONFIG_REISER4_FS) += compress/
-+obj-$(CONFIG_REISER4_FS) += space/
-+obj-$(CONFIG_REISER4_FS) += disk_format/
-+obj-$(CONFIG_REISER4_FS) += security/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/node/Makefile linux-2.6.20/fs/reiser4/plugin/node/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/node/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/node/Makefile 2007-05-06 14:50:43.827015719 +0400
-@@ -0,0 +1,5 @@
-+obj-$(CONFIG_REISER4_FS) += node_plugins.o
-+
-+node_plugins-objs := \
-+ node.o \
-+ node40.o
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/node/node40.c linux-2.6.20/fs/reiser4/plugin/node/node40.c
---- linux-2.6.20.orig/fs/reiser4/plugin/node/node40.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/node/node40.c 2007-05-06 14:50:43.831016969 +0400
-@@ -0,0 +1,2924 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "../plugin_header.h"
-+#include "../item/item.h"
-+#include "node.h"
-+#include "node40.h"
-+#include "../plugin.h"
-+#include "../../jnode.h"
-+#include "../../znode.h"
-+#include "../../pool.h"
-+#include "../../carry.h"
-+#include "../../tap.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../reiser4.h"
-+
-+#include <asm/uaccess.h>
-+#include <linux/types.h>
-+#include <linux/prefetch.h>
-+
-+/* leaf 40 format:
-+
-+ [node header | item 0, item 1, .., item N-1 | free space | item_head N-1, .. item_head 1, item head 0 ]
-+ plugin_id (16) key
-+ free_space (16) pluginid (16)
-+ free_space_start (16) offset (16)
-+ level (8)
-+ num_items (16)
-+ magic (32)
-+ flush_time (32)
-+*/
-+/* NIKITA-FIXME-HANS: I told you guys not less than 10 times to not call it r4fs. Change to "ReIs". */
-+/* magic number that is stored in ->magic field of node header */
-+static const __u32 REISER4_NODE_MAGIC = 0x52344653; /* (*(__u32 *)"R4FS"); */
-+
-+static int prepare_for_update(znode * left, znode * right,
-+ carry_plugin_info * info);
-+
-+/* header of node of reiser40 format is at the beginning of node */
-+static inline node40_header *node40_node_header(const znode * node /* node to
-+ * query */ )
-+{
-+ assert("nikita-567", node != NULL);
-+ assert("nikita-568", znode_page(node) != NULL);
-+ assert("nikita-569", zdata(node) != NULL);
-+ return (node40_header *) zdata(node);
-+}
-+
-+/* functions to get/set fields of node40_header */
-+#define nh40_get_magic(nh) le32_to_cpu(get_unaligned(&(nh)->magic))
-+#define nh40_get_free_space(nh) le16_to_cpu(get_unaligned(&(nh)->free_space))
-+#define nh40_get_free_space_start(nh) le16_to_cpu(get_unaligned(&(nh)->free_space_start))
-+#define nh40_get_level(nh) get_unaligned(&(nh)->level)
-+#define nh40_get_num_items(nh) le16_to_cpu(get_unaligned(&(nh)->nr_items))
-+#define nh40_get_flush_id(nh) le64_to_cpu(get_unaligned(&(nh)->flush_id))
-+
-+#define nh40_set_magic(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->magic)
-+#define nh40_set_free_space(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space)
-+#define nh40_set_free_space_start(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space_start)
-+#define nh40_set_level(nh, value) put_unaligned(value, &(nh)->level)
-+#define nh40_set_num_items(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->nr_items)
-+#define nh40_set_mkfs_id(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->mkfs_id)
-+
-+/* plugin field of node header should be read/set by
-+ plugin_by_disk_id/save_disk_plugin */
-+
-+/* array of item headers is at the end of node */
-+static inline item_header40 *node40_ih_at(const znode * node, unsigned pos)
-+{
-+ return (item_header40 *) (zdata(node) + znode_size(node)) - pos - 1;
-+}
-+
-+/* ( page_address( node -> pg ) + PAGE_CACHE_SIZE ) - pos - 1
-+ */
-+static inline item_header40 *node40_ih_at_coord(const coord_t * coord)
-+{
-+ return (item_header40 *) (zdata(coord->node) +
-+ znode_size(coord->node)) - (coord->item_pos) -
-+ 1;
-+}
-+
-+/* functions to get/set fields of item_header40 */
-+#define ih40_get_offset(ih) le16_to_cpu(get_unaligned(&(ih)->offset))
-+
-+#define ih40_set_offset(ih, value) put_unaligned(cpu_to_le16(value), &(ih)->offset)
-+
-+/* plugin field of item header should be read/set by
-+ plugin_by_disk_id/save_disk_plugin */
-+
-+/* plugin methods */
-+
-+/* plugin->u.node.item_overhead
-+ look for description of this method in plugin/node/node.h */
-+size_t
-+item_overhead_node40(const znode * node UNUSED_ARG, flow_t * f UNUSED_ARG)
-+{
-+ return sizeof(item_header40);
-+}
-+
-+/* plugin->u.node.free_space
-+ look for description of this method in plugin/node/node.h */
-+size_t free_space_node40(znode * node)
-+{
-+ assert("nikita-577", node != NULL);
-+ assert("nikita-578", znode_is_loaded(node));
-+ assert("nikita-579", zdata(node) != NULL);
-+
-+ return nh40_get_free_space(node40_node_header(node));
-+}
-+
-+/* private inline version of node40_num_of_items() for use in this file. This
-+ is necessary, because address of node40_num_of_items() is taken and it is
-+ never inlined as a result. */
-+static inline short node40_num_of_items_internal(const znode * node)
-+{
-+ return nh40_get_num_items(node40_node_header(node));
-+}
-+
-+#if REISER4_DEBUG
-+static inline void check_num_items(const znode * node)
-+{
-+ assert("nikita-2749",
-+ node40_num_of_items_internal(node) == node->nr_items);
-+ assert("nikita-2746", znode_is_write_locked(node));
-+}
-+#else
-+#define check_num_items(node) noop
-+#endif
-+
-+/* plugin->u.node.num_of_items
-+ look for description of this method in plugin/node/node.h */
-+int num_of_items_node40(const znode * node)
-+{
-+ return node40_num_of_items_internal(node);
-+}
-+
-+static void
-+node40_set_num_items(znode * node, node40_header * nh, unsigned value)
-+{
-+ assert("nikita-2751", node != NULL);
-+ assert("nikita-2750", nh == node40_node_header(node));
-+
-+ check_num_items(node);
-+ nh40_set_num_items(nh, value);
-+ node->nr_items = value;
-+ check_num_items(node);
-+}
-+
-+/* plugin->u.node.item_by_coord
-+ look for description of this method in plugin/node/node.h */
-+char *item_by_coord_node40(const coord_t * coord)
-+{
-+ item_header40 *ih;
-+ char *p;
-+
-+ /* @coord is set to existing item */
-+ assert("nikita-596", coord != NULL);
-+ assert("vs-255", coord_is_existing_item(coord));
-+
-+ ih = node40_ih_at_coord(coord);
-+ p = zdata(coord->node) + ih40_get_offset(ih);
-+ return p;
-+}
-+
-+/* plugin->u.node.length_by_coord
-+ look for description of this method in plugin/node/node.h */
-+int length_by_coord_node40(const coord_t * coord)
-+{
-+ item_header40 *ih;
-+ int result;
-+
-+ /* @coord is set to existing item */
-+ assert("vs-256", coord != NULL);
-+ assert("vs-257", coord_is_existing_item(coord));
-+
-+ ih = node40_ih_at_coord(coord);
-+ if ((int)coord->item_pos ==
-+ node40_num_of_items_internal(coord->node) - 1)
-+ result =
-+ nh40_get_free_space_start(node40_node_header(coord->node)) -
-+ ih40_get_offset(ih);
-+ else
-+ result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
-+
-+ return result;
-+}
-+
-+static pos_in_node_t
-+node40_item_length(const znode * node, pos_in_node_t item_pos)
-+{
-+ item_header40 *ih;
-+ pos_in_node_t result;
-+
-+ /* @coord is set to existing item */
-+ assert("vs-256", node != NULL);
-+ assert("vs-257", node40_num_of_items_internal(node) > item_pos);
-+
-+ ih = node40_ih_at(node, item_pos);
-+ if (item_pos == node40_num_of_items_internal(node) - 1)
-+ result =
-+ nh40_get_free_space_start(node40_node_header(node)) -
-+ ih40_get_offset(ih);
-+ else
-+ result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
-+
-+ return result;
-+}
-+
-+/* plugin->u.node.plugin_by_coord
-+ look for description of this method in plugin/node/node.h */
-+item_plugin *plugin_by_coord_node40(const coord_t * coord)
-+{
-+ item_header40 *ih;
-+ item_plugin *result;
-+
-+ /* @coord is set to existing item */
-+ assert("vs-258", coord != NULL);
-+ assert("vs-259", coord_is_existing_item(coord));
-+
-+ ih = node40_ih_at_coord(coord);
-+ /* pass NULL in stead of current tree. This is time critical call. */
-+ result = item_plugin_by_disk_id(NULL, &ih->plugin_id);
-+ return result;
-+}
-+
-+/* plugin->u.node.key_at
-+ look for description of this method in plugin/node/node.h */
-+reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key)
-+{
-+ item_header40 *ih;
-+
-+ assert("nikita-1765", coord_is_existing_item(coord));
-+
-+ /* @coord is set to existing item */
-+ ih = node40_ih_at_coord(coord);
-+ memcpy(key, &ih->key, sizeof(reiser4_key));
-+ return key;
-+}
-+
-+/* VS-FIXME-HANS: please review whether the below are properly disabled when debugging is disabled */
-+
-+#define NODE_INCSTAT(n, counter) \
-+ reiser4_stat_inc_at_level(znode_get_level(n), node.lookup.counter)
-+
-+#define NODE_ADDSTAT(n, counter, val) \
-+ reiser4_stat_add_at_level(znode_get_level(n), node.lookup.counter, val)
-+
-+/* plugin->u.node.lookup
-+ look for description of this method in plugin/node/node.h */
-+node_search_result lookup_node40(znode * node /* node to query */ ,
-+ const reiser4_key * key /* key to look for */ ,
-+ lookup_bias bias /* search bias */ ,
-+ coord_t * coord /* resulting coord */ )
-+{
-+ int left;
-+ int right;
-+ int found;
-+ int items;
-+
-+ item_header40 *lefth;
-+ item_header40 *righth;
-+
-+ item_plugin *iplug;
-+ item_header40 *bstop;
-+ item_header40 *ih;
-+ cmp_t order;
-+
-+ assert("nikita-583", node != NULL);
-+ assert("nikita-584", key != NULL);
-+ assert("nikita-585", coord != NULL);
-+ assert("nikita-2693", znode_is_any_locked(node));
-+ cassert(REISER4_SEQ_SEARCH_BREAK > 2);
-+
-+ items = node_num_items(node);
-+
-+ if (unlikely(items == 0)) {
-+ coord_init_first_unit(coord, node);
-+ return NS_NOT_FOUND;
-+ }
-+
-+ /* binary search for item that can contain given key */
-+ left = 0;
-+ right = items - 1;
-+ coord->node = node;
-+ coord_clear_iplug(coord);
-+ found = 0;
-+
-+ lefth = node40_ih_at(node, left);
-+ righth = node40_ih_at(node, right);
-+
-+ /* It is known that for small arrays sequential search is on average
-+ more efficient than binary. This is because sequential search is
-+ coded as tight loop that can be better optimized by compilers and
-+ for small array size gain from this optimization makes sequential
-+ search the winner. Another, maybe more important, reason for this,
-+ is that sequential array is more CPU cache friendly, whereas binary
-+ search effectively destroys CPU caching.
-+
-+ Critical here is the notion of "smallness". Reasonable value of
-+ REISER4_SEQ_SEARCH_BREAK can be found by playing with code in
-+ fs/reiser4/ulevel/ulevel.c:test_search().
-+
-+ Don't try to further optimize sequential search by scanning from
-+ right to left in attempt to use more efficient loop termination
-+ condition (comparison with 0). This doesn't work.
-+
-+ */
-+
-+ while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
-+ int median;
-+ item_header40 *medianh;
-+
-+ median = (left + right) / 2;
-+ medianh = node40_ih_at(node, median);
-+
-+ assert("nikita-1084", median >= 0);
-+ assert("nikita-1085", median < items);
-+ switch (keycmp(key, &medianh->key)) {
-+ case LESS_THAN:
-+ right = median;
-+ righth = medianh;
-+ break;
-+ default:
-+ wrong_return_value("nikita-586", "keycmp");
-+ case GREATER_THAN:
-+ left = median;
-+ lefth = medianh;
-+ break;
-+ case EQUAL_TO:
-+ do {
-+ --median;
-+ /* headers are ordered from right to left */
-+ ++medianh;
-+ } while (median >= 0 && keyeq(key, &medianh->key));
-+ right = left = median + 1;
-+ ih = lefth = righth = medianh - 1;
-+ found = 1;
-+ break;
-+ }
-+ }
-+ /* sequential scan. Item headers, and, therefore, keys are stored at
-+ the rightmost part of a node from right to left. We are trying to
-+ access memory from left to right, and hence, scan in _descending_
-+ order of item numbers.
-+ */
-+ if (!found) {
-+ for (left = right, ih = righth; left >= 0; ++ih, --left) {
-+ cmp_t comparison;
-+
-+ prefetchkey(&(ih + 1)->key);
-+ comparison = keycmp(&ih->key, key);
-+ if (comparison == GREATER_THAN)
-+ continue;
-+ if (comparison == EQUAL_TO) {
-+ found = 1;
-+ do {
-+ --left;
-+ ++ih;
-+ } while (left >= 0 && keyeq(&ih->key, key));
-+ ++left;
-+ --ih;
-+ } else {
-+ assert("nikita-1256", comparison == LESS_THAN);
-+ }
-+ break;
-+ }
-+ if (unlikely(left < 0))
-+ left = 0;
-+ }
-+
-+ assert("nikita-3212", right >= left);
-+ assert("nikita-3214",
-+ equi(found, keyeq(&node40_ih_at(node, left)->key, key)));
-+
-+ coord_set_item_pos(coord, left);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+
-+ /* key < leftmost key in a mode or node is corrupted and keys
-+ are not sorted */
-+ bstop = node40_ih_at(node, (unsigned)left);
-+ order = keycmp(&bstop->key, key);
-+ if (unlikely(order == GREATER_THAN)) {
-+ if (unlikely(left != 0)) {
-+ /* screw up */
-+ warning("nikita-587", "Key less than %i key in a node",
-+ left);
-+ reiser4_print_key("key", key);
-+ reiser4_print_key("min", &bstop->key);
-+ print_coord_content("coord", coord);
-+ return RETERR(-EIO);
-+ } else {
-+ coord->between = BEFORE_UNIT;
-+ return NS_NOT_FOUND;
-+ }
-+ }
-+ /* left <= key, ok */
-+ iplug = item_plugin_by_disk_id(znode_get_tree(node), &bstop->plugin_id);
-+
-+ if (unlikely(iplug == NULL)) {
-+ warning("nikita-588", "Unknown plugin %i",
-+ le16_to_cpu(get_unaligned(&bstop->plugin_id)));
-+ reiser4_print_key("key", key);
-+ print_coord_content("coord", coord);
-+ return RETERR(-EIO);
-+ }
-+
-+ coord_set_iplug(coord, iplug);
-+
-+ /* if exact key from item header was found by binary search, no
-+ further checks are necessary. */
-+ if (found) {
-+ assert("nikita-1259", order == EQUAL_TO);
-+ return NS_FOUND;
-+ }
-+ if (iplug->b.max_key_inside != NULL) {
-+ reiser4_key max_item_key;
-+
-+ /* key > max_item_key --- outside of an item */
-+ if (keygt(key, iplug->b.max_key_inside(coord, &max_item_key))) {
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+ /* FIXME-VS: key we are looking for does not fit into
-+ found item. Return NS_NOT_FOUND then. Without that
-+ the following case does not work: there is extent of
-+ file 10000, 10001. File 10000, 10002 has been just
-+ created. When writing to position 0 in that file -
-+ traverse_tree will stop here on twig level. When we
-+ want it to go down to leaf level
-+ */
-+ return NS_NOT_FOUND;
-+ }
-+ }
-+
-+ if (iplug->b.lookup != NULL) {
-+ return iplug->b.lookup(key, bias, coord);
-+ } else {
-+ assert("nikita-1260", order == LESS_THAN);
-+ coord->between = AFTER_UNIT;
-+ return (bias == FIND_EXACT) ? NS_NOT_FOUND : NS_FOUND;
-+ }
-+}
-+
-+#undef NODE_ADDSTAT
-+#undef NODE_INCSTAT
-+
-+/* plugin->u.node.estimate
-+ look for description of this method in plugin/node/node.h */
-+size_t estimate_node40(znode * node)
-+{
-+ size_t result;
-+
-+ assert("nikita-597", node != NULL);
-+
-+ result = free_space_node40(node) - sizeof(item_header40);
-+
-+ return (result > 0) ? result : 0;
-+}
-+
-+/* plugin->u.node.check
-+ look for description of this method in plugin/node/node.h */
-+int check_node40(const znode * node /* node to check */ ,
-+ __u32 flags /* check flags */ ,
-+ const char **error /* where to store error message */ )
-+{
-+ int nr_items;
-+ int i;
-+ reiser4_key prev;
-+ unsigned old_offset;
-+ tree_level level;
-+ coord_t coord;
-+ int result;
-+
-+ assert("nikita-580", node != NULL);
-+ assert("nikita-581", error != NULL);
-+ assert("nikita-2948", znode_is_loaded(node));
-+
-+ if (ZF_ISSET(node, JNODE_HEARD_BANSHEE))
-+ return 0;
-+
-+ assert("nikita-582", zdata(node) != NULL);
-+
-+ nr_items = node40_num_of_items_internal(node);
-+ if (nr_items < 0) {
-+ *error = "Negative number of items";
-+ return -1;
-+ }
-+
-+ if (flags & REISER4_NODE_DKEYS)
-+ prev = *znode_get_ld_key((znode *) node);
-+ else
-+ prev = *reiser4_min_key();
-+
-+ old_offset = 0;
-+ coord_init_zero(&coord);
-+ coord.node = (znode *) node;
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ level = znode_get_level(node);
-+ for (i = 0; i < nr_items; i++) {
-+ item_header40 *ih;
-+ reiser4_key unit_key;
-+ unsigned j;
-+
-+ ih = node40_ih_at(node, (unsigned)i);
-+ coord_set_item_pos(&coord, i);
-+ if ((ih40_get_offset(ih) >=
-+ znode_size(node) - nr_items * sizeof(item_header40)) ||
-+ (ih40_get_offset(ih) < sizeof(node40_header))) {
-+ *error = "Offset is out of bounds";
-+ return -1;
-+ }
-+ if (ih40_get_offset(ih) <= old_offset) {
-+ *error = "Offsets are in wrong order";
-+ return -1;
-+ }
-+ if ((i == 0) && (ih40_get_offset(ih) != sizeof(node40_header))) {
-+ *error = "Wrong offset of first item";
-+ return -1;
-+ }
-+ old_offset = ih40_get_offset(ih);
-+
-+ if (keygt(&prev, &ih->key)) {
-+ *error = "Keys are in wrong order";
-+ return -1;
-+ }
-+ if (!keyeq(&ih->key, unit_key_by_coord(&coord, &unit_key))) {
-+ *error = "Wrong key of first unit";
-+ return -1;
-+ }
-+ prev = ih->key;
-+ for (j = 0; j < coord_num_units(&coord); ++j) {
-+ coord.unit_pos = j;
-+ unit_key_by_coord(&coord, &unit_key);
-+ if (keygt(&prev, &unit_key)) {
-+ *error = "Unit keys are in wrong order";
-+ return -1;
-+ }
-+ prev = unit_key;
-+ }
-+ coord.unit_pos = 0;
-+ if (level != TWIG_LEVEL && item_is_extent(&coord)) {
-+ *error = "extent on the wrong level";
-+ return -1;
-+ }
-+ if (level == LEAF_LEVEL && item_is_internal(&coord)) {
-+ *error = "internal item on the wrong level";
-+ return -1;
-+ }
-+ if (level != LEAF_LEVEL &&
-+ !item_is_internal(&coord) && !item_is_extent(&coord)) {
-+ *error = "wrong item on the internal level";
-+ return -1;
-+ }
-+ if (level > TWIG_LEVEL && !item_is_internal(&coord)) {
-+ *error = "non-internal item on the internal level";
-+ return -1;
-+ }
-+#if REISER4_DEBUG
-+ if (item_plugin_by_coord(&coord)->b.check
-+ && item_plugin_by_coord(&coord)->b.check(&coord, error))
-+ return -1;
-+#endif
-+ if (i) {
-+ coord_t prev_coord;
-+ /* two neighboring items can not be mergeable */
-+ coord_dup(&prev_coord, &coord);
-+ coord_prev_item(&prev_coord);
-+ if (are_items_mergeable(&prev_coord, &coord)) {
-+ *error = "mergeable items in one node";
-+ return -1;
-+ }
-+
-+ }
-+ }
-+
-+ if ((flags & REISER4_NODE_DKEYS) && !node_is_empty(node)) {
-+ coord_t coord;
-+ item_plugin *iplug;
-+
-+ coord_init_last_unit(&coord, node);
-+ iplug = item_plugin_by_coord(&coord);
-+ if ((item_is_extent(&coord) || item_is_tail(&coord)) &&
-+ iplug->s.file.append_key != NULL) {
-+ reiser4_key mkey;
-+
-+ iplug->s.file.append_key(&coord, &mkey);
-+ set_key_offset(&mkey, get_key_offset(&mkey) - 1);
-+ read_lock_dk(current_tree);
-+ result = keygt(&mkey, znode_get_rd_key((znode *) node));
-+ read_unlock_dk(current_tree);
-+ if (result) {
-+ *error = "key of rightmost item is too large";
-+ return -1;
-+ }
-+ }
-+ }
-+ if (flags & REISER4_NODE_DKEYS) {
-+ read_lock_tree(current_tree);
-+ read_lock_dk(current_tree);
-+
-+ flags |= REISER4_NODE_TREE_STABLE;
-+
-+ if (keygt(&prev, znode_get_rd_key((znode *) node))) {
-+ if (flags & REISER4_NODE_TREE_STABLE) {
-+ *error = "Last key is greater than rdkey";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+ }
-+ if (keygt
-+ (znode_get_ld_key((znode *) node),
-+ znode_get_rd_key((znode *) node))) {
-+ *error = "ldkey is greater than rdkey";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+ if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) &&
-+ (node->left != NULL) &&
-+ !ZF_ISSET(node->left, JNODE_HEARD_BANSHEE) &&
-+ ergo(flags & REISER4_NODE_TREE_STABLE,
-+ !keyeq(znode_get_rd_key(node->left),
-+ znode_get_ld_key((znode *) node)))
-+ && ergo(!(flags & REISER4_NODE_TREE_STABLE),
-+ keygt(znode_get_rd_key(node->left),
-+ znode_get_ld_key((znode *) node)))) {
-+ *error = "left rdkey or ldkey is wrong";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
-+ (node->right != NULL) &&
-+ !ZF_ISSET(node->right, JNODE_HEARD_BANSHEE) &&
-+ ergo(flags & REISER4_NODE_TREE_STABLE,
-+ !keyeq(znode_get_rd_key((znode *) node),
-+ znode_get_ld_key(node->right)))
-+ && ergo(!(flags & REISER4_NODE_TREE_STABLE),
-+ keygt(znode_get_rd_key((znode *) node),
-+ znode_get_ld_key(node->right)))) {
-+ *error = "rdkey or right ldkey is wrong";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.node.parse
-+ look for description of this method in plugin/node/node.h */
-+int parse_node40(znode * node /* node to parse */ )
-+{
-+ node40_header *header;
-+ int result;
-+ d8 level;
-+
-+ header = node40_node_header((znode *) node);
-+ result = -EIO;
-+ level = nh40_get_level(header);
-+ if (unlikely(((__u8) znode_get_level(node)) != level))
-+ warning("nikita-494", "Wrong level found in node: %i != %i",
-+ znode_get_level(node), level);
-+ else if (unlikely(nh40_get_magic(header) != REISER4_NODE_MAGIC))
-+ warning("nikita-495",
-+ "Wrong magic in tree node: want %x, got %x",
-+ REISER4_NODE_MAGIC, nh40_get_magic(header));
-+ else {
-+ node->nr_items = node40_num_of_items_internal(node);
-+ result = 0;
-+ }
-+ return RETERR(result);
-+}
-+
-+/* plugin->u.node.init
-+ look for description of this method in plugin/node/node.h */
-+int init_node40(znode * node /* node to initialise */ )
-+{
-+ node40_header *header;
-+
-+ assert("nikita-570", node != NULL);
-+ assert("nikita-572", zdata(node) != NULL);
-+
-+ header = node40_node_header(node);
-+ memset(header, 0, sizeof(node40_header));
-+ nh40_set_free_space(header, znode_size(node) - sizeof(node40_header));
-+ nh40_set_free_space_start(header, sizeof(node40_header));
-+ /* sane hypothesis: 0 in CPU format is 0 in disk format */
-+ /* items: 0 */
-+ save_plugin_id(node_plugin_to_plugin(node->nplug),
-+ &header->common_header.plugin_id);
-+ nh40_set_level(header, znode_get_level(node));
-+ nh40_set_magic(header, REISER4_NODE_MAGIC);
-+ node->nr_items = 0;
-+ nh40_set_mkfs_id(header, reiser4_mkfs_id(reiser4_get_current_sb()));
-+
-+ /* flags: 0 */
-+ return 0;
-+}
-+
-+#ifdef GUESS_EXISTS
-+int guess_node40(const znode * node /* node to guess plugin of */ )
-+{
-+ node40_header *nethack;
-+
-+ assert("nikita-1058", node != NULL);
-+ nethack = node40_node_header(node);
-+ return
-+ (nh40_get_magic(nethack) == REISER4_NODE_MAGIC) &&
-+ (plugin_by_disk_id(znode_get_tree(node),
-+ REISER4_NODE_PLUGIN_TYPE,
-+ &nethack->common_header.plugin_id)->h.id ==
-+ NODE40_ID);
-+}
-+#endif
-+
-+/* plugin->u.node.chage_item_size
-+ look for description of this method in plugin/node/node.h */
-+void change_item_size_node40(coord_t * coord, int by)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ char *item_data;
-+ int item_length;
-+ unsigned i;
-+
-+ /* make sure that @item is coord of existing item */
-+ assert("vs-210", coord_is_existing_item(coord));
-+
-+ nh = node40_node_header(coord->node);
-+
-+ item_data = item_by_coord_node40(coord);
-+ item_length = length_by_coord_node40(coord);
-+
-+ /* move item bodies */
-+ ih = node40_ih_at_coord(coord);
-+ memmove(item_data + item_length + by, item_data + item_length,
-+ nh40_get_free_space_start(node40_node_header(coord->node)) -
-+ (ih40_get_offset(ih) + item_length));
-+
-+ /* update offsets of moved items */
-+ for (i = coord->item_pos + 1; i < nh40_get_num_items(nh); i++) {
-+ ih = node40_ih_at(coord->node, i);
-+ ih40_set_offset(ih, ih40_get_offset(ih) + by);
-+ }
-+
-+ /* update node header */
-+ nh40_set_free_space(nh, nh40_get_free_space(nh) - by);
-+ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) + by);
-+}
-+
-+static int should_notify_parent(const znode * node)
-+{
-+ /* FIXME_JMACD This looks equivalent to znode_is_root(), right? -josh */
-+ return !disk_addr_eq(znode_get_block(node),
-+ &znode_get_tree(node)->root_block);
-+}
-+
-+/* plugin->u.node.create_item
-+ look for description of this method in plugin/node/node.h */
-+int
-+create_item_node40(coord_t *target, const reiser4_key *key,
-+ reiser4_item_data *data, carry_plugin_info *info)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ unsigned offset;
-+ unsigned i;
-+
-+ nh = node40_node_header(target->node);
-+
-+ assert("vs-212", coord_is_between_items(target));
-+ /* node must have enough free space */
-+ assert("vs-254",
-+ free_space_node40(target->node) >=
-+ data->length + sizeof(item_header40));
-+ assert("vs-1410", data->length >= 0);
-+
-+ if (coord_set_to_right(target))
-+ /* there are not items to the right of @target, so, new item
-+ will be inserted after last one */
-+ coord_set_item_pos(target, nh40_get_num_items(nh));
-+
-+ if (target->item_pos < nh40_get_num_items(nh)) {
-+ /* there are items to be moved to prepare space for new
-+ item */
-+ ih = node40_ih_at_coord(target);
-+ /* new item will start at this offset */
-+ offset = ih40_get_offset(ih);
-+
-+ memmove(zdata(target->node) + offset + data->length,
-+ zdata(target->node) + offset,
-+ nh40_get_free_space_start(nh) - offset);
-+ /* update headers of moved items */
-+ for (i = target->item_pos; i < nh40_get_num_items(nh); i++) {
-+ ih = node40_ih_at(target->node, i);
-+ ih40_set_offset(ih, ih40_get_offset(ih) + data->length);
-+ }
-+
-+ /* @ih is set to item header of the last item, move item headers */
-+ memmove(ih - 1, ih,
-+ sizeof(item_header40) * (nh40_get_num_items(nh) -
-+ target->item_pos));
-+ } else {
-+ /* new item will start at this offset */
-+ offset = nh40_get_free_space_start(nh);
-+ }
-+
-+ /* make item header for the new item */
-+ ih = node40_ih_at_coord(target);
-+ memcpy(&ih->key, key, sizeof(reiser4_key));
-+ ih40_set_offset(ih, offset);
-+ save_plugin_id(item_plugin_to_plugin(data->iplug), &ih->plugin_id);
-+
-+ /* update node header */
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) - data->length -
-+ sizeof(item_header40));
-+ nh40_set_free_space_start(nh,
-+ nh40_get_free_space_start(nh) + data->length);
-+ node40_set_num_items(target->node, nh, nh40_get_num_items(nh) + 1);
-+
-+ /* FIXME: check how does create_item work when between is set to BEFORE_UNIT */
-+ target->unit_pos = 0;
-+ target->between = AT_UNIT;
-+ coord_clear_iplug(target);
-+
-+ /* initialize item */
-+ if (data->iplug->b.init != NULL) {
-+ data->iplug->b.init(target, NULL, data);
-+ }
-+ /* copy item body */
-+ if (data->iplug->b.paste != NULL) {
-+ data->iplug->b.paste(target, data, info);
-+ } else if (data->data != NULL) {
-+ if (data->user) {
-+ /* AUDIT: Are we really should not check that pointer
-+ from userspace was valid and data bytes were
-+ available? How will we return -EFAULT of some kind
-+ without this check? */
-+ assert("nikita-3038", reiser4_schedulable());
-+ /* copy data from user space */
-+ __copy_from_user(zdata(target->node) + offset,
-+ (const char __user *)data->data,
-+ (unsigned)data->length);
-+ } else
-+ /* copy from kernel space */
-+ memcpy(zdata(target->node) + offset, data->data,
-+ (unsigned)data->length);
-+ }
-+
-+ if (target->item_pos == 0) {
-+ /* left delimiting key has to be updated */
-+ prepare_for_update(NULL, target->node, info);
-+ }
-+
-+ if (item_plugin_by_coord(target)->b.create_hook != NULL) {
-+ item_plugin_by_coord(target)->b.create_hook(target, data->arg);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.node.update_item_key
-+ look for description of this method in plugin/node/node.h */
-+void
-+update_item_key_node40(coord_t * target, const reiser4_key * key,
-+ carry_plugin_info * info)
-+{
-+ item_header40 *ih;
-+
-+ ih = node40_ih_at_coord(target);
-+ memcpy(&ih->key, key, sizeof(reiser4_key));
-+
-+ if (target->item_pos == 0) {
-+ prepare_for_update(NULL, target->node, info);
-+ }
-+}
-+
-+/* this bits encode cut mode */
-+#define CMODE_TAIL 1
-+#define CMODE_WHOLE 2
-+#define CMODE_HEAD 4
-+
-+struct cut40_info {
-+ int mode;
-+ pos_in_node_t tail_removed; /* position of item which gets tail removed */
-+ pos_in_node_t first_removed; /* position of first the leftmost item among items removed completely */
-+ pos_in_node_t removed_count; /* number of items removed completely */
-+ pos_in_node_t head_removed; /* position of item which gets head removed */
-+
-+ pos_in_node_t freed_space_start;
-+ pos_in_node_t freed_space_end;
-+ pos_in_node_t first_moved;
-+ pos_in_node_t head_removed_location;
-+};
-+
-+static void init_cinfo(struct cut40_info *cinfo)
-+{
-+ cinfo->mode = 0;
-+ cinfo->tail_removed = MAX_POS_IN_NODE;
-+ cinfo->first_removed = MAX_POS_IN_NODE;
-+ cinfo->removed_count = MAX_POS_IN_NODE;
-+ cinfo->head_removed = MAX_POS_IN_NODE;
-+ cinfo->freed_space_start = MAX_POS_IN_NODE;
-+ cinfo->freed_space_end = MAX_POS_IN_NODE;
-+ cinfo->first_moved = MAX_POS_IN_NODE;
-+ cinfo->head_removed_location = MAX_POS_IN_NODE;
-+}
-+
-+/* complete cut_node40/kill_node40 content by removing the gap created by */
-+static void compact(znode * node, struct cut40_info *cinfo)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ pos_in_node_t freed;
-+ pos_in_node_t pos, nr_items;
-+
-+ assert("vs-1526", (cinfo->freed_space_start != MAX_POS_IN_NODE &&
-+ cinfo->freed_space_end != MAX_POS_IN_NODE &&
-+ cinfo->first_moved != MAX_POS_IN_NODE));
-+ assert("vs-1523", cinfo->freed_space_end >= cinfo->freed_space_start);
-+
-+ nh = node40_node_header(node);
-+ nr_items = nh40_get_num_items(nh);
-+
-+ /* remove gap made up by removal */
-+ memmove(zdata(node) + cinfo->freed_space_start,
-+ zdata(node) + cinfo->freed_space_end,
-+ nh40_get_free_space_start(nh) - cinfo->freed_space_end);
-+
-+ /* update item headers of moved items - change their locations */
-+ pos = cinfo->first_moved;
-+ ih = node40_ih_at(node, pos);
-+ if (cinfo->head_removed_location != MAX_POS_IN_NODE) {
-+ assert("vs-1580", pos == cinfo->head_removed);
-+ ih40_set_offset(ih, cinfo->head_removed_location);
-+ pos++;
-+ ih--;
-+ }
-+
-+ freed = cinfo->freed_space_end - cinfo->freed_space_start;
-+ for (; pos < nr_items; pos++, ih--) {
-+ assert("vs-1581", ih == node40_ih_at(node, pos));
-+ ih40_set_offset(ih, ih40_get_offset(ih) - freed);
-+ }
-+
-+ /* free space start moved to right */
-+ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - freed);
-+
-+ if (cinfo->removed_count != MAX_POS_IN_NODE) {
-+ /* number of items changed. Remove item headers of those items */
-+ ih = node40_ih_at(node, nr_items - 1);
-+ memmove(ih + cinfo->removed_count, ih,
-+ sizeof(item_header40) * (nr_items -
-+ cinfo->removed_count -
-+ cinfo->first_removed));
-+ freed += sizeof(item_header40) * cinfo->removed_count;
-+ node40_set_num_items(node, nh, nr_items - cinfo->removed_count);
-+ }
-+
-+ /* total amount of free space increased */
-+ nh40_set_free_space(nh, nh40_get_free_space(nh) + freed);
-+}
-+
-+int shrink_item_node40(coord_t * coord, int delta)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ pos_in_node_t pos;
-+ pos_in_node_t nr_items;
-+ char *end;
-+ znode *node;
-+ int off;
-+
-+ assert("nikita-3487", coord != NULL);
-+ assert("nikita-3488", delta >= 0);
-+
-+ node = coord->node;
-+ nh = node40_node_header(node);
-+ nr_items = nh40_get_num_items(nh);
-+
-+ ih = node40_ih_at_coord(coord);
-+ assert("nikita-3489", delta <= length_by_coord_node40(coord));
-+ off = ih40_get_offset(ih) + length_by_coord_node40(coord);
-+ end = zdata(node) + off;
-+
-+ /* remove gap made up by removal */
-+ memmove(end - delta, end, nh40_get_free_space_start(nh) - off);
-+
-+ /* update item headers of moved items - change their locations */
-+ pos = coord->item_pos + 1;
-+ ih = node40_ih_at(node, pos);
-+ for (; pos < nr_items; pos++, ih--) {
-+ assert("nikita-3490", ih == node40_ih_at(node, pos));
-+ ih40_set_offset(ih, ih40_get_offset(ih) - delta);
-+ }
-+
-+ /* free space start moved to left */
-+ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - delta);
-+ /* total amount of free space increased */
-+ nh40_set_free_space(nh, nh40_get_free_space(nh) + delta);
-+ /*
-+ * This method does _not_ changes number of items. Hence, it cannot
-+ * make node empty. Also it doesn't remove items at all, which means
-+ * that no keys have to be updated either.
-+ */
-+ return 0;
-+}
-+
-+/* this is used by cut_node40 and kill_node40. It analyses input parameters and calculates cut mode. There are 2 types
-+ of cut. First is when a unit is removed from the middle of an item. In this case this function returns 1. All the
-+ rest fits into second case: 0 or 1 of items getting tail cut, 0 or more items removed completely and 0 or 1 item
-+ getting head cut. Function returns 0 in this case */
-+static int
-+parse_cut(struct cut40_info *cinfo, const struct cut_kill_params *params)
-+{
-+ reiser4_key left_key, right_key;
-+ reiser4_key min_from_key, max_to_key;
-+ const reiser4_key *from_key, *to_key;
-+
-+ init_cinfo(cinfo);
-+
-+ /* calculate minimal key stored in first item of items to be cut (params->from) */
-+ item_key_by_coord(params->from, &min_from_key);
-+ /* and max key stored in last item of items to be cut (params->to) */
-+ max_item_key_by_coord(params->to, &max_to_key);
-+
-+ /* if cut key range is not defined in input parameters - define it using cut coord range */
-+ if (params->from_key == NULL) {
-+ assert("vs-1513", params->to_key == NULL);
-+ unit_key_by_coord(params->from, &left_key);
-+ from_key = &left_key;
-+ max_unit_key_by_coord(params->to, &right_key);
-+ to_key = &right_key;
-+ } else {
-+ from_key = params->from_key;
-+ to_key = params->to_key;
-+ }
-+
-+ if (params->from->item_pos == params->to->item_pos) {
-+ if (keylt(&min_from_key, from_key)
-+ && keylt(to_key, &max_to_key))
-+ return 1;
-+
-+ if (keygt(from_key, &min_from_key)) {
-+ /* tail of item is to be cut cut */
-+ cinfo->tail_removed = params->from->item_pos;
-+ cinfo->mode |= CMODE_TAIL;
-+ } else if (keylt(to_key, &max_to_key)) {
-+ /* head of item is to be cut */
-+ cinfo->head_removed = params->from->item_pos;
-+ cinfo->mode |= CMODE_HEAD;
-+ } else {
-+ /* item is removed completely */
-+ cinfo->first_removed = params->from->item_pos;
-+ cinfo->removed_count = 1;
-+ cinfo->mode |= CMODE_WHOLE;
-+ }
-+ } else {
-+ cinfo->first_removed = params->from->item_pos + 1;
-+ cinfo->removed_count =
-+ params->to->item_pos - params->from->item_pos - 1;
-+
-+ if (keygt(from_key, &min_from_key)) {
-+ /* first item is not cut completely */
-+ cinfo->tail_removed = params->from->item_pos;
-+ cinfo->mode |= CMODE_TAIL;
-+ } else {
-+ cinfo->first_removed--;
-+ cinfo->removed_count++;
-+ }
-+ if (keylt(to_key, &max_to_key)) {
-+ /* last item is not cut completely */
-+ cinfo->head_removed = params->to->item_pos;
-+ cinfo->mode |= CMODE_HEAD;
-+ } else {
-+ cinfo->removed_count++;
-+ }
-+ if (cinfo->removed_count)
-+ cinfo->mode |= CMODE_WHOLE;
-+ }
-+
-+ return 0;
-+}
-+
-+static void
-+call_kill_hooks(znode * node, pos_in_node_t from, pos_in_node_t count,
-+ carry_kill_data * kdata)
-+{
-+ coord_t coord;
-+ item_plugin *iplug;
-+ pos_in_node_t pos;
-+
-+ coord.node = node;
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ for (pos = 0; pos < count; pos++) {
-+ coord_set_item_pos(&coord, from + pos);
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ iplug = item_plugin_by_coord(&coord);
-+ if (iplug->b.kill_hook) {
-+ iplug->b.kill_hook(&coord, 0, coord_num_units(&coord),
-+ kdata);
-+ }
-+ }
-+}
-+
-+/* this is used to kill item partially */
-+static pos_in_node_t
-+kill_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
-+ reiser4_key * smallest_removed, reiser4_key * new_first_key)
-+{
-+ struct carry_kill_data *kdata;
-+ item_plugin *iplug;
-+
-+ kdata = data;
-+ iplug = item_plugin_by_coord(coord);
-+
-+ assert("vs-1524", iplug->b.kill_units);
-+ return iplug->b.kill_units(coord, from, to, kdata, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* call item plugin to cut tail of file */
-+static pos_in_node_t
-+kill_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
-+{
-+ struct carry_kill_data *kdata;
-+ pos_in_node_t to;
-+
-+ kdata = data;
-+ to = coord_last_unit_pos(coord);
-+ return kill_units(coord, coord->unit_pos, to, kdata, smallest_removed,
-+ NULL);
-+}
-+
-+/* call item plugin to cut head of item */
-+static pos_in_node_t
-+kill_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
-+ reiser4_key * new_first_key)
-+{
-+ return kill_units(coord, 0, coord->unit_pos, data, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* this is used to cut item partially */
-+static pos_in_node_t
-+cut_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
-+ reiser4_key * smallest_removed, reiser4_key * new_first_key)
-+{
-+ carry_cut_data *cdata;
-+ item_plugin *iplug;
-+
-+ cdata = data;
-+ iplug = item_plugin_by_coord(coord);
-+ assert("vs-302", iplug->b.cut_units);
-+ return iplug->b.cut_units(coord, from, to, cdata, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* call item plugin to cut tail of file */
-+static pos_in_node_t
-+cut_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
-+{
-+ carry_cut_data *cdata;
-+ pos_in_node_t to;
-+
-+ cdata = data;
-+ to = coord_last_unit_pos(cdata->params.from);
-+ return cut_units(coord, coord->unit_pos, to, data, smallest_removed, NULL);
-+}
-+
-+/* call item plugin to cut head of item */
-+static pos_in_node_t
-+cut_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
-+ reiser4_key * new_first_key)
-+{
-+ return cut_units(coord, 0, coord->unit_pos, data, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* this returns 1 of key of first item changed, 0 - if it did not */
-+static int
-+prepare_for_compact(struct cut40_info *cinfo,
-+ const struct cut_kill_params *params, int is_cut,
-+ void *data, carry_plugin_info * info)
-+{
-+ znode *node;
-+ item_header40 *ih;
-+ pos_in_node_t freed;
-+ pos_in_node_t item_pos;
-+ coord_t coord;
-+ reiser4_key new_first_key;
-+ pos_in_node_t(*kill_units_f) (coord_t *, pos_in_node_t, pos_in_node_t,
-+ void *, reiser4_key *, reiser4_key *);
-+ pos_in_node_t(*kill_tail_f) (coord_t *, void *, reiser4_key *);
-+ pos_in_node_t(*kill_head_f) (coord_t *, void *, reiser4_key *,
-+ reiser4_key *);
-+ int retval;
-+
-+ retval = 0;
-+
-+ node = params->from->node;
-+
-+ assert("vs-184", node == params->to->node);
-+ assert("vs-312", !node_is_empty(node));
-+ assert("vs-297",
-+ coord_compare(params->from, params->to) != COORD_CMP_ON_RIGHT);
-+
-+ if (is_cut) {
-+ kill_units_f = cut_units;
-+ kill_tail_f = cut_tail;
-+ kill_head_f = cut_head;
-+ } else {
-+ kill_units_f = kill_units;
-+ kill_tail_f = kill_tail;
-+ kill_head_f = kill_head;
-+ }
-+
-+ if (parse_cut(cinfo, params) == 1) {
-+ /* cut from the middle of item */
-+ freed =
-+ kill_units_f(params->from, params->from->unit_pos,
-+ params->to->unit_pos, data,
-+ params->smallest_removed, NULL);
-+
-+ item_pos = params->from->item_pos;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos) - freed;
-+ cinfo->freed_space_end = cinfo->freed_space_start + freed;
-+ cinfo->first_moved = item_pos + 1;
-+ } else {
-+ assert("vs-1521", (cinfo->tail_removed != MAX_POS_IN_NODE ||
-+ cinfo->first_removed != MAX_POS_IN_NODE ||
-+ cinfo->head_removed != MAX_POS_IN_NODE));
-+
-+ switch (cinfo->mode) {
-+ case CMODE_TAIL:
-+ /* one item gets cut partially from its end */
-+ assert("vs-1562",
-+ cinfo->tail_removed == params->from->item_pos);
-+
-+ freed =
-+ kill_tail_f(params->from, data,
-+ params->smallest_removed);
-+
-+ item_pos = cinfo->tail_removed;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos) -
-+ freed;
-+ cinfo->freed_space_end =
-+ cinfo->freed_space_start + freed;
-+ cinfo->first_moved = cinfo->tail_removed + 1;
-+ break;
-+
-+ case CMODE_WHOLE:
-+ /* one or more items get removed completely */
-+ assert("vs-1563",
-+ cinfo->first_removed == params->from->item_pos);
-+ assert("vs-1564", cinfo->removed_count > 0
-+ && cinfo->removed_count != MAX_POS_IN_NODE);
-+
-+ /* call kill hook for all items removed completely */
-+ if (is_cut == 0)
-+ call_kill_hooks(node, cinfo->first_removed,
-+ cinfo->removed_count, data);
-+
-+ item_pos = cinfo->first_removed;
-+ ih = node40_ih_at(node, item_pos);
-+
-+ if (params->smallest_removed)
-+ memcpy(params->smallest_removed, &ih->key,
-+ sizeof(reiser4_key));
-+
-+ cinfo->freed_space_start = ih40_get_offset(ih);
-+
-+ item_pos += (cinfo->removed_count - 1);
-+ ih -= (cinfo->removed_count - 1);
-+ cinfo->freed_space_end =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos);
-+ cinfo->first_moved = item_pos + 1;
-+ if (cinfo->first_removed == 0)
-+ /* key of first item of the node changes */
-+ retval = 1;
-+ break;
-+
-+ case CMODE_HEAD:
-+ /* one item gets cut partially from its head */
-+ assert("vs-1565",
-+ cinfo->head_removed == params->from->item_pos);
-+
-+ freed =
-+ kill_head_f(params->to, data,
-+ params->smallest_removed,
-+ &new_first_key);
-+
-+ item_pos = cinfo->head_removed;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start = ih40_get_offset(ih);
-+ cinfo->freed_space_end = ih40_get_offset(ih) + freed;
-+ cinfo->first_moved = cinfo->head_removed + 1;
-+
-+ /* item head is removed, therefore, item key changed */
-+ coord.node = node;
-+ coord_set_item_pos(&coord, item_pos);
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ update_item_key_node40(&coord, &new_first_key, NULL);
-+ if (item_pos == 0)
-+ /* key of first item of the node changes */
-+ retval = 1;
-+ break;
-+
-+ case CMODE_TAIL | CMODE_WHOLE:
-+ /* one item gets cut from its end and one or more items get removed completely */
-+ assert("vs-1566",
-+ cinfo->tail_removed == params->from->item_pos);
-+ assert("vs-1567",
-+ cinfo->first_removed == cinfo->tail_removed + 1);
-+ assert("vs-1564", cinfo->removed_count > 0
-+ && cinfo->removed_count != MAX_POS_IN_NODE);
-+
-+ freed =
-+ kill_tail_f(params->from, data,
-+ params->smallest_removed);
-+
-+ item_pos = cinfo->tail_removed;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos) -
-+ freed;
-+
-+ /* call kill hook for all items removed completely */
-+ if (is_cut == 0)
-+ call_kill_hooks(node, cinfo->first_removed,
-+ cinfo->removed_count, data);
-+
-+ item_pos += cinfo->removed_count;
-+ ih -= cinfo->removed_count;
-+ cinfo->freed_space_end =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos);
-+ cinfo->first_moved = item_pos + 1;
-+ break;
-+
-+ case CMODE_WHOLE | CMODE_HEAD:
-+ /* one or more items get removed completely and one item gets cut partially from its head */
-+ assert("vs-1568",
-+ cinfo->first_removed == params->from->item_pos);
-+ assert("vs-1564", cinfo->removed_count > 0
-+ && cinfo->removed_count != MAX_POS_IN_NODE);
-+ assert("vs-1569",
-+ cinfo->head_removed ==
-+ cinfo->first_removed + cinfo->removed_count);
-+
-+ /* call kill hook for all items removed completely */
-+ if (is_cut == 0)
-+ call_kill_hooks(node, cinfo->first_removed,
-+ cinfo->removed_count, data);
-+
-+ item_pos = cinfo->first_removed;
-+ ih = node40_ih_at(node, item_pos);
-+
-+ if (params->smallest_removed)
-+ memcpy(params->smallest_removed, &ih->key,
-+ sizeof(reiser4_key));
-+
-+ freed =
-+ kill_head_f(params->to, data, NULL, &new_first_key);
-+
-+ cinfo->freed_space_start = ih40_get_offset(ih);
-+
-+ ih = node40_ih_at(node, cinfo->head_removed);
-+ /* this is the most complex case. Item which got head removed and items which are to be moved
-+ intact change their location differently. */
-+ cinfo->freed_space_end = ih40_get_offset(ih) + freed;
-+ cinfo->first_moved = cinfo->head_removed;
-+ cinfo->head_removed_location = cinfo->freed_space_start;
-+
-+ /* item head is removed, therefore, item key changed */
-+ coord.node = node;
-+ coord_set_item_pos(&coord, cinfo->head_removed);
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ update_item_key_node40(&coord, &new_first_key, NULL);
-+
-+ assert("vs-1579", cinfo->first_removed == 0);
-+ /* key of first item of the node changes */
-+ retval = 1;
-+ break;
-+
-+ case CMODE_TAIL | CMODE_HEAD:
-+ /* one item get cut from its end and its neighbor gets cut from its tail */
-+ impossible("vs-1576", "this can not happen currently");
-+ break;
-+
-+ case CMODE_TAIL | CMODE_WHOLE | CMODE_HEAD:
-+ impossible("vs-1577", "this can not happen currently");
-+ break;
-+ default:
-+ impossible("vs-1578", "unexpected cut mode");
-+ break;
-+ }
-+ }
-+ return retval;
-+}
-+
-+/* plugin->u.node.kill
-+ return value is number of items removed completely */
-+int kill_node40(struct carry_kill_data *kdata, carry_plugin_info * info)
-+{
-+ znode *node;
-+ struct cut40_info cinfo;
-+ int first_key_changed;
-+
-+ node = kdata->params.from->node;
-+
-+ first_key_changed =
-+ prepare_for_compact(&cinfo, &kdata->params, 0 /* not cut */ , kdata,
-+ info);
-+ compact(node, &cinfo);
-+
-+ if (info) {
-+ /* it is not called by node40_shift, so we have to take care
-+ of changes on upper levels */
-+ if (node_is_empty(node)
-+ && !(kdata->flags & DELETE_RETAIN_EMPTY))
-+ /* all contents of node is deleted */
-+ prepare_removal_node40(node, info);
-+ else if (first_key_changed) {
-+ prepare_for_update(NULL, node, info);
-+ }
-+ }
-+
-+ coord_clear_iplug(kdata->params.from);
-+ coord_clear_iplug(kdata->params.to);
-+
-+ znode_make_dirty(node);
-+ return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
-+}
-+
-+/* plugin->u.node.cut
-+ return value is number of items removed completely */
-+int cut_node40(struct carry_cut_data *cdata, carry_plugin_info * info)
-+{
-+ znode *node;
-+ struct cut40_info cinfo;
-+ int first_key_changed;
-+
-+ node = cdata->params.from->node;
-+
-+ first_key_changed =
-+ prepare_for_compact(&cinfo, &cdata->params, 1 /* not cut */ , cdata,
-+ info);
-+ compact(node, &cinfo);
-+
-+ if (info) {
-+ /* it is not called by node40_shift, so we have to take care
-+ of changes on upper levels */
-+ if (node_is_empty(node))
-+ /* all contents of node is deleted */
-+ prepare_removal_node40(node, info);
-+ else if (first_key_changed) {
-+ prepare_for_update(NULL, node, info);
-+ }
-+ }
-+
-+ coord_clear_iplug(cdata->params.from);
-+ coord_clear_iplug(cdata->params.to);
-+
-+ znode_make_dirty(node);
-+ return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
-+}
-+
-+/* this structure is used by shift method of node40 plugin */
-+struct shift_params {
-+ shift_direction pend; /* when @pend == append - we are shifting to
-+ left, when @pend == prepend - to right */
-+ coord_t wish_stop; /* when shifting to left this is last unit we
-+ want shifted, when shifting to right - this
-+ is set to unit we want to start shifting
-+ from */
-+ znode *target;
-+ int everything; /* it is set to 1 if everything we have to shift is
-+ shifted, 0 - otherwise */
-+
-+ /* FIXME-VS: get rid of read_stop */
-+
-+ /* these are set by estimate_shift */
-+ coord_t real_stop; /* this will be set to last unit which will be
-+ really shifted */
-+
-+ /* coordinate in source node before operation of unit which becomes
-+ first after shift to left of last after shift to right */
-+ union {
-+ coord_t future_first;
-+ coord_t future_last;
-+ } u;
-+
-+ unsigned merging_units; /* number of units of first item which have to
-+ be merged with last item of target node */
-+ unsigned merging_bytes; /* number of bytes in those units */
-+
-+ unsigned entire; /* items shifted in their entirety */
-+ unsigned entire_bytes; /* number of bytes in those items */
-+
-+ unsigned part_units; /* number of units of partially copied item */
-+ unsigned part_bytes; /* number of bytes in those units */
-+
-+ unsigned shift_bytes; /* total number of bytes in items shifted (item
-+ headers not included) */
-+
-+};
-+
-+static int item_creation_overhead(coord_t *item)
-+{
-+ return node_plugin_by_coord(item)->item_overhead(item->node, NULL);
-+}
-+
-+/* how many units are there in @source starting from source->unit_pos
-+ but not further than @stop_coord */
-+static int
-+wanted_units(coord_t *source, coord_t *stop_coord, shift_direction pend)
-+{
-+ if (pend == SHIFT_LEFT) {
-+ assert("vs-181", source->unit_pos == 0);
-+ } else {
-+ assert("vs-182",
-+ source->unit_pos == coord_last_unit_pos(source));
-+ }
-+
-+ if (source->item_pos != stop_coord->item_pos) {
-+ /* @source and @stop_coord are different items */
-+ return coord_last_unit_pos(source) + 1;
-+ }
-+
-+ if (pend == SHIFT_LEFT) {
-+ return stop_coord->unit_pos + 1;
-+ } else {
-+ return source->unit_pos - stop_coord->unit_pos + 1;
-+ }
-+}
-+
-+/* this calculates what can be copied from @shift->wish_stop.node to
-+ @shift->target */
-+static void
-+estimate_shift(struct shift_params *shift, const reiser4_context * ctx)
-+{
-+ unsigned target_free_space, size;
-+ pos_in_node_t stop_item; /* item which estimating should not consider */
-+ unsigned want; /* number of units of item we want shifted */
-+ coord_t source; /* item being estimated */
-+ item_plugin *iplug;
-+
-+ /* shifting to left/right starts from first/last units of
-+ @shift->wish_stop.node */
-+ if (shift->pend == SHIFT_LEFT) {
-+ coord_init_first_unit(&source, shift->wish_stop.node);
-+ } else {
-+ coord_init_last_unit(&source, shift->wish_stop.node);
-+ }
-+ shift->real_stop = source;
-+
-+ /* free space in target node and number of items in source */
-+ target_free_space = znode_free_space(shift->target);
-+
-+ shift->everything = 0;
-+ if (!node_is_empty(shift->target)) {
-+ /* target node is not empty, check for boundary items
-+ mergeability */
-+ coord_t to;
-+
-+ /* item we try to merge @source with */
-+ if (shift->pend == SHIFT_LEFT) {
-+ coord_init_last_unit(&to, shift->target);
-+ } else {
-+ coord_init_first_unit(&to, shift->target);
-+ }
-+
-+ if ((shift->pend == SHIFT_LEFT) ? are_items_mergeable(&to,
-+ &source) :
-+ are_items_mergeable(&source, &to)) {
-+ /* how many units of @source do we want to merge to
-+ item @to */
-+ want =
-+ wanted_units(&source, &shift->wish_stop,
-+ shift->pend);
-+
-+ /* how many units of @source we can merge to item
-+ @to */
-+ iplug = item_plugin_by_coord(&source);
-+ if (iplug->b.can_shift != NULL)
-+ shift->merging_units =
-+ iplug->b.can_shift(target_free_space,
-+ &source, shift->target,
-+ shift->pend, &size,
-+ want);
-+ else {
-+ shift->merging_units = 0;
-+ size = 0;
-+ }
-+ shift->merging_bytes = size;
-+ shift->shift_bytes += size;
-+ /* update stop coord to be set to last unit of @source
-+ we can merge to @target */
-+ if (shift->merging_units)
-+ /* at least one unit can be shifted */
-+ shift->real_stop.unit_pos =
-+ (shift->merging_units - source.unit_pos -
-+ 1) * shift->pend;
-+ else {
-+ /* nothing can be shifted */
-+ if (shift->pend == SHIFT_LEFT)
-+ coord_init_before_first_item(&shift->
-+ real_stop,
-+ source.
-+ node);
-+ else
-+ coord_init_after_last_item(&shift->
-+ real_stop,
-+ source.node);
-+ }
-+ assert("nikita-2081", shift->real_stop.unit_pos + 1);
-+
-+ if (shift->merging_units != want) {
-+ /* we could not copy as many as we want, so,
-+ there is no reason for estimating any
-+ longer */
-+ return;
-+ }
-+
-+ target_free_space -= size;
-+ coord_add_item_pos(&source, shift->pend);
-+ }
-+ }
-+
-+ /* number of item nothing of which we want to shift */
-+ stop_item = shift->wish_stop.item_pos + shift->pend;
-+
-+ /* calculate how many items can be copied into given free
-+ space as whole */
-+ for (; source.item_pos != stop_item;
-+ coord_add_item_pos(&source, shift->pend)) {
-+ if (shift->pend == SHIFT_RIGHT)
-+ source.unit_pos = coord_last_unit_pos(&source);
-+
-+ /* how many units of @source do we want to copy */
-+ want = wanted_units(&source, &shift->wish_stop, shift->pend);
-+
-+ if (want == coord_last_unit_pos(&source) + 1) {
-+ /* we want this item to be copied entirely */
-+ size =
-+ item_length_by_coord(&source) +
-+ item_creation_overhead(&source);
-+ if (size <= target_free_space) {
-+ /* item fits into target node as whole */
-+ target_free_space -= size;
-+ shift->shift_bytes +=
-+ size - item_creation_overhead(&source);
-+ shift->entire_bytes +=
-+ size - item_creation_overhead(&source);
-+ shift->entire++;
-+
-+ /* update shift->real_stop coord to be set to
-+ last unit of @source we can merge to
-+ @target */
-+ shift->real_stop = source;
-+ if (shift->pend == SHIFT_LEFT)
-+ shift->real_stop.unit_pos =
-+ coord_last_unit_pos(&shift->
-+ real_stop);
-+ else
-+ shift->real_stop.unit_pos = 0;
-+ continue;
-+ }
-+ }
-+
-+ /* we reach here only for an item which does not fit into
-+ target node in its entirety. This item may be either
-+ partially shifted, or not shifted at all. We will have to
-+ create new item in target node, so decrease amout of free
-+ space by an item creation overhead. We can reach here also
-+ if stop coord is in this item */
-+ if (target_free_space >=
-+ (unsigned)item_creation_overhead(&source)) {
-+ target_free_space -= item_creation_overhead(&source);
-+ iplug = item_plugin_by_coord(&source);
-+ if (iplug->b.can_shift) {
-+ shift->part_units = iplug->b.can_shift(target_free_space,
-+ &source,
-+ NULL, /* target */
-+ shift->pend,
-+ &size,
-+ want);
-+ } else {
-+ target_free_space = 0;
-+ shift->part_units = 0;
-+ size = 0;
-+ }
-+ } else {
-+ target_free_space = 0;
-+ shift->part_units = 0;
-+ size = 0;
-+ }
-+ shift->part_bytes = size;
-+ shift->shift_bytes += size;
-+
-+ /* set @shift->real_stop to last unit of @source we can merge
-+ to @shift->target */
-+ if (shift->part_units) {
-+ shift->real_stop = source;
-+ shift->real_stop.unit_pos =
-+ (shift->part_units - source.unit_pos -
-+ 1) * shift->pend;
-+ assert("nikita-2082", shift->real_stop.unit_pos + 1);
-+ }
-+
-+ if (want != shift->part_units)
-+ /* not everything wanted were shifted */
-+ return;
-+ break;
-+ }
-+
-+ shift->everything = 1;
-+}
-+
-+static void
-+copy_units(coord_t * target, coord_t * source, unsigned from, unsigned count,
-+ shift_direction dir, unsigned free_space)
-+{
-+ item_plugin *iplug;
-+
-+ assert("nikita-1463", target != NULL);
-+ assert("nikita-1464", source != NULL);
-+ assert("nikita-1465", from + count <= coord_num_units(source));
-+
-+ iplug = item_plugin_by_coord(source);
-+ assert("nikita-1468", iplug == item_plugin_by_coord(target));
-+ iplug->b.copy_units(target, source, from, count, dir, free_space);
-+
-+ if (dir == SHIFT_RIGHT) {
-+ /* FIXME-VS: this looks not necessary. update_item_key was
-+ called already by copy_units method */
-+ reiser4_key split_key;
-+
-+ assert("nikita-1469", target->unit_pos == 0);
-+
-+ unit_key_by_coord(target, &split_key);
-+ node_plugin_by_coord(target)->update_item_key(target,
-+ &split_key, NULL);
-+ }
-+}
-+
-+/* copy part of @shift->real_stop.node starting either from its beginning or
-+ from its end and ending at @shift->real_stop to either the end or the
-+ beginning of @shift->target */
-+static void copy(struct shift_params *shift)
-+{
-+ node40_header *nh;
-+ coord_t from;
-+ coord_t to;
-+ item_header40 *from_ih, *to_ih;
-+ int free_space_start;
-+ int new_items;
-+ unsigned old_items;
-+ int old_offset;
-+ unsigned i;
-+
-+ nh = node40_node_header(shift->target);
-+ free_space_start = nh40_get_free_space_start(nh);
-+ old_items = nh40_get_num_items(nh);
-+ new_items = shift->entire + (shift->part_units ? 1 : 0);
-+ assert("vs-185",
-+ shift->shift_bytes ==
-+ shift->merging_bytes + shift->entire_bytes + shift->part_bytes);
-+
-+ from = shift->wish_stop;
-+
-+ coord_init_first_unit(&to, shift->target);
-+
-+ /* NOTE:NIKITA->VS not sure what I am doing: shift->target is empty,
-+ hence to.between is set to EMPTY_NODE above. Looks like we want it
-+ to be AT_UNIT.
-+
-+ Oh, wonders of ->betweeness...
-+
-+ */
-+ to.between = AT_UNIT;
-+
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* copying to left */
-+
-+ coord_set_item_pos(&from, 0);
-+ from_ih = node40_ih_at(from.node, 0);
-+
-+ coord_set_item_pos(&to,
-+ node40_num_of_items_internal(to.node) - 1);
-+ if (shift->merging_units) {
-+ /* expand last item, so that plugin methods will see
-+ correct data */
-+ free_space_start += shift->merging_bytes;
-+ nh40_set_free_space_start(nh,
-+ (unsigned)free_space_start);
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) -
-+ shift->merging_bytes);
-+
-+ /* appending last item of @target */
-+ copy_units(&to, &from, 0, /* starting from 0-th unit */
-+ shift->merging_units, SHIFT_LEFT,
-+ shift->merging_bytes);
-+ coord_inc_item_pos(&from);
-+ from_ih--;
-+ coord_inc_item_pos(&to);
-+ }
-+
-+ to_ih = node40_ih_at(shift->target, old_items);
-+ if (shift->entire) {
-+ /* copy @entire items entirely */
-+
-+ /* copy item headers */
-+ memcpy(to_ih - shift->entire + 1,
-+ from_ih - shift->entire + 1,
-+ shift->entire * sizeof(item_header40));
-+ /* update item header offset */
-+ old_offset = ih40_get_offset(from_ih);
-+ /* AUDIT: Looks like if we calculate old_offset + free_space_start here instead of just old_offset, we can perform one "add" operation less per each iteration */
-+ for (i = 0; i < shift->entire; i++, to_ih--, from_ih--)
-+ ih40_set_offset(to_ih,
-+ ih40_get_offset(from_ih) -
-+ old_offset + free_space_start);
-+
-+ /* copy item bodies */
-+ memcpy(zdata(shift->target) + free_space_start, zdata(from.node) + old_offset, /*ih40_get_offset (from_ih), */
-+ shift->entire_bytes);
-+
-+ coord_add_item_pos(&from, (int)shift->entire);
-+ coord_add_item_pos(&to, (int)shift->entire);
-+ }
-+
-+ nh40_set_free_space_start(nh,
-+ free_space_start +
-+ shift->shift_bytes -
-+ shift->merging_bytes);
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) -
-+ (shift->shift_bytes - shift->merging_bytes +
-+ sizeof(item_header40) * new_items));
-+
-+ /* update node header */
-+ node40_set_num_items(shift->target, nh, old_items + new_items);
-+ assert("vs-170",
-+ nh40_get_free_space(nh) < znode_size(shift->target));
-+
-+ if (shift->part_units) {
-+ /* copy heading part (@part units) of @source item as
-+ a new item into @target->node */
-+
-+ /* copy item header of partially copied item */
-+ coord_set_item_pos(&to,
-+ node40_num_of_items_internal(to.node)
-+ - 1);
-+ memcpy(to_ih, from_ih, sizeof(item_header40));
-+ ih40_set_offset(to_ih,
-+ nh40_get_free_space_start(nh) -
-+ shift->part_bytes);
-+ if (item_plugin_by_coord(&to)->b.init)
-+ item_plugin_by_coord(&to)->b.init(&to, &from,
-+ NULL);
-+ copy_units(&to, &from, 0, shift->part_units, SHIFT_LEFT,
-+ shift->part_bytes);
-+ }
-+
-+ } else {
-+ /* copying to right */
-+
-+ coord_set_item_pos(&from,
-+ node40_num_of_items_internal(from.node) - 1);
-+ from_ih = node40_ih_at_coord(&from);
-+
-+ coord_set_item_pos(&to, 0);
-+
-+ /* prepare space for new items */
-+ memmove(zdata(to.node) + sizeof(node40_header) +
-+ shift->shift_bytes,
-+ zdata(to.node) + sizeof(node40_header),
-+ free_space_start - sizeof(node40_header));
-+ /* update item headers of moved items */
-+ to_ih = node40_ih_at(to.node, 0);
-+ /* first item gets @merging_bytes longer. free space appears
-+ at its beginning */
-+ if (!node_is_empty(to.node))
-+ ih40_set_offset(to_ih,
-+ ih40_get_offset(to_ih) +
-+ shift->shift_bytes -
-+ shift->merging_bytes);
-+
-+ for (i = 1; i < old_items; i++)
-+ ih40_set_offset(to_ih - i,
-+ ih40_get_offset(to_ih - i) +
-+ shift->shift_bytes);
-+
-+ /* move item headers to make space for new items */
-+ memmove(to_ih - old_items + 1 - new_items,
-+ to_ih - old_items + 1,
-+ sizeof(item_header40) * old_items);
-+ to_ih -= (new_items - 1);
-+
-+ nh40_set_free_space_start(nh,
-+ free_space_start +
-+ shift->shift_bytes);
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) -
-+ (shift->shift_bytes +
-+ sizeof(item_header40) * new_items));
-+
-+ /* update node header */
-+ node40_set_num_items(shift->target, nh, old_items + new_items);
-+ assert("vs-170",
-+ nh40_get_free_space(nh) < znode_size(shift->target));
-+
-+ if (shift->merging_units) {
-+ coord_add_item_pos(&to, new_items);
-+ to.unit_pos = 0;
-+ to.between = AT_UNIT;
-+ /* prepend first item of @to */
-+ copy_units(&to, &from,
-+ coord_last_unit_pos(&from) -
-+ shift->merging_units + 1,
-+ shift->merging_units, SHIFT_RIGHT,
-+ shift->merging_bytes);
-+ coord_dec_item_pos(&from);
-+ from_ih++;
-+ }
-+
-+ if (shift->entire) {
-+ /* copy @entire items entirely */
-+
-+ /* copy item headers */
-+ memcpy(to_ih, from_ih,
-+ shift->entire * sizeof(item_header40));
-+
-+ /* update item header offset */
-+ old_offset =
-+ ih40_get_offset(from_ih + shift->entire - 1);
-+ /* AUDIT: old_offset + sizeof (node40_header) + shift->part_bytes calculation can be taken off the loop. */
-+ for (i = 0; i < shift->entire; i++, to_ih++, from_ih++)
-+ ih40_set_offset(to_ih,
-+ ih40_get_offset(from_ih) -
-+ old_offset +
-+ sizeof(node40_header) +
-+ shift->part_bytes);
-+ /* copy item bodies */
-+ coord_add_item_pos(&from, -(int)(shift->entire - 1));
-+ memcpy(zdata(to.node) + sizeof(node40_header) +
-+ shift->part_bytes, item_by_coord_node40(&from),
-+ shift->entire_bytes);
-+ coord_dec_item_pos(&from);
-+ }
-+
-+ if (shift->part_units) {
-+ coord_set_item_pos(&to, 0);
-+ to.unit_pos = 0;
-+ to.between = AT_UNIT;
-+ /* copy heading part (@part units) of @source item as
-+ a new item into @target->node */
-+
-+ /* copy item header of partially copied item */
-+ memcpy(to_ih, from_ih, sizeof(item_header40));
-+ ih40_set_offset(to_ih, sizeof(node40_header));
-+ if (item_plugin_by_coord(&to)->b.init)
-+ item_plugin_by_coord(&to)->b.init(&to, &from,
-+ NULL);
-+ copy_units(&to, &from,
-+ coord_last_unit_pos(&from) -
-+ shift->part_units + 1, shift->part_units,
-+ SHIFT_RIGHT, shift->part_bytes);
-+ }
-+ }
-+}
-+
-+/* remove everything either before or after @fact_stop. Number of items
-+ removed completely is returned */
-+static int delete_copied(struct shift_params *shift)
-+{
-+ coord_t from;
-+ coord_t to;
-+ struct carry_cut_data cdata;
-+
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* we were shifting to left, remove everything from the
-+ beginning of @shift->wish_stop->node upto
-+ @shift->wish_stop */
-+ coord_init_first_unit(&from, shift->real_stop.node);
-+ to = shift->real_stop;
-+
-+ /* store old coordinate of unit which will be first after
-+ shift to left */
-+ shift->u.future_first = to;
-+ coord_next_unit(&shift->u.future_first);
-+ } else {
-+ /* we were shifting to right, remove everything from
-+ @shift->stop_coord upto to end of
-+ @shift->stop_coord->node */
-+ from = shift->real_stop;
-+ coord_init_last_unit(&to, from.node);
-+
-+ /* store old coordinate of unit which will be last after
-+ shift to right */
-+ shift->u.future_last = from;
-+ coord_prev_unit(&shift->u.future_last);
-+ }
-+
-+ cdata.params.from = &from;
-+ cdata.params.to = &to;
-+ cdata.params.from_key = NULL;
-+ cdata.params.to_key = NULL;
-+ cdata.params.smallest_removed = NULL;
-+ return cut_node40(&cdata, NULL);
-+}
-+
-+/* something was moved between @left and @right. Add carry operation to @info
-+ list to have carry to update delimiting key between them */
-+static int
-+prepare_for_update(znode * left, znode * right, carry_plugin_info * info)
-+{
-+ carry_op *op;
-+ carry_node *cn;
-+
-+ if (info == NULL)
-+ /* nowhere to send operation to. */
-+ return 0;
-+
-+ if (!should_notify_parent(right))
-+ return 0;
-+
-+ op = node_post_carry(info, COP_UPDATE, right, 1);
-+ if (IS_ERR(op) || op == NULL)
-+ return op ? PTR_ERR(op) : -EIO;
-+
-+ if (left != NULL) {
-+ carry_node *reference;
-+
-+ if (info->doing)
-+ reference = insert_carry_node(info->doing,
-+ info->todo, left);
-+ else
-+ reference = op->node;
-+ assert("nikita-2992", reference != NULL);
-+ cn = reiser4_add_carry(info->todo, POOLO_BEFORE, reference);
-+ if (IS_ERR(cn))
-+ return PTR_ERR(cn);
-+ cn->parent = 1;
-+ cn->node = left;
-+ if (ZF_ISSET(left, JNODE_ORPHAN))
-+ cn->left_before = 1;
-+ op->u.update.left = cn;
-+ } else
-+ op->u.update.left = NULL;
-+ return 0;
-+}
-+
-+/* plugin->u.node.prepare_removal
-+ to delete a pointer to @empty from the tree add corresponding carry
-+ operation (delete) to @info list */
-+int prepare_removal_node40(znode * empty, carry_plugin_info * info)
-+{
-+ carry_op *op;
-+ reiser4_tree *tree;
-+
-+ if (!should_notify_parent(empty))
-+ return 0;
-+ /* already on a road to Styx */
-+ if (ZF_ISSET(empty, JNODE_HEARD_BANSHEE))
-+ return 0;
-+ op = node_post_carry(info, COP_DELETE, empty, 1);
-+ if (IS_ERR(op) || op == NULL)
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+
-+ op->u.delete.child = NULL;
-+ op->u.delete.flags = 0;
-+
-+ /* fare thee well */
-+ tree = znode_get_tree(empty);
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+ znode_set_ld_key(empty, znode_get_rd_key(empty));
-+ if (znode_is_left_connected(empty) && empty->left)
-+ znode_set_rd_key(empty->left, znode_get_rd_key(empty));
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+
-+ ZF_SET(empty, JNODE_HEARD_BANSHEE);
-+ return 0;
-+}
-+
-+/* something were shifted from @insert_coord->node to @shift->target, update
-+ @insert_coord correspondingly */
-+static void
-+adjust_coord(coord_t * insert_coord, struct shift_params *shift, int removed,
-+ int including_insert_coord)
-+{
-+ /* item plugin was invalidated by shifting */
-+ coord_clear_iplug(insert_coord);
-+
-+ if (node_is_empty(shift->wish_stop.node)) {
-+ assert("vs-242", shift->everything);
-+ if (including_insert_coord) {
-+ if (shift->pend == SHIFT_RIGHT) {
-+ /* set @insert_coord before first unit of
-+ @shift->target node */
-+ coord_init_before_first_item(insert_coord,
-+ shift->target);
-+ } else {
-+ /* set @insert_coord after last in target node */
-+ coord_init_after_last_item(insert_coord,
-+ shift->target);
-+ }
-+ } else {
-+ /* set @insert_coord inside of empty node. There is
-+ only one possible coord within an empty
-+ node. init_first_unit will set that coord */
-+ coord_init_first_unit(insert_coord,
-+ shift->wish_stop.node);
-+ }
-+ return;
-+ }
-+
-+ if (shift->pend == SHIFT_RIGHT) {
-+ /* there was shifting to right */
-+ if (shift->everything) {
-+ /* everything wanted was shifted */
-+ if (including_insert_coord) {
-+ /* @insert_coord is set before first unit of
-+ @to node */
-+ coord_init_before_first_item(insert_coord,
-+ shift->target);
-+ insert_coord->between = BEFORE_UNIT;
-+ } else {
-+ /* @insert_coord is set after last unit of
-+ @insert->node */
-+ coord_init_last_unit(insert_coord,
-+ shift->wish_stop.node);
-+ insert_coord->between = AFTER_UNIT;
-+ }
-+ }
-+ return;
-+ }
-+
-+ /* there was shifting to left */
-+ if (shift->everything) {
-+ /* everything wanted was shifted */
-+ if (including_insert_coord) {
-+ /* @insert_coord is set after last unit in @to node */
-+ coord_init_after_last_item(insert_coord, shift->target);
-+ } else {
-+ /* @insert_coord is set before first unit in the same
-+ node */
-+ coord_init_before_first_item(insert_coord,
-+ shift->wish_stop.node);
-+ }
-+ return;
-+ }
-+
-+ /* FIXME-VS: the code below is complicated because with between ==
-+ AFTER_ITEM unit_pos is set to 0 */
-+
-+ if (!removed) {
-+ /* no items were shifted entirely */
-+ assert("vs-195", shift->merging_units == 0
-+ || shift->part_units == 0);
-+
-+ if (shift->real_stop.item_pos == insert_coord->item_pos) {
-+ if (shift->merging_units) {
-+ if (insert_coord->between == AFTER_UNIT) {
-+ assert("nikita-1441",
-+ insert_coord->unit_pos >=
-+ shift->merging_units);
-+ insert_coord->unit_pos -=
-+ shift->merging_units;
-+ } else if (insert_coord->between == BEFORE_UNIT) {
-+ assert("nikita-2090",
-+ insert_coord->unit_pos >
-+ shift->merging_units);
-+ insert_coord->unit_pos -=
-+ shift->merging_units;
-+ }
-+
-+ assert("nikita-2083",
-+ insert_coord->unit_pos + 1);
-+ } else {
-+ if (insert_coord->between == AFTER_UNIT) {
-+ assert("nikita-1442",
-+ insert_coord->unit_pos >=
-+ shift->part_units);
-+ insert_coord->unit_pos -=
-+ shift->part_units;
-+ } else if (insert_coord->between == BEFORE_UNIT) {
-+ assert("nikita-2089",
-+ insert_coord->unit_pos >
-+ shift->part_units);
-+ insert_coord->unit_pos -=
-+ shift->part_units;
-+ }
-+
-+ assert("nikita-2084",
-+ insert_coord->unit_pos + 1);
-+ }
-+ }
-+ return;
-+ }
-+
-+ /* we shifted to left and there was no enough space for everything */
-+ switch (insert_coord->between) {
-+ case AFTER_UNIT:
-+ case BEFORE_UNIT:
-+ if (shift->real_stop.item_pos == insert_coord->item_pos)
-+ insert_coord->unit_pos -= shift->part_units;
-+ case AFTER_ITEM:
-+ coord_add_item_pos(insert_coord, -removed);
-+ break;
-+ default:
-+ impossible("nikita-2087", "not ready");
-+ }
-+ assert("nikita-2085", insert_coord->unit_pos + 1);
-+}
-+
-+static int call_shift_hooks(struct shift_params *shift)
-+{
-+ unsigned i, shifted;
-+ coord_t coord;
-+ item_plugin *iplug;
-+
-+ assert("vs-275", !node_is_empty(shift->target));
-+
-+ /* number of items shift touches */
-+ shifted =
-+ shift->entire + (shift->merging_units ? 1 : 0) +
-+ (shift->part_units ? 1 : 0);
-+
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* moved items are at the end */
-+ coord_init_last_unit(&coord, shift->target);
-+ coord.unit_pos = 0;
-+
-+ assert("vs-279", shift->pend == 1);
-+ for (i = 0; i < shifted; i++) {
-+ unsigned from, count;
-+
-+ iplug = item_plugin_by_coord(&coord);
-+ if (i == 0 && shift->part_units) {
-+ assert("vs-277",
-+ coord_num_units(&coord) ==
-+ shift->part_units);
-+ count = shift->part_units;
-+ from = 0;
-+ } else if (i == shifted - 1 && shift->merging_units) {
-+ count = shift->merging_units;
-+ from = coord_num_units(&coord) - count;
-+ } else {
-+ count = coord_num_units(&coord);
-+ from = 0;
-+ }
-+
-+ if (iplug->b.shift_hook) {
-+ iplug->b.shift_hook(&coord, from, count,
-+ shift->wish_stop.node);
-+ }
-+ coord_add_item_pos(&coord, -shift->pend);
-+ }
-+ } else {
-+ /* moved items are at the beginning */
-+ coord_init_first_unit(&coord, shift->target);
-+
-+ assert("vs-278", shift->pend == -1);
-+ for (i = 0; i < shifted; i++) {
-+ unsigned from, count;
-+
-+ iplug = item_plugin_by_coord(&coord);
-+ if (i == 0 && shift->part_units) {
-+ assert("vs-277",
-+ coord_num_units(&coord) ==
-+ shift->part_units);
-+ count = coord_num_units(&coord);
-+ from = 0;
-+ } else if (i == shifted - 1 && shift->merging_units) {
-+ count = shift->merging_units;
-+ from = 0;
-+ } else {
-+ count = coord_num_units(&coord);
-+ from = 0;
-+ }
-+
-+ if (iplug->b.shift_hook) {
-+ iplug->b.shift_hook(&coord, from, count,
-+ shift->wish_stop.node);
-+ }
-+ coord_add_item_pos(&coord, -shift->pend);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* shift to left is completed. Return 1 if unit @old was moved to left neighbor */
-+static int
-+unit_moved_left(const struct shift_params *shift, const coord_t * old)
-+{
-+ assert("vs-944", shift->real_stop.node == old->node);
-+
-+ if (shift->real_stop.item_pos < old->item_pos)
-+ return 0;
-+ if (shift->real_stop.item_pos == old->item_pos) {
-+ if (shift->real_stop.unit_pos < old->unit_pos)
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* shift to right is completed. Return 1 if unit @old was moved to right
-+ neighbor */
-+static int
-+unit_moved_right(const struct shift_params *shift, const coord_t * old)
-+{
-+ assert("vs-944", shift->real_stop.node == old->node);
-+
-+ if (shift->real_stop.item_pos > old->item_pos)
-+ return 0;
-+ if (shift->real_stop.item_pos == old->item_pos) {
-+ if (shift->real_stop.unit_pos > old->unit_pos)
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* coord @old was set in node from which shift was performed. What was shifted
-+ is stored in @shift. Update @old correspondingly to performed shift */
-+static coord_t *adjust_coord2(const struct shift_params *shift,
-+ const coord_t * old, coord_t * new)
-+{
-+ coord_clear_iplug(new);
-+ new->between = old->between;
-+
-+ coord_clear_iplug(new);
-+ if (old->node == shift->target) {
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* coord which is set inside of left neighbor does not
-+ change during shift to left */
-+ coord_dup(new, old);
-+ return new;
-+ }
-+ new->node = old->node;
-+ coord_set_item_pos(new,
-+ old->item_pos + shift->entire +
-+ (shift->part_units ? 1 : 0));
-+ new->unit_pos = old->unit_pos;
-+ if (old->item_pos == 0 && shift->merging_units)
-+ new->unit_pos += shift->merging_units;
-+ return new;
-+ }
-+
-+ assert("vs-977", old->node == shift->wish_stop.node);
-+ if (shift->pend == SHIFT_LEFT) {
-+ if (unit_moved_left(shift, old)) {
-+ /* unit @old moved to left neighbor. Calculate its
-+ coordinate there */
-+ new->node = shift->target;
-+ coord_set_item_pos(new,
-+ node_num_items(shift->target) -
-+ shift->entire -
-+ (shift->part_units ? 1 : 0) +
-+ old->item_pos);
-+
-+ new->unit_pos = old->unit_pos;
-+ if (shift->merging_units) {
-+ coord_dec_item_pos(new);
-+ if (old->item_pos == 0) {
-+ /* unit_pos only changes if item got
-+ merged */
-+ new->unit_pos =
-+ coord_num_units(new) -
-+ (shift->merging_units -
-+ old->unit_pos);
-+ }
-+ }
-+ } else {
-+ /* unit @old did not move to left neighbor.
-+
-+ Use _nocheck, because @old is outside of its node.
-+ */
-+ coord_dup_nocheck(new, old);
-+ coord_add_item_pos(new,
-+ -shift->u.future_first.item_pos);
-+ if (new->item_pos == 0)
-+ new->unit_pos -= shift->u.future_first.unit_pos;
-+ }
-+ } else {
-+ if (unit_moved_right(shift, old)) {
-+ /* unit @old moved to right neighbor */
-+ new->node = shift->target;
-+ coord_set_item_pos(new,
-+ old->item_pos -
-+ shift->real_stop.item_pos);
-+ if (new->item_pos == 0) {
-+ /* unit @old might change unit pos */
-+ coord_set_item_pos(new,
-+ old->unit_pos -
-+ shift->real_stop.unit_pos);
-+ }
-+ } else {
-+ /* unit @old did not move to right neighbor, therefore
-+ it did not change */
-+ coord_dup(new, old);
-+ }
-+ }
-+ coord_set_iplug(new, item_plugin_by_coord(new));
-+ return new;
-+}
-+
-+/* this is called when shift is completed (something of source node is copied
-+ to target and deleted in source) to update all taps set in current
-+ context */
-+static void update_taps(const struct shift_params *shift)
-+{
-+ tap_t *tap;
-+ coord_t new;
-+
-+ for_all_taps(tap) {
-+ /* update only taps set to nodes participating in shift */
-+ if (tap->coord->node == shift->wish_stop.node
-+ || tap->coord->node == shift->target)
-+ tap_to_coord(tap,
-+ adjust_coord2(shift, tap->coord, &new));
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+
-+struct shift_check {
-+ reiser4_key key;
-+ __u16 plugin_id;
-+ union {
-+ __u64 bytes;
-+ __u64 entries;
-+ void *unused;
-+ } u;
-+};
-+
-+void *shift_check_prepare(const znode * left, const znode * right)
-+{
-+ pos_in_node_t i, nr_items;
-+ int mergeable;
-+ struct shift_check *data;
-+ item_header40 *ih;
-+
-+ if (node_is_empty(left) || node_is_empty(right))
-+ mergeable = 0;
-+ else {
-+ coord_t l, r;
-+
-+ coord_init_last_unit(&l, left);
-+ coord_init_first_unit(&r, right);
-+ mergeable = are_items_mergeable(&l, &r);
-+ }
-+ nr_items =
-+ node40_num_of_items_internal(left) +
-+ node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
-+ data =
-+ kmalloc(sizeof(struct shift_check) * nr_items,
-+ reiser4_ctx_gfp_mask_get());
-+ if (data != NULL) {
-+ coord_t coord;
-+ pos_in_node_t item_pos;
-+
-+ coord_init_first_unit(&coord, left);
-+ i = 0;
-+
-+ for (item_pos = 0;
-+ item_pos < node40_num_of_items_internal(left);
-+ item_pos++) {
-+
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ data[i].key = ih->key;
-+ data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ data[i].u.bytes = coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ data[i].u.bytes =
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ data[i].u.entries = coord_num_units(&coord);
-+ break;
-+ default:
-+ data[i].u.unused = NULL;
-+ break;
-+ }
-+ i++;
-+ }
-+
-+ coord_init_first_unit(&coord, right);
-+
-+ if (mergeable) {
-+ assert("vs-1609", i != 0);
-+
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1589",
-+ data[i - 1].plugin_id ==
-+ le16_to_cpu(get_unaligned(&ih->plugin_id)));
-+ switch (data[i - 1].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ data[i - 1].u.bytes += coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ data[i - 1].u.bytes +=
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ data[i - 1].u.entries +=
-+ coord_num_units(&coord);
-+ break;
-+ default:
-+ impossible("vs-1605", "wrong mergeable item");
-+ break;
-+ }
-+ item_pos = 1;
-+ } else
-+ item_pos = 0;
-+ for (; item_pos < node40_num_of_items_internal(right);
-+ item_pos++) {
-+
-+ assert("vs-1604", i < nr_items);
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ data[i].key = ih->key;
-+ data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ data[i].u.bytes = coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ data[i].u.bytes =
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ data[i].u.entries = coord_num_units(&coord);
-+ break;
-+ default:
-+ data[i].u.unused = NULL;
-+ break;
-+ }
-+ i++;
-+ }
-+ assert("vs-1606", i == nr_items);
-+ }
-+ return data;
-+}
-+
-+void shift_check(void *vp, const znode * left, const znode * right)
-+{
-+ pos_in_node_t i, nr_items;
-+ coord_t coord;
-+ __u64 last_bytes;
-+ int mergeable;
-+ item_header40 *ih;
-+ pos_in_node_t item_pos;
-+ struct shift_check *data;
-+
-+ data = (struct shift_check *)vp;
-+
-+ if (data == NULL)
-+ return;
-+
-+ if (node_is_empty(left) || node_is_empty(right))
-+ mergeable = 0;
-+ else {
-+ coord_t l, r;
-+
-+ coord_init_last_unit(&l, left);
-+ coord_init_first_unit(&r, right);
-+ mergeable = are_items_mergeable(&l, &r);
-+ }
-+
-+ nr_items =
-+ node40_num_of_items_internal(left) +
-+ node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
-+
-+ i = 0;
-+ last_bytes = 0;
-+
-+ coord_init_first_unit(&coord, left);
-+
-+ for (item_pos = 0; item_pos < node40_num_of_items_internal(left);
-+ item_pos++) {
-+
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1611", i == item_pos);
-+ assert("vs-1590", keyeq(&ih->key, &data[i].key));
-+ assert("vs-1591",
-+ le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
-+ if ((i < (node40_num_of_items_internal(left) - 1))
-+ || !mergeable) {
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ assert("vs-1592",
-+ data[i].u.bytes ==
-+ coord_num_units(&coord));
-+ break;
-+ case EXTENT_POINTER_ID:
-+ assert("vs-1593",
-+ data[i].u.bytes ==
-+ reiser4_extent_size(&coord,
-+ coord_num_units
-+ (&coord)));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ assert("vs-1594",
-+ data[i].u.entries ==
-+ coord_num_units(&coord));
-+ break;
-+ default:
-+ break;
-+ }
-+ }
-+ if (item_pos == (node40_num_of_items_internal(left) - 1)
-+ && mergeable) {
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ last_bytes = coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ last_bytes =
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ last_bytes = coord_num_units(&coord);
-+ break;
-+ default:
-+ impossible("vs-1595", "wrong mergeable item");
-+ break;
-+ }
-+ }
-+ i++;
-+ }
-+
-+ coord_init_first_unit(&coord, right);
-+ if (mergeable) {
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1589",
-+ data[i - 1].plugin_id == le16_to_cpu(get_unaligned(&ih->plugin_id)));
-+ assert("vs-1608", last_bytes != 0);
-+ switch (data[i - 1].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ assert("vs-1596",
-+ data[i - 1].u.bytes ==
-+ last_bytes + coord_num_units(&coord));
-+ break;
-+
-+ case EXTENT_POINTER_ID:
-+ assert("vs-1597",
-+ data[i - 1].u.bytes ==
-+ last_bytes + reiser4_extent_size(&coord,
-+ coord_num_units
-+ (&coord)));
-+ break;
-+
-+ case COMPOUND_DIR_ID:
-+ assert("vs-1598",
-+ data[i - 1].u.bytes ==
-+ last_bytes + coord_num_units(&coord));
-+ break;
-+ default:
-+ impossible("vs-1599", "wrong mergeable item");
-+ break;
-+ }
-+ item_pos = 1;
-+ } else
-+ item_pos = 0;
-+
-+ for (; item_pos < node40_num_of_items_internal(right); item_pos++) {
-+
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1612", keyeq(&ih->key, &data[i].key));
-+ assert("vs-1613",
-+ le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ assert("vs-1600",
-+ data[i].u.bytes == coord_num_units(&coord));
-+ break;
-+ case EXTENT_POINTER_ID:
-+ assert("vs-1601",
-+ data[i].u.bytes ==
-+ reiser4_extent_size(&coord,
-+ coord_num_units
-+ (&coord)));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ assert("vs-1602",
-+ data[i].u.entries == coord_num_units(&coord));
-+ break;
-+ default:
-+ break;
-+ }
-+ i++;
-+ }
-+
-+ assert("vs-1603", i == nr_items);
-+ kfree(data);
-+}
-+
-+#endif
-+
-+/* plugin->u.node.shift
-+ look for description of this method in plugin/node/node.h */
-+int shift_node40(coord_t * from, znode * to, shift_direction pend, int delete_child, /* if @from->node becomes empty - it will be
-+ deleted from the tree if this is set to 1 */
-+ int including_stop_coord, carry_plugin_info * info)
-+{
-+ struct shift_params shift;
-+ int result;
-+ znode *left, *right;
-+ znode *source;
-+ int target_empty;
-+
-+ assert("nikita-2161", coord_check(from));
-+
-+ memset(&shift, 0, sizeof(shift));
-+ shift.pend = pend;
-+ shift.wish_stop = *from;
-+ shift.target = to;
-+
-+ assert("nikita-1473", znode_is_write_locked(from->node));
-+ assert("nikita-1474", znode_is_write_locked(to));
-+
-+ source = from->node;
-+
-+ /* set @shift.wish_stop to rightmost/leftmost unit among units we want
-+ shifted */
-+ if (pend == SHIFT_LEFT) {
-+ result = coord_set_to_left(&shift.wish_stop);
-+ left = to;
-+ right = from->node;
-+ } else {
-+ result = coord_set_to_right(&shift.wish_stop);
-+ left = from->node;
-+ right = to;
-+ }
-+
-+ if (result) {
-+ /* move insertion coord even if there is nothing to move */
-+ if (including_stop_coord) {
-+ /* move insertion coord (@from) */
-+ if (pend == SHIFT_LEFT) {
-+ /* after last item in target node */
-+ coord_init_after_last_item(from, to);
-+ } else {
-+ /* before first item in target node */
-+ coord_init_before_first_item(from, to);
-+ }
-+ }
-+
-+ if (delete_child && node_is_empty(shift.wish_stop.node))
-+ result =
-+ prepare_removal_node40(shift.wish_stop.node, info);
-+ else
-+ result = 0;
-+ /* there is nothing to shift */
-+ assert("nikita-2078", coord_check(from));
-+ return result;
-+ }
-+
-+ target_empty = node_is_empty(to);
-+
-+ /* when first node plugin with item body compression is implemented,
-+ this must be changed to call node specific plugin */
-+
-+ /* shift->stop_coord is updated to last unit which really will be
-+ shifted */
-+ estimate_shift(&shift, get_current_context());
-+ if (!shift.shift_bytes) {
-+ /* we could not shift anything */
-+ assert("nikita-2079", coord_check(from));
-+ return 0;
-+ }
-+
-+ copy(&shift);
-+
-+ /* result value of this is important. It is used by adjust_coord below */
-+ result = delete_copied(&shift);
-+
-+ assert("vs-1610", result >= 0);
-+ assert("vs-1471",
-+ ((reiser4_context *) current->journal_info)->magic ==
-+ context_magic);
-+
-+ /* item which has been moved from one node to another might want to do
-+ something on that event. This can be done by item's shift_hook
-+ method, which will be now called for every moved items */
-+ call_shift_hooks(&shift);
-+
-+ assert("vs-1472",
-+ ((reiser4_context *) current->journal_info)->magic ==
-+ context_magic);
-+
-+ update_taps(&shift);
-+
-+ assert("vs-1473",
-+ ((reiser4_context *) current->journal_info)->magic ==
-+ context_magic);
-+
-+ /* adjust @from pointer in accordance with @including_stop_coord flag
-+ and amount of data which was really shifted */
-+ adjust_coord(from, &shift, result, including_stop_coord);
-+
-+ if (target_empty)
-+ /*
-+ * items were shifted into empty node. Update delimiting key.
-+ */
-+ result = prepare_for_update(NULL, left, info);
-+
-+ /* add update operation to @info, which is the list of operations to
-+ be performed on a higher level */
-+ result = prepare_for_update(left, right, info);
-+ if (!result && node_is_empty(source) && delete_child) {
-+ /* all contents of @from->node is moved to @to and @from->node
-+ has to be removed from the tree, so, on higher level we
-+ will be removing the pointer to node @from->node */
-+ result = prepare_removal_node40(source, info);
-+ }
-+ assert("nikita-2080", coord_check(from));
-+ return result ? result : (int)shift.shift_bytes;
-+}
-+
-+/* plugin->u.node.fast_insert()
-+ look for description of this method in plugin/node/node.h */
-+int fast_insert_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
-+{
-+ return 1;
-+}
-+
-+/* plugin->u.node.fast_paste()
-+ look for description of this method in plugin/node/node.h */
-+int fast_paste_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
-+{
-+ return 1;
-+}
-+
-+/* plugin->u.node.fast_cut()
-+ look for description of this method in plugin/node/node.h */
-+int fast_cut_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
-+{
-+ return 1;
-+}
-+
-+/* plugin->u.node.modify - not defined */
-+
-+/* plugin->u.node.max_item_size */
-+int max_item_size_node40(void)
-+{
-+ return reiser4_get_current_sb()->s_blocksize - sizeof(node40_header) -
-+ sizeof(item_header40);
-+}
-+
-+/* plugin->u.node.set_item_plugin */
-+int set_item_plugin_node40(coord_t *coord, item_id id)
-+{
-+ item_header40 *ih;
-+
-+ ih = node40_ih_at_coord(coord);
-+ put_unaligned(cpu_to_le16(id), &ih->plugin_id);
-+ coord->iplugid = id;
-+ return 0;
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/node/node40.h linux-2.6.20/fs/reiser4/plugin/node/node40.h
---- linux-2.6.20.orig/fs/reiser4/plugin/node/node40.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/node/node40.h 2007-05-06 14:50:43.835018219 +0400
-@@ -0,0 +1,125 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined( __REISER4_NODE40_H__ )
-+#define __REISER4_NODE40_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "node.h"
-+
-+#include <linux/types.h>
-+
-+/* format of node header for 40 node layouts. Keep bloat out of this struct. */
-+typedef struct node40_header {
-+ /* identifier of node plugin. Must be located at the very beginning
-+ of a node. */
-+ common_node_header common_header; /* this is 16 bits */
-+ /* number of items. Should be first element in the node header,
-+ because we haven't yet finally decided whether it shouldn't go into
-+ common_header.
-+ */
-+/* NIKITA-FIXME-HANS: Create a macro such that if there is only one
-+ * node format at compile time, and it is this one, accesses do not function dereference when
-+ * accessing these fields (and otherwise they do). Probably 80% of users will only have one node format at a time throughout the life of reiser4. */
-+ d16 nr_items;
-+ /* free space in node measured in bytes */
-+ d16 free_space;
-+ /* offset to start of free space in node */
-+ d16 free_space_start;
-+ /* for reiser4_fsck. When information about what is a free
-+ block is corrupted, and we try to recover everything even
-+ if marked as freed, then old versions of data may
-+ duplicate newer versions, and this field allows us to
-+ restore the newer version. Also useful for when users
-+ who don't have the new trashcan installed on their linux distro
-+ delete the wrong files and send us desperate emails
-+ offering $25 for them back. */
-+
-+ /* magic field we need to tell formatted nodes NIKITA-FIXME-HANS: improve this comment */
-+ d32 magic;
-+ /* flushstamp is made of mk_id and write_counter. mk_id is an
-+ id generated randomly at mkreiserfs time. So we can just
-+ skip all nodes with different mk_id. write_counter is d64
-+ incrementing counter of writes on disk. It is used for
-+ choosing the newest data at fsck time. NIKITA-FIXME-HANS: why was field name changed but not comment? */
-+
-+ d32 mkfs_id;
-+ d64 flush_id;
-+ /* node flags to be used by fsck (reiser4ck or reiser4fsck?)
-+ and repacker NIKITA-FIXME-HANS: say more or reference elsewhere that says more */
-+ d16 flags;
-+
-+ /* 1 is leaf level, 2 is twig level, root is the numerically
-+ largest level */
-+ d8 level;
-+
-+ d8 pad;
-+} PACKED node40_header;
-+
-+/* item headers are not standard across all node layouts, pass
-+ pos_in_node to functions instead */
-+typedef struct item_header40 {
-+ /* key of item */
-+ /* 0 */ reiser4_key key;
-+ /* offset from start of a node measured in 8-byte chunks */
-+ /* 24 */ d16 offset;
-+ /* 26 */ d16 flags;
-+ /* 28 */ d16 plugin_id;
-+} PACKED item_header40;
-+
-+size_t item_overhead_node40(const znode * node, flow_t * aflow);
-+size_t free_space_node40(znode * node);
-+node_search_result lookup_node40(znode * node, const reiser4_key * key,
-+ lookup_bias bias, coord_t * coord);
-+int num_of_items_node40(const znode * node);
-+char *item_by_coord_node40(const coord_t * coord);
-+int length_by_coord_node40(const coord_t * coord);
-+item_plugin *plugin_by_coord_node40(const coord_t * coord);
-+reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key);
-+size_t estimate_node40(znode * node);
-+int check_node40(const znode * node, __u32 flags, const char **error);
-+int parse_node40(znode * node);
-+int init_node40(znode * node);
-+#ifdef GUESS_EXISTS
-+int guess_node40(const znode * node);
-+#endif
-+void change_item_size_node40(coord_t * coord, int by);
-+int create_item_node40(coord_t * target, const reiser4_key * key,
-+ reiser4_item_data * data, carry_plugin_info * info);
-+void update_item_key_node40(coord_t * target, const reiser4_key * key,
-+ carry_plugin_info * info);
-+int kill_node40(struct carry_kill_data *, carry_plugin_info *);
-+int cut_node40(struct carry_cut_data *, carry_plugin_info *);
-+int shift_node40(coord_t * from, znode * to, shift_direction pend,
-+ /* if @from->node becomes
-+ empty - it will be deleted from
-+ the tree if this is set to 1
-+ */
-+ int delete_child, int including_stop_coord,
-+ carry_plugin_info * info);
-+
-+int fast_insert_node40(const coord_t * coord);
-+int fast_paste_node40(const coord_t * coord);
-+int fast_cut_node40(const coord_t * coord);
-+int max_item_size_node40(void);
-+int prepare_removal_node40(znode * empty, carry_plugin_info * info);
-+int set_item_plugin_node40(coord_t * coord, item_id id);
-+int shrink_item_node40(coord_t * coord, int delta);
-+
-+#if REISER4_DEBUG
-+void *shift_check_prepare(const znode *left, const znode *right);
-+void shift_check(void *vp, const znode *left, const znode *right);
-+#endif
-+
-+/* __REISER4_NODE40_H__ */
-+#endif
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/node/node.c linux-2.6.20/fs/reiser4/plugin/node/node.c
---- linux-2.6.20.orig/fs/reiser4/plugin/node/node.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/node/node.c 2007-05-06 14:50:43.835018219 +0400
-@@ -0,0 +1,131 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Node plugin interface.
-+
-+ Description: The tree provides the abstraction of flows, which it
-+ internally fragments into items which it stores in nodes.
-+
-+ A key_atom is a piece of data bound to a single key.
-+
-+ For reasonable space efficiency to be achieved it is often
-+ necessary to store key_atoms in the nodes in the form of items, where
-+ an item is a sequence of key_atoms of the same or similar type. It is
-+ more space-efficient, because the item can implement (very)
-+ efficient compression of key_atom's bodies using internal knowledge
-+ about their semantics, and it can often avoid having a key for each
-+ key_atom. Each type of item has specific operations implemented by its
-+ item handler (see balance.c).
-+
-+ Rationale: the rest of the code (specifically balancing routines)
-+ accesses leaf level nodes through this interface. This way we can
-+ implement various block layouts and even combine various layouts
-+ within the same tree. Balancing/allocating algorithms should not
-+ care about peculiarities of splitting/merging specific item types,
-+ but rather should leave that to the item's item handler.
-+
-+ Items, including those that provide the abstraction of flows, have
-+ the property that if you move them in part or in whole to another
-+ node, the balancing code invokes their is_left_mergeable()
-+ item_operation to determine if they are mergeable with their new
-+ neighbor in the node you have moved them to. For some items the
-+ is_left_mergeable() function always returns null.
-+
-+ When moving the bodies of items from one node to another:
-+
-+ if a partial item is shifted to another node the balancing code invokes
-+ an item handler method to handle the item splitting.
-+
-+ if the balancing code needs to merge with an item in the node it
-+ is shifting to, it will invoke an item handler method to handle
-+ the item merging.
-+
-+ if it needs to move whole item bodies unchanged, the balancing code uses xmemcpy()
-+ adjusting the item headers after the move is done using the node handler.
-+*/
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "../plugin_header.h"
-+#include "../item/item.h"
-+#include "node.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../reiser4.h"
-+
-+/**
-+ * leftmost_key_in_node - get the smallest key in node
-+ * @node:
-+ * @key: store result here
-+ *
-+ * Stores the leftmost key of @node in @key.
-+ */
-+reiser4_key *leftmost_key_in_node(const znode *node, reiser4_key *key)
-+{
-+ assert("nikita-1634", node != NULL);
-+ assert("nikita-1635", key != NULL);
-+
-+ if (!node_is_empty(node)) {
-+ coord_t first_item;
-+
-+ coord_init_first_unit(&first_item, (znode *) node);
-+ item_key_by_coord(&first_item, key);
-+ } else
-+ *key = *reiser4_max_key();
-+ return key;
-+}
-+
-+node_plugin node_plugins[LAST_NODE_ID] = {
-+ [NODE40_ID] = {
-+ .h = {
-+ .type_id = REISER4_NODE_PLUGIN_TYPE,
-+ .id = NODE40_ID,
-+ .pops = NULL,
-+ .label = "unified",
-+ .desc = "unified node layout",
-+ .linkage = {NULL, NULL}
-+ },
-+ .item_overhead = item_overhead_node40,
-+ .free_space = free_space_node40,
-+ .lookup = lookup_node40,
-+ .num_of_items = num_of_items_node40,
-+ .item_by_coord = item_by_coord_node40,
-+ .length_by_coord = length_by_coord_node40,
-+ .plugin_by_coord = plugin_by_coord_node40,
-+ .key_at = key_at_node40,
-+ .estimate = estimate_node40,
-+ .check = check_node40,
-+ .parse = parse_node40,
-+ .init = init_node40,
-+#ifdef GUESS_EXISTS
-+ .guess = guess_node40,
-+#endif
-+ .change_item_size = change_item_size_node40,
-+ .create_item = create_item_node40,
-+ .update_item_key = update_item_key_node40,
-+ .cut_and_kill = kill_node40,
-+ .cut = cut_node40,
-+ .shift = shift_node40,
-+ .shrink_item = shrink_item_node40,
-+ .fast_insert = fast_insert_node40,
-+ .fast_paste = fast_paste_node40,
-+ .fast_cut = fast_cut_node40,
-+ .max_item_size = max_item_size_node40,
-+ .prepare_removal = prepare_removal_node40,
-+ .set_item_plugin = set_item_plugin_node40
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/node/node.h linux-2.6.20/fs/reiser4/plugin/node/node.h
---- linux-2.6.20.orig/fs/reiser4/plugin/node/node.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/node/node.h 2007-05-06 14:50:43.835018219 +0400
-@@ -0,0 +1,272 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* We need a definition of the default node layout here. */
-+
-+/* Generally speaking, it is best to have free space in the middle of the
-+ node so that two sets of things can grow towards it, and to have the
-+ item bodies on the left so that the last one of them grows into free
-+ space. We optimize for the case where we append new items to the end
-+ of the node, or grow the last item, because it hurts nothing to so
-+ optimize and it is a common special case to do massive insertions in
-+ increasing key order (and one of cases more likely to have a real user
-+ notice the delay time for).
-+
-+ formatted leaf default layout: (leaf1)
-+
-+ |node header:item bodies:free space:key + pluginid + item offset|
-+
-+ We grow towards the middle, optimizing layout for the case where we
-+ append new items to the end of the node. The node header is fixed
-+ length. Keys, and item offsets plus pluginids for the items
-+ corresponding to them are in increasing key order, and are fixed
-+ length. Item offsets are relative to start of node (16 bits creating
-+ a node size limit of 64k, 12 bits might be a better choice....). Item
-+ bodies are in decreasing key order. Item bodies have a variable size.
-+ There is a one to one to one mapping of keys to item offsets to item
-+ bodies. Item offsets consist of pointers to the zeroth byte of the
-+ item body. Item length equals the start of the next item minus the
-+ start of this item, except the zeroth item whose length equals the end
-+ of the node minus the start of that item (plus a byte). In other
-+ words, the item length is not recorded anywhere, and it does not need
-+ to be since it is computable.
-+
-+ Leaf variable length items and keys layout : (lvar)
-+
-+ |node header:key offset + item offset + pluginid triplets:free space:key bodies:item bodies|
-+
-+ We grow towards the middle, optimizing layout for the case where we
-+ append new items to the end of the node. The node header is fixed
-+ length. Keys and item offsets for the items corresponding to them are
-+ in increasing key order, and keys are variable length. Item offsets
-+ are relative to start of node (16 bits). Item bodies are in
-+ decreasing key order. Item bodies have a variable size. There is a
-+ one to one to one mapping of keys to item offsets to item bodies.
-+ Item offsets consist of pointers to the zeroth byte of the item body.
-+ Item length equals the start of the next item's key minus the start of
-+ this item, except the zeroth item whose length equals the end of the
-+ node minus the start of that item (plus a byte).
-+
-+ leaf compressed keys layout: (lcomp)
-+
-+ |node header:key offset + key inherit + item offset pairs:free space:key bodies:item bodies|
-+
-+ We grow towards the middle, optimizing layout for the case where we
-+ append new items to the end of the node. The node header is fixed
-+ length. Keys and item offsets for the items corresponding to them are
-+ in increasing key order, and keys are variable length. The "key
-+ inherit" field indicates how much of the key prefix is identical to
-+ the previous key (stem compression as described in "Managing
-+ Gigabytes" is used). key_inherit is a one byte integer. The
-+ intra-node searches performed through this layout are linear searches,
-+ and this is theorized to not hurt performance much due to the high
-+ cost of processor stalls on modern CPUs, and the small number of keys
-+ in a single node. Item offsets are relative to start of node (16
-+ bits). Item bodies are in decreasing key order. Item bodies have a
-+ variable size. There is a one to one to one mapping of keys to item
-+ offsets to item bodies. Item offsets consist of pointers to the
-+ zeroth byte of the item body. Item length equals the start of the
-+ next item minus the start of this item, except the zeroth item whose
-+ length equals the end of the node minus the start of that item (plus a
-+ byte). In other words, item length and key length is not recorded
-+ anywhere, and it does not need to be since it is computable.
-+
-+ internal node default layout: (idef1)
-+
-+ just like ldef1 except that item bodies are either blocknrs of
-+ children or extents, and moving them may require updating parent
-+ pointers in the nodes that they point to.
-+*/
-+
-+/* There is an inherent 3-way tradeoff between optimizing and
-+ exchanging disks between different architectures and code
-+ complexity. This is optimal and simple and inexchangeable.
-+ Someone else can do the code for exchanging disks and make it
-+ complex. It would not be that hard. Using other than the PAGE_SIZE
-+ might be suboptimal.
-+*/
-+
-+#if !defined( __REISER4_NODE_H__ )
-+#define __REISER4_NODE_H__
-+
-+#define LEAF40_NODE_SIZE PAGE_CACHE_SIZE
-+
-+#include "../../dformat.h"
-+#include "../plugin_header.h"
-+
-+#include <linux/types.h>
-+
-+typedef enum {
-+ NS_FOUND = 0,
-+ NS_NOT_FOUND = -ENOENT
-+} node_search_result;
-+
-+/* Maximal possible space overhead for creation of new item in a node */
-+#define REISER4_NODE_MAX_OVERHEAD ( sizeof( reiser4_key ) + 32 )
-+
-+typedef enum {
-+ REISER4_NODE_DKEYS = (1 << 0),
-+ REISER4_NODE_TREE_STABLE = (1 << 1)
-+} reiser4_node_check_flag;
-+
-+/* cut and cut_and_kill have too long list of parameters. This structure is just to safe some space on stack */
-+struct cut_list {
-+ coord_t *from;
-+ coord_t *to;
-+ const reiser4_key *from_key;
-+ const reiser4_key *to_key;
-+ reiser4_key *smallest_removed;
-+ carry_plugin_info *info;
-+ __u32 flags;
-+ struct inode *inode; /* this is to pass list of eflushed jnodes down to extent_kill_hook */
-+ lock_handle *left;
-+ lock_handle *right;
-+};
-+
-+struct carry_cut_data;
-+struct carry_kill_data;
-+
-+/* The responsibility of the node plugin is to store and give access
-+ to the sequence of items within the node. */
-+typedef struct node_plugin {
-+ /* generic plugin fields */
-+ plugin_header h;
-+
-+ /* calculates the amount of space that will be required to store an
-+ item which is in addition to the space consumed by the item body.
-+ (the space consumed by the item body can be gotten by calling
-+ item->estimate) */
-+ size_t(*item_overhead) (const znode * node, flow_t * f);
-+
-+ /* returns free space by looking into node (i.e., without using
-+ znode->free_space). */
-+ size_t(*free_space) (znode * node);
-+ /* search within the node for the one item which might
-+ contain the key, invoking item->search_within to search within
-+ that item to see if it is in there */
-+ node_search_result(*lookup) (znode * node, const reiser4_key * key,
-+ lookup_bias bias, coord_t * coord);
-+ /* number of items in node */
-+ int (*num_of_items) (const znode * node);
-+
-+ /* store information about item in @coord in @data */
-+ /* break into several node ops, don't add any more uses of this before doing so */
-+ /*int ( *item_at )( const coord_t *coord, reiser4_item_data *data ); */
-+ char *(*item_by_coord) (const coord_t * coord);
-+ int (*length_by_coord) (const coord_t * coord);
-+ item_plugin *(*plugin_by_coord) (const coord_t * coord);
-+
-+ /* store item key in @key */
-+ reiser4_key *(*key_at) (const coord_t * coord, reiser4_key * key);
-+ /* conservatively estimate whether unit of what size can fit
-+ into node. This estimation should be performed without
-+ actually looking into the node's content (free space is saved in
-+ znode). */
-+ size_t(*estimate) (znode * node);
-+
-+ /* performs every consistency check the node plugin author could
-+ imagine. Optional. */
-+ int (*check) (const znode * node, __u32 flags, const char **error);
-+
-+ /* Called when node is read into memory and node plugin is
-+ already detected. This should read some data into znode (like free
-+ space counter) and, optionally, check data consistency.
-+ */
-+ int (*parse) (znode * node);
-+ /* This method is called on a new node to initialise plugin specific
-+ data (header, etc.) */
-+ int (*init) (znode * node);
-+ /* Check whether @node content conforms to this plugin format.
-+ Probably only useful after support for old V3.x formats is added.
-+ Uncomment after 4.0 only.
-+ */
-+ /* int ( *guess )( const znode *node ); */
-+#if REISER4_DEBUG
-+ void (*print) (const char *prefix, const znode * node, __u32 flags);
-+#endif
-+ /* change size of @item by @by bytes. @item->node has enough free
-+ space. When @by > 0 - free space is appended to end of item. When
-+ @by < 0 - item is truncated - it is assumed that last @by bytes if
-+ the item are freed already */
-+ void (*change_item_size) (coord_t * item, int by);
-+
-+ /* create new item @length bytes long in coord @target */
-+ int (*create_item) (coord_t * target, const reiser4_key * key,
-+ reiser4_item_data * data, carry_plugin_info * info);
-+
-+ /* update key of item. */
-+ void (*update_item_key) (coord_t * target, const reiser4_key * key,
-+ carry_plugin_info * info);
-+
-+ int (*cut_and_kill) (struct carry_kill_data *, carry_plugin_info *);
-+ int (*cut) (struct carry_cut_data *, carry_plugin_info *);
-+
-+ /*
-+ * shrink item pointed to by @coord by @delta bytes.
-+ */
-+ int (*shrink_item) (coord_t * coord, int delta);
-+
-+ /* copy as much as possible but not more than up to @stop from
-+ @stop->node to @target. If (pend == append) then data from beginning of
-+ @stop->node are copied to the end of @target. If (pend == prepend) then
-+ data from the end of @stop->node are copied to the beginning of
-+ @target. Copied data are removed from @stop->node. Information
-+ about what to do on upper level is stored in @todo */
-+ int (*shift) (coord_t * stop, znode * target, shift_direction pend,
-+ int delete_node, int including_insert_coord,
-+ carry_plugin_info * info);
-+ /* return true if this node allows skip carry() in some situations
-+ (see fs/reiser4/tree.c:insert_by_coord()). Reiser3.x format
-+ emulation doesn't.
-+
-+ This will speedup insertions that doesn't require updates to the
-+ parent, by bypassing initialisation of carry() structures. It's
-+ believed that majority of insertions will fit there.
-+
-+ */
-+ int (*fast_insert) (const coord_t * coord);
-+ int (*fast_paste) (const coord_t * coord);
-+ int (*fast_cut) (const coord_t * coord);
-+ /* this limits max size of item which can be inserted into a node and
-+ number of bytes item in a node may be appended with */
-+ int (*max_item_size) (void);
-+ int (*prepare_removal) (znode * empty, carry_plugin_info * info);
-+ /* change plugin id of items which are in a node already. Currently it is Used in tail conversion for regular
-+ * files */
-+ int (*set_item_plugin) (coord_t * coord, item_id);
-+} node_plugin;
-+
-+typedef enum {
-+ /* standard unified node layout used for both leaf and internal
-+ nodes */
-+ NODE40_ID,
-+ LAST_NODE_ID
-+} reiser4_node_id;
-+
-+extern reiser4_key *leftmost_key_in_node(const znode * node, reiser4_key * key);
-+#if REISER4_DEBUG
-+extern void print_node_content(const char *prefix, const znode * node,
-+ __u32 flags);
-+#endif
-+
-+extern void indent_znode(const znode * node);
-+
-+typedef struct common_node_header {
-+ /*
-+ * identifier of node plugin. Must be located at the very beginning of
-+ * a node.
-+ */
-+ __le16 plugin_id;
-+} common_node_header;
-+
-+/* __REISER4_NODE_H__ */
-+#endif
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/object.c linux-2.6.20/fs/reiser4/plugin/object.c
---- linux-2.6.20.orig/fs/reiser4/plugin/object.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/object.c 2007-05-06 14:50:43.835018219 +0400
-@@ -0,0 +1,516 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * Examples of object plugins: file, directory, symlink, special file.
-+ *
-+ * Plugins associated with inode:
-+ *
-+ * Plugin of inode is plugin referenced by plugin-id field of on-disk
-+ * stat-data. How we store this plugin in in-core inode is not
-+ * important. Currently pointers are used, another variant is to store offsets
-+ * and do array lookup on each access.
-+ *
-+ * Now, each inode has one selected plugin: object plugin that
-+ * determines what type of file this object is: directory, regular etc.
-+ *
-+ * This main plugin can use other plugins that are thus subordinated to
-+ * it. Directory instance of object plugin uses hash; regular file
-+ * instance uses tail policy plugin.
-+ *
-+ * Object plugin is either taken from id in stat-data or guessed from
-+ * i_mode bits. Once it is established we ask it to install its
-+ * subordinate plugins, by looking again in stat-data or inheriting them
-+ * from parent.
-+ *
-+ * How new inode is initialized during ->read_inode():
-+ * 1 read stat-data and initialize inode fields: i_size, i_mode,
-+ * i_generation, capabilities etc.
-+ * 2 read plugin id from stat data or try to guess plugin id
-+ * from inode->i_mode bits if plugin id is missing.
-+ * 3 Call ->init_inode() method of stat-data plugin to initialise inode fields.
-+ *
-+ * NIKITA-FIXME-HANS: can you say a little about 1 being done before 3? What
-+ * if stat data does contain i_size, etc., due to it being an unusual plugin?
-+ *
-+ * 4 Call ->activate() method of object's plugin. Plugin is either read from
-+ * from stat-data or guessed from mode bits
-+ * 5 Call ->inherit() method of object plugin to inherit as yet un initialized
-+ * plugins from parent.
-+ *
-+ * Easy induction proves that on last step all plugins of inode would be
-+ * initialized.
-+ *
-+ * When creating new object:
-+ * 1 obtain object plugin id (see next period)
-+ * NIKITA-FIXME-HANS: period?
-+ * 2 ->install() this plugin
-+ * 3 ->inherit() the rest from the parent
-+ *
-+ * We need some examples of creating an object with default and non-default
-+ * plugin ids. Nikita, please create them.
-+ */
-+
-+#include "../inode.h"
-+
-+static int _bugop(void)
-+{
-+ BUG_ON(1);
-+ return 0;
-+}
-+
-+#define bugop ((void *)_bugop)
-+
-+static int _dummyop(void)
-+{
-+ return 0;
-+}
-+
-+#define dummyop ((void *)_dummyop)
-+
-+static int change_file(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ /* cannot change object plugin of already existing object */
-+ if (memb == PSET_FILE)
-+ return RETERR(-EINVAL);
-+
-+ /* Change PSET_CREATE */
-+ return aset_set_unsafe(&reiser4_inode_data(inode)->pset, memb, plugin);
-+}
-+
-+static reiser4_plugin_ops file_plugin_ops = {
-+ .change = change_file
-+};
-+
-+/*
-+ * Definitions of object plugins.
-+ */
-+
-+file_plugin file_plugins[LAST_FILE_PLUGIN_ID] = {
-+ [UNIX_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = UNIX_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_REGULAR_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "reg",
-+ .desc = "regular file",
-+ .linkage = {NULL, NULL},
-+ },
-+ .inode_ops = {
-+ .permission = reiser4_permission_common,
-+ .setattr = setattr_unix_file,
-+ .getattr = reiser4_getattr_common
-+ },
-+ .file_ops = {
-+ .llseek = generic_file_llseek,
-+ .read = read_unix_file,
-+ .write = write_unix_file,
-+ .aio_read = generic_file_aio_read,
-+ .ioctl = ioctl_unix_file,
-+ .mmap = mmap_unix_file,
-+ .open = open_unix_file,
-+ .release = release_unix_file,
-+ .fsync = sync_unix_file,
-+ .sendfile = sendfile_unix_file
-+ },
-+ .as_ops = {
-+ .writepage = reiser4_writepage,
-+ .readpage = readpage_unix_file,
-+ .sync_page = block_sync_page,
-+ .writepages = writepages_unix_file,
-+ .set_page_dirty = reiser4_set_page_dirty,
-+ .readpages = readpages_unix_file,
-+ .prepare_write = prepare_write_unix_file,
-+ .commit_write = commit_write_unix_file,
-+ .bmap = bmap_unix_file,
-+ .invalidatepage = reiser4_invalidatepage,
-+ .releasepage = reiser4_releasepage
-+ },
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .flow_by_inode = flow_by_inode_unix_file,
-+ .key_by_inode = key_by_inode_and_offset_common,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common,
-+ .create_object = reiser4_create_object_common,
-+ .delete_object = delete_object_unix_file,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .owns_item = owns_item_unix_file,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_data_unix_file,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ },
-+ [DIRECTORY_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = DIRECTORY_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_DIRECTORY_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "dir",
-+ .desc = "directory",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = {.create = NULL},
-+ .file_ops = {.owner = NULL},
-+ .as_ops = {.writepage = NULL},
-+
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .flow_by_inode = bugop,
-+ .key_by_inode = bugop,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common_dir,
-+ .create_object = reiser4_create_object_common,
-+ .delete_object = reiser4_delete_dir_common,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = rem_link_common_dir,
-+ .owns_item = owns_item_common_dir,
-+ .can_add_link = can_add_link_common,
-+ .can_rem_link = can_rem_link_common_dir,
-+ .detach = reiser4_detach_common_dir,
-+ .bind = reiser4_bind_common_dir,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common_dir,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common_dir
-+ },
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ },
-+ .init_inode_data = init_inode_ordering,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ },
-+ [SYMLINK_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = SYMLINK_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_SYMLINK_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "symlink",
-+ .desc = "symbolic link",
-+ .linkage = {NULL,NULL}
-+ },
-+ .inode_ops = {
-+ .readlink = generic_readlink,
-+ .follow_link = reiser4_follow_link_common,
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr_common,
-+ .getattr = reiser4_getattr_common
-+ },
-+ /* inode->i_fop of symlink is initialized by NULL in setup_inode_ops */
-+ .file_ops = {.owner = NULL},
-+ .as_ops = {.writepage = NULL},
-+
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common,
-+ .create_object = reiser4_create_symlink,
-+ .delete_object = reiser4_delete_object_common,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_ordering,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ .destroy_inode = destroy_inode_symlink,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ },
-+ [SPECIAL_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = SPECIAL_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_SPECIAL_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "special",
-+ .desc =
-+ "special: fifo, device or socket",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = {
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr_common,
-+ .getattr = reiser4_getattr_common
-+ },
-+ /* file_ops of special files (sockets, block, char, fifo) are
-+ initialized by init_special_inode. */
-+ .file_ops = {.owner = NULL},
-+ .as_ops = {.writepage = NULL},
-+
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common,
-+ .create_object = reiser4_create_object_common,
-+ .delete_object = reiser4_delete_object_common,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .owns_item = owns_item_common,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_ordering,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ },
-+ [CRYPTCOMPRESS_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = CRYPTCOMPRESS_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_REGULAR_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "cryptcompress",
-+ .desc = "cryptcompress file",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = {
-+ .permission = reiser4_permission_common,
-+ .setattr = prot_setattr_cryptcompress,
-+ .getattr = reiser4_getattr_common
-+ },
-+ .file_ops = {
-+ .llseek = generic_file_llseek,
-+ .read = prot_read_cryptcompress,
-+ .write = prot_write_cryptcompress,
-+ .aio_read = generic_file_aio_read,
-+ .mmap = prot_mmap_cryptcompress,
-+ .release = prot_release_cryptcompress,
-+ .fsync = reiser4_sync_common,
-+ .sendfile = prot_sendfile_cryptcompress
-+ },
-+ .as_ops = {
-+ .writepage = reiser4_writepage,
-+ .readpage = readpage_cryptcompress,
-+ .sync_page = block_sync_page,
-+ .writepages = writepages_cryptcompress,
-+ .set_page_dirty = reiser4_set_page_dirty,
-+ .readpages = readpages_cryptcompress,
-+ .prepare_write = prepare_write_common,
-+ .invalidatepage = reiser4_invalidatepage,
-+ .releasepage = reiser4_releasepage
-+ },
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .flow_by_inode = flow_by_inode_cryptcompress,
-+ .key_by_inode = key_by_inode_cryptcompress,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_cryptcompress,
-+ .create_object = create_cryptcompress,
-+ .open_object = open_object_cryptcompress,
-+ .delete_object = delete_object_cryptcompress,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .owns_item = owns_item_common,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_data_cryptcompress,
-+ .cut_tree_worker = cut_tree_worker_cryptcompress,
-+ .destroy_inode = destroy_inode_cryptcompress,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ }
-+};
-+
-+static int change_dir(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ /* cannot change dir plugin of already existing object */
-+ return RETERR(-EINVAL);
-+}
-+
-+static reiser4_plugin_ops dir_plugin_ops = {
-+ .change = change_dir
-+};
-+
-+/*
-+ * definition of directory plugins
-+ */
-+
-+dir_plugin dir_plugins[LAST_DIR_ID] = {
-+ /* standard hashed directory plugin */
-+ [HASHED_DIR_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_DIR_PLUGIN_TYPE,
-+ .id = HASHED_DIR_PLUGIN_ID,
-+ .pops = &dir_plugin_ops,
-+ .label = "dir",
-+ .desc = "hashed directory",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = {
-+ .create = reiser4_create_common,
-+ .lookup = reiser4_lookup_common,
-+ .link = reiser4_link_common,
-+ .unlink = reiser4_unlink_common,
-+ .symlink = reiser4_symlink_common,
-+ .mkdir = reiser4_mkdir_common,
-+ .rmdir = reiser4_unlink_common,
-+ .mknod = reiser4_mknod_common,
-+ .rename = reiser4_rename_common,
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr_common,
-+ .getattr = reiser4_getattr_common
-+ },
-+ .file_ops = {
-+ .llseek = reiser4_llseek_dir_common,
-+ .read = generic_read_dir,
-+ .readdir = reiser4_readdir_common,
-+ .release = reiser4_release_dir_common,
-+ .fsync = reiser4_sync_common
-+ },
-+ .as_ops = {
-+ .writepage = bugop,
-+ .sync_page = bugop,
-+ .writepages = dummyop,
-+ .set_page_dirty = bugop,
-+ .readpages = bugop,
-+ .prepare_write = bugop,
-+ .commit_write = bugop,
-+ .bmap = bugop,
-+ .invalidatepage = bugop,
-+ .releasepage = bugop
-+ },
-+ .get_parent = get_parent_common,
-+ .is_name_acceptable = is_name_acceptable_common,
-+ .build_entry_key = build_entry_key_hashed,
-+ .build_readdir_key = build_readdir_key_common,
-+ .add_entry = reiser4_add_entry_common,
-+ .rem_entry = reiser4_rem_entry_common,
-+ .init = reiser4_dir_init_common,
-+ .done = reiser4_dir_done_common,
-+ .attach = reiser4_attach_common,
-+ .detach = reiser4_detach_common,
-+ .estimate = {
-+ .add_entry = estimate_add_entry_common,
-+ .rem_entry = estimate_rem_entry_common,
-+ .unlink = dir_estimate_unlink_common
-+ }
-+ },
-+ /* hashed directory for which seekdir/telldir are guaranteed to
-+ * work. Brain-damage. */
-+ [SEEKABLE_HASHED_DIR_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_DIR_PLUGIN_TYPE,
-+ .id = SEEKABLE_HASHED_DIR_PLUGIN_ID,
-+ .pops = &dir_plugin_ops,
-+ .label = "dir32",
-+ .desc = "directory hashed with 31 bit hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = {
-+ .create = reiser4_create_common,
-+ .lookup = reiser4_lookup_common,
-+ .link = reiser4_link_common,
-+ .unlink = reiser4_unlink_common,
-+ .symlink = reiser4_symlink_common,
-+ .mkdir = reiser4_mkdir_common,
-+ .rmdir = reiser4_unlink_common,
-+ .mknod = reiser4_mknod_common,
-+ .rename = reiser4_rename_common,
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr_common,
-+ .getattr = reiser4_getattr_common
-+ },
-+ .file_ops = {
-+ .llseek = reiser4_llseek_dir_common,
-+ .read = generic_read_dir,
-+ .readdir = reiser4_readdir_common,
-+ .release = reiser4_release_dir_common,
-+ .fsync = reiser4_sync_common
-+ },
-+ .as_ops = {
-+ .writepage = bugop,
-+ .sync_page = bugop,
-+ .writepages = dummyop,
-+ .set_page_dirty = bugop,
-+ .readpages = bugop,
-+ .prepare_write = bugop,
-+ .commit_write = bugop,
-+ .bmap = bugop,
-+ .invalidatepage = bugop,
-+ .releasepage = bugop
-+ },
-+ .get_parent = get_parent_common,
-+ .is_name_acceptable = is_name_acceptable_common,
-+ .build_entry_key = build_entry_key_seekable,
-+ .build_readdir_key = build_readdir_key_common,
-+ .add_entry = reiser4_add_entry_common,
-+ .rem_entry = reiser4_rem_entry_common,
-+ .init = reiser4_dir_init_common,
-+ .done = reiser4_dir_done_common,
-+ .attach = reiser4_attach_common,
-+ .detach = reiser4_detach_common,
-+ .estimate = {
-+ .add_entry = estimate_add_entry_common,
-+ .rem_entry = estimate_rem_entry_common,
-+ .unlink = dir_estimate_unlink_common
-+ }
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/object.h linux-2.6.20/fs/reiser4/plugin/object.h
---- linux-2.6.20.orig/fs/reiser4/plugin/object.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/object.h 2007-05-06 14:50:43.839019469 +0400
-@@ -0,0 +1,121 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Declaration of object plugin functions. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_OBJECT_H__ )
-+#define __FS_REISER4_PLUGIN_OBJECT_H__
-+
-+#include "../type_safe_hash.h"
-+
-+/* common implementations of inode operations */
-+int reiser4_create_common(struct inode *parent, struct dentry *dentry,
-+ int mode, struct nameidata *);
-+struct dentry * reiser4_lookup_common(struct inode *parent,
-+ struct dentry *dentry,
-+ struct nameidata *nameidata);
-+int reiser4_link_common(struct dentry *existing, struct inode *parent,
-+ struct dentry *newname);
-+int reiser4_unlink_common(struct inode *parent, struct dentry *victim);
-+int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, int mode);
-+int reiser4_symlink_common(struct inode *parent, struct dentry *dentry,
-+ const char *linkname);
-+int reiser4_mknod_common(struct inode *parent, struct dentry *dentry,
-+ int mode, dev_t rdev);
-+int reiser4_rename_common(struct inode *old_dir, struct dentry *old_name,
-+ struct inode *new_dir, struct dentry *new_name);
-+void *reiser4_follow_link_common(struct dentry *, struct nameidata *data);
-+int reiser4_permission_common(struct inode *, int mask,
-+ struct nameidata *nameidata);
-+int reiser4_setattr_common(struct dentry *, struct iattr *);
-+int reiser4_getattr_common(struct vfsmount *mnt, struct dentry *,
-+ struct kstat *);
-+
-+/* common implementations of file operations */
-+loff_t reiser4_llseek_dir_common(struct file *, loff_t off, int origin);
-+int reiser4_readdir_common(struct file *, void *dirent, filldir_t);
-+int reiser4_release_dir_common(struct inode *, struct file *);
-+int reiser4_sync_common(struct file *, struct dentry *, int datasync);
-+
-+/* common implementations of address space operations */
-+int prepare_write_common(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+
-+/* file plugin operations: common implementations */
-+int write_sd_by_inode_common(struct inode *);
-+int key_by_inode_and_offset_common(struct inode *, loff_t, reiser4_key *);
-+int set_plug_in_inode_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+int adjust_to_parent_common(struct inode *object, struct inode *parent,
-+ struct inode *root);
-+int adjust_to_parent_common_dir(struct inode *object, struct inode *parent,
-+ struct inode *root);
-+int adjust_to_parent_cryptcompress(struct inode *object, struct inode *parent,
-+ struct inode *root);
-+int reiser4_create_object_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+int reiser4_delete_object_common(struct inode *);
-+int reiser4_delete_dir_common(struct inode *);
-+int reiser4_add_link_common(struct inode *object, struct inode *parent);
-+int reiser4_rem_link_common(struct inode *object, struct inode *parent);
-+int rem_link_common_dir(struct inode *object, struct inode *parent);
-+int owns_item_common(const struct inode *, const coord_t *);
-+int owns_item_common_dir(const struct inode *, const coord_t *);
-+int can_add_link_common(const struct inode *);
-+int can_rem_link_common_dir(const struct inode *);
-+int reiser4_detach_common_dir(struct inode *child, struct inode *parent);
-+int reiser4_bind_common_dir(struct inode *child, struct inode *parent);
-+int safelink_common(struct inode *, reiser4_safe_link_t, __u64 value);
-+reiser4_block_nr estimate_create_common(const struct inode *);
-+reiser4_block_nr estimate_create_common_dir(const struct inode *);
-+reiser4_block_nr estimate_update_common(const struct inode *);
-+reiser4_block_nr estimate_unlink_common(const struct inode *,
-+ const struct inode *);
-+reiser4_block_nr estimate_unlink_common_dir(const struct inode *,
-+ const struct inode *);
-+char *wire_write_common(struct inode *, char *start);
-+char *wire_read_common(char *addr, reiser4_object_on_wire *);
-+struct dentry *wire_get_common(struct super_block *, reiser4_object_on_wire *);
-+int wire_size_common(struct inode *);
-+void wire_done_common(reiser4_object_on_wire *);
-+
-+/* dir plugin operations: common implementations */
-+struct dentry *get_parent_common(struct inode *child);
-+int is_name_acceptable_common(const struct inode *, const char *name, int len);
-+void build_entry_key_common(const struct inode *,
-+ const struct qstr *qname, reiser4_key *);
-+int build_readdir_key_common(struct file *dir, reiser4_key *);
-+int reiser4_add_entry_common(struct inode *object, struct dentry *where,
-+ reiser4_object_create_data *, reiser4_dir_entry_desc *);
-+int reiser4_rem_entry_common(struct inode *object, struct dentry *where,
-+ reiser4_dir_entry_desc *);
-+int reiser4_dir_init_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+int reiser4_dir_done_common(struct inode *);
-+int reiser4_attach_common(struct inode *child, struct inode *parent);
-+int reiser4_detach_common(struct inode *object, struct inode *parent);
-+reiser4_block_nr estimate_add_entry_common(const struct inode *);
-+reiser4_block_nr estimate_rem_entry_common(const struct inode *);
-+reiser4_block_nr dir_estimate_unlink_common(const struct inode *,
-+ const struct inode *);
-+
-+/* these are essential parts of common implementations, they are to make
-+ customized implementations easier */
-+int do_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-+
-+/* merely useful functions */
-+int lookup_sd(struct inode *, znode_lock_mode, coord_t *, lock_handle *,
-+ const reiser4_key *, int silent);
-+
-+/* __FS_REISER4_PLUGIN_OBJECT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/plugin.c linux-2.6.20/fs/reiser4/plugin/plugin.c
---- linux-2.6.20.orig/fs/reiser4/plugin/plugin.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/plugin.c 2007-05-06 14:50:43.839019469 +0400
-@@ -0,0 +1,578 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Basic plugin infrastructure, lookup etc. */
-+
-+/* PLUGINS:
-+
-+ Plugins are internal Reiser4 "modules" or "objects" used to increase
-+ extensibility and allow external users to easily adapt reiser4 to
-+ their needs.
-+
-+ Plugins are classified into several disjoint "types". Plugins
-+ belonging to the particular plugin type are termed "instances" of
-+ this type. Currently the following types are present:
-+
-+ . object plugin
-+ . hash plugin
-+ . tail plugin
-+ . perm plugin
-+ . item plugin
-+ . node layout plugin
-+
-+NIKITA-FIXME-HANS: update this list, and review this entire comment for currency
-+
-+ Object (file) plugin determines how given file-system object serves
-+ standard VFS requests for read, write, seek, mmap etc. Instances of
-+ file plugins are: regular file, directory, symlink. Another example
-+ of file plugin is audit plugin, that optionally records accesses to
-+ underlying object and forwards requests to it.
-+
-+ Hash plugins compute hashes used by reiser4 to store and locate
-+ files within directories. Instances of hash plugin type are: r5,
-+ tea, rupasov.
-+
-+ Tail plugins (or, more precisely, tail policy plugins) determine
-+ when last part of the file should be stored in a formatted item.
-+
-+ Perm plugins control permissions granted for a process accessing a file.
-+
-+ Scope and lookup:
-+
-+ label such that pair ( type_label, plugin_label ) is unique. This
-+ pair is a globally persistent and user-visible plugin
-+ identifier. Internally kernel maintains plugins and plugin types in
-+ arrays using an index into those arrays as plugin and plugin type
-+ identifiers. File-system in turn, also maintains persistent
-+ "dictionary" which is mapping from plugin label to numerical
-+ identifier which is stored in file-system objects. That is, we
-+ store the offset into the plugin array for that plugin type as the
-+ plugin id in the stat data of the filesystem object.
-+
-+ plugin_labels have meaning for the user interface that assigns
-+ plugins to files, and may someday have meaning for dynamic loading of
-+ plugins and for copying of plugins from one fs instance to
-+ another by utilities like cp and tar.
-+
-+ Internal kernel plugin type identifier (index in plugins[] array) is
-+ of type reiser4_plugin_type. Set of available plugin types is
-+ currently static, but dynamic loading doesn't seem to pose
-+ insurmountable problems.
-+
-+ Within each type plugins are addressed by the identifiers of type
-+ reiser4_plugin_id (indices in
-+ reiser4_plugin_type_data.builtin[]). Such identifiers are only
-+ required to be unique within one type, not globally.
-+
-+ Thus, plugin in memory is uniquely identified by the pair (type_id,
-+ id).
-+
-+ Usage:
-+
-+ There exists only one instance of each plugin instance, but this
-+ single instance can be associated with many entities (file-system
-+ objects, items, nodes, transactions, file-descriptors etc.). Entity
-+ to which plugin of given type is termed (due to the lack of
-+ imagination) "subject" of this plugin type and, by abuse of
-+ terminology, subject of particular instance of this type to which
-+ it's attached currently. For example, inode is subject of object
-+ plugin type. Inode representing directory is subject of directory
-+ plugin, hash plugin type and some particular instance of hash plugin
-+ type. Inode, representing regular file is subject of "regular file"
-+ plugin, tail-policy plugin type etc.
-+
-+ With each subject the plugin possibly stores some state. For example,
-+ the state of a directory plugin (instance of object plugin type) is pointer
-+ to hash plugin (if directories always use hashing that is). State of
-+ audit plugin is file descriptor (struct file) of log file or some
-+ magic value to do logging through printk().
-+
-+ Interface:
-+
-+ In addition to a scalar identifier, each plugin type and plugin
-+ proper has a "label": short string and a "description"---longer
-+ descriptive string. Labels and descriptions of plugin types are
-+ hard-coded into plugins[] array, declared and defined in
-+ plugin.c. Label and description of plugin are stored in .label and
-+ .desc fields of reiser4_plugin_header respectively. It's possible to
-+ locate plugin by the pair of labels.
-+
-+ Features:
-+
-+ . user-level plugin manipulations:
-+ + reiser4("filename/..file_plugin<='audit'");
-+ + write(open("filename/..file_plugin"), "audit", 8);
-+
-+ . user level utilities lsplug and chplug to manipulate plugins.
-+ Utilities are not of primary priority. Possibly they will be not
-+ working on v4.0
-+
-+NIKITA-FIXME-HANS: this should be a mkreiserfs option not a mount option, do you agree? I don't think that specifying it at mount time, and then changing it with each mount, is a good model for usage.
-+
-+ . mount option "plug" to set-up plugins of root-directory.
-+ "plug=foo:bar" will set "bar" as default plugin of type "foo".
-+
-+ Limitations:
-+
-+ . each plugin type has to provide at least one builtin
-+ plugin. This is technical limitation and it can be lifted in the
-+ future.
-+
-+ TODO:
-+
-+ New plugin types/plugings:
-+ Things we should be able to separately choose to inherit:
-+
-+ security plugins
-+
-+ stat data
-+
-+ file bodies
-+
-+ file plugins
-+
-+ dir plugins
-+
-+ . perm:acl
-+
-+ d audi---audit plugin intercepting and possibly logging all
-+ accesses to object. Requires to put stub functions in file_operations
-+ in stead of generic_file_*.
-+
-+NIKITA-FIXME-HANS: why make overflows a plugin?
-+ . over---handle hash overflows
-+
-+ . sqnt---handle different access patterns and instruments read-ahead
-+
-+NIKITA-FIXME-HANS: describe the line below in more detail.
-+
-+ . hier---handle inheritance of plugins along file-system hierarchy
-+
-+ Different kinds of inheritance: on creation vs. on access.
-+ Compatible/incompatible plugins.
-+ Inheritance for multi-linked files.
-+ Layered plugins.
-+ Notion of plugin context is abandoned.
-+
-+Each file is associated
-+ with one plugin and dependant plugins (hash, etc.) are stored as
-+ main plugin state. Now, if we have plugins used for regular files
-+ but not for directories, how such plugins would be inherited?
-+ . always store them with directories also
-+
-+NIKTIA-FIXME-HANS: Do the line above. It is not exclusive of doing the line below which is also useful.
-+
-+ . use inheritance hierarchy, independent of file-system namespace
-+
-+*/
-+
-+#include "../debug.h"
-+#include "../dformat.h"
-+#include "plugin_header.h"
-+#include "item/static_stat.h"
-+#include "node/node.h"
-+#include "security/perm.h"
-+#include "space/space_allocator.h"
-+#include "disk_format/disk_format.h"
-+#include "plugin.h"
-+#include "../reiser4.h"
-+#include "../jnode.h"
-+#include "../inode.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+
-+/* public interface */
-+
-+/* initialise plugin sub-system. Just call this once on reiser4 startup. */
-+int init_plugins(void);
-+int setup_plugins(struct super_block *super, reiser4_plugin ** area);
-+int locate_plugin(struct inode *inode, plugin_locator * loc);
-+
-+/**
-+ * init_plugins - initialize plugins
-+ *
-+ * Initializes plugin sub-system. It is part of reiser4 module
-+ * initialization. For each plugin of each type init method is called and each
-+ * plugin is put into list of plugins.
-+ */
-+int init_plugins(void)
-+{
-+ reiser4_plugin_type type_id;
-+
-+ for (type_id = 0; type_id < REISER4_PLUGIN_TYPES; ++type_id) {
-+ reiser4_plugin_type_data *ptype;
-+ int i;
-+
-+ ptype = &plugins[type_id];
-+ assert("nikita-3508", ptype->label != NULL);
-+ assert("nikita-3509", ptype->type_id == type_id);
-+
-+ INIT_LIST_HEAD(&ptype->plugins_list);
-+/* NIKITA-FIXME-HANS: change builtin_num to some other name lacking the term builtin. */
-+ for (i = 0; i < ptype->builtin_num; ++i) {
-+ reiser4_plugin *plugin;
-+
-+ plugin = plugin_at(ptype, i);
-+
-+ if (plugin->h.label == NULL)
-+ /* uninitialized slot encountered */
-+ continue;
-+ assert("nikita-3445", plugin->h.type_id == type_id);
-+ plugin->h.id = i;
-+ if (plugin->h.pops != NULL &&
-+ plugin->h.pops->init != NULL) {
-+ int result;
-+
-+ result = plugin->h.pops->init(plugin);
-+ if (result != 0)
-+ return result;
-+ }
-+ INIT_LIST_HEAD(&plugin->h.linkage);
-+ list_add_tail(&plugin->h.linkage, &ptype->plugins_list);
-+ }
-+ }
-+ return 0;
-+}
-+
-+/* true if plugin type id is valid */
-+int is_plugin_type_valid(reiser4_plugin_type type)
-+{
-+ /* "type" is unsigned, so no comparison with 0 is
-+ necessary */
-+ return (type < REISER4_PLUGIN_TYPES);
-+}
-+
-+/* true if plugin id is valid */
-+int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id)
-+{
-+ assert("nikita-1653", is_plugin_type_valid(type));
-+ return id < plugins[type].builtin_num;
-+}
-+
-+/* return plugin by its @type and @id.
-+
-+ Both arguments are checked for validness: this is supposed to be called
-+ from user-level.
-+
-+NIKITA-FIXME-HANS: Do you instead mean that this checks ids created in
-+user space, and passed to the filesystem by use of method files? Your
-+comment really confused me on the first reading....
-+
-+*/
-+reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type /* plugin type
-+ * unchecked */,
-+ reiser4_plugin_id id /* plugin id,
-+ * unchecked */)
-+{
-+ if (is_plugin_type_valid(type)) {
-+ if (is_plugin_id_valid(type, id))
-+ return plugin_at(&plugins[type], id);
-+ else
-+ /* id out of bounds */
-+ warning("nikita-2913",
-+ "Invalid plugin id: [%i:%i]", type, id);
-+ } else
-+ /* type_id out of bounds */
-+ warning("nikita-2914", "Invalid type_id: %i", type);
-+ return NULL;
-+}
-+
-+/**
-+ * save_plugin_id - store plugin id in disk format
-+ * @plugin: plugin to convert
-+ * @area: where to store result
-+ *
-+ * Puts id of @plugin in little endian format to address @area.
-+ */
-+int save_plugin_id(reiser4_plugin *plugin /* plugin to convert */ ,
-+ d16 *area /* where to store result */ )
-+{
-+ assert("nikita-1261", plugin != NULL);
-+ assert("nikita-1262", area != NULL);
-+
-+ put_unaligned(cpu_to_le16(plugin->h.id), area);
-+ return 0;
-+}
-+
-+/* list of all plugins of given type */
-+struct list_head *get_plugin_list(reiser4_plugin_type type)
-+{
-+ assert("nikita-1056", is_plugin_type_valid(type));
-+ return &plugins[type].plugins_list;
-+}
-+
-+static void update_pset_mask(reiser4_inode * info, pset_member memb)
-+{
-+ struct dentry *rootdir;
-+ reiser4_inode *root;
-+
-+ assert("edward-1443", memb != PSET_FILE);
-+
-+ rootdir = inode_by_reiser4_inode(info)->i_sb->s_root;
-+ if (rootdir != NULL) {
-+ root = reiser4_inode_data(rootdir->d_inode);
-+ /*
-+ * if inode is different from the default one, or we are
-+ * changing plugin of root directory, update plugin_mask
-+ */
-+ if (aset_get(info->pset, memb) !=
-+ aset_get(root->pset, memb) ||
-+ info == root)
-+ info->plugin_mask |= (1 << memb);
-+ else
-+ info->plugin_mask &= ~(1 << memb);
-+ }
-+}
-+
-+/* Get specified plugin set member from parent,
-+ or from fs-defaults (if no parent is given) and
-+ install the result to pset of @self */
-+int grab_plugin_pset(struct inode *self,
-+ struct inode *ancestor,
-+ pset_member memb)
-+{
-+ reiser4_plugin *plug;
-+ reiser4_inode *info;
-+ int result = 0;
-+
-+ /* Do not grab if initialised already. */
-+ info = reiser4_inode_data(self);
-+ if (aset_get(info->pset, memb) != NULL)
-+ return 0;
-+ if (ancestor) {
-+ reiser4_inode *parent;
-+
-+ parent = reiser4_inode_data(ancestor);
-+ plug = aset_get(parent->hset, memb) ? :
-+ aset_get(parent->pset, memb);
-+ }
-+ else
-+ plug = get_default_plugin(memb);
-+
-+ result = set_plugin(&info->pset, memb, plug);
-+ if (result == 0) {
-+ if (!ancestor || self->i_sb->s_root->d_inode != self)
-+ update_pset_mask(info, memb);
-+ }
-+ return result;
-+}
-+
-+/* Take missing pset members from root inode */
-+int finish_pset(struct inode *inode)
-+{
-+ reiser4_plugin *plug;
-+ reiser4_inode *root;
-+ reiser4_inode *info;
-+ pset_member memb;
-+ int result = 0;
-+
-+ root = reiser4_inode_data(inode->i_sb->s_root->d_inode);
-+ info = reiser4_inode_data(inode);
-+
-+ assert("edward-1455", root != NULL);
-+ assert("edward-1456", info != NULL);
-+
-+ /* file and directory plugins are already initialized. */
-+ for (memb = PSET_DIR + 1; memb < PSET_LAST; ++memb) {
-+
-+ /* Do not grab if initialised already. */
-+ if (aset_get(info->pset, memb) != NULL)
-+ continue;
-+
-+ plug = aset_get(root->pset, memb);
-+ result = set_plugin(&info->pset, memb, plug);
-+ if (result != 0)
-+ break;
-+ }
-+ if (result != 0) {
-+ warning("nikita-3447",
-+ "Cannot set up plugins for %lli",
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+ }
-+ return result;
-+}
-+
-+int force_plugin_pset(struct inode *self, pset_member memb, reiser4_plugin * plug)
-+{
-+ reiser4_inode *info;
-+ int result = 0;
-+
-+ if (!self->i_sb->s_root || self->i_sb->s_root->d_inode == self) {
-+ /* Changing pset in the root object. */
-+ return RETERR(-EINVAL);
-+ }
-+
-+ info = reiser4_inode_data(self);
-+ if (plug->h.pops != NULL && plug->h.pops->change != NULL)
-+ result = plug->h.pops->change(self, plug, memb);
-+ else
-+ result = aset_set_unsafe(&info->pset, memb, plug);
-+ if (result == 0) {
-+ __u16 oldmask = info->plugin_mask;
-+
-+ update_pset_mask(info, memb);
-+ if (oldmask != info->plugin_mask)
-+ reiser4_inode_clr_flag(self, REISER4_SDLEN_KNOWN);
-+ }
-+ return result;
-+}
-+
-+reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES] = {
-+ /* C90 initializers */
-+ [REISER4_FILE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .label = "file",
-+ .desc = "Object plugins",
-+ .builtin_num = sizeof_array(file_plugins),
-+ .builtin = file_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(file_plugin)
-+ },
-+ [REISER4_DIR_PLUGIN_TYPE] = {
-+ .type_id = REISER4_DIR_PLUGIN_TYPE,
-+ .label = "dir",
-+ .desc = "Directory plugins",
-+ .builtin_num = sizeof_array(dir_plugins),
-+ .builtin = dir_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(dir_plugin)
-+ },
-+ [REISER4_HASH_PLUGIN_TYPE] = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .label = "hash",
-+ .desc = "Directory hashes",
-+ .builtin_num = sizeof_array(hash_plugins),
-+ .builtin = hash_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(hash_plugin)
-+ },
-+ [REISER4_FIBRATION_PLUGIN_TYPE] = {
-+ .type_id =
-+ REISER4_FIBRATION_PLUGIN_TYPE,
-+ .label = "fibration",
-+ .desc = "Directory fibrations",
-+ .builtin_num = sizeof_array(fibration_plugins),
-+ .builtin = fibration_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(fibration_plugin)
-+ },
-+ [REISER4_CIPHER_PLUGIN_TYPE] = {
-+ .type_id = REISER4_CIPHER_PLUGIN_TYPE,
-+ .label = "cipher",
-+ .desc = "Cipher plugins",
-+ .builtin_num = sizeof_array(cipher_plugins),
-+ .builtin = cipher_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(cipher_plugin)
-+ },
-+ [REISER4_DIGEST_PLUGIN_TYPE] = {
-+ .type_id = REISER4_DIGEST_PLUGIN_TYPE,
-+ .label = "digest",
-+ .desc = "Digest plugins",
-+ .builtin_num = sizeof_array(digest_plugins),
-+ .builtin = digest_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(digest_plugin)
-+ },
-+ [REISER4_COMPRESSION_PLUGIN_TYPE] = {
-+ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .label = "compression",
-+ .desc = "Compression plugins",
-+ .builtin_num = sizeof_array(compression_plugins),
-+ .builtin = compression_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(compression_plugin)
-+ },
-+ [REISER4_FORMATTING_PLUGIN_TYPE] = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .label = "formatting",
-+ .desc = "Tail inlining policies",
-+ .builtin_num = sizeof_array(formatting_plugins),
-+ .builtin = formatting_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(formatting_plugin)
-+ },
-+ [REISER4_PERM_PLUGIN_TYPE] = {
-+ .type_id = REISER4_PERM_PLUGIN_TYPE,
-+ .label = "perm",
-+ .desc = "Permission checks",
-+ .builtin_num = sizeof_array(perm_plugins),
-+ .builtin = perm_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(perm_plugin)
-+ },
-+ [REISER4_ITEM_PLUGIN_TYPE] = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .label = "item",
-+ .desc = "Item handlers",
-+ .builtin_num = sizeof_array(item_plugins),
-+ .builtin = item_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(item_plugin)
-+ },
-+ [REISER4_NODE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_NODE_PLUGIN_TYPE,
-+ .label = "node",
-+ .desc = "node layout handlers",
-+ .builtin_num = sizeof_array(node_plugins),
-+ .builtin = node_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(node_plugin)
-+ },
-+ [REISER4_SD_EXT_PLUGIN_TYPE] = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .label = "sd_ext",
-+ .desc = "Parts of stat-data",
-+ .builtin_num = sizeof_array(sd_ext_plugins),
-+ .builtin = sd_ext_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(sd_ext_plugin)
-+ },
-+ [REISER4_FORMAT_PLUGIN_TYPE] = {
-+ .type_id = REISER4_FORMAT_PLUGIN_TYPE,
-+ .label = "disk_layout",
-+ .desc = "defines filesystem on disk layout",
-+ .builtin_num = sizeof_array(format_plugins),
-+ .builtin = format_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(disk_format_plugin)
-+ },
-+ [REISER4_JNODE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .label = "jnode",
-+ .desc = "defines kind of jnode",
-+ .builtin_num = sizeof_array(jnode_plugins),
-+ .builtin = jnode_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(jnode_plugin)
-+ },
-+ [REISER4_COMPRESSION_MODE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .label = "compression_mode",
-+ .desc = "Defines compression mode",
-+ .builtin_num = sizeof_array(compression_mode_plugins),
-+ .builtin = compression_mode_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(compression_mode_plugin)
-+ },
-+ [REISER4_CLUSTER_PLUGIN_TYPE] = {
-+ .type_id = REISER4_CLUSTER_PLUGIN_TYPE,
-+ .label = "cluster",
-+ .desc = "Defines cluster size",
-+ .builtin_num = sizeof_array(cluster_plugins),
-+ .builtin = cluster_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(cluster_plugin)
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/plugin.h linux-2.6.20/fs/reiser4/plugin/plugin.h
---- linux-2.6.20.orig/fs/reiser4/plugin/plugin.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/plugin.h 2007-05-06 14:50:43.855024468 +0400
-@@ -0,0 +1,920 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Basic plugin data-types.
-+ see fs/reiser4/plugin/plugin.c for details */
-+
-+#if !defined( __FS_REISER4_PLUGIN_TYPES_H__ )
-+#define __FS_REISER4_PLUGIN_TYPES_H__
-+
-+#include "../forward.h"
-+#include "../debug.h"
-+#include "../dformat.h"
-+#include "../key.h"
-+#include "compress/compress.h"
-+#include "crypto/cipher.h"
-+#include "plugin_header.h"
-+#include "item/static_stat.h"
-+#include "item/internal.h"
-+#include "item/sde.h"
-+#include "item/cde.h"
-+#include "item/item.h"
-+#include "node/node.h"
-+#include "node/node40.h"
-+#include "security/perm.h"
-+#include "fibration.h"
-+
-+#include "space/bitmap.h"
-+#include "space/space_allocator.h"
-+
-+#include "disk_format/disk_format40.h"
-+#include "disk_format/disk_format.h"
-+
-+#include <linux/fs.h> /* for struct super_block, address_space */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/buffer_head.h> /* for struct buffer_head */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/types.h>
-+#include <linux/crypto.h>
-+
-+typedef struct reiser4_object_on_wire reiser4_object_on_wire;
-+
-+/*
-+ * File plugin. Defines the set of methods that file plugins implement, some
-+ * of which are optional.
-+ *
-+ * A file plugin offers to the caller an interface for IO ( writing to and/or
-+ * reading from) to what the caller sees as one sequence of bytes. An IO to it
-+ * may affect more than one physical sequence of bytes, or no physical sequence
-+ * of bytes, it may affect sequences of bytes offered by other file plugins to
-+ * the semantic layer, and the file plugin may invoke other plugins and
-+ * delegate work to them, but its interface is structured for offering the
-+ * caller the ability to read and/or write what the caller sees as being a
-+ * single sequence of bytes.
-+ *
-+ * The file plugin must present a sequence of bytes to the caller, but it does
-+ * not necessarily have to store a sequence of bytes, it does not necessarily
-+ * have to support efficient tree traversal to any offset in the sequence of
-+ * bytes (tail and extent items, whose keys contain offsets, do however provide
-+ * efficient non-sequential lookup of any offset in the sequence of bytes).
-+ *
-+ * Directory plugins provide methods for selecting file plugins by resolving a
-+ * name for them.
-+ *
-+ * The functionality other filesystems call an attribute, and rigidly tie
-+ * together, we decompose into orthogonal selectable features of files. Using
-+ * the terminology we will define next, an attribute is a perhaps constrained,
-+ * perhaps static length, file whose parent has a uni-count-intra-link to it,
-+ * which might be grandparent-major-packed, and whose parent has a deletion
-+ * method that deletes it.
-+ *
-+ * File plugins can implement constraints.
-+ *
-+ * Files can be of variable length (e.g. regular unix files), or of static
-+ * length (e.g. static sized attributes).
-+ *
-+ * An object may have many sequences of bytes, and many file plugins, but, it
-+ * has exactly one objectid. It is usually desirable that an object has a
-+ * deletion method which deletes every item with that objectid. Items cannot
-+ * in general be found by just their objectids. This means that an object must
-+ * have either a method built into its deletion plugin method for knowing what
-+ * items need to be deleted, or links stored with the object that provide the
-+ * plugin with a method for finding those items. Deleting a file within an
-+ * object may or may not have the effect of deleting the entire object,
-+ * depending on the file plugin's deletion method.
-+ *
-+ * LINK TAXONOMY:
-+ *
-+ * Many objects have a reference count, and when the reference count reaches 0
-+ * the object's deletion method is invoked. Some links embody a reference
-+ * count increase ("countlinks"), and others do not ("nocountlinks").
-+ *
-+ * Some links are bi-directional links ("bilinks"), and some are
-+ * uni-directional("unilinks").
-+ *
-+ * Some links are between parts of the same object ("intralinks"), and some are
-+ * between different objects ("interlinks").
-+ *
-+ * PACKING TAXONOMY:
-+ *
-+ * Some items of an object are stored with a major packing locality based on
-+ * their object's objectid (e.g. unix directory items in plan A), and these are
-+ * called "self-major-packed".
-+ *
-+ * Some items of an object are stored with a major packing locality based on
-+ * their semantic parent object's objectid (e.g. unix file bodies in plan A),
-+ * and these are called "parent-major-packed".
-+ *
-+ * Some items of an object are stored with a major packing locality based on
-+ * their semantic grandparent, and these are called "grandparent-major-packed".
-+ * Now carefully notice that we run into trouble with key length if we have to
-+ * store a 8 byte major+minor grandparent based packing locality, an 8 byte
-+ * parent objectid, an 8 byte attribute objectid, and an 8 byte offset, all in
-+ * a 24 byte key. One of these fields must be sacrificed if an item is to be
-+ * grandparent-major-packed, and which to sacrifice is left to the item author
-+ * choosing to make the item grandparent-major-packed. You cannot make tail
-+ * items and extent items grandparent-major-packed, though you could make them
-+ * self-major-packed (usually they are parent-major-packed).
-+ *
-+ * In the case of ACLs (which are composed of fixed length ACEs which consist
-+ * of {subject-type, subject, and permission bitmask} triples), it makes sense
-+ * to not have an offset field in the ACE item key, and to allow duplicate keys
-+ * for ACEs. Thus, the set of ACES for a given file is found by looking for a
-+ * key consisting of the objectid of the grandparent (thus grouping all ACLs in
-+ * a directory together), the minor packing locality of ACE, the objectid of
-+ * the file, and 0.
-+ *
-+ * IO involves moving data from one location to another, which means that two
-+ * locations must be specified, source and destination.
-+ *
-+ * This source and destination can be in the filesystem, or they can be a
-+ * pointer in the user process address space plus a byte count.
-+ *
-+ * If both source and destination are in the filesystem, then at least one of
-+ * them must be representable as a pure stream of bytes (which we call a flow,
-+ * and define as a struct containing a key, a data pointer, and a length).
-+ * This may mean converting one of them into a flow. We provide a generic
-+ * cast_into_flow() method, which will work for any plugin supporting
-+ * read_flow(), though it is inefficiently implemented in that it temporarily
-+ * stores the flow in a buffer (Question: what to do with huge flows that
-+ * cannot fit into memory? Answer: we must not convert them all at once. )
-+ *
-+ * Performing a write requires resolving the write request into a flow defining
-+ * the source, and a method that performs the write, and a key that defines
-+ * where in the tree the write is to go.
-+ *
-+ * Performing a read requires resolving the read request into a flow defining
-+ * the target, and a method that performs the read, and a key that defines
-+ * where in the tree the read is to come from.
-+ *
-+ * There will exist file plugins which have no pluginid stored on the disk for
-+ * them, and which are only invoked by other plugins.
-+ */
-+
-+/* This should be incremented with each new contributed
-+ pair (plugin type, plugin id).
-+ NOTE: Make sure there is a release of reiser4progs
-+ with the corresponding version number */
-+#define PLUGIN_LIBRARY_VERSION 0
-+
-+ /* enumeration of fields within plugin_set */
-+typedef enum {
-+ PSET_FILE,
-+ PSET_DIR, /* PSET_FILE and PSET_DIR should be first elements:
-+ * inode.c:read_inode() depends on this. */
-+ PSET_PERM,
-+ PSET_FORMATTING,
-+ PSET_HASH,
-+ PSET_FIBRATION,
-+ PSET_SD,
-+ PSET_DIR_ITEM,
-+ PSET_CIPHER,
-+ PSET_DIGEST,
-+ PSET_COMPRESSION,
-+ PSET_COMPRESSION_MODE,
-+ PSET_CLUSTER,
-+ PSET_CREATE,
-+ PSET_LAST
-+} pset_member;
-+
-+/* builtin file-plugins */
-+typedef enum {
-+ /* regular file */
-+ UNIX_FILE_PLUGIN_ID,
-+ /* directory */
-+ DIRECTORY_FILE_PLUGIN_ID,
-+ /* symlink */
-+ SYMLINK_FILE_PLUGIN_ID,
-+ /* for objects completely handled by the VFS: fifos, devices,
-+ sockets */
-+ SPECIAL_FILE_PLUGIN_ID,
-+ /* regular cryptcompress file */
-+ CRYPTCOMPRESS_FILE_PLUGIN_ID,
-+ /* number of file plugins. Used as size of arrays to hold
-+ file plugins. */
-+ LAST_FILE_PLUGIN_ID
-+} reiser4_file_id;
-+
-+typedef struct file_plugin {
-+
-+ /* generic fields */
-+ plugin_header h;
-+
-+ struct inode_operations inode_ops;
-+ struct file_operations file_ops;
-+ struct address_space_operations as_ops;
-+
-+ /* save inode cached stat-data onto disk. It was called
-+ reiserfs_update_sd() in 3.x */
-+ int (*write_sd_by_inode) (struct inode *);
-+
-+ /*
-+ * private methods: These are optional. If used they will allow you to
-+ * minimize the amount of code needed to implement a deviation from
-+ * some other method that also uses them.
-+ */
-+
-+ /*
-+ * Construct flow into @flow according to user-supplied data.
-+ *
-+ * This is used by read/write methods to construct a flow to
-+ * write/read. ->flow_by_inode() is plugin method, rather than single
-+ * global implementation, because key in a flow used by plugin may
-+ * depend on data in a @buf.
-+ *
-+ * NIKITA-FIXME-HANS: please create statistics on what functions are
-+ * dereferenced how often for the mongo benchmark. You can supervise
-+ * Elena doing this for you if that helps. Email me the list of the
-+ * top 10, with their counts, and an estimate of the total number of
-+ * CPU cycles spent dereferencing as a percentage of CPU cycles spent
-+ * processing (non-idle processing). If the total percent is, say,
-+ * less than 1%, it will make our coding discussions much easier, and
-+ * keep me from questioning whether functions like the below are too
-+ * frequently called to be dereferenced. If the total percent is more
-+ * than 1%, perhaps private methods should be listed in a "required"
-+ * comment at the top of each plugin (with stern language about how if
-+ * the comment is missing it will not be accepted by the maintainer),
-+ * and implemented using macros not dereferenced functions. How about
-+ * replacing this whole private methods part of the struct with a
-+ * thorough documentation of what the standard helper functions are for
-+ * use in constructing plugins? I think users have been asking for
-+ * that, though not in so many words.
-+ */
-+ int (*flow_by_inode) (struct inode *, const char __user *buf,
-+ int user, loff_t size,
-+ loff_t off, rw_op op, flow_t *);
-+
-+ /*
-+ * Return the key used to retrieve an offset of a file. It is used by
-+ * default implementation of ->flow_by_inode() method
-+ * (common_build_flow()) and, among other things, to get to the extent
-+ * from jnode of unformatted node.
-+ */
-+ int (*key_by_inode) (struct inode *, loff_t off, reiser4_key *);
-+
-+ /* NIKITA-FIXME-HANS: this comment is not as clear to others as you think.... */
-+ /*
-+ * set the plugin for a file. Called during file creation in creat()
-+ * but not reiser4() unless an inode already exists for the file.
-+ */
-+ int (*set_plug_in_inode) (struct inode *inode, struct inode *parent,
-+ reiser4_object_create_data *);
-+
-+ /* NIKITA-FIXME-HANS: comment and name seem to say different things,
-+ * are you setting up the object itself also or just adjusting the
-+ * parent?.... */
-+ /* set up plugins for new @object created in @parent. @root is root
-+ directory. */
-+ int (*adjust_to_parent) (struct inode *object, struct inode *parent,
-+ struct inode *root);
-+ /*
-+ * this does whatever is necessary to do when object is created. For
-+ * instance, for unix files stat data is inserted. It is supposed to be
-+ * called by create of struct inode_operations.
-+ */
-+ int (*create_object) (struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+
-+ /* this does whatever is necessary to do when object is opened */
-+ int (*open_object) (struct inode * inode, struct file * file);
-+ /*
-+ * this method should check REISER4_NO_SD and set REISER4_NO_SD on
-+ * success. Deletion of an object usually includes removal of items
-+ * building file body (for directories this is removal of "." and "..")
-+ * and removal of stat-data item.
-+ */
-+ int (*delete_object) (struct inode *);
-+
-+ /* add link from @parent to @object */
-+ int (*add_link) (struct inode *object, struct inode *parent);
-+
-+ /* remove link from @parent to @object */
-+ int (*rem_link) (struct inode *object, struct inode *parent);
-+
-+ /*
-+ * return true if item addressed by @coord belongs to @inode. This is
-+ * used by read/write to properly slice flow into items in presence of
-+ * multiple key assignment policies, because items of a file are not
-+ * necessarily contiguous in a key space, for example, in a plan-b.
-+ */
-+ int (*owns_item) (const struct inode *, const coord_t *);
-+
-+ /* checks whether yet another hard links to this object can be
-+ added */
-+ int (*can_add_link) (const struct inode *);
-+
-+ /* checks whether hard links to this object can be removed */
-+ int (*can_rem_link) (const struct inode *);
-+
-+ /* not empty for DIRECTORY_FILE_PLUGIN_ID only currently. It calls
-+ detach of directory plugin to remove ".." */
-+ int (*detach) (struct inode * child, struct inode * parent);
-+
-+ /* called when @child was just looked up in the @parent. It is not
-+ empty for DIRECTORY_FILE_PLUGIN_ID only where it calls attach of
-+ directory plugin */
-+ int (*bind) (struct inode * child, struct inode * parent);
-+
-+ /* process safe-link during mount */
-+ int (*safelink) (struct inode * object, reiser4_safe_link_t link,
-+ __u64 value);
-+
-+ /* The couple of estimate methods for all file operations */
-+ struct {
-+ reiser4_block_nr(*create) (const struct inode *);
-+ reiser4_block_nr(*update) (const struct inode *);
-+ reiser4_block_nr(*unlink) (const struct inode *,
-+ const struct inode *);
-+ } estimate;
-+
-+ /*
-+ * reiser4 specific part of inode has a union of structures which are
-+ * specific to a plugin. This method is called when inode is read
-+ * (read_inode) and when file is created (common_create_child) so that
-+ * file plugin could initialize its inode data
-+ */
-+ void (*init_inode_data) (struct inode *, reiser4_object_create_data *,
-+ int);
-+
-+ /*
-+ * This method performs progressive deletion of items and whole nodes
-+ * from right to left.
-+ *
-+ * @tap: the point deletion process begins from,
-+ * @from_key: the beginning of the deleted key range,
-+ * @to_key: the end of the deleted key range,
-+ * @smallest_removed: the smallest removed key,
-+ *
-+ * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree
-+ * operation was interrupted for allowing atom commit .
-+ */
-+ int (*cut_tree_worker) (tap_t *, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed, struct inode *,
-+ int, int *);
-+
-+ /* called from ->destroy_inode() */
-+ void (*destroy_inode) (struct inode *);
-+
-+ /*
-+ * methods to serialize object identify. This is used, for example, by
-+ * reiser4_{en,de}code_fh().
-+ */
-+ struct {
-+ /* store object's identity at @area */
-+ char *(*write) (struct inode * inode, char *area);
-+ /* parse object from wire to the @obj */
-+ char *(*read) (char *area, reiser4_object_on_wire * obj);
-+ /* given object identity in @obj, find or create its dentry */
-+ struct dentry *(*get) (struct super_block * s,
-+ reiser4_object_on_wire * obj);
-+ /* how many bytes ->wire.write() consumes */
-+ int (*size) (struct inode * inode);
-+ /* finish with object identify */
-+ void (*done) (reiser4_object_on_wire * obj);
-+ } wire;
-+} file_plugin;
-+
-+extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
-+
-+struct reiser4_object_on_wire {
-+ file_plugin *plugin;
-+ union {
-+ struct {
-+ obj_key_id key_id;
-+ } std;
-+ void *generic;
-+ } u;
-+};
-+
-+/* builtin dir-plugins */
-+typedef enum {
-+ HASHED_DIR_PLUGIN_ID,
-+ SEEKABLE_HASHED_DIR_PLUGIN_ID,
-+ LAST_DIR_ID
-+} reiser4_dir_id;
-+
-+typedef struct dir_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+
-+ struct inode_operations inode_ops;
-+ struct file_operations file_ops;
-+ struct address_space_operations as_ops;
-+
-+ /*
-+ * private methods: These are optional. If used they will allow you to
-+ * minimize the amount of code needed to implement a deviation from
-+ * some other method that uses them. You could logically argue that
-+ * they should be a separate type of plugin.
-+ */
-+
-+ struct dentry *(*get_parent) (struct inode * childdir);
-+
-+ /*
-+ * check whether "name" is acceptable name to be inserted into this
-+ * object. Optionally implemented by directory-like objects. Can check
-+ * for maximal length, reserved symbols etc
-+ */
-+ int (*is_name_acceptable) (const struct inode * inode, const char *name,
-+ int len);
-+
-+ void (*build_entry_key) (const struct inode * dir /* directory where
-+ * entry is (or will
-+ * be) in.*/ ,
-+ const struct qstr * name /* name of file
-+ * referenced by this
-+ * entry */ ,
-+ reiser4_key * result /* resulting key of
-+ * directory entry */ );
-+ int (*build_readdir_key) (struct file * dir, reiser4_key * result);
-+ int (*add_entry) (struct inode * object, struct dentry * where,
-+ reiser4_object_create_data * data,
-+ reiser4_dir_entry_desc * entry);
-+ int (*rem_entry) (struct inode * object, struct dentry * where,
-+ reiser4_dir_entry_desc * entry);
-+
-+ /*
-+ * initialize directory structure for newly created object. For normal
-+ * unix directories, insert dot and dotdot.
-+ */
-+ int (*init) (struct inode * object, struct inode * parent,
-+ reiser4_object_create_data * data);
-+
-+ /* destroy directory */
-+ int (*done) (struct inode * child);
-+
-+ /* called when @subdir was just looked up in the @dir */
-+ int (*attach) (struct inode * subdir, struct inode * dir);
-+ int (*detach) (struct inode * subdir, struct inode * dir);
-+
-+ struct {
-+ reiser4_block_nr(*add_entry) (const struct inode *);
-+ reiser4_block_nr(*rem_entry) (const struct inode *);
-+ reiser4_block_nr(*unlink) (const struct inode *,
-+ const struct inode *);
-+ } estimate;
-+} dir_plugin;
-+
-+extern dir_plugin dir_plugins[LAST_DIR_ID];
-+
-+typedef struct formatting_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* returns non-zero iff file's tail has to be stored
-+ in a direct item. */
-+ int (*have_tail) (const struct inode * inode, loff_t size);
-+} formatting_plugin;
-+
-+typedef struct hash_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* computes hash of the given name */
-+ __u64(*hash) (const unsigned char *name, int len);
-+} hash_plugin;
-+
-+typedef struct cipher_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ struct crypto_blkcipher * (*alloc) (void);
-+ void (*free) (struct crypto_blkcipher * tfm);
-+ /* Offset translator. For each offset this returns (k * offset), where
-+ k (k >= 1) is an expansion factor of the cipher algorithm.
-+ For all symmetric algorithms k == 1. For asymmetric algorithms (which
-+ inflate data) offset translation guarantees that all disk cluster's
-+ units will have keys smaller then next cluster's one.
-+ */
-+ loff_t(*scale) (struct inode * inode, size_t blocksize, loff_t src);
-+ /* Cipher algorithms can accept data only by chunks of cipher block
-+ size. This method is to align any flow up to cipher block size when
-+ we pass it to cipher algorithm. To align means to append padding of
-+ special format specific to the cipher algorithm */
-+ int (*align_stream) (__u8 * tail, int clust_size, int blocksize);
-+ /* low-level key manager (check, install, etc..) */
-+ int (*setkey) (struct crypto_tfm * tfm, const __u8 * key,
-+ unsigned int keylen);
-+ /* main text processing procedures */
-+ void (*encrypt) (__u32 * expkey, __u8 * dst, const __u8 * src);
-+ void (*decrypt) (__u32 * expkey, __u8 * dst, const __u8 * src);
-+} cipher_plugin;
-+
-+typedef struct digest_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* fingerprint size in bytes */
-+ int fipsize;
-+ struct crypto_hash * (*alloc) (void);
-+ void (*free) (struct crypto_hash * tfm);
-+} digest_plugin;
-+
-+typedef struct compression_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*init) (void);
-+ /* the maximum number of bytes the size of the "compressed" data can
-+ * exceed the uncompressed data. */
-+ int (*overrun) (unsigned src_len);
-+ coa_t(*alloc) (tfm_action act);
-+ void (*free) (coa_t coa, tfm_action act);
-+ /* minimal size of the flow we still try to compress */
-+ int (*min_size_deflate) (void);
-+ __u32(*checksum) (char *data, __u32 length);
-+ /* main transform procedures */
-+ void (*compress) (coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len);
-+ void (*decompress) (coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len);
-+} compression_plugin;
-+
-+typedef struct compression_mode_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* this is called when estimating compressibility
-+ of a logical cluster by its content */
-+ int (*should_deflate) (struct inode * inode, cloff_t index);
-+ /* this is called when results of compression should be saved */
-+ int (*accept_hook) (struct inode * inode, cloff_t index);
-+ /* this is called when results of compression should be discarded */
-+ int (*discard_hook) (struct inode * inode, cloff_t index);
-+} compression_mode_plugin;
-+
-+typedef struct cluster_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int shift;
-+} cluster_plugin;
-+
-+typedef struct sd_ext_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*present) (struct inode * inode, char **area, int *len);
-+ int (*absent) (struct inode * inode);
-+ int (*save_len) (struct inode * inode);
-+ int (*save) (struct inode * inode, char **area);
-+ /* alignment requirement for this stat-data part */
-+ int alignment;
-+} sd_ext_plugin;
-+
-+/* this plugin contains methods to allocate objectid for newly created files,
-+ to deallocate objectid when file gets removed, to report number of used and
-+ free objectids */
-+typedef struct oid_allocator_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*init_oid_allocator) (reiser4_oid_allocator * map, __u64 nr_files,
-+ __u64 oids);
-+ /* used to report statfs->f_files */
-+ __u64(*oids_used) (reiser4_oid_allocator * map);
-+ /* get next oid to use */
-+ __u64(*next_oid) (reiser4_oid_allocator * map);
-+ /* used to report statfs->f_ffree */
-+ __u64(*oids_free) (reiser4_oid_allocator * map);
-+ /* allocate new objectid */
-+ int (*allocate_oid) (reiser4_oid_allocator * map, oid_t *);
-+ /* release objectid */
-+ int (*release_oid) (reiser4_oid_allocator * map, oid_t);
-+ /* how many pages to reserve in transaction for allocation of new
-+ objectid */
-+ int (*oid_reserve_allocate) (reiser4_oid_allocator * map);
-+ /* how many pages to reserve in transaction for freeing of an
-+ objectid */
-+ int (*oid_reserve_release) (reiser4_oid_allocator * map);
-+ void (*print_info) (const char *, reiser4_oid_allocator *);
-+} oid_allocator_plugin;
-+
-+/* disk layout plugin: this specifies super block, journal, bitmap (if there
-+ are any) locations, etc */
-+typedef struct disk_format_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* replay journal, initialize super_info_data, etc */
-+ int (*init_format) (struct super_block *, void *data);
-+
-+ /* key of root directory stat data */
-+ const reiser4_key *(*root_dir_key) (const struct super_block *);
-+
-+ int (*release) (struct super_block *);
-+ jnode *(*log_super) (struct super_block *);
-+ int (*check_open) (const struct inode * object);
-+ int (*version_update) (struct super_block *);
-+} disk_format_plugin;
-+
-+struct jnode_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*init) (jnode * node);
-+ int (*parse) (jnode * node);
-+ struct address_space *(*mapping) (const jnode * node);
-+ unsigned long (*index) (const jnode * node);
-+ jnode *(*clone) (jnode * node);
-+};
-+
-+/* plugin instance. */
-+/* */
-+/* This is "wrapper" union for all types of plugins. Most of the code uses */
-+/* plugins of particular type (file_plugin, dir_plugin, etc.) rather than */
-+/* operates with pointers to reiser4_plugin. This union is only used in */
-+/* some generic code in plugin/plugin.c that operates on all */
-+/* plugins. Technically speaking purpose of this union is to add type */
-+/* safety to said generic code: each plugin type (file_plugin, for */
-+/* example), contains plugin_header as its first memeber. This first member */
-+/* is located at the same place in memory as .h member of */
-+/* reiser4_plugin. Generic code, obtains pointer to reiser4_plugin and */
-+/* looks in the .h which is header of plugin type located in union. This */
-+/* allows to avoid type-casts. */
-+union reiser4_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* file plugin */
-+ file_plugin file;
-+ /* directory plugin */
-+ dir_plugin dir;
-+ /* hash plugin, used by directory plugin */
-+ hash_plugin hash;
-+ /* fibration plugin used by directory plugin */
-+ fibration_plugin fibration;
-+ /* cipher transform plugin, used by file plugin */
-+ cipher_plugin cipher;
-+ /* digest transform plugin, used by file plugin */
-+ digest_plugin digest;
-+ /* compression transform plugin, used by file plugin */
-+ compression_plugin compression;
-+ /* tail plugin, used by file plugin */
-+ formatting_plugin formatting;
-+ /* permission plugin */
-+ perm_plugin perm;
-+ /* node plugin */
-+ node_plugin node;
-+ /* item plugin */
-+ item_plugin item;
-+ /* stat-data extension plugin */
-+ sd_ext_plugin sd_ext;
-+ /* disk layout plugin */
-+ disk_format_plugin format;
-+ /* object id allocator plugin */
-+ oid_allocator_plugin oid_allocator;
-+ /* plugin for different jnode types */
-+ jnode_plugin jnode;
-+ /* compression mode plugin, used by object plugin */
-+ compression_mode_plugin compression_mode;
-+ /* cluster plugin, used by object plugin */
-+ cluster_plugin clust;
-+ /* place-holder for new plugin types that can be registered
-+ dynamically, and used by other dynamically loaded plugins. */
-+ void *generic;
-+};
-+
-+struct reiser4_plugin_ops {
-+ /* called when plugin is initialized */
-+ int (*init) (reiser4_plugin * plugin);
-+ /* called when plugin is unloaded */
-+ int (*done) (reiser4_plugin * plugin);
-+ /* load given plugin from disk */
-+ int (*load) (struct inode * inode,
-+ reiser4_plugin * plugin, char **area, int *len);
-+ /* how many space is required to store this plugin's state
-+ in stat-data */
-+ int (*save_len) (struct inode * inode, reiser4_plugin * plugin);
-+ /* save persistent plugin-data to disk */
-+ int (*save) (struct inode * inode, reiser4_plugin * plugin,
-+ char **area);
-+ /* alignment requirement for on-disk state of this plugin
-+ in number of bytes */
-+ int alignment;
-+ /* install itself into given inode. This can return error
-+ (e.g., you cannot change hash of non-empty directory). */
-+ int (*change) (struct inode * inode, reiser4_plugin * plugin,
-+ pset_member memb);
-+ /* install itself into given inode. This can return error
-+ (e.g., you cannot change hash of non-empty directory). */
-+ int (*inherit) (struct inode * inode, struct inode * parent,
-+ reiser4_plugin * plugin);
-+};
-+
-+/* functions implemented in fs/reiser4/plugin/plugin.c */
-+
-+/* stores plugin reference in reiser4-specific part of inode */
-+extern int set_object_plugin(struct inode *inode, reiser4_plugin_id id);
-+extern int setup_plugins(struct super_block *super, reiser4_plugin ** area);
-+extern int init_plugins(void);
-+
-+/* builtin plugins */
-+
-+/* builtin hash-plugins */
-+
-+typedef enum {
-+ RUPASOV_HASH_ID,
-+ R5_HASH_ID,
-+ TEA_HASH_ID,
-+ FNV1_HASH_ID,
-+ DEGENERATE_HASH_ID,
-+ LAST_HASH_ID
-+} reiser4_hash_id;
-+
-+/* builtin cipher plugins */
-+
-+typedef enum {
-+ NONE_CIPHER_ID,
-+ LAST_CIPHER_ID
-+} reiser4_cipher_id;
-+
-+/* builtin digest plugins */
-+
-+typedef enum {
-+ SHA256_32_DIGEST_ID,
-+ LAST_DIGEST_ID
-+} reiser4_digest_id;
-+
-+/* builtin compression mode plugins */
-+typedef enum {
-+ NONE_COMPRESSION_MODE_ID,
-+ LATTD_COMPRESSION_MODE_ID,
-+ ULTIM_COMPRESSION_MODE_ID,
-+ FORCE_COMPRESSION_MODE_ID,
-+ CONVX_COMPRESSION_MODE_ID,
-+ LAST_COMPRESSION_MODE_ID
-+} reiser4_compression_mode_id;
-+
-+/* builtin cluster plugins */
-+typedef enum {
-+ CLUSTER_64K_ID,
-+ CLUSTER_32K_ID,
-+ CLUSTER_16K_ID,
-+ CLUSTER_8K_ID,
-+ CLUSTER_4K_ID,
-+ LAST_CLUSTER_ID
-+} reiser4_cluster_id;
-+
-+/* builtin tail-plugins */
-+
-+typedef enum {
-+ NEVER_TAILS_FORMATTING_ID,
-+ ALWAYS_TAILS_FORMATTING_ID,
-+ SMALL_FILE_FORMATTING_ID,
-+ LAST_TAIL_FORMATTING_ID
-+} reiser4_formatting_id;
-+
-+/* compression/clustering specific data */
-+typedef struct compression_data {
-+ reiser4_compression_id coa; /* id of the compression algorithm */
-+} compression_data_t;
-+
-+typedef __u8 cluster_data_t; /* cluster info */
-+
-+/* data type used to pack parameters that we pass to vfs object creation
-+ function create_object() */
-+struct reiser4_object_create_data {
-+ /* plugin to control created object */
-+ reiser4_file_id id;
-+ /* mode of regular file, directory or special file */
-+/* what happens if some other sort of perm plugin is in use? */
-+ int mode;
-+ /* rdev of special file */
-+ dev_t rdev;
-+ /* symlink target */
-+ const char *name;
-+ /* add here something for non-standard objects you invent, like
-+ query for interpolation file etc. */
-+
-+ crypto_stat_t * crypto;
-+ compression_data_t *compression;
-+ cluster_data_t *cluster;
-+
-+ struct inode *parent;
-+ struct dentry *dentry;
-+};
-+
-+/* description of directory entry being created/destroyed/sought for
-+
-+ It is passed down to the directory plugin and farther to the
-+ directory item plugin methods. Creation of new directory is done in
-+ several stages: first we search for an entry with the same name, then
-+ create new one. reiser4_dir_entry_desc is used to store some information
-+ collected at some stage of this process and required later: key of
-+ item that we want to insert/delete and pointer to an object that will
-+ be bound by the new directory entry. Probably some more fields will
-+ be added there.
-+
-+*/
-+struct reiser4_dir_entry_desc {
-+ /* key of directory entry */
-+ reiser4_key key;
-+ /* object bound by this entry. */
-+ struct inode *obj;
-+};
-+
-+#define MAX_PLUGIN_TYPE_LABEL_LEN 32
-+#define MAX_PLUGIN_PLUG_LABEL_LEN 32
-+
-+/* used for interface with user-land: table-driven parsing in
-+ reiser4(). */
-+typedef struct plugin_locator {
-+ reiser4_plugin_type type_id;
-+ reiser4_plugin_id id;
-+ char type_label[MAX_PLUGIN_TYPE_LABEL_LEN];
-+ char plug_label[MAX_PLUGIN_PLUG_LABEL_LEN];
-+} plugin_locator;
-+
-+extern int locate_plugin(struct inode *inode, plugin_locator * loc);
-+
-+#define PLUGIN_BY_ID(TYPE,ID,FIELD) \
-+static inline TYPE *TYPE ## _by_id( reiser4_plugin_id id ) \
-+{ \
-+ reiser4_plugin *plugin = plugin_by_id ( ID, id ); \
-+ return plugin ? & plugin -> FIELD : NULL; \
-+} \
-+static inline TYPE *TYPE ## _by_disk_id( reiser4_tree *tree, d16 *id ) \
-+{ \
-+ reiser4_plugin *plugin = plugin_by_disk_id ( tree, ID, id ); \
-+ return plugin ? & plugin -> FIELD : NULL; \
-+} \
-+static inline TYPE *TYPE ## _by_unsafe_id( reiser4_plugin_id id ) \
-+{ \
-+ reiser4_plugin *plugin = plugin_by_unsafe_id ( ID, id ); \
-+ return plugin ? & plugin -> FIELD : NULL; \
-+} \
-+static inline reiser4_plugin* TYPE ## _to_plugin( TYPE* plugin ) \
-+{ \
-+ return ( reiser4_plugin * ) plugin; \
-+} \
-+static inline reiser4_plugin_id TYPE ## _id( TYPE* plugin ) \
-+{ \
-+ return TYPE ## _to_plugin (plugin) -> h.id; \
-+} \
-+typedef struct { int foo; } TYPE ## _plugin_dummy
-+
-+PLUGIN_BY_ID(item_plugin, REISER4_ITEM_PLUGIN_TYPE, item);
-+PLUGIN_BY_ID(file_plugin, REISER4_FILE_PLUGIN_TYPE, file);
-+PLUGIN_BY_ID(dir_plugin, REISER4_DIR_PLUGIN_TYPE, dir);
-+PLUGIN_BY_ID(node_plugin, REISER4_NODE_PLUGIN_TYPE, node);
-+PLUGIN_BY_ID(sd_ext_plugin, REISER4_SD_EXT_PLUGIN_TYPE, sd_ext);
-+PLUGIN_BY_ID(perm_plugin, REISER4_PERM_PLUGIN_TYPE, perm);
-+PLUGIN_BY_ID(hash_plugin, REISER4_HASH_PLUGIN_TYPE, hash);
-+PLUGIN_BY_ID(fibration_plugin, REISER4_FIBRATION_PLUGIN_TYPE, fibration);
-+PLUGIN_BY_ID(cipher_plugin, REISER4_CIPHER_PLUGIN_TYPE, cipher);
-+PLUGIN_BY_ID(digest_plugin, REISER4_DIGEST_PLUGIN_TYPE, digest);
-+PLUGIN_BY_ID(compression_plugin, REISER4_COMPRESSION_PLUGIN_TYPE, compression);
-+PLUGIN_BY_ID(formatting_plugin, REISER4_FORMATTING_PLUGIN_TYPE, formatting);
-+PLUGIN_BY_ID(disk_format_plugin, REISER4_FORMAT_PLUGIN_TYPE, format);
-+PLUGIN_BY_ID(jnode_plugin, REISER4_JNODE_PLUGIN_TYPE, jnode);
-+PLUGIN_BY_ID(compression_mode_plugin, REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ compression_mode);
-+PLUGIN_BY_ID(cluster_plugin, REISER4_CLUSTER_PLUGIN_TYPE, clust);
-+
-+extern int save_plugin_id(reiser4_plugin * plugin, d16 * area);
-+
-+extern struct list_head *get_plugin_list(reiser4_plugin_type type_id);
-+
-+#define for_all_plugins(ptype, plugin) \
-+for (plugin = list_entry(get_plugin_list(ptype)->next, reiser4_plugin, h.linkage); \
-+ get_plugin_list(ptype) != &plugin->h.linkage; \
-+ plugin = list_entry(plugin->h.linkage.next, reiser4_plugin, h.linkage))
-+
-+
-+extern int grab_plugin_pset(struct inode *self, struct inode *ancestor, pset_member memb);
-+extern int force_plugin_pset(struct inode *self, pset_member memb, reiser4_plugin *plug);
-+extern int finish_pset(struct inode *inode);
-+
-+/* defined in fs/reiser4/plugin/object.c */
-+extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
-+/* defined in fs/reiser4/plugin/object.c */
-+extern dir_plugin dir_plugins[LAST_DIR_ID];
-+/* defined in fs/reiser4/plugin/item/static_stat.c */
-+extern sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION];
-+/* defined in fs/reiser4/plugin/hash.c */
-+extern hash_plugin hash_plugins[LAST_HASH_ID];
-+/* defined in fs/reiser4/plugin/fibration.c */
-+extern fibration_plugin fibration_plugins[LAST_FIBRATION_ID];
-+/* defined in fs/reiser4/plugin/crypt.c */
-+extern cipher_plugin cipher_plugins[LAST_CIPHER_ID];
-+/* defined in fs/reiser4/plugin/digest.c */
-+extern digest_plugin digest_plugins[LAST_DIGEST_ID];
-+/* defined in fs/reiser4/plugin/compress/compress.c */
-+extern compression_plugin compression_plugins[LAST_COMPRESSION_ID];
-+/* defined in fs/reiser4/plugin/compress/compression_mode.c */
-+extern compression_mode_plugin
-+compression_mode_plugins[LAST_COMPRESSION_MODE_ID];
-+/* defined in fs/reiser4/plugin/cluster.c */
-+extern cluster_plugin cluster_plugins[LAST_CLUSTER_ID];
-+/* defined in fs/reiser4/plugin/tail.c */
-+extern formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID];
-+/* defined in fs/reiser4/plugin/security/security.c */
-+extern perm_plugin perm_plugins[LAST_PERM_ID];
-+/* defined in fs/reiser4/plugin/item/item.c */
-+extern item_plugin item_plugins[LAST_ITEM_ID];
-+/* defined in fs/reiser4/plugin/node/node.c */
-+extern node_plugin node_plugins[LAST_NODE_ID];
-+/* defined in fs/reiser4/plugin/disk_format/disk_format.c */
-+extern disk_format_plugin format_plugins[LAST_FORMAT_ID];
-+
-+/* __FS_REISER4_PLUGIN_TYPES_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/plugin_header.h linux-2.6.20/fs/reiser4/plugin/plugin_header.h
---- linux-2.6.20.orig/fs/reiser4/plugin/plugin_header.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/plugin_header.h 2007-05-06 14:50:43.855024468 +0400
-@@ -0,0 +1,144 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* plugin header. Data structures required by all plugin types. */
-+
-+#if !defined( __PLUGIN_HEADER_H__ )
-+#define __PLUGIN_HEADER_H__
-+
-+/* plugin data-types and constants */
-+
-+#include "../debug.h"
-+#include "../dformat.h"
-+
-+typedef enum {
-+ REISER4_FILE_PLUGIN_TYPE,
-+ REISER4_DIR_PLUGIN_TYPE,
-+ REISER4_ITEM_PLUGIN_TYPE,
-+ REISER4_NODE_PLUGIN_TYPE,
-+ REISER4_HASH_PLUGIN_TYPE,
-+ REISER4_FIBRATION_PLUGIN_TYPE,
-+ REISER4_FORMATTING_PLUGIN_TYPE,
-+ REISER4_PERM_PLUGIN_TYPE,
-+ REISER4_SD_EXT_PLUGIN_TYPE,
-+ REISER4_FORMAT_PLUGIN_TYPE,
-+ REISER4_JNODE_PLUGIN_TYPE,
-+ REISER4_CIPHER_PLUGIN_TYPE,
-+ REISER4_DIGEST_PLUGIN_TYPE,
-+ REISER4_COMPRESSION_PLUGIN_TYPE,
-+ REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ REISER4_CLUSTER_PLUGIN_TYPE,
-+ REISER4_PLUGIN_TYPES
-+} reiser4_plugin_type;
-+
-+typedef enum {
-+ REISER4_DIRECTORY_FILE,
-+ REISER4_REGULAR_FILE,
-+ REISER4_SYMLINK_FILE,
-+ REISER4_SPECIAL_FILE,
-+} reiser4_plugin_group;
-+
-+struct reiser4_plugin_ops;
-+/* generic plugin operations, supported by each
-+ plugin type. */
-+typedef struct reiser4_plugin_ops reiser4_plugin_ops;
-+
-+/* the common part of all plugin instances. */
-+typedef struct plugin_header {
-+ /* plugin type */
-+ reiser4_plugin_type type_id;
-+ /* id of this plugin */
-+ reiser4_plugin_id id;
-+ /* bitmask of groups the plugin belongs to. */
-+ reiser4_plugin_groups groups;
-+ /* plugin operations */
-+ reiser4_plugin_ops *pops;
-+/* NIKITA-FIXME-HANS: usage of and access to label and desc is not commented and defined. */
-+ /* short label of this plugin */
-+ const char *label;
-+ /* descriptive string.. */
-+ const char *desc;
-+ /* list linkage */
-+ struct list_head linkage;
-+} plugin_header;
-+
-+#define plugin_of_group(plug, group) (plug->h.groups & (1 << group))
-+
-+/* PRIVATE INTERFACES */
-+/* NIKITA-FIXME-HANS: what is this for and why does it duplicate what is in plugin_header? */
-+/* plugin type representation. */
-+typedef struct reiser4_plugin_type_data {
-+ /* internal plugin type identifier. Should coincide with
-+ index of this item in plugins[] array. */
-+ reiser4_plugin_type type_id;
-+ /* short symbolic label of this plugin type. Should be no longer
-+ than MAX_PLUGIN_TYPE_LABEL_LEN characters including '\0'. */
-+ const char *label;
-+ /* plugin type description longer than .label */
-+ const char *desc;
-+
-+/* NIKITA-FIXME-HANS: define built-in */
-+ /* number of built-in plugin instances of this type */
-+ int builtin_num;
-+ /* array of built-in plugins */
-+ void *builtin;
-+ struct list_head plugins_list;
-+ size_t size;
-+} reiser4_plugin_type_data;
-+
-+extern reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES];
-+
-+int is_plugin_type_valid(reiser4_plugin_type type);
-+int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id);
-+
-+static inline reiser4_plugin *plugin_at(reiser4_plugin_type_data * ptype, int i)
-+{
-+ char *builtin;
-+
-+ builtin = ptype->builtin;
-+ return (reiser4_plugin *) (builtin + i * ptype->size);
-+}
-+
-+/* return plugin by its @type_id and @id */
-+static inline reiser4_plugin *plugin_by_id(reiser4_plugin_type type,
-+ reiser4_plugin_id id)
-+{
-+ assert("nikita-1651", is_plugin_type_valid(type));
-+ assert("nikita-1652", is_plugin_id_valid(type, id));
-+ return plugin_at(&plugins[type], id);
-+}
-+
-+extern reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type_id,
-+ reiser4_plugin_id id);
-+
-+/**
-+ * plugin_by_disk_id - get reiser4_plugin
-+ * @type_id: plugin type id
-+ * @did: plugin id in disk format
-+ *
-+ * Returns reiser4_plugin by plugin type id an dplugin_id.
-+ */
-+static inline reiser4_plugin *plugin_by_disk_id(reiser4_tree * tree UNUSED_ARG,
-+ reiser4_plugin_type type_id,
-+ __le16 *plugin_id)
-+{
-+ /*
-+ * what we should do properly is to maintain within each file-system a
-+ * dictionary that maps on-disk plugin ids to "universal" ids. This
-+ * dictionary will be resolved on mount time, so that this function
-+ * will perform just one additional array lookup.
-+ */
-+ return plugin_by_unsafe_id(type_id, le16_to_cpu(*plugin_id));
-+}
-+
-+/* __PLUGIN_HEADER_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/plugin_set.c linux-2.6.20/fs/reiser4/plugin/plugin_set.c
---- linux-2.6.20.orig/fs/reiser4/plugin/plugin_set.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/plugin_set.c 2007-05-06 14:50:43.855024468 +0400
-@@ -0,0 +1,379 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* This file contains Reiser4 plugin set operations */
-+
-+/* plugin sets
-+ *
-+ * Each file in reiser4 is controlled by a whole set of plugins (file plugin,
-+ * directory plugin, hash plugin, tail policy plugin, security plugin, etc.)
-+ * assigned (inherited, deduced from mode bits, etc.) at creation time. This
-+ * set of plugins (so called pset) is described by structure plugin_set (see
-+ * plugin/plugin_set.h), which contains pointers to all required plugins.
-+ *
-+ * Children can inherit some pset members from their parent, however sometimes
-+ * it is useful to specify members different from parent ones. Since object's
-+ * pset can not be easily changed without fatal consequences, we use for this
-+ * purpose another special plugin table (so called hset, or heir set) described
-+ * by the same structure.
-+ *
-+ * Inode only stores a pointers to pset and hset. Different inodes with the
-+ * same set of pset (hset) members point to the same pset (hset). This is
-+ * archived by storing psets and hsets in global hash table. Races are avoided
-+ * by simple (and efficient so far) solution of never recycling psets, even
-+ * when last inode pointing to it is destroyed.
-+ */
-+
-+#include "../debug.h"
-+#include "../super.h"
-+#include "plugin_set.h"
-+
-+#include <linux/slab.h>
-+#include <linux/stddef.h>
-+
-+/* slab for plugin sets */
-+static struct kmem_cache *plugin_set_slab;
-+
-+static spinlock_t plugin_set_lock[8] __cacheline_aligned_in_smp = {
-+ [0 ... 7] = SPIN_LOCK_UNLOCKED
-+};
-+
-+/* hash table support */
-+
-+#define PS_TABLE_SIZE (32)
-+
-+static inline plugin_set *cast_to(const unsigned long *a)
-+{
-+ return container_of(a, plugin_set, hashval);
-+}
-+
-+static inline int pseq(const unsigned long *a1, const unsigned long *a2)
-+{
-+ plugin_set *set1;
-+ plugin_set *set2;
-+
-+ /* make sure fields are not missed in the code below */
-+ cassert(sizeof *set1 ==
-+ sizeof set1->hashval +
-+ sizeof set1->link +
-+ sizeof set1->file +
-+ sizeof set1->dir +
-+ sizeof set1->perm +
-+ sizeof set1->formatting +
-+ sizeof set1->hash +
-+ sizeof set1->fibration +
-+ sizeof set1->sd +
-+ sizeof set1->dir_item +
-+ sizeof set1->cipher +
-+ sizeof set1->digest +
-+ sizeof set1->compression +
-+ sizeof set1->compression_mode +
-+ sizeof set1->cluster +
-+ sizeof set1->create);
-+
-+ set1 = cast_to(a1);
-+ set2 = cast_to(a2);
-+ return
-+ set1->hashval == set2->hashval &&
-+ set1->file == set2->file &&
-+ set1->dir == set2->dir &&
-+ set1->perm == set2->perm &&
-+ set1->formatting == set2->formatting &&
-+ set1->hash == set2->hash &&
-+ set1->fibration == set2->fibration &&
-+ set1->sd == set2->sd &&
-+ set1->dir_item == set2->dir_item &&
-+ set1->cipher == set2->cipher &&
-+ set1->digest == set2->digest &&
-+ set1->compression == set2->compression &&
-+ set1->compression_mode == set2->compression_mode &&
-+ set1->cluster == set2->cluster &&
-+ set1->create == set2->create;
-+}
-+
-+#define HASH_FIELD(hash, set, field) \
-+({ \
-+ (hash) += (unsigned long)(set)->field >> 2; \
-+})
-+
-+static inline unsigned long calculate_hash(const plugin_set * set)
-+{
-+ unsigned long result;
-+
-+ result = 0;
-+ HASH_FIELD(result, set, file);
-+ HASH_FIELD(result, set, dir);
-+ HASH_FIELD(result, set, perm);
-+ HASH_FIELD(result, set, formatting);
-+ HASH_FIELD(result, set, hash);
-+ HASH_FIELD(result, set, fibration);
-+ HASH_FIELD(result, set, sd);
-+ HASH_FIELD(result, set, dir_item);
-+ HASH_FIELD(result, set, cipher);
-+ HASH_FIELD(result, set, digest);
-+ HASH_FIELD(result, set, compression);
-+ HASH_FIELD(result, set, compression_mode);
-+ HASH_FIELD(result, set, cluster);
-+ HASH_FIELD(result, set, create);
-+ return result & (PS_TABLE_SIZE - 1);
-+}
-+
-+static inline unsigned long
-+pshash(ps_hash_table * table, const unsigned long *a)
-+{
-+ return *a;
-+}
-+
-+/* The hash table definition */
-+#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
-+#define KFREE(ptr, size) kfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(ps, plugin_set, unsigned long, hashval, link, pshash,
-+ pseq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+static ps_hash_table ps_table;
-+static plugin_set empty_set = {
-+ .hashval = 0,
-+ .file = NULL,
-+ .dir = NULL,
-+ .perm = NULL,
-+ .formatting = NULL,
-+ .hash = NULL,
-+ .fibration = NULL,
-+ .sd = NULL,
-+ .dir_item = NULL,
-+ .cipher = NULL,
-+ .digest = NULL,
-+ .compression = NULL,
-+ .compression_mode = NULL,
-+ .cluster = NULL,
-+ .create = NULL,
-+ .link = {NULL}
-+};
-+
-+plugin_set *plugin_set_get_empty(void)
-+{
-+ return &empty_set;
-+}
-+
-+void plugin_set_put(plugin_set * set)
-+{
-+}
-+
-+static inline unsigned long *pset_field(plugin_set * set, int offset)
-+{
-+ return (unsigned long *)(((char *)set) + offset);
-+}
-+
-+static int plugin_set_field(plugin_set ** set, const unsigned long val,
-+ const int offset)
-+{
-+ unsigned long *spot;
-+ spinlock_t *lock;
-+ plugin_set replica;
-+ plugin_set *twin;
-+ plugin_set *psal;
-+ plugin_set *orig;
-+
-+ assert("nikita-2902", set != NULL);
-+ assert("nikita-2904", *set != NULL);
-+
-+ spot = pset_field(*set, offset);
-+ if (unlikely(*spot == val))
-+ return 0;
-+
-+ replica = *(orig = *set);
-+ *pset_field(&replica, offset) = val;
-+ replica.hashval = calculate_hash(&replica);
-+ rcu_read_lock();
-+ twin = ps_hash_find(&ps_table, &replica.hashval);
-+ if (unlikely(twin == NULL)) {
-+ rcu_read_unlock();
-+ psal = kmem_cache_alloc(plugin_set_slab,
-+ reiser4_ctx_gfp_mask_get());
-+ if (psal == NULL)
-+ return RETERR(-ENOMEM);
-+ *psal = replica;
-+ lock = &plugin_set_lock[replica.hashval & 7];
-+ spin_lock(lock);
-+ twin = ps_hash_find(&ps_table, &replica.hashval);
-+ if (likely(twin == NULL)) {
-+ *set = psal;
-+ ps_hash_insert_rcu(&ps_table, psal);
-+ } else {
-+ *set = twin;
-+ kmem_cache_free(plugin_set_slab, psal);
-+ }
-+ spin_unlock(lock);
-+ } else {
-+ rcu_read_unlock();
-+ *set = twin;
-+ }
-+ return 0;
-+}
-+
-+static struct {
-+ int offset;
-+ reiser4_plugin_groups groups;
-+ reiser4_plugin_type type;
-+} pset_descr[PSET_LAST] = {
-+ [PSET_FILE] = {
-+ .offset = offsetof(plugin_set, file),
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_DIR] = {
-+ .offset = offsetof(plugin_set, dir),
-+ .type = REISER4_DIR_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_PERM] = {
-+ .offset = offsetof(plugin_set, perm),
-+ .type = REISER4_PERM_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_FORMATTING] = {
-+ .offset = offsetof(plugin_set, formatting),
-+ .type = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_HASH] = {
-+ .offset = offsetof(plugin_set, hash),
-+ .type = REISER4_HASH_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_FIBRATION] = {
-+ .offset = offsetof(plugin_set, fibration),
-+ .type = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_SD] = {
-+ .offset = offsetof(plugin_set, sd),
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .groups = (1 << STAT_DATA_ITEM_TYPE)
-+ },
-+ [PSET_DIR_ITEM] = {
-+ .offset = offsetof(plugin_set, dir_item),
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .groups = (1 << DIR_ENTRY_ITEM_TYPE)
-+ },
-+ [PSET_CIPHER] = {
-+ .offset = offsetof(plugin_set, cipher),
-+ .type = REISER4_CIPHER_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_DIGEST] = {
-+ .offset = offsetof(plugin_set, digest),
-+ .type = REISER4_DIGEST_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_COMPRESSION] = {
-+ .offset = offsetof(plugin_set, compression),
-+ .type = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_COMPRESSION_MODE] = {
-+ .offset = offsetof(plugin_set, compression_mode),
-+ .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_CLUSTER] = {
-+ .offset = offsetof(plugin_set, cluster),
-+ .type = REISER4_CLUSTER_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_CREATE] = {
-+ .offset = offsetof(plugin_set, create),
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .groups = (1 << REISER4_REGULAR_FILE)
-+ }
-+};
-+
-+#define DEFINE_PSET_OPS(PREFIX) \
-+ reiser4_plugin_type PREFIX##_member_to_type_unsafe(pset_member memb) \
-+{ \
-+ if (memb > PSET_LAST) \
-+ return REISER4_PLUGIN_TYPES; \
-+ return pset_descr[memb].type; \
-+} \
-+ \
-+int PREFIX##_set_unsafe(plugin_set ** set, pset_member memb, \
-+ reiser4_plugin * plugin) \
-+{ \
-+ assert("nikita-3492", set != NULL); \
-+ assert("nikita-3493", *set != NULL); \
-+ assert("nikita-3494", plugin != NULL); \
-+ assert("nikita-3495", 0 <= memb && memb < PSET_LAST); \
-+ assert("nikita-3496", plugin->h.type_id == pset_descr[memb].type); \
-+ \
-+ if (pset_descr[memb].groups) \
-+ if (!(pset_descr[memb].groups & plugin->h.groups)) \
-+ return -EINVAL; \
-+ \
-+ return plugin_set_field(set, \
-+ (unsigned long)plugin, pset_descr[memb].offset); \
-+} \
-+ \
-+reiser4_plugin *PREFIX##_get(plugin_set * set, pset_member memb) \
-+{ \
-+ assert("nikita-3497", set != NULL); \
-+ assert("nikita-3498", 0 <= memb && memb < PSET_LAST); \
-+ \
-+ return *(reiser4_plugin **) (((char *)set) + pset_descr[memb].offset); \
-+}
-+
-+DEFINE_PSET_OPS(aset);
-+
-+int set_plugin(plugin_set ** set, pset_member memb, reiser4_plugin * plugin) {
-+ return plugin_set_field(set,
-+ (unsigned long)plugin, pset_descr[memb].offset);
-+}
-+
-+/**
-+ * init_plugin_set - create plugin set cache and hash table
-+ *
-+ * Initializes slab cache of plugin_set-s and their hash table. It is part of
-+ * reiser4 module initialization.
-+ */
-+int init_plugin_set(void)
-+{
-+ int result;
-+
-+ result = ps_hash_init(&ps_table, PS_TABLE_SIZE);
-+ if (result == 0) {
-+ plugin_set_slab = kmem_cache_create("plugin_set",
-+ sizeof(plugin_set), 0,
-+ SLAB_HWCACHE_ALIGN,
-+ NULL, NULL);
-+ if (plugin_set_slab == NULL)
-+ result = RETERR(-ENOMEM);
-+ }
-+ return result;
-+}
-+
-+/**
-+ * done_plugin_set - delete plugin_set cache and plugin_set hash table
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_plugin_set(void)
-+{
-+ plugin_set *cur, *next;
-+
-+ for_all_in_htable(&ps_table, ps, cur, next) {
-+ ps_hash_remove(&ps_table, cur);
-+ kmem_cache_free(plugin_set_slab, cur);
-+ }
-+ destroy_reiser4_cache(&plugin_set_slab);
-+ ps_hash_done(&ps_table);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/plugin_set.h linux-2.6.20/fs/reiser4/plugin/plugin_set.h
---- linux-2.6.20.orig/fs/reiser4/plugin/plugin_set.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/plugin_set.h 2007-05-06 14:50:43.855024468 +0400
-@@ -0,0 +1,77 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Reiser4 plugin set definition.
-+ See fs/reiser4/plugin/plugin_set.c for details */
-+
-+#if !defined( __PLUGIN_SET_H__ )
-+#define __PLUGIN_SET_H__
-+
-+#include "../type_safe_hash.h"
-+#include "plugin.h"
-+
-+#include <linux/rcupdate.h>
-+
-+struct plugin_set;
-+typedef struct plugin_set plugin_set;
-+
-+TYPE_SAFE_HASH_DECLARE(ps, plugin_set);
-+
-+struct plugin_set {
-+ unsigned long hashval;
-+ /* plugin of file */
-+ file_plugin *file;
-+ /* plugin of dir */
-+ dir_plugin *dir;
-+ /* perm plugin for this file */
-+ perm_plugin *perm;
-+ /* tail policy plugin. Only meaningful for regular files */
-+ formatting_plugin *formatting;
-+ /* hash plugin. Only meaningful for directories. */
-+ hash_plugin *hash;
-+ /* fibration plugin. Only meaningful for directories. */
-+ fibration_plugin *fibration;
-+ /* plugin of stat-data */
-+ item_plugin *sd;
-+ /* plugin of items a directory is built of */
-+ item_plugin *dir_item;
-+ /* cipher plugin */
-+ cipher_plugin *cipher;
-+ /* digest plugin */
-+ digest_plugin *digest;
-+ /* compression plugin */
-+ compression_plugin *compression;
-+ /* compression mode plugin */
-+ compression_mode_plugin *compression_mode;
-+ /* cluster plugin */
-+ cluster_plugin *cluster;
-+ /* this specifies file plugin of regular children.
-+ only meaningful for directories */
-+ file_plugin *create;
-+ ps_hash_link link;
-+};
-+
-+extern plugin_set *plugin_set_get_empty(void);
-+extern void plugin_set_put(plugin_set * set);
-+
-+extern int init_plugin_set(void);
-+extern void done_plugin_set(void);
-+
-+extern reiser4_plugin *aset_get(plugin_set * set, pset_member memb);
-+extern int set_plugin(plugin_set ** set, pset_member memb,
-+ reiser4_plugin * plugin);
-+extern int aset_set_unsafe(plugin_set ** set, pset_member memb,
-+ reiser4_plugin * plugin);
-+extern reiser4_plugin_type aset_member_to_type_unsafe(pset_member memb);
-+
-+/* __PLUGIN_SET_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/security/Makefile linux-2.6.20/fs/reiser4/plugin/security/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/security/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/security/Makefile 2007-05-06 14:50:43.855024468 +0400
-@@ -0,0 +1,4 @@
-+obj-$(CONFIG_REISER4_FS) += security_plugins.o
-+
-+security_plugins-objs := \
-+ perm.o
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/security/perm.c linux-2.6.20/fs/reiser4/plugin/security/perm.c
---- linux-2.6.20.orig/fs/reiser4/plugin/security/perm.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/security/perm.c 2007-05-06 14:50:43.859025718 +0400
-@@ -0,0 +1,44 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/*
-+ * this file contains implementation of permission plugins. Currently, only
-+ * RWX_PERM_ID is implemented
-+ */
-+
-+#include "../plugin.h"
-+#include "../plugin_header.h"
-+#include "../../debug.h"
-+
-+perm_plugin perm_plugins[LAST_PERM_ID] = {
-+ [NULL_PERM_ID] = {
-+ .h = {
-+ .type_id = REISER4_PERM_PLUGIN_TYPE,
-+ .id = NULL_PERM_ID,
-+ .pops = NULL,
-+ .label = "null",
-+ .desc = "stub permission plugin",
-+ .linkage = {NULL, NULL}
-+ },
-+ .read_ok = NULL,
-+ .write_ok = NULL,
-+ .lookup_ok = NULL,
-+ .create_ok = NULL,
-+ .link_ok = NULL,
-+ .unlink_ok = NULL,
-+ .delete_ok = NULL,
-+ .mask_ok = NULL,
-+ .setattr_ok = NULL,
-+ .getattr_ok = NULL,
-+ .rename_ok = NULL,
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/security/perm.h linux-2.6.20/fs/reiser4/plugin/security/perm.h
---- linux-2.6.20.orig/fs/reiser4/plugin/security/perm.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/security/perm.h 2007-05-06 14:50:43.859025718 +0400
-@@ -0,0 +1,82 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Perm (short for "permissions") plugins common stuff. */
-+
-+#if !defined( __REISER4_PERM_H__ )
-+#define __REISER4_PERM_H__
-+
-+#include "../../forward.h"
-+#include "../plugin_header.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h> /* for struct file */
-+#include <linux/dcache.h> /* for struct dentry */
-+
-+/* interface for perm plugin.
-+
-+ Perm plugin method can be implemented through:
-+
-+ 1. consulting ->i_mode bits in stat data
-+
-+ 2. obtaining acl from the tree and inspecting it
-+
-+ 3. asking some kernel module or user-level program to authorize access.
-+
-+ This allows for integration with things like capabilities, SELinux-style
-+ secutiry contexts, etc.
-+
-+*/
-+/* NIKITA-FIXME-HANS: define what this is targeted for. It does not seem to be intended for use with sys_reiser4. Explain. */
-+typedef struct perm_plugin {
-+ /* generic plugin fields */
-+ plugin_header h;
-+
-+ /* check permissions for read/write */
-+ int (*read_ok) (struct file *file, const char __user *buf,
-+ size_t size, loff_t *off);
-+ int (*write_ok) (struct file *file, const char __user *buf,
-+ size_t size, loff_t *off);
-+
-+ /* check permissions for lookup */
-+ int (*lookup_ok) (struct inode * parent, struct dentry * dentry);
-+
-+ /* check permissions for create */
-+ int (*create_ok) (struct inode * parent, struct dentry * dentry,
-+ reiser4_object_create_data * data);
-+
-+ /* check permissions for linking @where to @existing */
-+ int (*link_ok) (struct dentry * existing, struct inode * parent,
-+ struct dentry * where);
-+
-+ /* check permissions for unlinking @victim from @parent */
-+ int (*unlink_ok) (struct inode * parent, struct dentry * victim);
-+
-+ /* check permissions for deletion of @object whose last reference is
-+ by @parent */
-+ int (*delete_ok) (struct inode * parent, struct dentry * victim);
-+ int (*mask_ok) (struct inode * inode, int mask);
-+ /* check whether attribute change is acceptable */
-+ int (*setattr_ok) (struct dentry * dentry, struct iattr * attr);
-+
-+ /* check whether stat(2) is allowed */
-+ int (*getattr_ok) (struct vfsmount * mnt UNUSED_ARG,
-+ struct dentry * dentry, struct kstat * stat);
-+ /* check whether rename(2) is allowed */
-+ int (*rename_ok) (struct inode * old_dir, struct dentry * old,
-+ struct inode * new_dir, struct dentry * new);
-+} perm_plugin;
-+
-+typedef enum { NULL_PERM_ID, LAST_PERM_ID } reiser4_perm_id;
-+
-+/* __REISER4_PERM_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/space/bitmap.c linux-2.6.20/fs/reiser4/plugin/space/bitmap.c
---- linux-2.6.20.orig/fs/reiser4/plugin/space/bitmap.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/space/bitmap.c 2007-05-06 14:50:43.859025718 +0400
-@@ -0,0 +1,1585 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../txnmgr.h"
-+#include "../../jnode.h"
-+#include "../../block_alloc.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../plugin.h"
-+#include "space_allocator.h"
-+#include "bitmap.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/mutex.h>
-+#include <asm/div64.h>
-+
-+/* Proposed (but discarded) optimization: dynamic loading/unloading of bitmap
-+ * blocks
-+
-+ A useful optimization of reiser4 bitmap handling would be dynamic bitmap
-+ blocks loading/unloading which is different from v3.x where all bitmap
-+ blocks are loaded at mount time.
-+
-+ To implement bitmap blocks unloading we need to count bitmap block usage
-+ and detect currently unused blocks allowing them to be unloaded. It is not
-+ a simple task since we allow several threads to modify one bitmap block
-+ simultaneously.
-+
-+ Briefly speaking, the following schema is proposed: we count in special
-+ variable associated with each bitmap block. That is for counting of block
-+ alloc/dealloc operations on that bitmap block. With a deferred block
-+ deallocation feature of reiser4 all those operation will be represented in
-+ atom dirty/deleted lists as jnodes for freshly allocated or deleted
-+ nodes.
-+
-+ So, we increment usage counter for each new node allocated or deleted, and
-+ decrement it at atom commit one time for each node from the dirty/deleted
-+ atom's list. Of course, freshly allocated node deletion and node reusing
-+ from atom deleted (if we do so) list should decrement bitmap usage counter
-+ also.
-+
-+ This schema seems to be working but that reference counting is
-+ not easy to debug. I think we should agree with Hans and do not implement
-+ it in v4.0. Current code implements "on-demand" bitmap blocks loading only.
-+
-+ For simplicity all bitmap nodes (both commit and working bitmap blocks) are
-+ loaded into memory on fs mount time or each bitmap nodes are loaded at the
-+ first access to it, the "dont_load_bitmap" mount option controls whether
-+ bimtap nodes should be loaded at mount time. Dynamic unloading of bitmap
-+ nodes currently is not supported. */
-+
-+#define CHECKSUM_SIZE 4
-+
-+#define BYTES_PER_LONG (sizeof(long))
-+
-+#if BITS_PER_LONG == 64
-+# define LONG_INT_SHIFT (6)
-+#else
-+# define LONG_INT_SHIFT (5)
-+#endif
-+
-+#define LONG_INT_MASK (BITS_PER_LONG - 1UL)
-+
-+typedef unsigned long ulong_t;
-+
-+#define bmap_size(blocksize) ((blocksize) - CHECKSUM_SIZE)
-+#define bmap_bit_count(blocksize) (bmap_size(blocksize) << 3)
-+
-+/* Block allocation/deallocation are done through special bitmap objects which
-+ are allocated in an array at fs mount. */
-+struct bitmap_node {
-+ struct mutex mutex; /* long term lock object */
-+
-+ jnode *wjnode; /* j-nodes for WORKING ... */
-+ jnode *cjnode; /* ... and COMMIT bitmap blocks */
-+
-+ bmap_off_t first_zero_bit; /* for skip_busy option implementation */
-+
-+ atomic_t loaded; /* a flag which shows that bnode is loaded
-+ * already */
-+};
-+
-+static inline char *bnode_working_data(struct bitmap_node *bnode)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->wjnode);
-+ assert("zam-429", data != NULL);
-+
-+ return data + CHECKSUM_SIZE;
-+}
-+
-+static inline char *bnode_commit_data(const struct bitmap_node *bnode)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->cjnode);
-+ assert("zam-430", data != NULL);
-+
-+ return data + CHECKSUM_SIZE;
-+}
-+
-+static inline __u32 bnode_commit_crc(const struct bitmap_node *bnode)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->cjnode);
-+ assert("vpf-261", data != NULL);
-+
-+ return le32_to_cpu(get_unaligned((d32 *)data));
-+}
-+
-+static inline void bnode_set_commit_crc(struct bitmap_node *bnode, __u32 crc)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->cjnode);
-+ assert("vpf-261", data != NULL);
-+
-+ put_unaligned(cpu_to_le32(crc), (d32 *)data);
-+}
-+
-+/* ZAM-FIXME-HANS: is the idea that this might be a union someday? having
-+ * written the code, does this added abstraction still have */
-+/* ANSWER(Zam): No, the abstractions is in the level above (exact place is the
-+ * reiser4_space_allocator structure) */
-+/* ZAM-FIXME-HANS: I don't understand your english in comment above. */
-+/* FIXME-HANS(Zam): I don't understand the questions like "might be a union
-+ * someday?". What they about? If there is a reason to have a union, it should
-+ * be a union, if not, it should not be a union. "..might be someday" means no
-+ * reason. */
-+struct bitmap_allocator_data {
-+ /* an array for bitmap blocks direct access */
-+ struct bitmap_node *bitmap;
-+};
-+
-+#define get_barray(super) \
-+(((struct bitmap_allocator_data *)(get_super_private(super)->space_allocator.u.generic)) -> bitmap)
-+
-+#define get_bnode(super, i) (get_barray(super) + i)
-+
-+/* allocate and initialize jnode with JNODE_BITMAP type */
-+static jnode *bnew(void)
-+{
-+ jnode *jal = jalloc();
-+
-+ if (jal)
-+ jnode_init(jal, current_tree, JNODE_BITMAP);
-+
-+ return jal;
-+}
-+
-+/* this file contains:
-+ - bitmap based implementation of space allocation plugin
-+ - all the helper functions like set bit, find_first_zero_bit, etc */
-+
-+/* Audited by: green(2002.06.12) */
-+static int find_next_zero_bit_in_word(ulong_t word, int start_bit)
-+{
-+ ulong_t mask = 1UL << start_bit;
-+ int i = start_bit;
-+
-+ while ((word & mask) != 0) {
-+ mask <<= 1;
-+ if (++i >= BITS_PER_LONG)
-+ break;
-+ }
-+
-+ return i;
-+}
-+
-+#include <asm/bitops.h>
-+
-+#if BITS_PER_LONG == 64
-+
-+#define OFF(addr) (((ulong_t)(addr) & (BYTES_PER_LONG - 1)) << 3)
-+#define BASE(addr) ((ulong_t*) ((ulong_t)(addr) & ~(BYTES_PER_LONG - 1)))
-+
-+static inline void reiser4_set_bit(int nr, void *addr)
-+{
-+ ext2_set_bit(nr + OFF(addr), BASE(addr));
-+}
-+
-+static inline void reiser4_clear_bit(int nr, void *addr)
-+{
-+ ext2_clear_bit(nr + OFF(addr), BASE(addr));
-+}
-+
-+static inline int reiser4_test_bit(int nr, void *addr)
-+{
-+ return ext2_test_bit(nr + OFF(addr), BASE(addr));
-+}
-+static inline int reiser4_find_next_zero_bit(void *addr, int maxoffset,
-+ int offset)
-+{
-+ int off = OFF(addr);
-+
-+ return ext2_find_next_zero_bit(BASE(addr), maxoffset + off,
-+ offset + off) - off;
-+}
-+
-+#else
-+
-+#define reiser4_set_bit(nr, addr) ext2_set_bit(nr, addr)
-+#define reiser4_clear_bit(nr, addr) ext2_clear_bit(nr, addr)
-+#define reiser4_test_bit(nr, addr) ext2_test_bit(nr, addr)
-+
-+#define reiser4_find_next_zero_bit(addr, maxoffset, offset) \
-+ext2_find_next_zero_bit(addr, maxoffset, offset)
-+#endif
-+
-+/* Search for a set bit in the bit array [@start_offset, @max_offset[, offsets
-+ * are counted from @addr, return the offset of the first bit if it is found,
-+ * @maxoffset otherwise. */
-+static bmap_off_t __reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
-+ bmap_off_t start_offset)
-+{
-+ ulong_t *base = addr;
-+ /* start_offset is in bits, convert it to byte offset within bitmap. */
-+ int word_nr = start_offset >> LONG_INT_SHIFT;
-+ /* bit number within the byte. */
-+ int bit_nr = start_offset & LONG_INT_MASK;
-+ int max_word_nr = (max_offset - 1) >> LONG_INT_SHIFT;
-+
-+ assert("zam-387", max_offset != 0);
-+
-+ /* Unaligned @start_offset case. */
-+ if (bit_nr != 0) {
-+ bmap_nr_t nr;
-+
-+ nr = find_next_zero_bit_in_word(~(base[word_nr]), bit_nr);
-+
-+ if (nr < BITS_PER_LONG)
-+ return (word_nr << LONG_INT_SHIFT) + nr;
-+
-+ ++word_nr;
-+ }
-+
-+ /* Fast scan trough aligned words. */
-+ while (word_nr <= max_word_nr) {
-+ if (base[word_nr] != 0) {
-+ return (word_nr << LONG_INT_SHIFT)
-+ + find_next_zero_bit_in_word(~(base[word_nr]), 0);
-+ }
-+
-+ ++word_nr;
-+ }
-+
-+ return max_offset;
-+}
-+
-+#if BITS_PER_LONG == 64
-+
-+static bmap_off_t reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
-+ bmap_off_t start_offset)
-+{
-+ bmap_off_t off = OFF(addr);
-+
-+ return __reiser4_find_next_set_bit(BASE(addr), max_offset + off,
-+ start_offset + off) - off;
-+}
-+
-+#else
-+#define reiser4_find_next_set_bit(addr, max_offset, start_offset) \
-+ __reiser4_find_next_set_bit(addr, max_offset, start_offset)
-+#endif
-+
-+/* search for the first set bit in single word. */
-+static int find_last_set_bit_in_word(ulong_t word, int start_bit)
-+{
-+ ulong_t bit_mask;
-+ int nr = start_bit;
-+
-+ assert("zam-965", start_bit < BITS_PER_LONG);
-+ assert("zam-966", start_bit >= 0);
-+
-+ bit_mask = (1UL << nr);
-+
-+ while (bit_mask != 0) {
-+ if (bit_mask & word)
-+ return nr;
-+ bit_mask >>= 1;
-+ nr--;
-+ }
-+ return BITS_PER_LONG;
-+}
-+
-+/* Search bitmap for a set bit in backward direction from the end to the
-+ * beginning of given region
-+ *
-+ * @result: result offset of the last set bit
-+ * @addr: base memory address,
-+ * @low_off: low end of the search region, edge bit included into the region,
-+ * @high_off: high end of the search region, edge bit included into the region,
-+ *
-+ * @return: 0 - set bit was found, -1 otherwise.
-+ */
-+static int
-+reiser4_find_last_set_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
-+ bmap_off_t high_off)
-+{
-+ ulong_t *base = addr;
-+ int last_word;
-+ int first_word;
-+ int last_bit;
-+ int nr;
-+
-+ assert("zam-962", high_off >= low_off);
-+
-+ last_word = high_off >> LONG_INT_SHIFT;
-+ last_bit = high_off & LONG_INT_MASK;
-+ first_word = low_off >> LONG_INT_SHIFT;
-+
-+ if (last_bit < BITS_PER_LONG) {
-+ nr = find_last_set_bit_in_word(base[last_word], last_bit);
-+ if (nr < BITS_PER_LONG) {
-+ *result = (last_word << LONG_INT_SHIFT) + nr;
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+ while (last_word >= first_word) {
-+ if (base[last_word] != 0x0) {
-+ last_bit =
-+ find_last_set_bit_in_word(base[last_word],
-+ BITS_PER_LONG - 1);
-+ assert("zam-972", last_bit < BITS_PER_LONG);
-+ *result = (last_word << LONG_INT_SHIFT) + last_bit;
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+
-+ return -1; /* set bit not found */
-+}
-+
-+/* Search bitmap for a clear bit in backward direction from the end to the
-+ * beginning of given region */
-+static int
-+reiser4_find_last_zero_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
-+ bmap_off_t high_off)
-+{
-+ ulong_t *base = addr;
-+ int last_word;
-+ int first_word;
-+ int last_bit;
-+ int nr;
-+
-+ last_word = high_off >> LONG_INT_SHIFT;
-+ last_bit = high_off & LONG_INT_MASK;
-+ first_word = low_off >> LONG_INT_SHIFT;
-+
-+ if (last_bit < BITS_PER_LONG) {
-+ nr = find_last_set_bit_in_word(~base[last_word], last_bit);
-+ if (nr < BITS_PER_LONG) {
-+ *result = (last_word << LONG_INT_SHIFT) + nr;
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+ while (last_word >= first_word) {
-+ if (base[last_word] != (ulong_t) (-1)) {
-+ *result = (last_word << LONG_INT_SHIFT) +
-+ find_last_set_bit_in_word(~base[last_word],
-+ BITS_PER_LONG - 1);
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+
-+ return -1; /* zero bit not found */
-+}
-+
-+/* Audited by: green(2002.06.12) */
-+static void reiser4_clear_bits(char *addr, bmap_off_t start, bmap_off_t end)
-+{
-+ int first_byte;
-+ int last_byte;
-+
-+ unsigned char first_byte_mask = 0xFF;
-+ unsigned char last_byte_mask = 0xFF;
-+
-+ assert("zam-410", start < end);
-+
-+ first_byte = start >> 3;
-+ last_byte = (end - 1) >> 3;
-+
-+ if (last_byte > first_byte + 1)
-+ memset(addr + first_byte + 1, 0,
-+ (size_t) (last_byte - first_byte - 1));
-+
-+ first_byte_mask >>= 8 - (start & 0x7);
-+ last_byte_mask <<= ((end - 1) & 0x7) + 1;
-+
-+ if (first_byte == last_byte) {
-+ addr[first_byte] &= (first_byte_mask | last_byte_mask);
-+ } else {
-+ addr[first_byte] &= first_byte_mask;
-+ addr[last_byte] &= last_byte_mask;
-+ }
-+}
-+
-+/* Audited by: green(2002.06.12) */
-+/* ZAM-FIXME-HANS: comment this */
-+static void reiser4_set_bits(char *addr, bmap_off_t start, bmap_off_t end)
-+{
-+ int first_byte;
-+ int last_byte;
-+
-+ unsigned char first_byte_mask = 0xFF;
-+ unsigned char last_byte_mask = 0xFF;
-+
-+ assert("zam-386", start < end);
-+
-+ first_byte = start >> 3;
-+ last_byte = (end - 1) >> 3;
-+
-+ if (last_byte > first_byte + 1)
-+ memset(addr + first_byte + 1, 0xFF,
-+ (size_t) (last_byte - first_byte - 1));
-+
-+ first_byte_mask <<= start & 0x7;
-+ last_byte_mask >>= 7 - ((end - 1) & 0x7);
-+
-+ if (first_byte == last_byte) {
-+ addr[first_byte] |= (first_byte_mask & last_byte_mask);
-+ } else {
-+ addr[first_byte] |= first_byte_mask;
-+ addr[last_byte] |= last_byte_mask;
-+ }
-+}
-+
-+#define ADLER_BASE 65521
-+#define ADLER_NMAX 5552
-+
-+/* Calculates the adler32 checksum for the data pointed by `data` of the
-+ length `len`. This function was originally taken from zlib, version 1.1.3,
-+ July 9th, 1998.
-+
-+ Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
-+
-+ This software is provided 'as-is', without any express or implied
-+ warranty. In no event will the authors be held liable for any damages
-+ arising from the use of this software.
-+
-+ Permission is granted to anyone to use this software for any purpose,
-+ including commercial applications, and to alter it and redistribute it
-+ freely, subject to the following restrictions:
-+
-+ 1. The origin of this software must not be misrepresented; you must not
-+ claim that you wrote the original software. If you use this software
-+ in a product, an acknowledgment in the product documentation would be
-+ appreciated but is not required.
-+ 2. Altered source versions must be plainly marked as such, and must not be
-+ misrepresented as being the original software.
-+ 3. This notice may not be removed or altered from any source distribution.
-+
-+ Jean-loup Gailly Mark Adler
-+ jloup@gzip.org madler@alumni.caltech.edu
-+
-+ The above comment applies only to the reiser4_adler32 function.
-+*/
-+
-+__u32 reiser4_adler32(char *data, __u32 len)
-+{
-+ unsigned char *t = data;
-+ __u32 s1 = 1;
-+ __u32 s2 = 0;
-+ int k;
-+
-+ while (len > 0) {
-+ k = len < ADLER_NMAX ? len : ADLER_NMAX;
-+ len -= k;
-+
-+ while (k--) {
-+ s1 += *t++;
-+ s2 += s1;
-+ }
-+
-+ s1 %= ADLER_BASE;
-+ s2 %= ADLER_BASE;
-+ }
-+ return (s2 << 16) | s1;
-+}
-+
-+#define sb_by_bnode(bnode) \
-+ ((struct super_block *)jnode_get_tree(bnode->wjnode)->super)
-+
-+static __u32 bnode_calc_crc(const struct bitmap_node *bnode, unsigned long size)
-+{
-+ return reiser4_adler32(bnode_commit_data(bnode), bmap_size(size));
-+}
-+
-+static int
-+bnode_check_adler32(const struct bitmap_node *bnode, unsigned long size)
-+{
-+ if (bnode_calc_crc(bnode, size) != bnode_commit_crc(bnode)) {
-+ bmap_nr_t bmap;
-+
-+ bmap = bnode - get_bnode(sb_by_bnode(bnode), 0);
-+
-+ warning("vpf-263",
-+ "Checksum for the bitmap block %llu is incorrect",
-+ bmap);
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+#define REISER4_CHECK_BMAP_CRC (0)
-+
-+#if REISER4_CHECK_BMAP_CRC
-+static int bnode_check_crc(const struct bitmap_node *bnode)
-+{
-+ return bnode_check_adler32(bnode,
-+ bmap_size(sb_by_bnode(bnode)->s_blocksize));
-+}
-+
-+/* REISER4_CHECK_BMAP_CRC */
-+#else
-+
-+#define bnode_check_crc(bnode) (0)
-+
-+/* REISER4_CHECK_BMAP_CRC */
-+#endif
-+
-+/* Recalculates the adler32 checksum for only 1 byte change.
-+ adler - previous adler checksum
-+ old_data, data - old, new byte values.
-+ tail == (chunk - offset) : length, checksum was calculated for, - offset of
-+ the changed byte within this chunk.
-+ This function can be used for checksum calculation optimisation.
-+*/
-+
-+static __u32
-+adler32_recalc(__u32 adler, unsigned char old_data, unsigned char data,
-+ __u32 tail)
-+{
-+ __u32 delta = data - old_data + 2 * ADLER_BASE;
-+ __u32 s1 = adler & 0xffff;
-+ __u32 s2 = (adler >> 16) & 0xffff;
-+
-+ s1 = (delta + s1) % ADLER_BASE;
-+ s2 = (delta * tail + s2) % ADLER_BASE;
-+
-+ return (s2 << 16) | s1;
-+}
-+
-+#define LIMIT(val, boundary) ((val) > (boundary) ? (boundary) : (val))
-+
-+/**
-+ * get_nr_bitmap - calculate number of bitmap blocks
-+ * @super: super block with initialized blocksize and block count
-+ *
-+ * Calculates number of bitmap blocks of a filesystem which uses bitmaps to
-+ * maintain free disk space. It assumes that each bitmap addresses the same
-+ * number of blocks which is calculated by bmap_block_count macro defined in
-+ * above. Number of blocks in the filesystem has to be initialized in reiser4
-+ * private data of super block already so that it can be obtained via
-+ * reiser4_block_count(). Unfortunately, number of blocks addressed by a bitmap
-+ * is not power of 2 because 4 bytes are used for checksum. Therefore, we have
-+ * to use special function to divide and modulo 64bits filesystem block
-+ * counters.
-+ *
-+ * Example: suppose filesystem have 32768 blocks. Blocksize is 4096. Each bitmap
-+ * block addresses (4096 - 4) * 8 = 32736 blocks. Number of bitmaps to address
-+ * all 32768 blocks is calculated as (32768 - 1) / 32736 + 1 = 2.
-+ */
-+static bmap_nr_t get_nr_bmap(const struct super_block *super)
-+{
-+ u64 quotient;
-+
-+ assert("zam-393", reiser4_block_count(super) != 0);
-+
-+ quotient = reiser4_block_count(super) - 1;
-+ do_div(quotient, bmap_bit_count(super->s_blocksize));
-+ return quotient + 1;
-+}
-+
-+/**
-+ * parse_blocknr - calculate bitmap number and offset in it by block number
-+ * @block: pointer to block number to calculate location in bitmap of
-+ * @bmap: pointer where to store bitmap block number
-+ * @offset: pointer where to store offset within bitmap block
-+ *
-+ * Calculates location of bit which is responsible for allocation/freeing of
-+ * block @*block. That location is represented by bitmap block number and offset
-+ * within that bitmap block.
-+ */
-+static void
-+parse_blocknr(const reiser4_block_nr *block, bmap_nr_t *bmap,
-+ bmap_off_t *offset)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ u64 quotient = *block;
-+
-+ *offset = do_div(quotient, bmap_bit_count(super->s_blocksize));
-+ *bmap = quotient;
-+
-+ assert("zam-433", *bmap < get_nr_bmap(super));
-+ assert("", *offset < bmap_bit_count(super->s_blocksize));
-+}
-+
-+#if REISER4_DEBUG
-+/* Audited by: green(2002.06.12) */
-+static void
-+check_block_range(const reiser4_block_nr * start, const reiser4_block_nr * len)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ assert("zam-436", sb != NULL);
-+
-+ assert("zam-455", start != NULL);
-+ assert("zam-437", *start != 0);
-+ assert("zam-541", !reiser4_blocknr_is_fake(start));
-+ assert("zam-441", *start < reiser4_block_count(sb));
-+
-+ if (len != NULL) {
-+ assert("zam-438", *len != 0);
-+ assert("zam-442", *start + *len <= reiser4_block_count(sb));
-+ }
-+}
-+
-+static void check_bnode_loaded(const struct bitmap_node *bnode)
-+{
-+ assert("zam-485", bnode != NULL);
-+ assert("zam-483", jnode_page(bnode->wjnode) != NULL);
-+ assert("zam-484", jnode_page(bnode->cjnode) != NULL);
-+ assert("nikita-2820", jnode_is_loaded(bnode->wjnode));
-+ assert("nikita-2821", jnode_is_loaded(bnode->cjnode));
-+}
-+
-+#else
-+
-+# define check_block_range(start, len) do { /* nothing */} while(0)
-+# define check_bnode_loaded(bnode) do { /* nothing */} while(0)
-+
-+#endif
-+
-+/* modify bnode->first_zero_bit (if we free bits before); bnode should be
-+ spin-locked */
-+static inline void
-+adjust_first_zero_bit(struct bitmap_node *bnode, bmap_off_t offset)
-+{
-+ if (offset < bnode->first_zero_bit)
-+ bnode->first_zero_bit = offset;
-+}
-+
-+/* return a physical disk address for logical bitmap number @bmap */
-+/* FIXME-VS: this is somehow related to disk layout? */
-+/* ZAM-FIXME-HANS: your answer is? Use not more than one function dereference
-+ * per block allocation so that performance is not affected. Probably this
-+ * whole file should be considered part of the disk layout plugin, and other
-+ * disk layouts can use other defines and efficiency will not be significantly
-+ * affected. */
-+
-+#define REISER4_FIRST_BITMAP_BLOCK \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 2)
-+
-+/* Audited by: green(2002.06.12) */
-+static void
-+get_bitmap_blocknr(struct super_block *super, bmap_nr_t bmap,
-+ reiser4_block_nr * bnr)
-+{
-+
-+ assert("zam-390", bmap < get_nr_bmap(super));
-+
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+#define BITMAP_PLUGIN_DISKMAP_ID ((0xc0e1<<16) | (0xe0ff))
-+ /* Check if the diskmap have this already, first. */
-+ if (reiser4_get_diskmap_value(BITMAP_PLUGIN_DISKMAP_ID, bmap, bnr) == 0)
-+ return; /* Found it in diskmap */
-+#endif
-+ /* FIXME_ZAM: before discussing of disk layouts and disk format
-+ plugins I implement bitmap location scheme which is close to scheme
-+ used in reiser 3.6 */
-+ if (bmap == 0) {
-+ *bnr = REISER4_FIRST_BITMAP_BLOCK;
-+ } else {
-+ *bnr = bmap * bmap_bit_count(super->s_blocksize);
-+ }
-+}
-+
-+/* construct a fake block number for shadow bitmap (WORKING BITMAP) block */
-+/* Audited by: green(2002.06.12) */
-+static void get_working_bitmap_blocknr(bmap_nr_t bmap, reiser4_block_nr * bnr)
-+{
-+ *bnr =
-+ (reiser4_block_nr) ((bmap & ~REISER4_BLOCKNR_STATUS_BIT_MASK) |
-+ REISER4_BITMAP_BLOCKS_STATUS_VALUE);
-+}
-+
-+/* bnode structure initialization */
-+static void
-+init_bnode(struct bitmap_node *bnode,
-+ struct super_block *super UNUSED_ARG, bmap_nr_t bmap UNUSED_ARG)
-+{
-+ memset(bnode, 0, sizeof(struct bitmap_node));
-+
-+ mutex_init(&bnode->mutex);
-+ atomic_set(&bnode->loaded, 0);
-+}
-+
-+static void release(jnode * node)
-+{
-+ jrelse(node);
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ jput(node);
-+}
-+
-+/* This function is for internal bitmap.c use because it assumes that jnode is
-+ in under full control of this thread */
-+static void done_bnode(struct bitmap_node *bnode)
-+{
-+ if (bnode) {
-+ atomic_set(&bnode->loaded, 0);
-+ if (bnode->wjnode != NULL)
-+ release(bnode->wjnode);
-+ if (bnode->cjnode != NULL)
-+ release(bnode->cjnode);
-+ bnode->wjnode = bnode->cjnode = NULL;
-+ }
-+}
-+
-+/* ZAM-FIXME-HANS: comment this. Called only by load_and_lock_bnode()*/
-+static int prepare_bnode(struct bitmap_node *bnode, jnode **cjnode_ret,
-+ jnode **wjnode_ret)
-+{
-+ struct super_block *super;
-+ jnode *cjnode;
-+ jnode *wjnode;
-+ bmap_nr_t bmap;
-+ int ret;
-+
-+ super = reiser4_get_current_sb();
-+
-+ *wjnode_ret = wjnode = bnew();
-+ if (wjnode == NULL) {
-+ *cjnode_ret = NULL;
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ *cjnode_ret = cjnode = bnew();
-+ if (cjnode == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ bmap = bnode - get_bnode(super, 0);
-+
-+ get_working_bitmap_blocknr(bmap, &wjnode->blocknr);
-+ get_bitmap_blocknr(super, bmap, &cjnode->blocknr);
-+
-+ jref(cjnode);
-+ jref(wjnode);
-+
-+ /* load commit bitmap */
-+ ret = jload_gfp(cjnode, GFP_NOFS, 1);
-+
-+ if (ret)
-+ goto error;
-+
-+ /* allocate memory for working bitmap block. Note that for
-+ * bitmaps jinit_new() doesn't actually modifies node content,
-+ * so parallel calls to this are ok. */
-+ ret = jinit_new(wjnode, GFP_NOFS);
-+
-+ if (ret != 0) {
-+ jrelse(cjnode);
-+ goto error;
-+ }
-+
-+ return 0;
-+
-+ error:
-+ jput(cjnode);
-+ jput(wjnode);
-+ *wjnode_ret = *cjnode_ret = NULL;
-+ return ret;
-+
-+}
-+
-+/* Check the bnode data on read. */
-+static int check_struct_bnode(struct bitmap_node *bnode, __u32 blksize)
-+{
-+ void *data;
-+ int ret;
-+
-+ /* Check CRC */
-+ ret = bnode_check_adler32(bnode, blksize);
-+
-+ if (ret) {
-+ return ret;
-+ }
-+
-+ data = jdata(bnode->cjnode) + CHECKSUM_SIZE;
-+
-+ /* Check the very first bit -- it must be busy. */
-+ if (!reiser4_test_bit(0, data)) {
-+ warning("vpf-1362", "The allocator block %llu is not marked "
-+ "as used.", (unsigned long long)bnode->cjnode->blocknr);
-+
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+
-+/* load bitmap blocks "on-demand" */
-+static int load_and_lock_bnode(struct bitmap_node *bnode)
-+{
-+ int ret;
-+
-+ jnode *cjnode;
-+ jnode *wjnode;
-+
-+ assert("nikita-3040", reiser4_schedulable());
-+
-+/* ZAM-FIXME-HANS: since bitmaps are never unloaded, this does not
-+ * need to be atomic, right? Just leave a comment that if bitmaps were
-+ * unloadable, this would need to be atomic. */
-+ if (atomic_read(&bnode->loaded)) {
-+ /* bitmap is already loaded, nothing to do */
-+ check_bnode_loaded(bnode);
-+ mutex_lock(&bnode->mutex);
-+ assert("nikita-2827", atomic_read(&bnode->loaded));
-+ return 0;
-+ }
-+
-+ ret = prepare_bnode(bnode, &cjnode, &wjnode);
-+ if (ret == 0) {
-+ mutex_lock(&bnode->mutex);
-+
-+ if (!atomic_read(&bnode->loaded)) {
-+ assert("nikita-2822", cjnode != NULL);
-+ assert("nikita-2823", wjnode != NULL);
-+ assert("nikita-2824", jnode_is_loaded(cjnode));
-+ assert("nikita-2825", jnode_is_loaded(wjnode));
-+
-+ bnode->wjnode = wjnode;
-+ bnode->cjnode = cjnode;
-+
-+ ret = check_struct_bnode(bnode, current_blocksize);
-+ if (!ret) {
-+ cjnode = wjnode = NULL;
-+ atomic_set(&bnode->loaded, 1);
-+ /* working bitmap is initialized by on-disk
-+ * commit bitmap. This should be performed
-+ * under mutex. */
-+ memcpy(bnode_working_data(bnode),
-+ bnode_commit_data(bnode),
-+ bmap_size(current_blocksize));
-+ } else
-+ mutex_unlock(&bnode->mutex);
-+ } else
-+ /* race: someone already loaded bitmap while we were
-+ * busy initializing data. */
-+ check_bnode_loaded(bnode);
-+ }
-+
-+ if (wjnode != NULL) {
-+ release(wjnode);
-+ bnode->wjnode = NULL;
-+ }
-+ if (cjnode != NULL) {
-+ release(cjnode);
-+ bnode->cjnode = NULL;
-+ }
-+
-+ return ret;
-+}
-+
-+static void release_and_unlock_bnode(struct bitmap_node *bnode)
-+{
-+ check_bnode_loaded(bnode);
-+ mutex_unlock(&bnode->mutex);
-+}
-+
-+/* This function does all block allocation work but only for one bitmap
-+ block.*/
-+/* FIXME_ZAM: It does not allow us to allocate block ranges across bitmap
-+ block responsibility zone boundaries. This had no sense in v3.6 but may
-+ have it in v4.x */
-+/* ZAM-FIXME-HANS: do you mean search one bitmap block forward? */
-+static int
-+search_one_bitmap_forward(bmap_nr_t bmap, bmap_off_t * offset,
-+ bmap_off_t max_offset, int min_len, int max_len)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ struct bitmap_node *bnode = get_bnode(super, bmap);
-+
-+ char *data;
-+
-+ bmap_off_t search_end;
-+ bmap_off_t start;
-+ bmap_off_t end;
-+
-+ int set_first_zero_bit = 0;
-+
-+ int ret;
-+
-+ assert("zam-364", min_len > 0);
-+ assert("zam-365", max_len >= min_len);
-+ assert("zam-366", *offset <= max_offset);
-+
-+ ret = load_and_lock_bnode(bnode);
-+
-+ if (ret)
-+ return ret;
-+
-+ data = bnode_working_data(bnode);
-+
-+ start = *offset;
-+
-+ if (bnode->first_zero_bit >= start) {
-+ start = bnode->first_zero_bit;
-+ set_first_zero_bit = 1;
-+ }
-+
-+ while (start + min_len < max_offset) {
-+
-+ start =
-+ reiser4_find_next_zero_bit((long *)data, max_offset, start);
-+ if (set_first_zero_bit) {
-+ bnode->first_zero_bit = start;
-+ set_first_zero_bit = 0;
-+ }
-+ if (start >= max_offset)
-+ break;
-+
-+ search_end = LIMIT(start + max_len, max_offset);
-+ end =
-+ reiser4_find_next_set_bit((long *)data, search_end, start);
-+ if (end >= start + min_len) {
-+ /* we can't trust find_next_set_bit result if set bit
-+ was not fount, result may be bigger than
-+ max_offset */
-+ if (end > search_end)
-+ end = search_end;
-+
-+ ret = end - start;
-+ *offset = start;
-+
-+ reiser4_set_bits(data, start, end);
-+
-+ /* FIXME: we may advance first_zero_bit if [start,
-+ end] region overlaps the first_zero_bit point */
-+
-+ break;
-+ }
-+
-+ start = end + 1;
-+ }
-+
-+ release_and_unlock_bnode(bnode);
-+
-+ return ret;
-+}
-+
-+static int
-+search_one_bitmap_backward(bmap_nr_t bmap, bmap_off_t * start_offset,
-+ bmap_off_t end_offset, int min_len, int max_len)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ struct bitmap_node *bnode = get_bnode(super, bmap);
-+ char *data;
-+ bmap_off_t start;
-+ int ret;
-+
-+ assert("zam-958", min_len > 0);
-+ assert("zam-959", max_len >= min_len);
-+ assert("zam-960", *start_offset >= end_offset);
-+
-+ ret = load_and_lock_bnode(bnode);
-+ if (ret)
-+ return ret;
-+
-+ data = bnode_working_data(bnode);
-+ start = *start_offset;
-+
-+ while (1) {
-+ bmap_off_t end, search_end;
-+
-+ /* Find the beginning of the zero filled region */
-+ if (reiser4_find_last_zero_bit(&start, data, end_offset, start))
-+ break;
-+ /* Is there more than `min_len' bits from `start' to
-+ * `end_offset'? */
-+ if (start < end_offset + min_len - 1)
-+ break;
-+
-+ /* Do not search to `end_offset' if we need to find less than
-+ * `max_len' zero bits. */
-+ if (end_offset + max_len - 1 < start)
-+ search_end = start - max_len + 1;
-+ else
-+ search_end = end_offset;
-+
-+ if (reiser4_find_last_set_bit(&end, data, search_end, start))
-+ end = search_end;
-+ else
-+ end++;
-+
-+ if (end + min_len <= start + 1) {
-+ if (end < search_end)
-+ end = search_end;
-+ ret = start - end + 1;
-+ *start_offset = end; /* `end' is lowest offset */
-+ assert("zam-987",
-+ reiser4_find_next_set_bit(data, start + 1,
-+ end) >= start + 1);
-+ reiser4_set_bits(data, end, start + 1);
-+ break;
-+ }
-+
-+ if (end <= end_offset)
-+ /* left search boundary reached. */
-+ break;
-+ start = end - 1;
-+ }
-+
-+ release_and_unlock_bnode(bnode);
-+ return ret;
-+}
-+
-+/* allocate contiguous range of blocks in bitmap */
-+static int bitmap_alloc_forward(reiser4_block_nr * start,
-+ const reiser4_block_nr * end, int min_len,
-+ int max_len)
-+{
-+ bmap_nr_t bmap, end_bmap;
-+ bmap_off_t offset, end_offset;
-+ int len;
-+
-+ reiser4_block_nr tmp;
-+
-+ struct super_block *super = get_current_context()->super;
-+ const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
-+
-+ parse_blocknr(start, &bmap, &offset);
-+
-+ tmp = *end - 1;
-+ parse_blocknr(&tmp, &end_bmap, &end_offset);
-+ ++end_offset;
-+
-+ assert("zam-358", end_bmap >= bmap);
-+ assert("zam-359", ergo(end_bmap == bmap, end_offset >= offset));
-+
-+ for (; bmap < end_bmap; bmap++, offset = 0) {
-+ len =
-+ search_one_bitmap_forward(bmap, &offset, max_offset,
-+ min_len, max_len);
-+ if (len != 0)
-+ goto out;
-+ }
-+
-+ len =
-+ search_one_bitmap_forward(bmap, &offset, end_offset, min_len,
-+ max_len);
-+ out:
-+ *start = bmap * max_offset + offset;
-+ return len;
-+}
-+
-+/* allocate contiguous range of blocks in bitmap (from @start to @end in
-+ * backward direction) */
-+static int bitmap_alloc_backward(reiser4_block_nr * start,
-+ const reiser4_block_nr * end, int min_len,
-+ int max_len)
-+{
-+ bmap_nr_t bmap, end_bmap;
-+ bmap_off_t offset, end_offset;
-+ int len;
-+ struct super_block *super = get_current_context()->super;
-+ const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
-+
-+ parse_blocknr(start, &bmap, &offset);
-+ parse_blocknr(end, &end_bmap, &end_offset);
-+
-+ assert("zam-961", end_bmap <= bmap);
-+ assert("zam-962", ergo(end_bmap == bmap, end_offset <= offset));
-+
-+ for (; bmap > end_bmap; bmap--, offset = max_offset - 1) {
-+ len =
-+ search_one_bitmap_backward(bmap, &offset, 0, min_len,
-+ max_len);
-+ if (len != 0)
-+ goto out;
-+ }
-+
-+ len =
-+ search_one_bitmap_backward(bmap, &offset, end_offset, min_len,
-+ max_len);
-+ out:
-+ *start = bmap * max_offset + offset;
-+ return len;
-+}
-+
-+/* plugin->u.space_allocator.alloc_blocks() */
-+static int alloc_blocks_forward(reiser4_blocknr_hint *hint, int needed,
-+ reiser4_block_nr *start, reiser4_block_nr *len)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ int actual_len;
-+
-+ reiser4_block_nr search_start;
-+ reiser4_block_nr search_end;
-+
-+ assert("zam-398", super != NULL);
-+ assert("zam-412", hint != NULL);
-+ assert("zam-397", hint->blk <= reiser4_block_count(super));
-+
-+ if (hint->max_dist == 0)
-+ search_end = reiser4_block_count(super);
-+ else
-+ search_end =
-+ LIMIT(hint->blk + hint->max_dist,
-+ reiser4_block_count(super));
-+
-+ /* We use @hint -> blk as a search start and search from it to the end
-+ of the disk or in given region if @hint -> max_dist is not zero */
-+ search_start = hint->blk;
-+
-+ actual_len =
-+ bitmap_alloc_forward(&search_start, &search_end, 1, needed);
-+
-+ /* There is only one bitmap search if max_dist was specified or first
-+ pass was from the beginning of the bitmap. We also do one pass for
-+ scanning bitmap in backward direction. */
-+ if (!(actual_len != 0 || hint->max_dist != 0 || search_start == 0)) {
-+ /* next step is a scanning from 0 to search_start */
-+ search_end = search_start;
-+ search_start = 0;
-+ actual_len =
-+ bitmap_alloc_forward(&search_start, &search_end, 1, needed);
-+ }
-+ if (actual_len == 0)
-+ return RETERR(-ENOSPC);
-+ if (actual_len < 0)
-+ return RETERR(actual_len);
-+ *len = actual_len;
-+ *start = search_start;
-+ return 0;
-+}
-+
-+static int alloc_blocks_backward(reiser4_blocknr_hint * hint, int needed,
-+ reiser4_block_nr * start,
-+ reiser4_block_nr * len)
-+{
-+ reiser4_block_nr search_start;
-+ reiser4_block_nr search_end;
-+ int actual_len;
-+
-+ ON_DEBUG(struct super_block *super = reiser4_get_current_sb());
-+
-+ assert("zam-969", super != NULL);
-+ assert("zam-970", hint != NULL);
-+ assert("zam-971", hint->blk <= reiser4_block_count(super));
-+
-+ search_start = hint->blk;
-+ if (hint->max_dist == 0 || search_start <= hint->max_dist)
-+ search_end = 0;
-+ else
-+ search_end = search_start - hint->max_dist;
-+
-+ actual_len =
-+ bitmap_alloc_backward(&search_start, &search_end, 1, needed);
-+ if (actual_len == 0)
-+ return RETERR(-ENOSPC);
-+ if (actual_len < 0)
-+ return RETERR(actual_len);
-+ *len = actual_len;
-+ *start = search_start;
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.alloc_blocks() */
-+int reiser4_alloc_blocks_bitmap(reiser4_space_allocator * allocator,
-+ reiser4_blocknr_hint * hint, int needed,
-+ reiser4_block_nr * start, reiser4_block_nr * len)
-+{
-+ if (hint->backward)
-+ return alloc_blocks_backward(hint, needed, start, len);
-+ return alloc_blocks_forward(hint, needed, start, len);
-+}
-+
-+/* plugin->u.space_allocator.dealloc_blocks(). */
-+/* It just frees blocks in WORKING BITMAP. Usually formatted an unformatted
-+ nodes deletion is deferred until transaction commit. However, deallocation
-+ of temporary objects like wandered blocks and transaction commit records
-+ requires immediate node deletion from WORKING BITMAP.*/
-+void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator * allocator,
-+ reiser4_block_nr start, reiser4_block_nr len)
-+{
-+ struct super_block *super = reiser4_get_current_sb();
-+
-+ bmap_nr_t bmap;
-+ bmap_off_t offset;
-+
-+ struct bitmap_node *bnode;
-+ int ret;
-+
-+ assert("zam-468", len != 0);
-+ check_block_range(&start, &len);
-+
-+ parse_blocknr(&start, &bmap, &offset);
-+
-+ assert("zam-469", offset + len <= bmap_bit_count(super->s_blocksize));
-+
-+ bnode = get_bnode(super, bmap);
-+
-+ assert("zam-470", bnode != NULL);
-+
-+ ret = load_and_lock_bnode(bnode);
-+ assert("zam-481", ret == 0);
-+
-+ reiser4_clear_bits(bnode_working_data(bnode), offset,
-+ (bmap_off_t) (offset + len));
-+
-+ adjust_first_zero_bit(bnode, offset);
-+
-+ release_and_unlock_bnode(bnode);
-+}
-+
-+/* plugin->u.space_allocator.check_blocks(). */
-+void reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
-+ const reiser4_block_nr * len, int desired)
-+{
-+#if REISER4_DEBUG
-+ struct super_block *super = reiser4_get_current_sb();
-+
-+ bmap_nr_t bmap;
-+ bmap_off_t start_offset;
-+ bmap_off_t end_offset;
-+
-+ struct bitmap_node *bnode;
-+ int ret;
-+
-+ assert("zam-622", len != NULL);
-+ check_block_range(start, len);
-+ parse_blocknr(start, &bmap, &start_offset);
-+
-+ end_offset = start_offset + *len;
-+ assert("nikita-2214", end_offset <= bmap_bit_count(super->s_blocksize));
-+
-+ bnode = get_bnode(super, bmap);
-+
-+ assert("nikita-2215", bnode != NULL);
-+
-+ ret = load_and_lock_bnode(bnode);
-+ assert("zam-626", ret == 0);
-+
-+ assert("nikita-2216", jnode_is_loaded(bnode->wjnode));
-+
-+ if (desired) {
-+ assert("zam-623",
-+ reiser4_find_next_zero_bit(bnode_working_data(bnode),
-+ end_offset, start_offset)
-+ >= end_offset);
-+ } else {
-+ assert("zam-624",
-+ reiser4_find_next_set_bit(bnode_working_data(bnode),
-+ end_offset, start_offset)
-+ >= end_offset);
-+ }
-+
-+ release_and_unlock_bnode(bnode);
-+#endif
-+}
-+
-+/* conditional insertion of @node into atom's overwrite set if it was not there */
-+static void cond_add_to_overwrite_set(txn_atom * atom, jnode * node)
-+{
-+ assert("zam-546", atom != NULL);
-+ assert("zam-547", atom->stage == ASTAGE_PRE_COMMIT);
-+ assert("zam-548", node != NULL);
-+
-+ spin_lock_atom(atom);
-+ spin_lock_jnode(node);
-+
-+ if (node->atom == NULL) {
-+ JF_SET(node, JNODE_OVRWR);
-+ insert_into_atom_ovrwr_list(atom, node);
-+ } else {
-+ assert("zam-549", node->atom == atom);
-+ }
-+
-+ spin_unlock_jnode(node);
-+ spin_unlock_atom(atom);
-+}
-+
-+/* an actor which applies delete set to COMMIT bitmap pages and link modified
-+ pages in a single-linked list */
-+static int
-+apply_dset_to_commit_bmap(txn_atom * atom, const reiser4_block_nr * start,
-+ const reiser4_block_nr * len, void *data)
-+{
-+
-+ bmap_nr_t bmap;
-+ bmap_off_t offset;
-+ int ret;
-+
-+ long long *blocks_freed_p = data;
-+
-+ struct bitmap_node *bnode;
-+
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ check_block_range(start, len);
-+
-+ parse_blocknr(start, &bmap, &offset);
-+
-+ /* FIXME-ZAM: we assume that all block ranges are allocated by this
-+ bitmap-based allocator and each block range can't go over a zone of
-+ responsibility of one bitmap block; same assumption is used in
-+ other journal hooks in bitmap code. */
-+ bnode = get_bnode(sb, bmap);
-+ assert("zam-448", bnode != NULL);
-+
-+ /* it is safe to unlock atom with is in ASTAGE_PRE_COMMIT */
-+ assert("zam-767", atom->stage == ASTAGE_PRE_COMMIT);
-+ ret = load_and_lock_bnode(bnode);
-+ if (ret)
-+ return ret;
-+
-+ /* put bnode into atom's overwrite set */
-+ cond_add_to_overwrite_set(atom, bnode->cjnode);
-+
-+ data = bnode_commit_data(bnode);
-+
-+ ret = bnode_check_crc(bnode);
-+ if (ret != 0)
-+ return ret;
-+
-+ if (len != NULL) {
-+ /* FIXME-ZAM: a check that all bits are set should be there */
-+ assert("zam-443",
-+ offset + *len <= bmap_bit_count(sb->s_blocksize));
-+ reiser4_clear_bits(data, offset, (bmap_off_t) (offset + *len));
-+
-+ (*blocks_freed_p) += *len;
-+ } else {
-+ reiser4_clear_bit(offset, data);
-+ (*blocks_freed_p)++;
-+ }
-+
-+ bnode_set_commit_crc(bnode, bnode_calc_crc(bnode, sb->s_blocksize));
-+
-+ release_and_unlock_bnode(bnode);
-+
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.pre_commit_hook(). */
-+/* It just applies transaction changes to fs-wide COMMIT BITMAP, hoping the
-+ rest is done by transaction manager (allocate wandered locations for COMMIT
-+ BITMAP blocks, copy COMMIT BITMAP blocks data). */
-+/* Only one instance of this function can be running at one given time, because
-+ only one transaction can be committed a time, therefore it is safe to access
-+ some global variables without any locking */
-+
-+int reiser4_pre_commit_hook_bitmap(void)
-+{
-+ struct super_block *super = reiser4_get_current_sb();
-+ txn_atom *atom;
-+
-+ long long blocks_freed = 0;
-+
-+ atom = get_current_atom_locked();
-+ assert("zam-876", atom->stage == ASTAGE_PRE_COMMIT);
-+ spin_unlock_atom(atom);
-+
-+ { /* scan atom's captured list and find all freshly allocated nodes,
-+ * mark corresponded bits in COMMIT BITMAP as used */
-+ struct list_head *head = ATOM_CLEAN_LIST(atom);
-+ jnode *node = list_entry(head->next, jnode, capture_link);
-+
-+ while (head != &node->capture_link) {
-+ /* we detect freshly allocated jnodes */
-+ if (JF_ISSET(node, JNODE_RELOC)) {
-+ int ret;
-+ bmap_nr_t bmap;
-+
-+ bmap_off_t offset;
-+ bmap_off_t index;
-+ struct bitmap_node *bn;
-+ __u32 size = bmap_size(super->s_blocksize);
-+ __u32 crc;
-+ char byte;
-+
-+ assert("zam-559", !JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-460",
-+ !reiser4_blocknr_is_fake(&node->blocknr));
-+
-+ parse_blocknr(&node->blocknr, &bmap, &offset);
-+ bn = get_bnode(super, bmap);
-+
-+ index = offset >> 3;
-+ assert("vpf-276", index < size);
-+
-+ ret = bnode_check_crc(bnode);
-+ if (ret != 0)
-+ return ret;
-+
-+ check_bnode_loaded(bn);
-+ load_and_lock_bnode(bn);
-+
-+ byte = *(bnode_commit_data(bn) + index);
-+ reiser4_set_bit(offset, bnode_commit_data(bn));
-+
-+ crc = adler32_recalc(bnode_commit_crc(bn), byte,
-+ *(bnode_commit_data(bn) +
-+ index),
-+ size - index),
-+ bnode_set_commit_crc(bn, crc);
-+
-+ release_and_unlock_bnode(bn);
-+
-+ ret = bnode_check_crc(bn);
-+ if (ret != 0)
-+ return ret;
-+
-+ /* working of this depends on how it inserts
-+ new j-node into clean list, because we are
-+ scanning the same list now. It is OK, if
-+ insertion is done to the list front */
-+ cond_add_to_overwrite_set(atom, bn->cjnode);
-+ }
-+
-+ node = list_entry(node->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ blocknr_set_iterator(atom, &atom->delete_set, apply_dset_to_commit_bmap,
-+ &blocks_freed, 0);
-+
-+ blocks_freed -= atom->nr_blocks_allocated;
-+
-+ {
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ sbinfo->blocks_free_committed += blocks_freed;
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.init_allocator
-+ constructor of reiser4_space_allocator object. It is called on fs mount */
-+int reiser4_init_allocator_bitmap(reiser4_space_allocator * allocator,
-+ struct super_block *super, void *arg)
-+{
-+ struct bitmap_allocator_data *data = NULL;
-+ bmap_nr_t bitmap_blocks_nr;
-+ bmap_nr_t i;
-+
-+ assert("nikita-3039", reiser4_schedulable());
-+
-+ /* getting memory for bitmap allocator private data holder */
-+ data =
-+ kmalloc(sizeof(struct bitmap_allocator_data),
-+ reiser4_ctx_gfp_mask_get());
-+
-+ if (data == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ /* allocation and initialization for the array of bnodes */
-+ bitmap_blocks_nr = get_nr_bmap(super);
-+
-+ /* FIXME-ZAM: it is not clear what to do with huge number of bitmaps
-+ which is bigger than 2^32 (= 8 * 4096 * 4096 * 2^32 bytes = 5.76e+17,
-+ may I never meet someone who still uses the ia32 architecture when
-+ storage devices of that size enter the market, and wants to use ia32
-+ with that storage device, much less reiser4. ;-) -Hans). Kmalloc is not possible and,
-+ probably, another dynamic data structure should replace a static
-+ array of bnodes. */
-+ /*data->bitmap = reiser4_kmalloc((size_t) (sizeof (struct bitmap_node) * bitmap_blocks_nr), GFP_KERNEL); */
-+ data->bitmap = reiser4_vmalloc(sizeof(struct bitmap_node) * bitmap_blocks_nr);
-+ if (data->bitmap == NULL) {
-+ kfree(data);
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ for (i = 0; i < bitmap_blocks_nr; i++)
-+ init_bnode(data->bitmap + i, super, i);
-+
-+ allocator->u.generic = data;
-+
-+#if REISER4_DEBUG
-+ get_super_private(super)->min_blocks_used += bitmap_blocks_nr;
-+#endif
-+
-+ /* Load all bitmap blocks at mount time. */
-+ if (!test_bit
-+ (REISER4_DONT_LOAD_BITMAP, &get_super_private(super)->fs_flags)) {
-+ __u64 start_time, elapsed_time;
-+ struct bitmap_node *bnode;
-+ int ret;
-+
-+ if (REISER4_DEBUG)
-+ printk(KERN_INFO "loading reiser4 bitmap...");
-+ start_time = jiffies;
-+
-+ for (i = 0; i < bitmap_blocks_nr; i++) {
-+ bnode = data->bitmap + i;
-+ ret = load_and_lock_bnode(bnode);
-+ if (ret) {
-+ reiser4_destroy_allocator_bitmap(allocator,
-+ super);
-+ return ret;
-+ }
-+ release_and_unlock_bnode(bnode);
-+ }
-+
-+ elapsed_time = jiffies - start_time;
-+ if (REISER4_DEBUG)
-+ printk("...done (%llu jiffies)\n",
-+ (unsigned long long)elapsed_time);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.destroy_allocator
-+ destructor. It is called on fs unmount */
-+int reiser4_destroy_allocator_bitmap(reiser4_space_allocator * allocator,
-+ struct super_block *super)
-+{
-+ bmap_nr_t bitmap_blocks_nr;
-+ bmap_nr_t i;
-+
-+ struct bitmap_allocator_data *data = allocator->u.generic;
-+
-+ assert("zam-414", data != NULL);
-+ assert("zam-376", data->bitmap != NULL);
-+
-+ bitmap_blocks_nr = get_nr_bmap(super);
-+
-+ for (i = 0; i < bitmap_blocks_nr; i++) {
-+ struct bitmap_node *bnode = data->bitmap + i;
-+
-+ mutex_lock(&bnode->mutex);
-+
-+#if REISER4_DEBUG
-+ if (atomic_read(&bnode->loaded)) {
-+ jnode *wj = bnode->wjnode;
-+ jnode *cj = bnode->cjnode;
-+
-+ assert("zam-480", jnode_page(cj) != NULL);
-+ assert("zam-633", jnode_page(wj) != NULL);
-+
-+ assert("zam-634",
-+ memcmp(jdata(wj), jdata(wj),
-+ bmap_size(super->s_blocksize)) == 0);
-+
-+ }
-+#endif
-+ done_bnode(bnode);
-+ mutex_unlock(&bnode->mutex);
-+ }
-+
-+ vfree(data->bitmap);
-+ kfree(data);
-+
-+ allocator->u.generic = NULL;
-+
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/space/bitmap.h linux-2.6.20/fs/reiser4/plugin/space/bitmap.h
---- linux-2.6.20.orig/fs/reiser4/plugin/space/bitmap.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/space/bitmap.h 2007-05-06 14:50:43.863026968 +0400
-@@ -0,0 +1,47 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__REISER4_PLUGIN_SPACE_BITMAP_H__)
-+#define __REISER4_PLUGIN_SPACE_BITMAP_H__
-+
-+#include "../../dformat.h"
-+#include "../../block_alloc.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+/* EDWARD-FIXME-HANS: write something as informative as the below for every .h file lacking it. */
-+/* declarations of functions implementing methods of space allocator plugin for
-+ bitmap based allocator. The functions themselves are in bitmap.c */
-+extern int reiser4_init_allocator_bitmap(reiser4_space_allocator *,
-+ struct super_block *, void *);
-+extern int reiser4_destroy_allocator_bitmap(reiser4_space_allocator *,
-+ struct super_block *);
-+extern int reiser4_alloc_blocks_bitmap(reiser4_space_allocator *,
-+ reiser4_blocknr_hint *, int needed,
-+ reiser4_block_nr * start,
-+ reiser4_block_nr * len);
-+extern void reiser4_check_blocks_bitmap(const reiser4_block_nr *,
-+ const reiser4_block_nr *, int);
-+extern void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator *,
-+ reiser4_block_nr,
-+ reiser4_block_nr);
-+extern int reiser4_pre_commit_hook_bitmap(void);
-+
-+#define reiser4_post_commit_hook_bitmap() do{}while(0)
-+#define reiser4_post_write_back_hook_bitmap() do{}while(0)
-+#define reiser4_print_info_bitmap(pref, al) do{}while(0)
-+
-+typedef __u64 bmap_nr_t;
-+typedef __u32 bmap_off_t;
-+
-+#endif /* __REISER4_PLUGIN_SPACE_BITMAP_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/space/Makefile linux-2.6.20/fs/reiser4/plugin/space/Makefile
---- linux-2.6.20.orig/fs/reiser4/plugin/space/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/space/Makefile 2007-05-06 14:50:43.863026968 +0400
-@@ -0,0 +1,4 @@
-+obj-$(CONFIG_REISER4_FS) += space_plugins.o
-+
-+space_plugins-objs := \
-+ bitmap.o
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/space/space_allocator.h linux-2.6.20/fs/reiser4/plugin/space/space_allocator.h
---- linux-2.6.20.orig/fs/reiser4/plugin/space/space_allocator.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/space/space_allocator.h 2007-05-06 14:50:43.863026968 +0400
-@@ -0,0 +1,80 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#ifndef __SPACE_ALLOCATOR_H__
-+#define __SPACE_ALLOCATOR_H__
-+
-+#include "../../forward.h"
-+#include "bitmap.h"
-+/* NIKITA-FIXME-HANS: surely this could use a comment. Something about how bitmap is the only space allocator for now,
-+ * but... */
-+#define DEF_SPACE_ALLOCATOR(allocator) \
-+ \
-+static inline int sa_init_allocator (reiser4_space_allocator * al, struct super_block *s, void * opaque) \
-+{ \
-+ return reiser4_init_allocator_##allocator (al, s, opaque); \
-+} \
-+ \
-+static inline void sa_destroy_allocator (reiser4_space_allocator *al, struct super_block *s) \
-+{ \
-+ reiser4_destroy_allocator_##allocator (al, s); \
-+} \
-+ \
-+static inline int sa_alloc_blocks (reiser4_space_allocator *al, reiser4_blocknr_hint * hint, \
-+ int needed, reiser4_block_nr * start, reiser4_block_nr * len) \
-+{ \
-+ return reiser4_alloc_blocks_##allocator (al, hint, needed, start, len); \
-+} \
-+static inline void sa_dealloc_blocks (reiser4_space_allocator * al, reiser4_block_nr start, reiser4_block_nr len) \
-+{ \
-+ reiser4_dealloc_blocks_##allocator (al, start, len); \
-+} \
-+ \
-+static inline void sa_check_blocks (const reiser4_block_nr * start, const reiser4_block_nr * end, int desired) \
-+{ \
-+ reiser4_check_blocks_##allocator (start, end, desired); \
-+} \
-+ \
-+static inline void sa_pre_commit_hook (void) \
-+{ \
-+ reiser4_pre_commit_hook_##allocator (); \
-+} \
-+ \
-+static inline void sa_post_commit_hook (void) \
-+{ \
-+ reiser4_post_commit_hook_##allocator (); \
-+} \
-+ \
-+static inline void sa_post_write_back_hook (void) \
-+{ \
-+ reiser4_post_write_back_hook_##allocator(); \
-+} \
-+ \
-+static inline void sa_print_info(const char * prefix, reiser4_space_allocator * al) \
-+{ \
-+ reiser4_print_info_##allocator (prefix, al); \
-+}
-+
-+DEF_SPACE_ALLOCATOR(bitmap)
-+
-+/* this object is part of reiser4 private in-core super block */
-+struct reiser4_space_allocator {
-+ union {
-+ /* space allocators might use this pointer to reference their
-+ * data. */
-+ void *generic;
-+ } u;
-+};
-+
-+/* __SPACE_ALLOCATOR_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/plugin/tail_policy.c linux-2.6.20/fs/reiser4/plugin/tail_policy.c
---- linux-2.6.20.orig/fs/reiser4/plugin/tail_policy.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/plugin/tail_policy.c 2007-05-06 14:50:43.863026968 +0400
-@@ -0,0 +1,113 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Formatting policy plugins */
-+
-+/*
-+ * Formatting policy plugin is used by object plugin (of regular file) to
-+ * convert file between two representations.
-+ *
-+ * Currently following policies are implemented:
-+ * never store file in formatted nodes
-+ * always store file in formatted nodes
-+ * store file in formatted nodes if file is smaller than 4 blocks (default)
-+ */
-+
-+#include "../tree.h"
-+#include "../inode.h"
-+#include "../super.h"
-+#include "object.h"
-+#include "plugin.h"
-+#include "node/node.h"
-+#include "plugin_header.h"
-+
-+#include <linux/pagemap.h>
-+#include <linux/fs.h> /* For struct inode */
-+
-+/**
-+ * have_formatting_never -
-+ * @inode:
-+ * @size:
-+ *
-+ *
-+ */
-+/* Never store file's tail as direct item */
-+/* Audited by: green(2002.06.12) */
-+static int have_formatting_never(const struct inode *inode UNUSED_ARG
-+ /* inode to operate on */ ,
-+ loff_t size UNUSED_ARG /* new object size */ )
-+{
-+ return 0;
-+}
-+
-+/* Always store file's tail as direct item */
-+/* Audited by: green(2002.06.12) */
-+static int
-+have_formatting_always(const struct inode *inode UNUSED_ARG
-+ /* inode to operate on */ ,
-+ loff_t size UNUSED_ARG /* new object size */ )
-+{
-+ return 1;
-+}
-+
-+/* This function makes test if we should store file denoted @inode as tails only or
-+ as extents only. */
-+static int
-+have_formatting_default(const struct inode *inode UNUSED_ARG
-+ /* inode to operate on */ ,
-+ loff_t size /* new object size */ )
-+{
-+ assert("umka-1253", inode != NULL);
-+
-+ if (size > inode->i_sb->s_blocksize * 4)
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/* tail plugins */
-+formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID] = {
-+ [NEVER_TAILS_FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = NEVER_TAILS_FORMATTING_ID,
-+ .pops = NULL,
-+ .label = "never",
-+ .desc = "Never store file's tail",
-+ .linkage = {NULL, NULL}
-+ },
-+ .have_tail = have_formatting_never
-+ },
-+ [ALWAYS_TAILS_FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = ALWAYS_TAILS_FORMATTING_ID,
-+ .pops = NULL,
-+ .label = "always",
-+ .desc = "Always store file's tail",
-+ .linkage = {NULL, NULL}
-+ },
-+ .have_tail = have_formatting_always
-+ },
-+ [SMALL_FILE_FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = SMALL_FILE_FORMATTING_ID,
-+ .pops = NULL,
-+ .label = "4blocks",
-+ .desc = "store files shorter than 4 blocks in tail items",
-+ .linkage = {NULL, NULL}
-+ },
-+ .have_tail = have_formatting_default
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/pool.c linux-2.6.20/fs/reiser4/pool.c
---- linux-2.6.20.orig/fs/reiser4/pool.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/pool.c 2007-05-06 14:50:43.863026968 +0400
-@@ -0,0 +1,234 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Fast pool allocation.
-+
-+ There are situations when some sub-system normally asks memory allocator
-+ for only few objects, but under some circumstances could require much
-+ more. Typical and actually motivating example is tree balancing. It needs
-+ to keep track of nodes that were involved into it, and it is well-known
-+ that in reasonable packed balanced tree most (92.938121%) percent of all
-+ balancings end up after working with only few nodes (3.141592 on
-+ average). But in rare cases balancing can involve much more nodes
-+ (3*tree_height+1 in extremal situation).
-+
-+ On the one hand, we don't want to resort to dynamic allocation (slab,
-+ malloc(), etc.) to allocate data structures required to keep track of
-+ nodes during balancing. On the other hand, we cannot statically allocate
-+ required amount of space on the stack, because first: it is useless wastage
-+ of precious resource, and second: this amount is unknown in advance (tree
-+ height can change).
-+
-+ Pools, implemented in this file are solution for this problem:
-+
-+ - some configurable amount of objects is statically preallocated on the
-+ stack
-+
-+ - if this preallocated pool is exhausted and more objects is requested
-+ they are allocated dynamically.
-+
-+ Pools encapsulate distinction between statically and dynamically allocated
-+ objects. Both allocation and recycling look exactly the same.
-+
-+ To keep track of dynamically allocated objects, pool adds its own linkage
-+ to each object.
-+
-+ NOTE-NIKITA This linkage also contains some balancing-specific data. This
-+ is not perfect. On the other hand, balancing is currently the only client
-+ of pool code.
-+
-+ NOTE-NIKITA Another desirable feature is to rewrite all pool manipulation
-+ functions in the style of tslist/tshash, i.e., make them unreadable, but
-+ type-safe.
-+
-+*/
-+
-+#include "debug.h"
-+#include "pool.h"
-+#include "super.h"
-+
-+#include <linux/types.h>
-+#include <linux/err.h>
-+
-+/* initialize new pool object */
-+static void reiser4_init_pool_obj(reiser4_pool_header * h /* pool object to
-+ * initialize */ )
-+{
-+ INIT_LIST_HEAD(&h->usage_linkage);
-+ INIT_LIST_HEAD(&h->level_linkage);
-+ INIT_LIST_HEAD(&h->extra_linkage);
-+}
-+
-+/* initialize new pool */
-+void reiser4_init_pool(reiser4_pool * pool /* pool to initialize */ ,
-+ size_t obj_size /* size of objects in @pool */ ,
-+ int num_of_objs /* number of preallocated objects */ ,
-+ char *data /* area for preallocated objects */ )
-+{
-+ reiser4_pool_header *h;
-+ int i;
-+
-+ assert("nikita-955", pool != NULL);
-+ assert("nikita-1044", obj_size > 0);
-+ assert("nikita-956", num_of_objs >= 0);
-+ assert("nikita-957", data != NULL);
-+
-+ memset(pool, 0, sizeof *pool);
-+ pool->obj_size = obj_size;
-+ pool->data = data;
-+ INIT_LIST_HEAD(&pool->free);
-+ INIT_LIST_HEAD(&pool->used);
-+ INIT_LIST_HEAD(&pool->extra);
-+ memset(data, 0, obj_size * num_of_objs);
-+ for (i = 0; i < num_of_objs; ++i) {
-+ h = (reiser4_pool_header *) (data + i * obj_size);
-+ reiser4_init_pool_obj(h);
-+ /* add pool header to the end of pool's free list */
-+ list_add_tail(&h->usage_linkage, &pool->free);
-+ }
-+}
-+
-+/* release pool resources
-+
-+ Release all resources acquired by this pool, specifically, dynamically
-+ allocated objects.
-+
-+*/
-+void reiser4_done_pool(reiser4_pool * pool UNUSED_ARG /* pool to destroy */ )
-+{
-+}
-+
-+/* allocate carry object from pool
-+
-+ First, try to get preallocated object. If this fails, resort to dynamic
-+ allocation.
-+
-+*/
-+static void *reiser4_pool_alloc(reiser4_pool * pool /* pool to allocate object
-+ * from */ )
-+{
-+ reiser4_pool_header *result;
-+
-+ assert("nikita-959", pool != NULL);
-+
-+ if (!list_empty(&pool->free)) {
-+ struct list_head *linkage;
-+
-+ linkage = pool->free.next;
-+ list_del(linkage);
-+ INIT_LIST_HEAD(linkage);
-+ result = list_entry(linkage, reiser4_pool_header, usage_linkage);
-+ BUG_ON(!list_empty(&result->level_linkage) ||
-+ !list_empty(&result->extra_linkage));
-+ } else {
-+ /* pool is empty. Extra allocations don't deserve dedicated
-+ slab to be served from, as they are expected to be rare. */
-+ result = kmalloc(pool->obj_size, reiser4_ctx_gfp_mask_get());
-+ if (result != 0) {
-+ reiser4_init_pool_obj(result);
-+ list_add(&result->extra_linkage, &pool->extra);
-+ } else
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ BUG_ON(!list_empty(&result->usage_linkage) ||
-+ !list_empty(&result->level_linkage));
-+ }
-+ ++pool->objs;
-+ list_add(&result->usage_linkage, &pool->used);
-+ memset(result + 1, 0, pool->obj_size - sizeof *result);
-+ return result;
-+}
-+
-+/* return object back to the pool */
-+void reiser4_pool_free(reiser4_pool * pool, reiser4_pool_header * h /* pool to return object back
-+ * into */ )
-+{
-+ assert("nikita-961", h != NULL);
-+ assert("nikita-962", pool != NULL);
-+
-+ --pool->objs;
-+ assert("nikita-963", pool->objs >= 0);
-+
-+ list_del_init(&h->usage_linkage);
-+ list_del_init(&h->level_linkage);
-+
-+ if (list_empty(&h->extra_linkage))
-+ /*
-+ * pool header is not an extra one. Push it onto free list
-+ * using usage_linkage
-+ */
-+ list_add(&h->usage_linkage, &pool->free);
-+ else {
-+ /* remove pool header from pool's extra list and kfree it */
-+ list_del(&h->extra_linkage);
-+ kfree(h);
-+ }
-+}
-+
-+/* add new object to the carry level list
-+
-+ Carry level is FIFO most of the time, but not always. Complications arise
-+ when make_space() function tries to go to the left neighbor and thus adds
-+ carry node before existing nodes, and also, when updating delimiting keys
-+ after moving data between two nodes, we want left node to be locked before
-+ right node.
-+
-+ Latter case is confusing at the first glance. Problem is that COP_UPDATE
-+ opration that updates delimiting keys is sometimes called with two nodes
-+ (when data are moved between two nodes) and sometimes with only one node
-+ (when leftmost item is deleted in a node). In any case operation is
-+ supplied with at least node whose left delimiting key is to be updated
-+ (that is "right" node).
-+
-+*/
-+reiser4_pool_header *reiser4_add_obj(reiser4_pool * pool /* pool from which to
-+ * allocate new object
-+ */,
-+ struct list_head *list /* list where to add
-+ * object */,
-+ pool_ordering order /* where to add */,
-+ reiser4_pool_header * reference
-+ /* after (or before) which existing object
-+ to add */)
-+{
-+ reiser4_pool_header *result;
-+
-+ assert("nikita-972", pool != NULL);
-+
-+ result = reiser4_pool_alloc(pool);
-+ if (IS_ERR(result))
-+ return result;
-+
-+ assert("nikita-973", result != NULL);
-+
-+ switch (order) {
-+ case POOLO_BEFORE:
-+ __list_add(&result->level_linkage,
-+ reference->level_linkage.prev,
-+ &reference->level_linkage);
-+ break;
-+ case POOLO_AFTER:
-+ __list_add(&result->level_linkage,
-+ &reference->level_linkage,
-+ reference->level_linkage.next);
-+ break;
-+ case POOLO_LAST:
-+ list_add_tail(&result->level_linkage, list);
-+ break;
-+ case POOLO_FIRST:
-+ list_add(&result->level_linkage, list);
-+ break;
-+ default:
-+ wrong_return_value("nikita-927", "order");
-+ }
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/pool.h linux-2.6.20/fs/reiser4/pool.h
---- linux-2.6.20.orig/fs/reiser4/pool.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/pool.h 2007-05-06 14:50:43.863026968 +0400
-@@ -0,0 +1,55 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Fast pool allocation */
-+
-+#ifndef __REISER4_POOL_H__
-+#define __REISER4_POOL_H__
-+
-+#include <linux/types.h>
-+
-+typedef struct reiser4_pool {
-+ size_t obj_size;
-+ int objs;
-+ char *data;
-+ struct list_head free;
-+ struct list_head used;
-+ struct list_head extra;
-+} reiser4_pool;
-+
-+typedef struct reiser4_pool_header {
-+ /* object is either on free or "used" lists */
-+ struct list_head usage_linkage;
-+ struct list_head level_linkage;
-+ struct list_head extra_linkage;
-+} reiser4_pool_header;
-+
-+typedef enum {
-+ POOLO_BEFORE,
-+ POOLO_AFTER,
-+ POOLO_LAST,
-+ POOLO_FIRST
-+} pool_ordering;
-+
-+/* pool manipulation functions */
-+
-+extern void reiser4_init_pool(reiser4_pool * pool, size_t obj_size,
-+ int num_of_objs, char *data);
-+extern void reiser4_done_pool(reiser4_pool * pool);
-+extern void reiser4_pool_free(reiser4_pool * pool, reiser4_pool_header * h);
-+reiser4_pool_header *reiser4_add_obj(reiser4_pool * pool,
-+ struct list_head * list,
-+ pool_ordering order,
-+ reiser4_pool_header * reference);
-+
-+/* __REISER4_POOL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/readahead.c linux-2.6.20/fs/reiser4/readahead.c
---- linux-2.6.20.orig/fs/reiser4/readahead.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/readahead.c 2007-05-06 14:50:43.867028218 +0400
-@@ -0,0 +1,138 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "forward.h"
-+#include "tree.h"
-+#include "tree_walk.h"
-+#include "super.h"
-+#include "inode.h"
-+#include "key.h"
-+#include "znode.h"
-+
-+#include <linux/swap.h> /* for totalram_pages */
-+
-+void reiser4_init_ra_info(ra_info_t * rai)
-+{
-+ rai->key_to_stop = *reiser4_min_key();
-+}
-+
-+/* global formatted node readahead parameter. It can be set by mount option -o readahead:NUM:1 */
-+static inline int ra_adjacent_only(int flags)
-+{
-+ return flags & RA_ADJACENT_ONLY;
-+}
-+
-+/* this is used by formatted_readahead to decide whether read for right neighbor of node is to be issued. It returns 1
-+ if right neighbor's first key is less or equal to readahead's stop key */
-+static int should_readahead_neighbor(znode * node, ra_info_t * info)
-+{
-+ int result;
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = keyle(znode_get_rd_key(node), &info->key_to_stop);
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+#define LOW_MEM_PERCENTAGE (5)
-+
-+static int low_on_memory(void)
-+{
-+ unsigned int freepages;
-+
-+ freepages = nr_free_pages();
-+ return freepages < (totalram_pages * LOW_MEM_PERCENTAGE / 100);
-+}
-+
-+/* start read for @node and for a few of its right neighbors */
-+void formatted_readahead(znode * node, ra_info_t * info)
-+{
-+ ra_params_t *ra_params;
-+ znode *cur;
-+ int i;
-+ int grn_flags;
-+ lock_handle next_lh;
-+
-+ /* do nothing if node block number has not been assigned to node (which means it is still in cache). */
-+ if (reiser4_blocknr_is_fake(znode_get_block(node)))
-+ return;
-+
-+ ra_params = get_current_super_ra_params();
-+
-+ if (znode_page(node) == NULL)
-+ jstartio(ZJNODE(node));
-+
-+ if (znode_get_level(node) != LEAF_LEVEL)
-+ return;
-+
-+ /* don't waste memory for read-ahead when low on memory */
-+ if (low_on_memory())
-+ return;
-+
-+ /* We can have locked nodes on upper tree levels, in this situation lock
-+ priorities do not help to resolve deadlocks, we have to use TRY_LOCK
-+ here. */
-+ grn_flags = (GN_CAN_USE_UPPER_LEVELS | GN_TRY_LOCK);
-+
-+ i = 0;
-+ cur = zref(node);
-+ init_lh(&next_lh);
-+ while (i < ra_params->max) {
-+ const reiser4_block_nr *nextblk;
-+
-+ if (!should_readahead_neighbor(cur, info))
-+ break;
-+
-+ if (reiser4_get_right_neighbor
-+ (&next_lh, cur, ZNODE_READ_LOCK, grn_flags))
-+ break;
-+
-+ nextblk = znode_get_block(next_lh.node);
-+ if (reiser4_blocknr_is_fake(nextblk) ||
-+ (ra_adjacent_only(ra_params->flags)
-+ && *nextblk != *znode_get_block(cur) + 1)) {
-+ break;
-+ }
-+
-+ zput(cur);
-+ cur = zref(next_lh.node);
-+ done_lh(&next_lh);
-+ if (znode_page(cur) == NULL)
-+ jstartio(ZJNODE(cur));
-+ else
-+ /* Do not scan read-ahead window if pages already
-+ * allocated (and i/o already started). */
-+ break;
-+
-+ i++;
-+ }
-+ zput(cur);
-+ done_lh(&next_lh);
-+}
-+
-+void reiser4_readdir_readahead_init(struct inode *dir, tap_t * tap)
-+{
-+ reiser4_key *stop_key;
-+
-+ assert("nikita-3542", dir != NULL);
-+ assert("nikita-3543", tap != NULL);
-+
-+ stop_key = &tap->ra_info.key_to_stop;
-+ /* initialize readdir readahead information: include into readahead
-+ * stat data of all files of the directory */
-+ set_key_locality(stop_key, get_inode_oid(dir));
-+ set_key_type(stop_key, KEY_SD_MINOR);
-+ set_key_ordering(stop_key, get_key_ordering(reiser4_max_key()));
-+ set_key_objectid(stop_key, get_key_objectid(reiser4_max_key()));
-+ set_key_offset(stop_key, get_key_offset(reiser4_max_key()));
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/readahead.h linux-2.6.20/fs/reiser4/readahead.h
---- linux-2.6.20.orig/fs/reiser4/readahead.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/readahead.h 2007-05-06 14:50:43.867028218 +0400
-@@ -0,0 +1,48 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#ifndef __READAHEAD_H__
-+#define __READAHEAD_H__
-+
-+#include "key.h"
-+
-+typedef enum {
-+ RA_ADJACENT_ONLY = 1, /* only requests nodes which are adjacent. Default is NO (not only adjacent) */
-+} ra_global_flags;
-+
-+/* reiser4 super block has a field of this type. It controls readahead during tree traversals */
-+typedef struct formatted_read_ahead_params {
-+ unsigned long max; /* request not more than this amount of nodes. Default is totalram_pages / 4 */
-+ int flags;
-+} ra_params_t;
-+
-+typedef struct {
-+ reiser4_key key_to_stop;
-+} ra_info_t;
-+
-+void formatted_readahead(znode *, ra_info_t *);
-+void reiser4_init_ra_info(ra_info_t * rai);
-+
-+struct reiser4_file_ra_state {
-+ loff_t start; /* Current window */
-+ loff_t size;
-+ loff_t next_size; /* Next window size */
-+ loff_t ahead_start; /* Ahead window */
-+ loff_t ahead_size;
-+ loff_t max_window_size; /* Maximum readahead window */
-+ loff_t slow_start; /* enlarging r/a size algorithm. */
-+};
-+
-+extern void reiser4_readdir_readahead_init(struct inode *dir, tap_t * tap);
-+
-+/* __READAHEAD_H__ */
-+#endif
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/README linux-2.6.20/fs/reiser4/README
---- linux-2.6.20.orig/fs/reiser4/README 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/README 2007-05-06 14:50:43.867028218 +0400
-@@ -0,0 +1,125 @@
-+[LICENSING]
-+
-+Reiser4 is hereby licensed under the GNU General
-+Public License version 2.
-+
-+Source code files that contain the phrase "licensing governed by
-+reiser4/README" are "governed files" throughout this file. Governed
-+files are licensed under the GPL. The portions of them owned by Hans
-+Reiser, or authorized to be licensed by him, have been in the past,
-+and likely will be in the future, licensed to other parties under
-+other licenses. If you add your code to governed files, and don't
-+want it to be owned by Hans Reiser, put your copyright label on that
-+code so the poor blight and his customers can keep things straight.
-+All portions of governed files not labeled otherwise are owned by Hans
-+Reiser, and by adding your code to it, widely distributing it to
-+others or sending us a patch, and leaving the sentence in stating that
-+licensing is governed by the statement in this file, you accept this.
-+It will be a kindness if you identify whether Hans Reiser is allowed
-+to license code labeled as owned by you on your behalf other than
-+under the GPL, because he wants to know if it is okay to do so and put
-+a check in the mail to you (for non-trivial improvements) when he
-+makes his next sale. He makes no guarantees as to the amount if any,
-+though he feels motivated to motivate contributors, and you can surely
-+discuss this with him before or after contributing. You have the
-+right to decline to allow him to license your code contribution other
-+than under the GPL.
-+
-+Further licensing options are available for commercial and/or other
-+interests directly from Hans Reiser: reiser@namesys.com. If you interpret
-+the GPL as not allowing those additional licensing options, you read
-+it wrongly, and Richard Stallman agrees with me, when carefully read
-+you can see that those restrictions on additional terms do not apply
-+to the owner of the copyright, and my interpretation of this shall
-+govern for this license.
-+
-+[END LICENSING]
-+
-+Reiser4 is a file system based on dancing tree algorithms, and is
-+described at http://www.namesys.com
-+
-+mkfs.reiser4 and other utilities are on our webpage or wherever your
-+Linux provider put them. You really want to be running the latest
-+version off the website if you use fsck.
-+
-+Yes, if you update your reiser4 kernel module you do have to
-+recompile your kernel, most of the time. The errors you get will be
-+quite cryptic if your forget to do so.
-+
-+Hideous Commercial Pitch: Spread your development costs across other OS
-+vendors. Select from the best in the world, not the best in your
-+building, by buying from third party OS component suppliers. Leverage
-+the software component development power of the internet. Be the most
-+aggressive in taking advantage of the commercial possibilities of
-+decentralized internet development, and add value through your branded
-+integration that you sell as an operating system. Let your competitors
-+be the ones to compete against the entire internet by themselves. Be
-+hip, get with the new economic trend, before your competitors do. Send
-+email to reiser@namesys.com
-+
-+Hans Reiser was the primary architect of Reiser4, but a whole team
-+chipped their ideas in. He invested everything he had into Namesys
-+for 5.5 dark years of no money before Reiser3 finally started to work well
-+enough to bring in money. He owns the copyright.
-+
-+DARPA was the primary sponsor of Reiser4. DARPA does not endorse
-+Reiser4, it merely sponsors it. DARPA is, in solely Hans's personal
-+opinion, unique in its willingness to invest into things more
-+theoretical than the VC community can readily understand, and more
-+longterm than allows them to be sure that they will be the ones to
-+extract the economic benefits from. DARPA also integrated us into a
-+security community that transformed our security worldview.
-+
-+Vladimir Saveliev is our lead programmer, with us from the beginning,
-+and he worked long hours writing the cleanest code. This is why he is
-+now the lead programmer after years of commitment to our work. He
-+always made the effort to be the best he could be, and to make his
-+code the best that it could be. What resulted was quite remarkable. I
-+don't think that money can ever motivate someone to work the way he
-+did, he is one of the most selfless men I know.
-+
-+Alexander Lyamin was our sysadmin, and helped to educate us in
-+security issues. Moscow State University and IMT were very generous
-+in the internet access they provided us, and in lots of other little
-+ways that a generous institution can be.
-+
-+Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the
-+locking code, the block allocator, and finished the flushing code.
-+His code is always crystal clean and well structured.
-+
-+Nikita Danilov wrote the core of the balancing code, the core of the
-+plugins code, and the directory code. He worked a steady pace of long
-+hours that produced a whole lot of well abstracted code. He is our
-+senior computer scientist.
-+
-+Vladimir Demidov wrote the parser. Writing an in kernel parser is
-+something very few persons have the skills for, and it is thanks to
-+him that we can say that the parser is really not so big compared to
-+various bits of our other code, and making a parser work in the kernel
-+was not so complicated as everyone would imagine mainly because it was
-+him doing it...
-+
-+Joshua McDonald wrote the transaction manager, and the flush code.
-+The flush code unexpectedly turned out be extremely hairy for reasons
-+you can read about on our web page, and he did a great job on an
-+extremely difficult task.
-+
-+Nina Reiser handled our accounting, government relations, and much
-+more.
-+
-+Ramon Reiser developed our website.
-+
-+Beverly Palmer drew our graphics.
-+
-+Vitaly Fertman developed librepair, userspace plugins repair code, fsck
-+and worked with Umka on developing libreiser4 and userspace plugins.
-+
-+Yury Umanets (aka Umka) developed libreiser4, userspace plugins and
-+userspace tools (reiser4progs).
-+
-+Oleg Drokin (aka Green) is the release manager who fixes everything.
-+It is so nice to have someone like that on the team. He (plus Chris
-+and Jeff) make it possible for the entire rest of the Namesys team to
-+focus on Reiser4, and he fixed a whole lot of Reiser4 bugs also. It
-+is just amazing to watch his talent for spotting bugs in action.
-+
-diff -urN linux-2.6.20.orig/fs/reiser4/reiser4.h linux-2.6.20/fs/reiser4/reiser4.h
---- linux-2.6.20.orig/fs/reiser4/reiser4.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/reiser4.h 2007-05-06 14:50:43.867028218 +0400
-@@ -0,0 +1,269 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* definitions of common constants used by reiser4 */
-+
-+#if !defined( __REISER4_H__ )
-+#define __REISER4_H__
-+
-+#include <asm/param.h> /* for HZ */
-+#include <linux/errno.h>
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/hardirq.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * reiser4 compilation options.
-+ */
-+
-+#if defined(CONFIG_REISER4_DEBUG)
-+/* turn on assertion checks */
-+#define REISER4_DEBUG (1)
-+#else
-+#define REISER4_DEBUG (0)
-+#endif
-+
-+#if defined(CONFIG_ZLIB_INFLATE)
-+/* turn on zlib */
-+#define REISER4_ZLIB (1)
-+#else
-+#define REISER4_ZLIB (0)
-+#endif
-+
-+#if defined(CONFIG_CRYPTO_SHA256)
-+#define REISER4_SHA256 (1)
-+#else
-+#define REISER4_SHA256 (0)
-+#endif
-+
-+/*
-+ * Turn on large keys mode. In his mode (which is default), reiser4 key has 4
-+ * 8-byte components. In the old "small key" mode, it's 3 8-byte
-+ * components. Additional component, referred to as "ordering" is used to
-+ * order items from which given object is composed of. As such, ordering is
-+ * placed between locality and objectid. For directory item ordering contains
-+ * initial prefix of the file name this item is for. This sorts all directory
-+ * items within given directory lexicographically (but see
-+ * fibration.[ch]). For file body and stat-data, ordering contains initial
-+ * prefix of the name file was initially created with. In the common case
-+ * (files with single name) this allows to order file bodies and stat-datas in
-+ * the same order as their respective directory entries, thus speeding up
-+ * readdir.
-+ *
-+ * Note, that kernel can only mount file system with the same key size as one
-+ * it is compiled for, so flipping this option may render your data
-+ * inaccessible.
-+ */
-+#define REISER4_LARGE_KEY (1)
-+/*#define REISER4_LARGE_KEY (0)*/
-+
-+/*#define GUESS_EXISTS 1*/
-+
-+/*
-+ * PLEASE update fs/reiser4/kattr.c:show_options() when adding new compilation
-+ * option
-+ */
-+
-+extern const char *REISER4_SUPER_MAGIC_STRING;
-+extern const int REISER4_MAGIC_OFFSET; /* offset to magic string from the
-+ * beginning of device */
-+
-+/* here go tunable parameters that are not worth special entry in kernel
-+ configuration */
-+
-+/* default number of slots in coord-by-key caches */
-+#define CBK_CACHE_SLOTS (16)
-+/* how many elementary tree operation to carry on the next level */
-+#define CARRIES_POOL_SIZE (5)
-+/* size of pool of preallocated nodes for carry process. */
-+#define NODES_LOCKED_POOL_SIZE (5)
-+
-+#define REISER4_NEW_NODE_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
-+#define REISER4_NEW_EXTENT_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
-+#define REISER4_PASTE_FLAGS (COPI_GO_LEFT)
-+#define REISER4_INSERT_FLAGS (COPI_GO_LEFT)
-+
-+/* we are supporting reservation of disk space on uid basis */
-+#define REISER4_SUPPORT_UID_SPACE_RESERVATION (0)
-+/* we are supporting reservation of disk space for groups */
-+#define REISER4_SUPPORT_GID_SPACE_RESERVATION (0)
-+/* we are supporting reservation of disk space for root */
-+#define REISER4_SUPPORT_ROOT_SPACE_RESERVATION (0)
-+/* we use rapid flush mode, see flush.c for comments. */
-+#define REISER4_USE_RAPID_FLUSH (1)
-+
-+/*
-+ * set this to 0 if you don't want to use wait-for-flush in ->writepage().
-+ */
-+#define REISER4_USE_ENTD (1)
-+
-+/* key allocation is Plan-A */
-+#define REISER4_PLANA_KEY_ALLOCATION (1)
-+/* key allocation follows good old 3.x scheme */
-+#define REISER4_3_5_KEY_ALLOCATION (0)
-+
-+/* size of hash-table for znodes */
-+#define REISER4_ZNODE_HASH_TABLE_SIZE (1 << 13)
-+
-+/* number of buckets in lnode hash-table */
-+#define LNODE_HTABLE_BUCKETS (1024)
-+
-+/* some ridiculously high maximal limit on height of znode tree. This
-+ is used in declaration of various per level arrays and
-+ to allocate stattistics gathering array for per-level stats. */
-+#define REISER4_MAX_ZTREE_HEIGHT (8)
-+
-+#define REISER4_PANIC_MSG_BUFFER_SIZE (1024)
-+
-+/* If array contains less than REISER4_SEQ_SEARCH_BREAK elements then,
-+ sequential search is on average faster than binary. This is because
-+ of better optimization and because sequential search is more CPU
-+ cache friendly. This number (25) was found by experiments on dual AMD
-+ Athlon(tm), 1400MHz.
-+
-+ NOTE: testing in kernel has shown that binary search is more effective than
-+ implied by results of the user level benchmarking. Probably because in the
-+ node keys are separated by other data. So value was adjusted after few
-+ tests. More thorough tuning is needed.
-+*/
-+#define REISER4_SEQ_SEARCH_BREAK (3)
-+
-+/* don't allow tree to be lower than this */
-+#define REISER4_MIN_TREE_HEIGHT (TWIG_LEVEL)
-+
-+/* NOTE NIKITA this is no longer used: maximal atom size is auto-adjusted to
-+ * available memory. */
-+/* Default value of maximal atom size. Can be ovewritten by
-+ tmgr.atom_max_size mount option. By default infinity. */
-+#define REISER4_ATOM_MAX_SIZE ((unsigned)(~0))
-+
-+/* Default value of maximal atom age (in jiffies). After reaching this age
-+ atom will be forced to commit, either synchronously or asynchronously. Can
-+ be overwritten by tmgr.atom_max_age mount option. */
-+#define REISER4_ATOM_MAX_AGE (600 * HZ)
-+
-+/* sleeping period for ktxnmrgd */
-+#define REISER4_TXNMGR_TIMEOUT (5 * HZ)
-+
-+/* timeout to wait for ent thread in writepage. Default: 3 milliseconds. */
-+#define REISER4_ENTD_TIMEOUT (3 * HZ / 1000)
-+
-+/* start complaining after that many restarts in coord_by_key().
-+
-+ This either means incredibly heavy contention for this part of a tree, or
-+ some corruption or bug.
-+*/
-+#define REISER4_CBK_ITERATIONS_LIMIT (100)
-+
-+/* return -EIO after that many iterations in coord_by_key().
-+
-+ I have witnessed more than 800 iterations (in 30 thread test) before cbk
-+ finished. --nikita
-+*/
-+#define REISER4_MAX_CBK_ITERATIONS 500000
-+
-+/* put a per-inode limit on maximal number of directory entries with identical
-+ keys in hashed directory.
-+
-+ Disable this until inheritance interfaces stabilize: we need some way to
-+ set per directory limit.
-+*/
-+#define REISER4_USE_COLLISION_LIMIT (0)
-+
-+/* If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty leaf-level blocks it
-+ will force them to be relocated. */
-+#define FLUSH_RELOCATE_THRESHOLD 64
-+/* If flush finds can find a block allocation closer than at most FLUSH_RELOCATE_DISTANCE
-+ from the preceder it will relocate to that position. */
-+#define FLUSH_RELOCATE_DISTANCE 64
-+
-+/* If we have written this much or more blocks before encountering busy jnode
-+ in flush list - abort flushing hoping that next time we get called
-+ this jnode will be clean already, and we will save some seeks. */
-+#define FLUSH_WRITTEN_THRESHOLD 50
-+
-+/* The maximum number of nodes to scan left on a level during flush. */
-+#define FLUSH_SCAN_MAXNODES 10000
-+
-+/* per-atom limit of flushers */
-+#define ATOM_MAX_FLUSHERS (1)
-+
-+/* default tracing buffer size */
-+#define REISER4_TRACE_BUF_SIZE (1 << 15)
-+
-+/* what size units of IO we would like cp, etc., to use, in writing to
-+ reiser4. In bytes.
-+
-+ Can be overwritten by optimal_io_size mount option.
-+*/
-+#define REISER4_OPTIMAL_IO_SIZE (64 * 1024)
-+
-+/* see comments in inode.c:oid_to_uino() */
-+#define REISER4_UINO_SHIFT (1 << 30)
-+
-+/* Mark function argument as unused to avoid compiler warnings. */
-+#define UNUSED_ARG __attribute__((unused))
-+
-+#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
-+#define NONNULL __attribute__((nonnull))
-+#else
-+#define NONNULL
-+#endif
-+
-+/* master super block offset in bytes.*/
-+#define REISER4_MASTER_OFFSET 65536
-+
-+/* size of VFS block */
-+#define VFS_BLKSIZE 512
-+/* number of bits in size of VFS block (512==2^9) */
-+#define VFS_BLKSIZE_BITS 9
-+
-+#define REISER4_I reiser4_inode_data
-+
-+/* implication */
-+#define ergo( antecedent, consequent ) ( !( antecedent ) || ( consequent ) )
-+/* logical equivalence */
-+#define equi( p1, p2 ) ( ergo( ( p1 ), ( p2 ) ) && ergo( ( p2 ), ( p1 ) ) )
-+
-+#define sizeof_array(x) ((int) (sizeof(x) / sizeof(x[0])))
-+
-+#define NOT_YET (0)
-+
-+/** Reiser4 specific error codes **/
-+
-+#define REISER4_ERROR_CODE_BASE 500
-+
-+/* Neighbor is not available (side neighbor or parent) */
-+#define E_NO_NEIGHBOR (REISER4_ERROR_CODE_BASE)
-+
-+/* Node was not found in cache */
-+#define E_NOT_IN_CACHE (REISER4_ERROR_CODE_BASE + 1)
-+
-+/* node has no free space enough for completion of balancing operation */
-+#define E_NODE_FULL (REISER4_ERROR_CODE_BASE + 2)
-+
-+/* repeat operation */
-+#define E_REPEAT (REISER4_ERROR_CODE_BASE + 3)
-+
-+/* deadlock happens */
-+#define E_DEADLOCK (REISER4_ERROR_CODE_BASE + 4)
-+
-+/* operation cannot be performed, because it would block and non-blocking mode
-+ * was requested. */
-+#define E_BLOCK (REISER4_ERROR_CODE_BASE + 5)
-+
-+/* wait some event (depends on context), then repeat */
-+#define E_WAIT (REISER4_ERROR_CODE_BASE + 6)
-+
-+#endif /* __REISER4_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/safe_link.c linux-2.6.20/fs/reiser4/safe_link.c
---- linux-2.6.20.orig/fs/reiser4/safe_link.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/safe_link.c 2007-05-06 14:50:43.867028218 +0400
-@@ -0,0 +1,351 @@
-+/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Safe-links. */
-+
-+/*
-+ * Safe-links are used to maintain file system consistency during operations
-+ * that spawns multiple transactions. For example:
-+ *
-+ * 1. Unlink. UNIX supports "open-but-unlinked" files, that is files
-+ * without user-visible names in the file system, but still opened by some
-+ * active process. What happens here is that unlink proper (i.e., removal
-+ * of the last file name) and file deletion (truncate of file body to zero
-+ * and deletion of stat-data, that happens when last file descriptor is
-+ * closed), may belong to different transactions T1 and T2. If a crash
-+ * happens after T1 commit, but before T2 commit, on-disk file system has
-+ * a file without name, that is, disk space leak.
-+ *
-+ * 2. Truncate. Truncate of large file may spawn multiple transactions. If
-+ * system crashes while truncate was in-progress, file is left partially
-+ * truncated, which violates "atomicity guarantees" of reiser4, viz. that
-+ * every system is atomic.
-+ *
-+ * Safe-links address both above cases. Basically, safe-link is a way post
-+ * some operation to be executed during commit of some other transaction than
-+ * current one. (Another way to look at the safe-link is to interpret it as a
-+ * logical logging.)
-+ *
-+ * Specifically, at the beginning of unlink safe-link in inserted in the
-+ * tree. This safe-link is normally removed by file deletion code (during
-+ * transaction T2 in the above terms). Truncate also inserts safe-link that is
-+ * normally removed when truncate operation is finished.
-+ *
-+ * This means, that in the case of "clean umount" there are no safe-links in
-+ * the tree. If safe-links are observed during mount, it means that (a) system
-+ * was terminated abnormally, and (b) safe-link correspond to the "pending"
-+ * (i.e., not finished) operations that were in-progress during system
-+ * termination. Each safe-link record enough information to complete
-+ * corresponding operation, and mount simply "replays" them (hence, the
-+ * analogy with the logical logging).
-+ *
-+ * Safe-links are implemented as blackbox items (see
-+ * plugin/item/blackbox.[ch]).
-+ *
-+ * For the reference: ext3 also has similar mechanism, it's called "an orphan
-+ * list" there.
-+ */
-+
-+#include "safe_link.h"
-+#include "debug.h"
-+#include "inode.h"
-+
-+#include "plugin/item/blackbox.h"
-+
-+#include <linux/fs.h>
-+
-+/*
-+ * On-disk format of safe-link.
-+ */
-+typedef struct safelink {
-+ reiser4_key sdkey; /* key of stat-data for the file safe-link is
-+ * for */
-+ d64 size; /* size to which file should be truncated */
-+} safelink_t;
-+
-+/*
-+ * locality where safe-link items are stored. Next to the objectid of root
-+ * directory.
-+ */
-+static oid_t safe_link_locality(reiser4_tree * tree)
-+{
-+ return get_key_objectid(get_super_private(tree->super)->df_plug->
-+ root_dir_key(tree->super)) + 1;
-+}
-+
-+/*
-+ Construct a key for the safe-link. Key has the following format:
-+
-+| 60 | 4 | 64 | 4 | 60 | 64 |
-++---------------+---+------------------+---+---------------+------------------+
-+| locality | 0 | 0 | 0 | objectid | link type |
-++---------------+---+------------------+---+---------------+------------------+
-+| | | | |
-+| 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+
-+ This is in large keys format. In small keys format second 8 byte chunk is
-+ out. Locality is a constant returned by safe_link_locality(). objectid is
-+ an oid of a file on which operation protected by this safe-link is
-+ performed. link-type is used to distinguish safe-links for different
-+ operations.
-+
-+ */
-+static reiser4_key *build_link_key(reiser4_tree * tree, oid_t oid,
-+ reiser4_safe_link_t link, reiser4_key * key)
-+{
-+ reiser4_key_init(key);
-+ set_key_locality(key, safe_link_locality(tree));
-+ set_key_objectid(key, oid);
-+ set_key_offset(key, link);
-+ return key;
-+}
-+
-+/*
-+ * how much disk space is necessary to insert and remove (in the
-+ * error-handling path) safe-link.
-+ */
-+static __u64 safe_link_tograb(reiser4_tree * tree)
-+{
-+ return
-+ /* insert safe link */
-+ estimate_one_insert_item(tree) +
-+ /* remove safe link */
-+ estimate_one_item_removal(tree) +
-+ /* drill to the leaf level during insertion */
-+ 1 + estimate_one_insert_item(tree) +
-+ /*
-+ * possible update of existing safe-link. Actually, if
-+ * safe-link existed already (we failed to remove it), then no
-+ * insertion is necessary, so this term is already "covered",
-+ * but for simplicity let's left it.
-+ */
-+ 1;
-+}
-+
-+/*
-+ * grab enough disk space to insert and remove (in the error-handling path)
-+ * safe-link.
-+ */
-+int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags)
-+{
-+ int result;
-+
-+ grab_space_enable();
-+ /* The sbinfo->delete_mutex can be taken here.
-+ * safe_link_release() should be called before leaving reiser4
-+ * context. */
-+ result =
-+ reiser4_grab_reserved(tree->super, safe_link_tograb(tree), flags);
-+ grab_space_enable();
-+ return result;
-+}
-+
-+/*
-+ * release unused disk space reserved by safe_link_grab().
-+ */
-+void safe_link_release(reiser4_tree * tree)
-+{
-+ reiser4_release_reserved(tree->super);
-+}
-+
-+/*
-+ * insert into tree safe-link for operation @link on inode @inode.
-+ */
-+int safe_link_add(struct inode *inode, reiser4_safe_link_t link)
-+{
-+ reiser4_key key;
-+ safelink_t sl;
-+ int length;
-+ int result;
-+ reiser4_tree *tree;
-+
-+ build_sd_key(inode, &sl.sdkey);
-+ length = sizeof sl.sdkey;
-+
-+ if (link == SAFE_TRUNCATE) {
-+ /*
-+ * for truncate we have to store final file length also,
-+ * expand item.
-+ */
-+ length += sizeof(sl.size);
-+ put_unaligned(cpu_to_le64(inode->i_size), &sl.size);
-+ }
-+ tree = reiser4_tree_by_inode(inode);
-+ build_link_key(tree, get_inode_oid(inode), link, &key);
-+
-+ result = store_black_box(tree, &key, &sl, length);
-+ if (result == -EEXIST)
-+ result = update_black_box(tree, &key, &sl, length);
-+ return result;
-+}
-+
-+/*
-+ * remove safe-link corresponding to the operation @link on inode @inode from
-+ * the tree.
-+ */
-+int safe_link_del(reiser4_tree * tree, oid_t oid, reiser4_safe_link_t link)
-+{
-+ reiser4_key key;
-+
-+ return kill_black_box(tree, build_link_key(tree, oid, link, &key));
-+}
-+
-+/*
-+ * in-memory structure to keep information extracted from safe-link. This is
-+ * used to iterate over all safe-links.
-+ */
-+typedef struct {
-+ reiser4_tree *tree; /* internal tree */
-+ reiser4_key key; /* safe-link key */
-+ reiser4_key sdkey; /* key of object stat-data */
-+ reiser4_safe_link_t link; /* safe-link type */
-+ oid_t oid; /* object oid */
-+ __u64 size; /* final size for truncate */
-+} safe_link_context;
-+
-+/*
-+ * start iterating over all safe-links.
-+ */
-+static void safe_link_iter_begin(reiser4_tree * tree, safe_link_context * ctx)
-+{
-+ ctx->tree = tree;
-+ reiser4_key_init(&ctx->key);
-+ set_key_locality(&ctx->key, safe_link_locality(tree));
-+ set_key_objectid(&ctx->key, get_key_objectid(reiser4_max_key()));
-+ set_key_offset(&ctx->key, get_key_offset(reiser4_max_key()));
-+}
-+
-+/*
-+ * return next safe-link.
-+ */
-+static int safe_link_iter_next(safe_link_context * ctx)
-+{
-+ int result;
-+ safelink_t sl;
-+
-+ result = load_black_box(ctx->tree, &ctx->key, &sl, sizeof sl, 0);
-+ if (result == 0) {
-+ ctx->oid = get_key_objectid(&ctx->key);
-+ ctx->link = get_key_offset(&ctx->key);
-+ ctx->sdkey = sl.sdkey;
-+ if (ctx->link == SAFE_TRUNCATE)
-+ ctx->size = le64_to_cpu(get_unaligned(&sl.size));
-+ }
-+ return result;
-+}
-+
-+/*
-+ * check are there any more safe-links left in the tree.
-+ */
-+static int safe_link_iter_finished(safe_link_context * ctx)
-+{
-+ return get_key_locality(&ctx->key) != safe_link_locality(ctx->tree);
-+}
-+
-+/*
-+ * finish safe-link iteration.
-+ */
-+static void safe_link_iter_end(safe_link_context * ctx)
-+{
-+ /* nothing special */
-+}
-+
-+/*
-+ * process single safe-link.
-+ */
-+static int process_safelink(struct super_block *super, reiser4_safe_link_t link,
-+ reiser4_key * sdkey, oid_t oid, __u64 size)
-+{
-+ struct inode *inode;
-+ int result;
-+
-+ /*
-+ * obtain object inode by reiser4_iget(), then call object plugin
-+ * ->safelink() method to do actual work, then delete safe-link on
-+ * success.
-+ */
-+ inode = reiser4_iget(super, sdkey, 1);
-+ if (!IS_ERR(inode)) {
-+ file_plugin *fplug;
-+
-+ fplug = inode_file_plugin(inode);
-+ assert("nikita-3428", fplug != NULL);
-+ assert("", oid == get_inode_oid(inode));
-+ if (fplug->safelink != NULL) {
-+ /* reiser4_txn_restart_current is not necessary because
-+ * mounting is signle thread. However, without it
-+ * deadlock detection code will complain (see
-+ * nikita-3361). */
-+ reiser4_txn_restart_current();
-+ result = fplug->safelink(inode, link, size);
-+ } else {
-+ warning("nikita-3430",
-+ "Cannot handle safelink for %lli",
-+ (unsigned long long)oid);
-+ reiser4_print_key("key", sdkey);
-+ result = 0;
-+ }
-+ if (result != 0) {
-+ warning("nikita-3431",
-+ "Error processing safelink for %lli: %i",
-+ (unsigned long long)oid, result);
-+ }
-+ reiser4_iget_complete(inode);
-+ iput(inode);
-+ if (result == 0) {
-+ result = safe_link_grab(reiser4_get_tree(super), BA_CAN_COMMIT);
-+ if (result == 0)
-+ result =
-+ safe_link_del(reiser4_get_tree(super), oid, link);
-+ safe_link_release(reiser4_get_tree(super));
-+ /*
-+ * restart transaction: if there was large number of
-+ * safe-links, their processing may fail to fit into
-+ * single transaction.
-+ */
-+ if (result == 0)
-+ reiser4_txn_restart_current();
-+ }
-+ } else
-+ result = PTR_ERR(inode);
-+ return result;
-+}
-+
-+/*
-+ * iterate over all safe-links in the file-system processing them one by one.
-+ */
-+int process_safelinks(struct super_block *super)
-+{
-+ safe_link_context ctx;
-+ int result;
-+
-+ if (rofs_super(super))
-+ /* do nothing on the read-only file system */
-+ return 0;
-+ safe_link_iter_begin(&get_super_private(super)->tree, &ctx);
-+ result = 0;
-+ do {
-+ result = safe_link_iter_next(&ctx);
-+ if (safe_link_iter_finished(&ctx) || result == -ENOENT) {
-+ result = 0;
-+ break;
-+ }
-+ if (result == 0)
-+ result = process_safelink(super, ctx.link,
-+ &ctx.sdkey, ctx.oid,
-+ ctx.size);
-+ } while (result == 0);
-+ safe_link_iter_end(&ctx);
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/safe_link.h linux-2.6.20/fs/reiser4/safe_link.h
---- linux-2.6.20.orig/fs/reiser4/safe_link.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/safe_link.h 2007-05-06 14:50:43.867028218 +0400
-@@ -0,0 +1,29 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Safe-links. See safe_link.c for details. */
-+
-+#if !defined( __FS_SAFE_LINK_H__ )
-+#define __FS_SAFE_LINK_H__
-+
-+#include "tree.h"
-+
-+int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags);
-+void safe_link_release(reiser4_tree * tree);
-+int safe_link_add(struct inode *inode, reiser4_safe_link_t link);
-+int safe_link_del(reiser4_tree *, oid_t oid, reiser4_safe_link_t link);
-+
-+int process_safelinks(struct super_block *super);
-+
-+/* __FS_SAFE_LINK_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/seal.c linux-2.6.20/fs/reiser4/seal.c
---- linux-2.6.20.orig/fs/reiser4/seal.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/seal.c 2007-05-06 14:50:43.871029467 +0400
-@@ -0,0 +1,218 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Seals implementation. */
-+/* Seals are "weak" tree pointers. They are analogous to tree coords in
-+ allowing to bypass tree traversal. But normal usage of coords implies that
-+ node pointed to by coord is locked, whereas seals don't keep a lock (or
-+ even a reference) to znode. In stead, each znode contains a version number,
-+ increased on each znode modification. This version number is copied into a
-+ seal when seal is created. Later, one can "validate" seal by calling
-+ reiser4_seal_validate(). If znode is in cache and its version number is
-+ still the same, seal is "pristine" and coord associated with it can be
-+ re-used immediately.
-+
-+ If, on the other hand, znode is out of cache, or it is obviously different
-+ one from the znode seal was initially attached to (for example, it is on
-+ the different level, or is being removed from the tree), seal is
-+ irreparably invalid ("burned") and tree traversal has to be repeated.
-+
-+ Otherwise, there is some hope, that while znode was modified (and seal was
-+ "broken" as a result), key attached to the seal is still in the node. This
-+ is checked by first comparing this key with delimiting keys of node and, if
-+ key is ok, doing intra-node lookup.
-+
-+ Znode version is maintained in the following way:
-+
-+ there is reiser4_tree.znode_epoch counter. Whenever new znode is created,
-+ znode_epoch is incremented and its new value is stored in ->version field
-+ of new znode. Whenever znode is dirtied (which means it was probably
-+ modified), znode_epoch is also incremented and its new value is stored in
-+ znode->version. This is done so, because just incrementing znode->version
-+ on each update is not enough: it may so happen, that znode get deleted, new
-+ znode is allocated for the same disk block and gets the same version
-+ counter, tricking seal code into false positive.
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "super.h"
-+
-+static znode *seal_node(const seal_t * seal);
-+static int seal_matches(const seal_t * seal, znode * node);
-+
-+/* initialise seal. This can be called several times on the same seal. @coord
-+ and @key can be NULL. */
-+void reiser4_seal_init(seal_t * seal /* seal to initialise */ ,
-+ const coord_t * coord /* coord @seal will be
-+ * attached to */ ,
-+ const reiser4_key * key UNUSED_ARG /* key @seal will be
-+ * attached to */ )
-+{
-+ assert("nikita-1886", seal != NULL);
-+ memset(seal, 0, sizeof *seal);
-+ if (coord != NULL) {
-+ znode *node;
-+
-+ node = coord->node;
-+ assert("nikita-1987", node != NULL);
-+ spin_lock_znode(node);
-+ seal->version = node->version;
-+ assert("nikita-1988", seal->version != 0);
-+ seal->block = *znode_get_block(node);
-+#if REISER4_DEBUG
-+ seal->coord1 = *coord;
-+ if (key != NULL)
-+ seal->key = *key;
-+#endif
-+ spin_unlock_znode(node);
-+ }
-+}
-+
-+/* finish with seal */
-+void reiser4_seal_done(seal_t * seal /* seal to clear */ )
-+{
-+ assert("nikita-1887", seal != NULL);
-+ seal->version = 0;
-+}
-+
-+/* true if seal was initialised */
-+int reiser4_seal_is_set(const seal_t * seal /* seal to query */ )
-+{
-+ assert("nikita-1890", seal != NULL);
-+ return seal->version != 0;
-+}
-+
-+#if REISER4_DEBUG
-+/* helper function for reiser4_seal_validate(). It checks that item at @coord
-+ * has expected key. This is to detect cases where node was modified but wasn't
-+ * marked dirty. */
-+static inline int check_seal_match(const coord_t * coord /* coord to check */ ,
-+ const reiser4_key * k /* expected key */ )
-+{
-+ reiser4_key ukey;
-+
-+ return (coord->between != AT_UNIT) ||
-+ /* FIXME-VS: we only can compare keys for items whose units
-+ represent exactly one key */
-+ ((coord_is_existing_unit(coord))
-+ && (item_is_extent(coord)
-+ || keyeq(k, unit_key_by_coord(coord, &ukey))))
-+ || ((coord_is_existing_unit(coord)) && (item_is_ctail(coord))
-+ && keyge(k, unit_key_by_coord(coord, &ukey)));
-+}
-+#endif
-+
-+/* this is used by reiser4_seal_validate. It accepts return value of
-+ * longterm_lock_znode and returns 1 if it can be interpreted as seal
-+ * validation failure. For instance, when longterm_lock_znode returns -EINVAL,
-+ * reiser4_seal_validate returns -E_REPEAT and caller will call tre search.
-+ * We cannot do this in longterm_lock_znode(), because sometimes we want to
-+ * distinguish between -EINVAL and -E_REPEAT. */
-+static int should_repeat(int return_code)
-+{
-+ return return_code == -EINVAL;
-+}
-+
-+/* (re-)validate seal.
-+
-+ Checks whether seal is pristine, and try to revalidate it if possible.
-+
-+ If seal was burned, or broken irreparably, return -E_REPEAT.
-+
-+ NOTE-NIKITA currently reiser4_seal_validate() returns -E_REPEAT if key we are
-+ looking for is in range of keys covered by the sealed node, but item wasn't
-+ found by node ->lookup() method. Alternative is to return -ENOENT in this
-+ case, but this would complicate callers logic.
-+
-+*/
-+int reiser4_seal_validate(seal_t * seal /* seal to validate */,
-+ coord_t * coord /* coord to validate against */,
-+ const reiser4_key * key /* key to validate against */,
-+ lock_handle * lh /* resulting lock handle */,
-+ znode_lock_mode mode /* lock node */,
-+ znode_lock_request request /* locking priority */)
-+{
-+ znode *node;
-+ int result;
-+
-+ assert("nikita-1889", seal != NULL);
-+ assert("nikita-1881", reiser4_seal_is_set(seal));
-+ assert("nikita-1882", key != NULL);
-+ assert("nikita-1883", coord != NULL);
-+ assert("nikita-1884", lh != NULL);
-+ assert("nikita-1885", keyeq(&seal->key, key));
-+ assert("nikita-1989", coords_equal(&seal->coord1, coord));
-+
-+ /* obtain znode by block number */
-+ node = seal_node(seal);
-+ if (node != NULL) {
-+ /* znode was in cache, lock it */
-+ result = longterm_lock_znode(lh, node, mode, request);
-+ zput(node);
-+ if (result == 0) {
-+ if (seal_matches(seal, node)) {
-+ /* if seal version and znode version
-+ coincide */
-+ ON_DEBUG(coord_update_v(coord));
-+ assert("nikita-1990",
-+ node == seal->coord1.node);
-+ assert("nikita-1898",
-+ WITH_DATA_RET(coord->node, 1,
-+ check_seal_match(coord,
-+ key)));
-+ } else
-+ result = RETERR(-E_REPEAT);
-+ }
-+ if (result != 0) {
-+ if (should_repeat(result))
-+ result = RETERR(-E_REPEAT);
-+ /* unlock node on failure */
-+ done_lh(lh);
-+ }
-+ } else {
-+ /* znode wasn't in cache */
-+ result = RETERR(-E_REPEAT);
-+ }
-+ return result;
-+}
-+
-+/* helpers functions */
-+
-+/* obtain reference to znode seal points to, if in cache */
-+static znode *seal_node(const seal_t * seal /* seal to query */ )
-+{
-+ assert("nikita-1891", seal != NULL);
-+ return zlook(current_tree, &seal->block);
-+}
-+
-+/* true if @seal version and @node version coincide */
-+static int seal_matches(const seal_t * seal /* seal to check */ ,
-+ znode * node /* node to check */ )
-+{
-+ int result;
-+
-+ assert("nikita-1991", seal != NULL);
-+ assert("nikita-1993", node != NULL);
-+
-+ spin_lock_znode(node);
-+ result = (seal->version == node->version);
-+ spin_unlock_znode(node);
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/seal.h linux-2.6.20/fs/reiser4/seal.h
---- linux-2.6.20.orig/fs/reiser4/seal.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/seal.h 2007-05-06 14:50:43.871029467 +0400
-@@ -0,0 +1,49 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Declaration of seals: "weak" tree pointers. See seal.c for comments. */
-+
-+#ifndef __SEAL_H__
-+#define __SEAL_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+
-+/* for __u?? types */
-+/*#include <linux/types.h>*/
-+
-+/* seal. See comment at the top of seal.c */
-+typedef struct seal_s {
-+ /* version of znode recorder at the time of seal creation */
-+ __u64 version;
-+ /* block number of znode attached to this seal */
-+ reiser4_block_nr block;
-+#if REISER4_DEBUG
-+ /* coord this seal is attached to. For debugging. */
-+ coord_t coord1;
-+ /* key this seal is attached to. For debugging. */
-+ reiser4_key key;
-+#endif
-+} seal_t;
-+
-+extern void reiser4_seal_init(seal_t *, const coord_t *, const reiser4_key *);
-+extern void reiser4_seal_done(seal_t *);
-+extern int reiser4_seal_is_set(const seal_t *);
-+extern int reiser4_seal_validate(seal_t *, coord_t *,
-+ const reiser4_key *, lock_handle *,
-+ znode_lock_mode mode, znode_lock_request request);
-+
-+/* __SEAL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/search.c linux-2.6.20/fs/reiser4/search.c
---- linux-2.6.20.orig/fs/reiser4/search.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/search.c 2007-05-06 14:50:43.871029467 +0400
-@@ -0,0 +1,1611 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "tree.h"
-+#include "reiser4.h"
-+#include "super.h"
-+#include "inode.h"
-+
-+#include <linux/slab.h>
-+
-+static const char *bias_name(lookup_bias bias);
-+
-+/* tree searching algorithm, intranode searching algorithms are in
-+ plugin/node/ */
-+
-+/* tree lookup cache
-+ *
-+ * The coord by key cache consists of small list of recently accessed nodes
-+ * maintained according to the LRU discipline. Before doing real top-to-down
-+ * tree traversal this cache is scanned for nodes that can contain key
-+ * requested.
-+ *
-+ * The efficiency of coord cache depends heavily on locality of reference for
-+ * tree accesses. Our user level simulations show reasonably good hit ratios
-+ * for coord cache under most loads so far.
-+ */
-+
-+/* Initialise coord cache slot */
-+static void cbk_cache_init_slot(cbk_cache_slot *slot)
-+{
-+ assert("nikita-345", slot != NULL);
-+
-+ INIT_LIST_HEAD(&slot->lru);
-+ slot->node = NULL;
-+}
-+
-+/* Initialize coord cache */
-+int cbk_cache_init(cbk_cache *cache /* cache to init */ )
-+{
-+ int i;
-+
-+ assert("nikita-346", cache != NULL);
-+
-+ cache->slot =
-+ kmalloc(sizeof(cbk_cache_slot) * cache->nr_slots,
-+ reiser4_ctx_gfp_mask_get());
-+ if (cache->slot == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ INIT_LIST_HEAD(&cache->lru);
-+ for (i = 0; i < cache->nr_slots; ++i) {
-+ cbk_cache_init_slot(cache->slot + i);
-+ list_add_tail(&((cache->slot + i)->lru), &cache->lru);
-+ }
-+ rwlock_init(&cache->guard);
-+ return 0;
-+}
-+
-+/* free cbk cache data */
-+void cbk_cache_done(cbk_cache * cache /* cache to release */ )
-+{
-+ assert("nikita-2493", cache != NULL);
-+ if (cache->slot != NULL) {
-+ kfree(cache->slot);
-+ cache->slot = NULL;
-+ }
-+}
-+
-+/* macro to iterate over all cbk cache slots */
-+#define for_all_slots(cache, slot) \
-+ for ((slot) = list_entry((cache)->lru.next, cbk_cache_slot, lru); \
-+ &(cache)->lru != &(slot)->lru; \
-+ (slot) = list_entry(slot->lru.next, cbk_cache_slot, lru))
-+
-+#if REISER4_DEBUG
-+/* this function assures that [cbk-cache-invariant] invariant holds */
-+static int cbk_cache_invariant(const cbk_cache *cache)
-+{
-+ cbk_cache_slot *slot;
-+ int result;
-+ int unused;
-+
-+ if (cache->nr_slots == 0)
-+ return 1;
-+
-+ assert("nikita-2469", cache != NULL);
-+ unused = 0;
-+ result = 1;
-+ read_lock(&((cbk_cache *)cache)->guard);
-+ for_all_slots(cache, slot) {
-+ /* in LRU first go all `used' slots followed by `unused' */
-+ if (unused && (slot->node != NULL))
-+ result = 0;
-+ if (slot->node == NULL)
-+ unused = 1;
-+ else {
-+ cbk_cache_slot *scan;
-+
-+ /* all cached nodes are different */
-+ scan = slot;
-+ while (result) {
-+ scan = list_entry(scan->lru.next, cbk_cache_slot, lru);
-+ if (&cache->lru == &scan->lru)
-+ break;
-+ if (slot->node == scan->node)
-+ result = 0;
-+ }
-+ }
-+ if (!result)
-+ break;
-+ }
-+ read_unlock(&((cbk_cache *)cache)->guard);
-+ return result;
-+}
-+
-+#endif
-+
-+/* Remove references, if any, to @node from coord cache */
-+void cbk_cache_invalidate(const znode * node /* node to remove from cache */ ,
-+ reiser4_tree * tree /* tree to remove node from */ )
-+{
-+ cbk_cache_slot *slot;
-+ cbk_cache *cache;
-+ int i;
-+
-+ assert("nikita-350", node != NULL);
-+ assert("nikita-1479", LOCK_CNT_GTZ(rw_locked_tree));
-+
-+ cache = &tree->cbk_cache;
-+ assert("nikita-2470", cbk_cache_invariant(cache));
-+
-+ write_lock(&(cache->guard));
-+ for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
-+ if (slot->node == node) {
-+ list_move_tail(&slot->lru, &cache->lru);
-+ slot->node = NULL;
-+ break;
-+ }
-+ }
-+ write_unlock(&(cache->guard));
-+ assert("nikita-2471", cbk_cache_invariant(cache));
-+}
-+
-+/* add to the cbk-cache in the "tree" information about "node". This
-+ can actually be update of existing slot in a cache. */
-+static void cbk_cache_add(const znode *node /* node to add to the cache */ )
-+{
-+ cbk_cache *cache;
-+ cbk_cache_slot *slot;
-+ int i;
-+
-+ assert("nikita-352", node != NULL);
-+
-+ cache = &znode_get_tree(node)->cbk_cache;
-+ assert("nikita-2472", cbk_cache_invariant(cache));
-+
-+ if (cache->nr_slots == 0)
-+ return;
-+
-+ write_lock(&(cache->guard));
-+ /* find slot to update/add */
-+ for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
-+ /* oops, this node is already in a cache */
-+ if (slot->node == node)
-+ break;
-+ }
-+ /* if all slots are used, reuse least recently used one */
-+ if (i == cache->nr_slots) {
-+ slot = list_entry(cache->lru.prev, cbk_cache_slot, lru);
-+ slot->node = (znode *) node;
-+ }
-+ list_move(&slot->lru, &cache->lru);
-+ write_unlock(&(cache->guard));
-+ assert("nikita-2473", cbk_cache_invariant(cache));
-+}
-+
-+static int setup_delimiting_keys(cbk_handle * h);
-+static lookup_result coord_by_handle(cbk_handle * handle);
-+static lookup_result traverse_tree(cbk_handle * h);
-+static int cbk_cache_search(cbk_handle * h);
-+
-+static level_lookup_result cbk_level_lookup(cbk_handle * h);
-+static level_lookup_result cbk_node_lookup(cbk_handle * h);
-+
-+/* helper functions */
-+
-+static void update_stale_dk(reiser4_tree * tree, znode * node);
-+
-+/* release parent node during traversal */
-+static void put_parent(cbk_handle * h);
-+/* check consistency of fields */
-+static int sanity_check(cbk_handle * h);
-+/* release resources in handle */
-+static void hput(cbk_handle * h);
-+
-+static level_lookup_result search_to_left(cbk_handle * h);
-+
-+/* pack numerous (numberous I should say) arguments of coord_by_key() into
-+ * cbk_handle */
-+static cbk_handle *cbk_pack(cbk_handle * handle,
-+ reiser4_tree * tree,
-+ const reiser4_key * key,
-+ coord_t * coord,
-+ lock_handle * active_lh,
-+ lock_handle * parent_lh,
-+ znode_lock_mode lock_mode,
-+ lookup_bias bias,
-+ tree_level lock_level,
-+ tree_level stop_level,
-+ __u32 flags, ra_info_t * info)
-+{
-+ memset(handle, 0, sizeof *handle);
-+
-+ handle->tree = tree;
-+ handle->key = key;
-+ handle->lock_mode = lock_mode;
-+ handle->bias = bias;
-+ handle->lock_level = lock_level;
-+ handle->stop_level = stop_level;
-+ handle->coord = coord;
-+ /* set flags. See comment in tree.h:cbk_flags */
-+ handle->flags = flags | CBK_TRUST_DK | CBK_USE_CRABLOCK;
-+
-+ handle->active_lh = active_lh;
-+ handle->parent_lh = parent_lh;
-+ handle->ra_info = info;
-+ return handle;
-+}
-+
-+/* main tree lookup procedure
-+
-+ Check coord cache. If key we are looking for is not found there, call cbk()
-+ to do real tree traversal.
-+
-+ As we have extents on the twig level, @lock_level and @stop_level can
-+ be different from LEAF_LEVEL and each other.
-+
-+ Thread cannot keep any reiser4 locks (tree, znode, dk spin-locks, or znode
-+ long term locks) while calling this.
-+*/
-+lookup_result coord_by_key(reiser4_tree * tree /* tree to perform search
-+ * in. Usually this tree is
-+ * part of file-system
-+ * super-block */ ,
-+ const reiser4_key * key /* key to look for */ ,
-+ coord_t * coord /* where to store found
-+ * position in a tree. Fields
-+ * in "coord" are only valid if
-+ * coord_by_key() returned
-+ * "CBK_COORD_FOUND" */ ,
-+ lock_handle * lh, /* resulting lock handle */
-+ znode_lock_mode lock_mode /* type of lookup we
-+ * want on node. Pass
-+ * ZNODE_READ_LOCK here
-+ * if you only want to
-+ * read item found and
-+ * ZNODE_WRITE_LOCK if
-+ * you want to modify
-+ * it */ ,
-+ lookup_bias bias /* what to return if coord
-+ * with exactly the @key is
-+ * not in the tree */ ,
-+ tree_level lock_level /* tree level where to start
-+ * taking @lock type of
-+ * locks */ ,
-+ tree_level stop_level /* tree level to stop. Pass
-+ * LEAF_LEVEL or TWIG_LEVEL
-+ * here Item being looked
-+ * for has to be between
-+ * @lock_level and
-+ * @stop_level, inclusive */ ,
-+ __u32 flags /* search flags */ ,
-+ ra_info_t *
-+ info
-+ /* information about desired tree traversal readahead */
-+ )
-+{
-+ cbk_handle handle;
-+ lock_handle parent_lh;
-+ lookup_result result;
-+
-+ init_lh(lh);
-+ init_lh(&parent_lh);
-+
-+ assert("nikita-3023", reiser4_schedulable());
-+
-+ assert("nikita-353", tree != NULL);
-+ assert("nikita-354", key != NULL);
-+ assert("nikita-355", coord != NULL);
-+ assert("nikita-356", (bias == FIND_EXACT)
-+ || (bias == FIND_MAX_NOT_MORE_THAN));
-+ assert("nikita-357", stop_level >= LEAF_LEVEL);
-+ /* no locks can be held during tree traversal */
-+ assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
-+
-+ cbk_pack(&handle,
-+ tree,
-+ key,
-+ coord,
-+ lh,
-+ &parent_lh,
-+ lock_mode, bias, lock_level, stop_level, flags, info);
-+
-+ result = coord_by_handle(&handle);
-+ assert("nikita-3247",
-+ ergo(!IS_CBKERR(result), coord->node == lh->node));
-+ return result;
-+}
-+
-+/* like coord_by_key(), but starts traversal from vroot of @object rather than
-+ * from tree root. */
-+lookup_result reiser4_object_lookup(struct inode * object,
-+ const reiser4_key * key,
-+ coord_t * coord,
-+ lock_handle * lh,
-+ znode_lock_mode lock_mode,
-+ lookup_bias bias,
-+ tree_level lock_level,
-+ tree_level stop_level, __u32 flags,
-+ ra_info_t * info)
-+{
-+ cbk_handle handle;
-+ lock_handle parent_lh;
-+ lookup_result result;
-+
-+ init_lh(lh);
-+ init_lh(&parent_lh);
-+
-+ assert("nikita-3023", reiser4_schedulable());
-+
-+ assert("nikita-354", key != NULL);
-+ assert("nikita-355", coord != NULL);
-+ assert("nikita-356", (bias == FIND_EXACT)
-+ || (bias == FIND_MAX_NOT_MORE_THAN));
-+ assert("nikita-357", stop_level >= LEAF_LEVEL);
-+ /* no locks can be held during tree search by key */
-+ assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
-+
-+ cbk_pack(&handle,
-+ object != NULL ? reiser4_tree_by_inode(object) : current_tree,
-+ key,
-+ coord,
-+ lh,
-+ &parent_lh,
-+ lock_mode, bias, lock_level, stop_level, flags, info);
-+ handle.object = object;
-+
-+ result = coord_by_handle(&handle);
-+ assert("nikita-3247",
-+ ergo(!IS_CBKERR(result), coord->node == lh->node));
-+ return result;
-+}
-+
-+/* lookup by cbk_handle. Common part of coord_by_key() and
-+ reiser4_object_lookup(). */
-+static lookup_result coord_by_handle(cbk_handle * handle)
-+{
-+ /*
-+ * first check cbk_cache (which is look-aside cache for our tree) and
-+ * of this fails, start traversal.
-+ */
-+ /* first check whether "key" is in cache of recent lookups. */
-+ if (cbk_cache_search(handle) == 0)
-+ return handle->result;
-+ else
-+ return traverse_tree(handle);
-+}
-+
-+/* Execute actor for each item (or unit, depending on @through_units_p),
-+ starting from @coord, right-ward, until either:
-+
-+ - end of the tree is reached
-+ - unformatted node is met
-+ - error occurred
-+ - @actor returns 0 or less
-+
-+ Error code, or last actor return value is returned.
-+
-+ This is used by plugin/dir/hashe_dir.c:reiser4_find_entry() to move through
-+ sequence of entries with identical keys and alikes.
-+*/
-+int reiser4_iterate_tree(reiser4_tree * tree /* tree to scan */ ,
-+ coord_t * coord /* coord to start from */ ,
-+ lock_handle * lh /* lock handle to start with and to
-+ * update along the way */ ,
-+ tree_iterate_actor_t actor /* function to call on each
-+ * item/unit */ ,
-+ void *arg /* argument to pass to @actor */ ,
-+ znode_lock_mode mode /* lock mode on scanned nodes */ ,
-+ int through_units_p /* call @actor on each item or on
-+ * each unit */ )
-+{
-+ int result;
-+
-+ assert("nikita-1143", tree != NULL);
-+ assert("nikita-1145", coord != NULL);
-+ assert("nikita-1146", lh != NULL);
-+ assert("nikita-1147", actor != NULL);
-+
-+ result = zload(coord->node);
-+ coord_clear_iplug(coord);
-+ if (result != 0)
-+ return result;
-+ if (!coord_is_existing_unit(coord)) {
-+ zrelse(coord->node);
-+ return -ENOENT;
-+ }
-+ while ((result = actor(tree, coord, lh, arg)) > 0) {
-+ /* move further */
-+ if ((through_units_p && coord_next_unit(coord)) ||
-+ (!through_units_p && coord_next_item(coord))) {
-+ do {
-+ lock_handle couple;
-+
-+ /* move to the next node */
-+ init_lh(&couple);
-+ result =
-+ reiser4_get_right_neighbor(&couple,
-+ coord->node,
-+ (int)mode,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ zrelse(coord->node);
-+ if (result == 0) {
-+
-+ result = zload(couple.node);
-+ if (result != 0) {
-+ done_lh(&couple);
-+ return result;
-+ }
-+
-+ coord_init_first_unit(coord,
-+ couple.node);
-+ done_lh(lh);
-+ move_lh(lh, &couple);
-+ } else
-+ return result;
-+ } while (node_is_empty(coord->node));
-+ }
-+
-+ assert("nikita-1149", coord_is_existing_unit(coord));
-+ }
-+ zrelse(coord->node);
-+ return result;
-+}
-+
-+/* return locked uber znode for @tree */
-+int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
-+ znode_lock_request pri, lock_handle * lh)
-+{
-+ int result;
-+
-+ result = longterm_lock_znode(lh, tree->uber, mode, pri);
-+ return result;
-+}
-+
-+/* true if @key is strictly within @node
-+
-+ we are looking for possibly non-unique key and it is item is at the edge of
-+ @node. May be it is in the neighbor.
-+*/
-+static int znode_contains_key_strict(znode * node /* node to check key
-+ * against */ ,
-+ const reiser4_key *
-+ key /* key to check */ ,
-+ int isunique)
-+{
-+ int answer;
-+
-+ assert("nikita-1760", node != NULL);
-+ assert("nikita-1722", key != NULL);
-+
-+ if (keyge(key, &node->rd_key))
-+ return 0;
-+
-+ answer = keycmp(&node->ld_key, key);
-+
-+ if (isunique)
-+ return answer != GREATER_THAN;
-+ else
-+ return answer == LESS_THAN;
-+}
-+
-+/*
-+ * Virtual Root (vroot) code.
-+ *
-+ * For given file system object (e.g., regular file or directory) let's
-+ * define its "virtual root" as lowest in the tree (that is, furtherest
-+ * from the tree root) node such that all body items of said object are
-+ * located in a tree rooted at this node.
-+ *
-+ * Once vroot of object is found all tree lookups for items within body of
-+ * this object ("object lookups") can be started from its vroot rather
-+ * than from real root. This has following advantages:
-+ *
-+ * 1. amount of nodes traversed during lookup (and, hence, amount of
-+ * key comparisons made) decreases, and
-+ *
-+ * 2. contention on tree root is decreased. This latter was actually
-+ * motivating reason behind vroot, because spin lock of root node,
-+ * which is taken when acquiring long-term lock on root node is the
-+ * hottest lock in the reiser4.
-+ *
-+ * How to find vroot.
-+ *
-+ * When vroot of object F is not yet determined, all object lookups start
-+ * from the root of the tree. At each tree level during traversal we have
-+ * a node N such that a key we are looking for (which is the key inside
-+ * object's body) is located within N. In function handle_vroot() called
-+ * from cbk_level_lookup() we check whether N is possible vroot for
-+ * F. Check is trivial---if neither leftmost nor rightmost item of N
-+ * belongs to F (and we already have helpful ->owns_item() method of
-+ * object plugin for this), then N is possible vroot of F. This, of
-+ * course, relies on the assumption that each object occupies contiguous
-+ * range of keys in the tree.
-+ *
-+ * Thus, traversing tree downward and checking each node as we go, we can
-+ * find lowest such node, which, by definition, is vroot.
-+ *
-+ * How to track vroot.
-+ *
-+ * Nohow. If actual vroot changes, next object lookup will just restart
-+ * from the actual tree root, refreshing object's vroot along the way.
-+ *
-+ */
-+
-+/*
-+ * Check whether @node is possible vroot of @object.
-+ */
-+static void handle_vroot(struct inode *object, znode * node)
-+{
-+ file_plugin *fplug;
-+ coord_t coord;
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-3353", fplug != NULL);
-+ assert("nikita-3354", fplug->owns_item != NULL);
-+
-+ if (unlikely(node_is_empty(node)))
-+ return;
-+
-+ coord_init_first_unit(&coord, node);
-+ /*
-+ * if leftmost item of @node belongs to @object, we cannot be sure
-+ * that @node is vroot of @object, because, some items of @object are
-+ * probably in the sub-tree rooted at the left neighbor of @node.
-+ */
-+ if (fplug->owns_item(object, &coord))
-+ return;
-+ coord_init_last_unit(&coord, node);
-+ /* mutatis mutandis for the rightmost item */
-+ if (fplug->owns_item(object, &coord))
-+ return;
-+ /* otherwise, @node is possible vroot of @object */
-+ inode_set_vroot(object, node);
-+}
-+
-+/*
-+ * helper function used by traverse tree to start tree traversal not from the
-+ * tree root, but from @h->object's vroot, if possible.
-+ */
-+static int prepare_object_lookup(cbk_handle * h)
-+{
-+ znode *vroot;
-+ int result;
-+
-+ vroot = inode_get_vroot(h->object);
-+ if (vroot == NULL) {
-+ /*
-+ * object doesn't have known vroot, start from real tree root.
-+ */
-+ return LOOKUP_CONT;
-+ }
-+
-+ h->level = znode_get_level(vroot);
-+ /* take a long-term lock on vroot */
-+ h->result = longterm_lock_znode(h->active_lh, vroot,
-+ cbk_lock_mode(h->level, h),
-+ ZNODE_LOCK_LOPRI);
-+ result = LOOKUP_REST;
-+ if (h->result == 0) {
-+ int isunique;
-+ int inside;
-+
-+ isunique = h->flags & CBK_UNIQUE;
-+ /* check that key is inside vroot */
-+ read_lock_dk(h->tree);
-+ inside = (znode_contains_key_strict(vroot, h->key, isunique) &&
-+ !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE));
-+ read_unlock_dk(h->tree);
-+ if (inside) {
-+ h->result = zload(vroot);
-+ if (h->result == 0) {
-+ /* search for key in vroot. */
-+ result = cbk_node_lookup(h);
-+ zrelse(vroot); /*h->active_lh->node); */
-+ if (h->active_lh->node != vroot) {
-+ result = LOOKUP_REST;
-+ } else if (result == LOOKUP_CONT) {
-+ move_lh(h->parent_lh, h->active_lh);
-+ h->flags &= ~CBK_DKSET;
-+ }
-+ }
-+ }
-+ }
-+
-+ zput(vroot);
-+
-+ if (IS_CBKERR(h->result) || result == LOOKUP_REST)
-+ hput(h);
-+ return result;
-+}
-+
-+/* main function that handles common parts of tree traversal: starting
-+ (fake znode handling), restarts, error handling, completion */
-+static lookup_result traverse_tree(cbk_handle * h /* search handle */ )
-+{
-+ int done;
-+ int iterations;
-+ int vroot_used;
-+
-+ assert("nikita-365", h != NULL);
-+ assert("nikita-366", h->tree != NULL);
-+ assert("nikita-367", h->key != NULL);
-+ assert("nikita-368", h->coord != NULL);
-+ assert("nikita-369", (h->bias == FIND_EXACT)
-+ || (h->bias == FIND_MAX_NOT_MORE_THAN));
-+ assert("nikita-370", h->stop_level >= LEAF_LEVEL);
-+ assert("nikita-2949", !(h->flags & CBK_DKSET));
-+ assert("zam-355", lock_stack_isclean(get_current_lock_stack()));
-+
-+ done = 0;
-+ iterations = 0;
-+ vroot_used = 0;
-+
-+ /* loop for restarts */
-+ restart:
-+
-+ assert("nikita-3024", reiser4_schedulable());
-+
-+ h->result = CBK_COORD_FOUND;
-+ /* connect_znode() needs it */
-+ h->ld_key = *reiser4_min_key();
-+ h->rd_key = *reiser4_max_key();
-+ h->flags |= CBK_DKSET;
-+ h->error = NULL;
-+
-+ if (!vroot_used && h->object != NULL) {
-+ vroot_used = 1;
-+ done = prepare_object_lookup(h);
-+ if (done == LOOKUP_REST) {
-+ goto restart;
-+ } else if (done == LOOKUP_DONE)
-+ return h->result;
-+ }
-+ if (h->parent_lh->node == NULL) {
-+ done =
-+ get_uber_znode(h->tree, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI,
-+ h->parent_lh);
-+
-+ assert("nikita-1637", done != -E_DEADLOCK);
-+
-+ h->block = h->tree->root_block;
-+ h->level = h->tree->height;
-+ h->coord->node = h->parent_lh->node;
-+
-+ if (done != 0)
-+ return done;
-+ }
-+
-+ /* loop descending a tree */
-+ while (!done) {
-+
-+ if (unlikely((iterations > REISER4_CBK_ITERATIONS_LIMIT) &&
-+ IS_POW(iterations))) {
-+ warning("nikita-1481", "Too many iterations: %i",
-+ iterations);
-+ reiser4_print_key("key", h->key);
-+ ++iterations;
-+ } else if (unlikely(iterations > REISER4_MAX_CBK_ITERATIONS)) {
-+ h->error =
-+ "reiser-2018: Too many iterations. Tree corrupted, or (less likely) starvation occurring.";
-+ h->result = RETERR(-EIO);
-+ break;
-+ }
-+ switch (cbk_level_lookup(h)) {
-+ case LOOKUP_CONT:
-+ move_lh(h->parent_lh, h->active_lh);
-+ continue;
-+ default:
-+ wrong_return_value("nikita-372", "cbk_level");
-+ case LOOKUP_DONE:
-+ done = 1;
-+ break;
-+ case LOOKUP_REST:
-+ hput(h);
-+ /* deadlock avoidance is normal case. */
-+ if (h->result != -E_DEADLOCK)
-+ ++iterations;
-+ reiser4_preempt_point();
-+ goto restart;
-+ }
-+ }
-+ /* that's all. The rest is error handling */
-+ if (unlikely(h->error != NULL)) {
-+ warning("nikita-373", "%s: level: %i, "
-+ "lock_level: %i, stop_level: %i "
-+ "lock_mode: %s, bias: %s",
-+ h->error, h->level, h->lock_level, h->stop_level,
-+ lock_mode_name(h->lock_mode), bias_name(h->bias));
-+ reiser4_print_address("block", &h->block);
-+ reiser4_print_key("key", h->key);
-+ print_coord_content("coord", h->coord);
-+ }
-+ /* `unlikely' error case */
-+ if (unlikely(IS_CBKERR(h->result))) {
-+ /* failure. do cleanup */
-+ hput(h);
-+ } else {
-+ assert("nikita-1605", WITH_DATA_RET
-+ (h->coord->node, 1,
-+ ergo((h->result == CBK_COORD_FOUND) &&
-+ (h->bias == FIND_EXACT) &&
-+ (!node_is_empty(h->coord->node)),
-+ coord_is_existing_item(h->coord))));
-+ }
-+ return h->result;
-+}
-+
-+/* find delimiting keys of child
-+
-+ Determine left and right delimiting keys for child pointed to by
-+ @parent_coord.
-+
-+*/
-+static void find_child_delimiting_keys(znode * parent /* parent znode, passed
-+ * locked */ ,
-+ const coord_t * parent_coord /* coord where
-+ * pointer to
-+ * child is
-+ * stored */ ,
-+ reiser4_key * ld /* where to store left
-+ * delimiting key */ ,
-+ reiser4_key * rd /* where to store right
-+ * delimiting key */ )
-+{
-+ coord_t neighbor;
-+
-+ assert("nikita-1484", parent != NULL);
-+ assert_rw_locked(&(znode_get_tree(parent)->dk_lock));
-+
-+ coord_dup(&neighbor, parent_coord);
-+
-+ if (neighbor.between == AT_UNIT)
-+ /* imitate item ->lookup() behavior. */
-+ neighbor.between = AFTER_UNIT;
-+
-+ if (coord_set_to_left(&neighbor) == 0)
-+ unit_key_by_coord(&neighbor, ld);
-+ else {
-+ assert("nikita-14851", 0);
-+ *ld = *znode_get_ld_key(parent);
-+ }
-+
-+ coord_dup(&neighbor, parent_coord);
-+ if (neighbor.between == AT_UNIT)
-+ neighbor.between = AFTER_UNIT;
-+ if (coord_set_to_right(&neighbor) == 0)
-+ unit_key_by_coord(&neighbor, rd);
-+ else
-+ *rd = *znode_get_rd_key(parent);
-+}
-+
-+/*
-+ * setup delimiting keys for a child
-+ *
-+ * @parent parent node
-+ *
-+ * @coord location in @parent where pointer to @child is
-+ *
-+ * @child child node
-+ */
-+int
-+set_child_delimiting_keys(znode * parent, const coord_t * coord, znode * child)
-+{
-+ reiser4_tree *tree;
-+
-+ assert("nikita-2952",
-+ znode_get_level(parent) == znode_get_level(coord->node));
-+
-+ /* fast check without taking dk lock. This is safe, because
-+ * JNODE_DKSET is never cleared once set. */
-+ if (!ZF_ISSET(child, JNODE_DKSET)) {
-+ tree = znode_get_tree(parent);
-+ write_lock_dk(tree);
-+ if (likely(!ZF_ISSET(child, JNODE_DKSET))) {
-+ find_child_delimiting_keys(parent, coord,
-+ &child->ld_key,
-+ &child->rd_key);
-+ ON_DEBUG(child->ld_key_version =
-+ atomic_inc_return(&delim_key_version);
-+ child->rd_key_version =
-+ atomic_inc_return(&delim_key_version););
-+ ZF_SET(child, JNODE_DKSET);
-+ }
-+ write_unlock_dk(tree);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+/* Perform tree lookup at one level. This is called from cbk_traverse()
-+ function that drives lookup through tree and calls cbk_node_lookup() to
-+ perform lookup within one node.
-+
-+ See comments in a code.
-+*/
-+static level_lookup_result cbk_level_lookup(cbk_handle * h /* search handle */ )
-+{
-+ int ret;
-+ int setdk;
-+ int ldkeyset = 0;
-+ reiser4_key ldkey;
-+ reiser4_key key;
-+ znode *active;
-+
-+ assert("nikita-3025", reiser4_schedulable());
-+
-+ /* acquire reference to @active node */
-+ active =
-+ zget(h->tree, &h->block, h->parent_lh->node, h->level,
-+ reiser4_ctx_gfp_mask_get());
-+
-+ if (IS_ERR(active)) {
-+ h->result = PTR_ERR(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ /* lock @active */
-+ h->result = longterm_lock_znode(h->active_lh,
-+ active,
-+ cbk_lock_mode(h->level, h),
-+ ZNODE_LOCK_LOPRI);
-+ /* longterm_lock_znode() acquires additional reference to znode (which
-+ will be later released by longterm_unlock_znode()). Release
-+ reference acquired by zget().
-+ */
-+ zput(active);
-+ if (unlikely(h->result != 0))
-+ goto fail_or_restart;
-+
-+ setdk = 0;
-+ /* if @active is accessed for the first time, setup delimiting keys on
-+ it. Delimiting keys are taken from the parent node. See
-+ setup_delimiting_keys() for details.
-+ */
-+ if (h->flags & CBK_DKSET) {
-+ setdk = setup_delimiting_keys(h);
-+ h->flags &= ~CBK_DKSET;
-+ } else {
-+ znode *parent;
-+
-+ parent = h->parent_lh->node;
-+ h->result = zload(parent);
-+ if (unlikely(h->result != 0))
-+ goto fail_or_restart;
-+
-+ if (!ZF_ISSET(active, JNODE_DKSET))
-+ setdk = set_child_delimiting_keys(parent,
-+ h->coord, active);
-+ else {
-+ read_lock_dk(h->tree);
-+ find_child_delimiting_keys(parent, h->coord, &ldkey,
-+ &key);
-+ read_unlock_dk(h->tree);
-+ ldkeyset = 1;
-+ }
-+ zrelse(parent);
-+ }
-+
-+ /* this is ugly kludge. Reminder: this is necessary, because
-+ ->lookup() method returns coord with ->between field probably set
-+ to something different from AT_UNIT.
-+ */
-+ h->coord->between = AT_UNIT;
-+
-+ if (znode_just_created(active) && (h->coord->node != NULL)) {
-+ write_lock_tree(h->tree);
-+ /* if we are going to load znode right now, setup
-+ ->in_parent: coord where pointer to this node is stored in
-+ parent.
-+ */
-+ coord_to_parent_coord(h->coord, &active->in_parent);
-+ write_unlock_tree(h->tree);
-+ }
-+
-+ /* check connectedness without holding tree lock---false negatives
-+ * will be re-checked by connect_znode(), and false positives are
-+ * impossible---@active cannot suddenly turn into unconnected
-+ * state. */
-+ if (!znode_is_connected(active)) {
-+ h->result = connect_znode(h->coord, active);
-+ if (unlikely(h->result != 0)) {
-+ put_parent(h);
-+ goto fail_or_restart;
-+ }
-+ }
-+
-+ jload_prefetch(ZJNODE(active));
-+
-+ if (setdk)
-+ update_stale_dk(h->tree, active);
-+
-+ /* put_parent() cannot be called earlier, because connect_znode()
-+ assumes parent node is referenced; */
-+ put_parent(h);
-+
-+ if ((!znode_contains_key_lock(active, h->key) &&
-+ (h->flags & CBK_TRUST_DK))
-+ || ZF_ISSET(active, JNODE_HEARD_BANSHEE)) {
-+ /* 1. key was moved out of this node while this thread was
-+ waiting for the lock. Restart. More elaborate solution is
-+ to determine where key moved (to the left, or to the right)
-+ and try to follow it through sibling pointers.
-+
-+ 2. or, node itself is going to be removed from the
-+ tree. Release lock and restart.
-+ */
-+ h->result = -E_REPEAT;
-+ }
-+ if (h->result == -E_REPEAT)
-+ return LOOKUP_REST;
-+
-+ h->result = zload_ra(active, h->ra_info);
-+ if (h->result) {
-+ return LOOKUP_DONE;
-+ }
-+
-+ /* sanity checks */
-+ if (sanity_check(h)) {
-+ zrelse(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ /* check that key of leftmost item in the @active is the same as in
-+ * its parent */
-+ if (ldkeyset && !node_is_empty(active) &&
-+ !keyeq(leftmost_key_in_node(active, &key), &ldkey)) {
-+ warning("vs-3533", "Keys are inconsistent. Fsck?");
-+ reiser4_print_key("inparent", &ldkey);
-+ reiser4_print_key("inchild", &key);
-+ h->result = RETERR(-EIO);
-+ zrelse(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ if (h->object != NULL)
-+ handle_vroot(h->object, active);
-+
-+ ret = cbk_node_lookup(h);
-+
-+ /* h->active_lh->node might change, but active is yet to be zrelsed */
-+ zrelse(active);
-+
-+ return ret;
-+
-+ fail_or_restart:
-+ if (h->result == -E_DEADLOCK)
-+ return LOOKUP_REST;
-+ return LOOKUP_DONE;
-+}
-+
-+#if REISER4_DEBUG
-+/* check left and right delimiting keys of a znode */
-+void check_dkeys(znode * node)
-+{
-+ znode *left;
-+ znode *right;
-+
-+ read_lock_tree(current_tree);
-+ read_lock_dk(current_tree);
-+
-+ assert("vs-1710", znode_is_any_locked(node));
-+ assert("vs-1197",
-+ !keygt(znode_get_ld_key(node), znode_get_rd_key(node)));
-+
-+ left = node->left;
-+ right = node->right;
-+
-+ if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
-+ && left != NULL && ZF_ISSET(left, JNODE_DKSET))
-+ /* check left neighbor. Note that left neighbor is not locked,
-+ so it might get wrong delimiting keys therefore */
-+ assert("vs-1198",
-+ (keyeq(znode_get_rd_key(left), znode_get_ld_key(node))
-+ || ZF_ISSET(left, JNODE_HEARD_BANSHEE)));
-+
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
-+ && right != NULL && ZF_ISSET(right, JNODE_DKSET))
-+ /* check right neighbor. Note that right neighbor is not
-+ locked, so it might get wrong delimiting keys therefore */
-+ assert("vs-1199",
-+ (keyeq(znode_get_rd_key(node), znode_get_ld_key(right))
-+ || ZF_ISSET(right, JNODE_HEARD_BANSHEE)));
-+
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+}
-+#endif
-+
-+/* true if @key is left delimiting key of @node */
-+static int key_is_ld(znode * node, const reiser4_key * key)
-+{
-+ int ld;
-+
-+ assert("nikita-1716", node != NULL);
-+ assert("nikita-1758", key != NULL);
-+
-+ read_lock_dk(znode_get_tree(node));
-+ assert("nikita-1759", znode_contains_key(node, key));
-+ ld = keyeq(znode_get_ld_key(node), key);
-+ read_unlock_dk(znode_get_tree(node));
-+ return ld;
-+}
-+
-+/* Process one node during tree traversal.
-+
-+ This is called by cbk_level_lookup(). */
-+static level_lookup_result cbk_node_lookup(cbk_handle * h /* search handle */ )
-+{
-+ /* node plugin of @active */
-+ node_plugin *nplug;
-+ /* item plugin of item that was found */
-+ item_plugin *iplug;
-+ /* search bias */
-+ lookup_bias node_bias;
-+ /* node we are operating upon */
-+ znode *active;
-+ /* tree we are searching in */
-+ reiser4_tree *tree;
-+ /* result */
-+ int result;
-+
-+ assert("nikita-379", h != NULL);
-+
-+ active = h->active_lh->node;
-+ tree = h->tree;
-+
-+ nplug = active->nplug;
-+ assert("nikita-380", nplug != NULL);
-+
-+ ON_DEBUG(check_dkeys(active));
-+
-+ /* return item from "active" node with maximal key not greater than
-+ "key" */
-+ node_bias = h->bias;
-+ result = nplug->lookup(active, h->key, node_bias, h->coord);
-+ if (unlikely(result != NS_FOUND && result != NS_NOT_FOUND)) {
-+ /* error occurred */
-+ h->result = result;
-+ return LOOKUP_DONE;
-+ }
-+ if (h->level == h->stop_level) {
-+ /* welcome to the stop level */
-+ assert("nikita-381", h->coord->node == active);
-+ if (result == NS_FOUND) {
-+ /* success of tree lookup */
-+ if (!(h->flags & CBK_UNIQUE)
-+ && key_is_ld(active, h->key)) {
-+ return search_to_left(h);
-+ } else
-+ h->result = CBK_COORD_FOUND;
-+ } else {
-+ h->result = CBK_COORD_NOTFOUND;
-+ }
-+ if (!(h->flags & CBK_IN_CACHE))
-+ cbk_cache_add(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ if (h->level > TWIG_LEVEL && result == NS_NOT_FOUND) {
-+ h->error = "not found on internal node";
-+ h->result = result;
-+ return LOOKUP_DONE;
-+ }
-+
-+ assert("vs-361", h->level > h->stop_level);
-+
-+ if (handle_eottl(h, &result)) {
-+ assert("vs-1674", (result == LOOKUP_DONE ||
-+ result == LOOKUP_REST));
-+ return result;
-+ }
-+
-+ /* go down to next level */
-+ check_me("vs-12", zload(h->coord->node) == 0);
-+ assert("nikita-2116", item_is_internal(h->coord));
-+ iplug = item_plugin_by_coord(h->coord);
-+ iplug->s.internal.down_link(h->coord, h->key, &h->block);
-+ zrelse(h->coord->node);
-+ --h->level;
-+ return LOOKUP_CONT; /* continue */
-+}
-+
-+/* scan cbk_cache slots looking for a match for @h */
-+static int cbk_cache_scan_slots(cbk_handle * h /* cbk handle */ )
-+{
-+ level_lookup_result llr;
-+ znode *node;
-+ reiser4_tree *tree;
-+ cbk_cache_slot *slot;
-+ cbk_cache *cache;
-+ tree_level level;
-+ int isunique;
-+ const reiser4_key *key;
-+ int result;
-+
-+ assert("nikita-1317", h != NULL);
-+ assert("nikita-1315", h->tree != NULL);
-+ assert("nikita-1316", h->key != NULL);
-+
-+ tree = h->tree;
-+ cache = &tree->cbk_cache;
-+ if (cache->nr_slots == 0)
-+ /* size of cbk cache was set to 0 by mount time option. */
-+ return RETERR(-ENOENT);
-+
-+ assert("nikita-2474", cbk_cache_invariant(cache));
-+ node = NULL; /* to keep gcc happy */
-+ level = h->level;
-+ key = h->key;
-+ isunique = h->flags & CBK_UNIQUE;
-+ result = RETERR(-ENOENT);
-+
-+ /*
-+ * this is time-critical function and dragons had, hence, been settled
-+ * here.
-+ *
-+ * Loop below scans cbk cache slots trying to find matching node with
-+ * suitable range of delimiting keys and located at the h->level.
-+ *
-+ * Scan is done under cbk cache spin lock that protects slot->node
-+ * pointers. If suitable node is found we want to pin it in
-+ * memory. But slot->node can point to the node with x_count 0
-+ * (unreferenced). Such node can be recycled at any moment, or can
-+ * already be in the process of being recycled (within jput()).
-+ *
-+ * As we found node in the cbk cache, it means that jput() hasn't yet
-+ * called cbk_cache_invalidate().
-+ *
-+ * We acquire reference to the node without holding tree lock, and
-+ * later, check node's RIP bit. This avoids races with jput().
-+ */
-+
-+ rcu_read_lock();
-+ read_lock(&((cbk_cache *)cache)->guard);
-+
-+ slot = list_entry(cache->lru.next, cbk_cache_slot, lru);
-+ slot = list_entry(slot->lru.prev, cbk_cache_slot, lru);
-+ BUG_ON(&slot->lru != &cache->lru);/*????*/
-+ while (1) {
-+
-+ slot = list_entry(slot->lru.next, cbk_cache_slot, lru);
-+
-+ if (&cache->lru != &slot->lru)
-+ node = slot->node;
-+ else
-+ node = NULL;
-+
-+ if (unlikely(node == NULL))
-+ break;
-+
-+ /*
-+ * this is (hopefully) the only place in the code where we are
-+ * working with delimiting keys without holding dk lock. This
-+ * is fine here, because this is only "guess" anyway---keys
-+ * are rechecked under dk lock below.
-+ */
-+ if (znode_get_level(node) == level &&
-+ /* reiser4_min_key < key < reiser4_max_key */
-+ znode_contains_key_strict(node, key, isunique)) {
-+ zref(node);
-+ result = 0;
-+ spin_lock_prefetch(&tree->tree_lock);
-+ break;
-+ }
-+ }
-+ read_unlock(&((cbk_cache *)cache)->guard);
-+
-+ assert("nikita-2475", cbk_cache_invariant(cache));
-+
-+ if (unlikely(result == 0 && ZF_ISSET(node, JNODE_RIP)))
-+ result = -ENOENT;
-+
-+ rcu_read_unlock();
-+
-+ if (result != 0) {
-+ h->result = CBK_COORD_NOTFOUND;
-+ return RETERR(-ENOENT);
-+ }
-+
-+ result =
-+ longterm_lock_znode(h->active_lh, node, cbk_lock_mode(level, h),
-+ ZNODE_LOCK_LOPRI);
-+ zput(node);
-+ if (result != 0)
-+ return result;
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+
-+ /* recheck keys */
-+ read_lock_dk(tree);
-+ result = (znode_contains_key_strict(node, key, isunique) &&
-+ !ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ read_unlock_dk(tree);
-+ if (result) {
-+ /* do lookup inside node */
-+ llr = cbk_node_lookup(h);
-+ /* if cbk_node_lookup() wandered to another node (due to eottl
-+ or non-unique keys), adjust @node */
-+ /*node = h->active_lh->node; */
-+
-+ if (llr != LOOKUP_DONE) {
-+ /* restart or continue on the next level */
-+ result = RETERR(-ENOENT);
-+ } else if (IS_CBKERR(h->result))
-+ /* io or oom */
-+ result = RETERR(-ENOENT);
-+ else {
-+ /* good. Either item found or definitely not found. */
-+ result = 0;
-+
-+ write_lock(&(cache->guard));
-+ if (slot->node == h->active_lh->node /*node */ ) {
-+ /* if this node is still in cbk cache---move
-+ its slot to the head of the LRU list. */
-+ list_move(&slot->lru, &cache->lru);
-+ }
-+ write_unlock(&(cache->guard));
-+ }
-+ } else {
-+ /* race. While this thread was waiting for the lock, node was
-+ rebalanced and item we are looking for, shifted out of it
-+ (if it ever was here).
-+
-+ Continuing scanning is almost hopeless: node key range was
-+ moved to, is almost certainly at the beginning of the LRU
-+ list at this time, because it's hot, but restarting
-+ scanning from the very beginning is complex. Just return,
-+ so that cbk() will be performed. This is not that
-+ important, because such races should be rare. Are they?
-+ */
-+ result = RETERR(-ENOENT); /* -ERAUGHT */
-+ }
-+ zrelse(node);
-+ assert("nikita-2476", cbk_cache_invariant(cache));
-+ return result;
-+}
-+
-+/* look for item with given key in the coord cache
-+
-+ This function, called by coord_by_key(), scans "coord cache" (&cbk_cache)
-+ which is a small LRU list of znodes accessed lately. For each znode in
-+ znode in this list, it checks whether key we are looking for fits into key
-+ range covered by this node. If so, and in addition, node lies at allowed
-+ level (this is to handle extents on a twig level), node is locked, and
-+ lookup inside it is performed.
-+
-+ we need a measurement of the cost of this cache search compared to the cost
-+ of coord_by_key.
-+
-+*/
-+static int cbk_cache_search(cbk_handle * h /* cbk handle */ )
-+{
-+ int result = 0;
-+ tree_level level;
-+
-+ /* add CBK_IN_CACHE to the handle flags. This means that
-+ * cbk_node_lookup() assumes that cbk_cache is scanned and would add
-+ * found node to the cache. */
-+ h->flags |= CBK_IN_CACHE;
-+ for (level = h->stop_level; level <= h->lock_level; ++level) {
-+ h->level = level;
-+ result = cbk_cache_scan_slots(h);
-+ if (result != 0) {
-+ done_lh(h->active_lh);
-+ done_lh(h->parent_lh);
-+ } else {
-+ assert("nikita-1319", !IS_CBKERR(h->result));
-+ break;
-+ }
-+ }
-+ h->flags &= ~CBK_IN_CACHE;
-+ return result;
-+}
-+
-+/* type of lock we want to obtain during tree traversal. On stop level
-+ we want type of lock user asked for, on upper levels: read lock. */
-+znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h)
-+{
-+ assert("nikita-382", h != NULL);
-+
-+ return (level <= h->lock_level) ? h->lock_mode : ZNODE_READ_LOCK;
-+}
-+
-+/* update outdated delimiting keys */
-+static void stale_dk(reiser4_tree * tree, znode * node)
-+{
-+ znode *right;
-+
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+ right = node->right;
-+
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
-+ right && ZF_ISSET(right, JNODE_DKSET) &&
-+ !keyeq(znode_get_rd_key(node), znode_get_ld_key(right)))
-+ znode_set_rd_key(node, znode_get_ld_key(right));
-+
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+}
-+
-+/* check for possibly outdated delimiting keys, and update them if
-+ * necessary. */
-+static void update_stale_dk(reiser4_tree * tree, znode * node)
-+{
-+ znode *right;
-+ reiser4_key rd;
-+
-+ read_lock_tree(tree);
-+ read_lock_dk(tree);
-+ rd = *znode_get_rd_key(node);
-+ right = node->right;
-+ if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
-+ right && ZF_ISSET(right, JNODE_DKSET) &&
-+ !keyeq(&rd, znode_get_ld_key(right)))) {
-+ assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET));
-+ read_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+ stale_dk(tree, node);
-+ return;
-+ }
-+ read_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+}
-+
-+/*
-+ * handle searches a the non-unique key.
-+ *
-+ * Suppose that we are looking for an item with possibly non-unique key 100.
-+ *
-+ * Root node contains two pointers: one to a node with left delimiting key 0,
-+ * and another to a node with left delimiting key 100. Item we interested in
-+ * may well happen in the sub-tree rooted at the first pointer.
-+ *
-+ * To handle this search_to_left() is called when search reaches stop
-+ * level. This function checks it is _possible_ that item we are looking for
-+ * is in the left neighbor (this can be done by comparing delimiting keys) and
-+ * if so, tries to lock left neighbor (this is low priority lock, so it can
-+ * deadlock, tree traversal is just restarted if it did) and then checks
-+ * whether left neighbor actually contains items with our key.
-+ *
-+ * Note that this is done on the stop level only. It is possible to try such
-+ * left-check on each level, but as duplicate keys are supposed to be rare
-+ * (very unlikely that more than one node is completely filled with items with
-+ * duplicate keys), it sis cheaper to scan to the left on the stop level once.
-+ *
-+ */
-+static level_lookup_result search_to_left(cbk_handle * h /* search handle */ )
-+{
-+ level_lookup_result result;
-+ coord_t *coord;
-+ znode *node;
-+ znode *neighbor;
-+
-+ lock_handle lh;
-+
-+ assert("nikita-1761", h != NULL);
-+ assert("nikita-1762", h->level == h->stop_level);
-+
-+ init_lh(&lh);
-+ coord = h->coord;
-+ node = h->active_lh->node;
-+ assert("nikita-1763", coord_is_leftmost_unit(coord));
-+
-+ h->result =
-+ reiser4_get_left_neighbor(&lh, node, (int)h->lock_mode,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ neighbor = NULL;
-+ switch (h->result) {
-+ case -E_DEADLOCK:
-+ result = LOOKUP_REST;
-+ break;
-+ case 0:{
-+ node_plugin *nplug;
-+ coord_t crd;
-+ lookup_bias bias;
-+
-+ neighbor = lh.node;
-+ h->result = zload(neighbor);
-+ if (h->result != 0) {
-+ result = LOOKUP_DONE;
-+ break;
-+ }
-+
-+ nplug = neighbor->nplug;
-+
-+ coord_init_zero(&crd);
-+ bias = h->bias;
-+ h->bias = FIND_EXACT;
-+ h->result =
-+ nplug->lookup(neighbor, h->key, h->bias, &crd);
-+ h->bias = bias;
-+
-+ if (h->result == NS_NOT_FOUND) {
-+ case -E_NO_NEIGHBOR:
-+ h->result = CBK_COORD_FOUND;
-+ if (!(h->flags & CBK_IN_CACHE))
-+ cbk_cache_add(node);
-+ default: /* some other error */
-+ result = LOOKUP_DONE;
-+ } else if (h->result == NS_FOUND) {
-+ read_lock_dk(znode_get_tree(neighbor));
-+ h->rd_key = *znode_get_ld_key(node);
-+ leftmost_key_in_node(neighbor, &h->ld_key);
-+ read_unlock_dk(znode_get_tree(neighbor));
-+ h->flags |= CBK_DKSET;
-+
-+ h->block = *znode_get_block(neighbor);
-+ /* clear coord -> node so that cbk_level_lookup()
-+ wouldn't overwrite parent hint in neighbor.
-+
-+ Parent hint was set up by
-+ reiser4_get_left_neighbor()
-+ */
-+ /* FIXME: why do we have to spinlock here? */
-+ write_lock_tree(znode_get_tree(neighbor));
-+ h->coord->node = NULL;
-+ write_unlock_tree(znode_get_tree(neighbor));
-+ result = LOOKUP_CONT;
-+ } else {
-+ result = LOOKUP_DONE;
-+ }
-+ if (neighbor != NULL)
-+ zrelse(neighbor);
-+ }
-+ }
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+/* debugging aid: return symbolic name of search bias */
-+static const char *bias_name(lookup_bias bias /* bias to get name of */ )
-+{
-+ if (bias == FIND_EXACT)
-+ return "exact";
-+ else if (bias == FIND_MAX_NOT_MORE_THAN)
-+ return "left-slant";
-+/* else if( bias == RIGHT_SLANT_BIAS ) */
-+/* return "right-bias"; */
-+ else {
-+ static char buf[30];
-+
-+ sprintf(buf, "unknown: %i", bias);
-+ return buf;
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+/* debugging aid: print human readable information about @p */
-+void print_coord_content(const char *prefix /* prefix to print */ ,
-+ coord_t * p /* coord to print */ )
-+{
-+ reiser4_key key;
-+
-+ if (p == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ if ((p->node != NULL) && znode_is_loaded(p->node)
-+ && coord_is_existing_item(p))
-+ printk("%s: data: %p, length: %i\n", prefix,
-+ item_body_by_coord(p), item_length_by_coord(p));
-+ if (znode_is_loaded(p->node)) {
-+ item_key_by_coord(p, &key);
-+ reiser4_print_key(prefix, &key);
-+ }
-+}
-+
-+/* debugging aid: print human readable information about @block */
-+void reiser4_print_address(const char *prefix /* prefix to print */ ,
-+ const reiser4_block_nr * block /* block number to print */ )
-+{
-+ printk("%s: %s\n", prefix, sprint_address(block));
-+}
-+#endif
-+
-+/* return string containing human readable representation of @block */
-+char *sprint_address(const reiser4_block_nr *
-+ block /* block number to print */ )
-+{
-+ static char address[30];
-+
-+ if (block == NULL)
-+ sprintf(address, "null");
-+ else if (reiser4_blocknr_is_fake(block))
-+ sprintf(address, "%llx", (unsigned long long)(*block));
-+ else
-+ sprintf(address, "%llu", (unsigned long long)(*block));
-+ return address;
-+}
-+
-+/* release parent node during traversal */
-+static void put_parent(cbk_handle * h /* search handle */ )
-+{
-+ assert("nikita-383", h != NULL);
-+ if (h->parent_lh->node != NULL) {
-+ longterm_unlock_znode(h->parent_lh);
-+ }
-+}
-+
-+/* helper function used by coord_by_key(): release reference to parent znode
-+ stored in handle before processing its child. */
-+static void hput(cbk_handle * h /* search handle */ )
-+{
-+ assert("nikita-385", h != NULL);
-+ done_lh(h->parent_lh);
-+ done_lh(h->active_lh);
-+}
-+
-+/* Helper function used by cbk(): update delimiting keys of child node (stored
-+ in h->active_lh->node) using key taken from parent on the parent level. */
-+static int setup_delimiting_keys(cbk_handle * h /* search handle */ )
-+{
-+ znode *active;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1088", h != NULL);
-+
-+ active = h->active_lh->node;
-+
-+ /* fast check without taking dk lock. This is safe, because
-+ * JNODE_DKSET is never cleared once set. */
-+ if (!ZF_ISSET(active, JNODE_DKSET)) {
-+ tree = znode_get_tree(active);
-+ write_lock_dk(tree);
-+ if (!ZF_ISSET(active, JNODE_DKSET)) {
-+ znode_set_ld_key(active, &h->ld_key);
-+ znode_set_rd_key(active, &h->rd_key);
-+ ZF_SET(active, JNODE_DKSET);
-+ }
-+ write_unlock_dk(tree);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+/* true if @block makes sense for the @tree. Used to detect corrupted node
-+ * pointers */
-+static int
-+block_nr_is_correct(reiser4_block_nr * block /* block number to check */ ,
-+ reiser4_tree * tree /* tree to check against */ )
-+{
-+ assert("nikita-757", block != NULL);
-+ assert("nikita-758", tree != NULL);
-+
-+ /* check to see if it exceeds the size of the device. */
-+ return reiser4_blocknr_is_sane_for(tree->super, block);
-+}
-+
-+/* check consistency of fields */
-+static int sanity_check(cbk_handle * h /* search handle */ )
-+{
-+ assert("nikita-384", h != NULL);
-+
-+ if (h->level < h->stop_level) {
-+ h->error = "Buried under leaves";
-+ h->result = RETERR(-EIO);
-+ return LOOKUP_DONE;
-+ } else if (!block_nr_is_correct(&h->block, h->tree)) {
-+ h->error = "bad block number";
-+ h->result = RETERR(-EIO);
-+ return LOOKUP_DONE;
-+ } else
-+ return 0;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/status_flags.c linux-2.6.20/fs/reiser4/status_flags.c
---- linux-2.6.20.orig/fs/reiser4/status_flags.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/status_flags.c 2007-05-06 14:50:43.875030717 +0400
-@@ -0,0 +1,175 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Functions that deal with reiser4 status block, query status and update it, if needed */
-+
-+#include <linux/bio.h>
-+#include <linux/highmem.h>
-+#include <linux/fs.h>
-+#include <linux/blkdev.h>
-+#include "debug.h"
-+#include "dformat.h"
-+#include "status_flags.h"
-+#include "super.h"
-+
-+/* This is our end I/O handler that marks page uptodate if IO was successful. It also
-+ unconditionally unlocks the page, so we can see that io was done.
-+ We do not free bio, because we hope to reuse that. */
-+static int reiser4_status_endio(struct bio *bio, unsigned int bytes_done,
-+ int err)
-+{
-+ if (bio->bi_size)
-+ return 1;
-+
-+ if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-+ SetPageUptodate(bio->bi_io_vec->bv_page);
-+ } else {
-+ ClearPageUptodate(bio->bi_io_vec->bv_page);
-+ SetPageError(bio->bi_io_vec->bv_page);
-+ }
-+ unlock_page(bio->bi_io_vec->bv_page);
-+ return 0;
-+}
-+
-+/* Initialise status code. This is expected to be called from the disk format
-+ code. block paremeter is where status block lives. */
-+int reiser4_status_init(reiser4_block_nr block)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ struct reiser4_status *statuspage;
-+ struct bio *bio;
-+ struct page *page;
-+
-+ get_super_private(sb)->status_page = NULL;
-+ get_super_private(sb)->status_bio = NULL;
-+
-+ page = alloc_pages(reiser4_ctx_gfp_mask_get(), 0);
-+ if (!page)
-+ return -ENOMEM;
-+
-+ bio = bio_alloc(reiser4_ctx_gfp_mask_get(), 1);
-+ if (bio != NULL) {
-+ bio->bi_sector = block * (sb->s_blocksize >> 9);
-+ bio->bi_bdev = sb->s_bdev;
-+ bio->bi_io_vec[0].bv_page = page;
-+ bio->bi_io_vec[0].bv_len = sb->s_blocksize;
-+ bio->bi_io_vec[0].bv_offset = 0;
-+ bio->bi_vcnt = 1;
-+ bio->bi_size = sb->s_blocksize;
-+ bio->bi_end_io = reiser4_status_endio;
-+ } else {
-+ __free_pages(page, 0);
-+ return -ENOMEM;
-+ }
-+ lock_page(page);
-+ submit_bio(READ, bio);
-+ blk_run_address_space(reiser4_get_super_fake(sb)->i_mapping);
-+ wait_on_page_locked(page);
-+ if (!PageUptodate(page)) {
-+ warning("green-2007",
-+ "I/O error while tried to read status page\n");
-+ return -EIO;
-+ }
-+
-+ statuspage = (struct reiser4_status *)kmap_atomic(page, KM_USER0);
-+ if (memcmp
-+ (statuspage->magic, REISER4_STATUS_MAGIC,
-+ sizeof(REISER4_STATUS_MAGIC))) {
-+ /* Magic does not match. */
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+ warning("green-2008", "Wrong magic in status block\n");
-+ __free_pages(page, 0);
-+ bio_put(bio);
-+ return -EINVAL;
-+ }
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+
-+ get_super_private(sb)->status_page = page;
-+ get_super_private(sb)->status_bio = bio;
-+ return 0;
-+}
-+
-+/* Query the status of fs. Returns if the FS can be safely mounted.
-+ Also if "status" and "extended" parameters are given, it will fill
-+ actual parts of status from disk there. */
-+int reiser4_status_query(u64 * status, u64 * extended)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ struct reiser4_status *statuspage;
-+ int retval;
-+
-+ if (!get_super_private(sb)->status_page) { // No status page?
-+ return REISER4_STATUS_MOUNT_UNKNOWN;
-+ }
-+ statuspage = (struct reiser4_status *)
-+ kmap_atomic(get_super_private(sb)->status_page, KM_USER0);
-+ switch ((long)le64_to_cpu(get_unaligned(&statuspage->status))) { // FIXME: this cast is a hack for 32 bit arches to work.
-+ case REISER4_STATUS_OK:
-+ retval = REISER4_STATUS_MOUNT_OK;
-+ break;
-+ case REISER4_STATUS_CORRUPTED:
-+ retval = REISER4_STATUS_MOUNT_WARN;
-+ break;
-+ case REISER4_STATUS_DAMAGED:
-+ case REISER4_STATUS_DESTROYED:
-+ case REISER4_STATUS_IOERROR:
-+ retval = REISER4_STATUS_MOUNT_RO;
-+ break;
-+ default:
-+ retval = REISER4_STATUS_MOUNT_UNKNOWN;
-+ break;
-+ }
-+
-+ if (status)
-+ *status = le64_to_cpu(get_unaligned(&statuspage->status));
-+ if (extended)
-+ *extended = le64_to_cpu(get_unaligned(&statuspage->extended_status));
-+
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+ return retval;
-+}
-+
-+/* This function should be called when something bad happens (e.g. from reiser4_panic).
-+ It fills the status structure and tries to push it to disk. */
-+int reiser4_status_write(__u64 status, __u64 extended_status, char *message)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ struct reiser4_status *statuspage;
-+ struct bio *bio = get_super_private(sb)->status_bio;
-+
-+ if (!get_super_private(sb)->status_page) { // No status page?
-+ return -1;
-+ }
-+ statuspage = (struct reiser4_status *)
-+ kmap_atomic(get_super_private(sb)->status_page, KM_USER0);
-+
-+ put_unaligned(cpu_to_le64(status), &statuspage->status);
-+ put_unaligned(cpu_to_le64(extended_status), &statuspage->extended_status);
-+ strncpy(statuspage->texterror, message, REISER4_TEXTERROR_LEN);
-+
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+ bio->bi_bdev = sb->s_bdev;
-+ bio->bi_io_vec[0].bv_page = get_super_private(sb)->status_page;
-+ bio->bi_io_vec[0].bv_len = sb->s_blocksize;
-+ bio->bi_io_vec[0].bv_offset = 0;
-+ bio->bi_vcnt = 1;
-+ bio->bi_size = sb->s_blocksize;
-+ bio->bi_end_io = reiser4_status_endio;
-+ lock_page(get_super_private(sb)->status_page); // Safe as nobody should touch our page.
-+ /* We can block now, but we have no other choice anyway */
-+ submit_bio(WRITE, bio);
-+ blk_run_address_space(reiser4_get_super_fake(sb)->i_mapping);
-+ return 0; // We do not wait for io to finish.
-+}
-+
-+/* Frees the page with status and bio structure. Should be called by disk format at umount time */
-+int reiser4_status_finish(void)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ __free_pages(get_super_private(sb)->status_page, 0);
-+ get_super_private(sb)->status_page = NULL;
-+ bio_put(get_super_private(sb)->status_bio);
-+ get_super_private(sb)->status_bio = NULL;
-+ return 0;
-+}
-diff -urN linux-2.6.20.orig/fs/reiser4/status_flags.h linux-2.6.20/fs/reiser4/status_flags.h
---- linux-2.6.20.orig/fs/reiser4/status_flags.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/status_flags.h 2007-05-06 14:50:43.875030717 +0400
-@@ -0,0 +1,43 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Here we declare structures and flags that store reiser4 status on disk.
-+ The status that helps us to find out if the filesystem is valid or if it
-+ contains some critical, or not so critical errors */
-+
-+#if !defined( __REISER4_STATUS_FLAGS_H__ )
-+#define __REISER4_STATUS_FLAGS_H__
-+
-+#include "dformat.h"
-+/* These are major status flags */
-+#define REISER4_STATUS_OK 0
-+#define REISER4_STATUS_CORRUPTED 0x1
-+#define REISER4_STATUS_DAMAGED 0x2
-+#define REISER4_STATUS_DESTROYED 0x4
-+#define REISER4_STATUS_IOERROR 0x8
-+
-+/* Return values for reiser4_status_query() */
-+#define REISER4_STATUS_MOUNT_OK 0
-+#define REISER4_STATUS_MOUNT_WARN 1
-+#define REISER4_STATUS_MOUNT_RO 2
-+#define REISER4_STATUS_MOUNT_UNKNOWN -1
-+
-+#define REISER4_TEXTERROR_LEN 256
-+
-+#define REISER4_STATUS_MAGIC "ReiSeR4StATusBl"
-+/* We probably need to keep its size under sector size which is 512 bytes */
-+struct reiser4_status {
-+ char magic[16];
-+ d64 status; /* Current FS state */
-+ d64 extended_status; /* Any additional info that might have sense in addition to "status". E.g.
-+ last sector where io error happened if status is "io error encountered" */
-+ d64 stacktrace[10]; /* Last ten functional calls made (addresses) */
-+ char texterror[REISER4_TEXTERROR_LEN]; /* Any error message if appropriate, otherwise filled with zeroes */
-+};
-+
-+int reiser4_status_init(reiser4_block_nr block);
-+int reiser4_status_query(u64 * status, u64 * extended);
-+int reiser4_status_write(u64 status, u64 extended_status, char *message);
-+int reiser4_status_finish(void);
-+
-+#endif
-diff -urN linux-2.6.20.orig/fs/reiser4/super.c linux-2.6.20/fs/reiser4/super.c
---- linux-2.6.20.orig/fs/reiser4/super.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/super.c 2007-05-06 14:50:43.875030717 +0400
-@@ -0,0 +1,316 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Super-block manipulations. */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/space/space_allocator.h"
-+#include "plugin/plugin.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+
-+static __u64 reserved_for_gid(const struct super_block *super, gid_t gid);
-+static __u64 reserved_for_uid(const struct super_block *super, uid_t uid);
-+static __u64 reserved_for_root(const struct super_block *super);
-+
-+/* Return reiser4-specific part of super block */
-+reiser4_super_info_data *get_super_private_nocheck(const struct super_block *super /* super block
-+ * queried */ )
-+{
-+ return (reiser4_super_info_data *) super->s_fs_info;
-+}
-+
-+/* Return reiser4 fstype: value that is returned in ->f_type field by statfs() */
-+long reiser4_statfs_type(const struct super_block *super UNUSED_ARG)
-+{
-+ assert("nikita-448", super != NULL);
-+ assert("nikita-449", is_reiser4_super(super));
-+ return (long)REISER4_SUPER_MAGIC;
-+}
-+
-+/* functions to read/modify fields of reiser4_super_info_data */
-+
-+/* get number of blocks in file system */
-+__u64 reiser4_block_count(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("vs-494", super != NULL);
-+ assert("vs-495", is_reiser4_super(super));
-+ return get_super_private(super)->block_count;
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * number of blocks in the current file system
-+ */
-+__u64 reiser4_current_block_count(void)
-+{
-+ return get_current_super_private()->block_count;
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* set number of block in filesystem */
-+void reiser4_set_block_count(const struct super_block *super, __u64 nr)
-+{
-+ assert("vs-501", super != NULL);
-+ assert("vs-502", is_reiser4_super(super));
-+ get_super_private(super)->block_count = nr;
-+ /*
-+ * The proper calculation of the reserved space counter (%5 of device
-+ * block counter) we need a 64 bit division which is missing in Linux
-+ * on i386 platform. Because we do not need a precise calculation here
-+ * we can replace a div64 operation by this combination of
-+ * multiplication and shift: 51. / (2^10) == .0498 .
-+ * FIXME: this is a bug. It comes up only for very small filesystems
-+ * which probably are never used. Nevertheless, it is a bug. Number of
-+ * reserved blocks must be not less than maximal number of blocks which
-+ * get grabbed with BA_RESERVED.
-+ */
-+ get_super_private(super)->blocks_reserved = ((nr * 51) >> 10);
-+}
-+
-+/* amount of blocks used (allocated for data) in file system */
-+__u64 reiser4_data_blocks(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-452", super != NULL);
-+ assert("nikita-453", is_reiser4_super(super));
-+ return get_super_private(super)->blocks_used;
-+}
-+
-+/* set number of block used in filesystem */
-+void reiser4_set_data_blocks(const struct super_block *super, __u64 nr)
-+{
-+ assert("vs-503", super != NULL);
-+ assert("vs-504", is_reiser4_super(super));
-+ get_super_private(super)->blocks_used = nr;
-+}
-+
-+/* amount of free blocks in file system */
-+__u64 reiser4_free_blocks(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-454", super != NULL);
-+ assert("nikita-455", is_reiser4_super(super));
-+ return get_super_private(super)->blocks_free;
-+}
-+
-+/* set number of blocks free in filesystem */
-+void reiser4_set_free_blocks(const struct super_block *super, __u64 nr)
-+{
-+ assert("vs-505", super != NULL);
-+ assert("vs-506", is_reiser4_super(super));
-+ get_super_private(super)->blocks_free = nr;
-+}
-+
-+/* get mkfs unique identifier */
-+__u32 reiser4_mkfs_id(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("vpf-221", super != NULL);
-+ assert("vpf-222", is_reiser4_super(super));
-+ return get_super_private(super)->mkfs_id;
-+}
-+
-+/* amount of free blocks in file system */
-+__u64 reiser4_free_committed_blocks(const struct super_block *super)
-+{
-+ assert("vs-497", super != NULL);
-+ assert("vs-498", is_reiser4_super(super));
-+ return get_super_private(super)->blocks_free_committed;
-+}
-+
-+/* amount of blocks in the file system reserved for @uid and @gid */
-+long reiser4_reserved_blocks(const struct super_block *super /* super block
-+ queried */ ,
-+ uid_t uid /* user id */ ,
-+ gid_t gid /* group id */ )
-+{
-+ long reserved;
-+
-+ assert("nikita-456", super != NULL);
-+ assert("nikita-457", is_reiser4_super(super));
-+
-+ reserved = 0;
-+ if (REISER4_SUPPORT_GID_SPACE_RESERVATION)
-+ reserved += reserved_for_gid(super, gid);
-+ if (REISER4_SUPPORT_UID_SPACE_RESERVATION)
-+ reserved += reserved_for_uid(super, uid);
-+ if (REISER4_SUPPORT_ROOT_SPACE_RESERVATION && (uid == 0))
-+ reserved += reserved_for_root(super);
-+ return reserved;
-+}
-+
-+/* get/set value of/to grabbed blocks counter */
-+__u64 reiser4_grabbed_blocks(const struct super_block * super)
-+{
-+ assert("zam-512", super != NULL);
-+ assert("zam-513", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_grabbed;
-+}
-+
-+__u64 reiser4_flush_reserved(const struct super_block * super)
-+{
-+ assert("vpf-285", super != NULL);
-+ assert("vpf-286", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_flush_reserved;
-+}
-+
-+/* get/set value of/to counter of fake allocated formatted blocks */
-+__u64 reiser4_fake_allocated(const struct super_block * super)
-+{
-+ assert("zam-516", super != NULL);
-+ assert("zam-517", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_fake_allocated;
-+}
-+
-+/* get/set value of/to counter of fake allocated unformatted blocks */
-+__u64 reiser4_fake_allocated_unformatted(const struct super_block * super)
-+{
-+ assert("zam-516", super != NULL);
-+ assert("zam-517", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_fake_allocated_unformatted;
-+}
-+
-+/* get/set value of/to counter of clustered blocks */
-+__u64 reiser4_clustered_blocks(const struct super_block * super)
-+{
-+ assert("edward-601", super != NULL);
-+ assert("edward-602", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_clustered;
-+}
-+
-+/* space allocator used by this file system */
-+reiser4_space_allocator * reiser4_get_space_allocator(const struct super_block
-+ *super)
-+{
-+ assert("nikita-1965", super != NULL);
-+ assert("nikita-1966", is_reiser4_super(super));
-+ return &get_super_private(super)->space_allocator;
-+}
-+
-+/* return fake inode used to bind formatted nodes in the page cache */
-+struct inode *reiser4_get_super_fake(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-1757", super != NULL);
-+ return get_super_private(super)->fake;
-+}
-+
-+/* return fake inode used to bind copied on capture nodes in the page cache */
-+struct inode *reiser4_get_cc_fake(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-1757", super != NULL);
-+ return get_super_private(super)->cc;
-+}
-+
-+/* return fake inode used to bind bitmaps and journlal heads */
-+struct inode *reiser4_get_bitmap_fake(const struct super_block *super)
-+{
-+ assert("nikita-17571", super != NULL);
-+ return get_super_private(super)->bitmap;
-+}
-+
-+/* tree used by this file system */
-+reiser4_tree *reiser4_get_tree(const struct super_block * super /* super block
-+ * queried */ )
-+{
-+ assert("nikita-460", super != NULL);
-+ assert("nikita-461", is_reiser4_super(super));
-+ return &get_super_private(super)->tree;
-+}
-+
-+/* Check that @super is (looks like) reiser4 super block. This is mainly for
-+ use in assertions. */
-+int is_reiser4_super(const struct super_block *super /* super block
-+ * queried */ )
-+{
-+ return
-+ super != NULL &&
-+ get_super_private(super) != NULL &&
-+ super->s_op == &(get_super_private(super)->ops.super);
-+}
-+
-+int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f)
-+{
-+ return test_bit((int)f, &get_super_private(super)->fs_flags);
-+}
-+
-+/* amount of blocks reserved for given group in file system */
-+static __u64 reserved_for_gid(const struct super_block *super UNUSED_ARG /* super
-+ * block
-+ * queried */ ,
-+ gid_t gid UNUSED_ARG /* group id */ )
-+{
-+ return 0;
-+}
-+
-+/* amount of blocks reserved for given user in file system */
-+static __u64 reserved_for_uid(const struct super_block *super UNUSED_ARG /* super
-+ block
-+ queried */ ,
-+ uid_t uid UNUSED_ARG /* user id */ )
-+{
-+ return 0;
-+}
-+
-+/* amount of blocks reserved for super user in file system */
-+static __u64 reserved_for_root(const struct super_block *super UNUSED_ARG /* super
-+ block
-+ queried */ )
-+{
-+ return 0;
-+}
-+
-+/*
-+ * true if block number @blk makes sense for the file system at @super.
-+ */
-+int
-+reiser4_blocknr_is_sane_for(const struct super_block *super,
-+ const reiser4_block_nr * blk)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("nikita-2957", super != NULL);
-+ assert("nikita-2958", blk != NULL);
-+
-+ if (reiser4_blocknr_is_fake(blk))
-+ return 1;
-+
-+ sbinfo = get_super_private(super);
-+ return *blk < sbinfo->block_count;
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * true, if block number @blk makes sense for the current file system
-+ */
-+int reiser4_blocknr_is_sane(const reiser4_block_nr * blk)
-+{
-+ return reiser4_blocknr_is_sane_for(reiser4_get_current_sb(), blk);
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/super.h linux-2.6.20/fs/reiser4/super.h
---- linux-2.6.20.orig/fs/reiser4/super.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/super.h 2007-05-06 14:50:43.875030717 +0400
-@@ -0,0 +1,464 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Super-block functions. See super.c for details. */
-+
-+#if !defined( __REISER4_SUPER_H__ )
-+#define __REISER4_SUPER_H__
-+
-+#include "tree.h"
-+#include "entd.h"
-+#include "wander.h"
-+#include "fsdata.h"
-+#include "plugin/object.h"
-+#include "plugin/space/space_allocator.h"
-+
-+/*
-+ * Flush algorithms parameters.
-+ */
-+typedef struct {
-+ unsigned relocate_threshold;
-+ unsigned relocate_distance;
-+ unsigned written_threshold;
-+ unsigned scan_maxnodes;
-+} flush_params;
-+
-+typedef enum {
-+ /*
-+ * True if this file system doesn't support hard-links (multiple names)
-+ * for directories: this is default UNIX behavior.
-+ *
-+ * If hard-links on directoires are not allowed, file system is Acyclic
-+ * Directed Graph (modulo dot, and dotdot, of course).
-+ *
-+ * This is used by reiser4_link().
-+ */
-+ REISER4_ADG = 0,
-+ /*
-+ * set if all nodes in internal tree have the same node layout plugin.
-+ * If so, znode_guess_plugin() will return tree->node_plugin in stead
-+ * of guessing plugin by plugin id stored in the node.
-+ */
-+ REISER4_ONE_NODE_PLUGIN = 1,
-+ /* if set, bsd gid assignment is supported. */
-+ REISER4_BSD_GID = 2,
-+ /* [mac]_time are 32 bit in inode */
-+ REISER4_32_BIT_TIMES = 3,
-+ /* load all bitmap blocks at mount time */
-+ REISER4_DONT_LOAD_BITMAP = 5,
-+ /* enforce atomicity during write(2) */
-+ REISER4_ATOMIC_WRITE = 6,
-+ /* don't use write barriers in the log writer code. */
-+ REISER4_NO_WRITE_BARRIER = 7
-+} reiser4_fs_flag;
-+
-+/*
-+ * VFS related operation vectors.
-+ */
-+typedef struct object_ops {
-+ struct super_operations super;
-+ struct dentry_operations dentry;
-+ struct export_operations export;
-+} object_ops;
-+
-+/* reiser4-specific part of super block
-+
-+ Locking
-+
-+ Fields immutable after mount:
-+
-+ ->oid*
-+ ->space*
-+ ->default_[ug]id
-+ ->mkfs_id
-+ ->trace_flags
-+ ->debug_flags
-+ ->fs_flags
-+ ->df_plug
-+ ->optimal_io_size
-+ ->plug
-+ ->flush
-+ ->u (bad name)
-+ ->txnmgr
-+ ->ra_params
-+ ->fsuid
-+ ->journal_header
-+ ->journal_footer
-+
-+ Fields protected by ->lnode_guard
-+
-+ ->lnode_htable
-+
-+ Fields protected by per-super block spin lock
-+
-+ ->block_count
-+ ->blocks_used
-+ ->blocks_free
-+ ->blocks_free_committed
-+ ->blocks_grabbed
-+ ->blocks_fake_allocated_unformatted
-+ ->blocks_fake_allocated
-+ ->blocks_flush_reserved
-+ ->eflushed
-+ ->blocknr_hint_default
-+
-+ After journal replaying during mount,
-+
-+ ->last_committed_tx
-+
-+ is protected by ->tmgr.commit_mutex
-+
-+ Invariants involving this data-type:
-+
-+ [sb-block-counts]
-+ [sb-grabbed]
-+ [sb-fake-allocated]
-+*/
-+struct reiser4_super_info_data {
-+ /*
-+ * guard spinlock which protects reiser4 super block fields (currently
-+ * blocks_free, blocks_free_committed)
-+ */
-+ spinlock_t guard;
-+
-+ /* next oid that will be returned by oid_allocate() */
-+ oid_t next_to_use;
-+ /* total number of used oids */
-+ oid_t oids_in_use;
-+
-+ /* space manager plugin */
-+ reiser4_space_allocator space_allocator;
-+
-+ /* reiser4 internal tree */
-+ reiser4_tree tree;
-+
-+ /*
-+ * default user id used for light-weight files without their own
-+ * stat-data.
-+ */
-+ uid_t default_uid;
-+
-+ /*
-+ * default group id used for light-weight files without their own
-+ * stat-data.
-+ */
-+ gid_t default_gid;
-+
-+ /* mkfs identifier generated at mkfs time. */
-+ __u32 mkfs_id;
-+ /* amount of blocks in a file system */
-+ __u64 block_count;
-+
-+ /* inviolable reserve */
-+ __u64 blocks_reserved;
-+
-+ /* amount of blocks used by file system data and meta-data. */
-+ __u64 blocks_used;
-+
-+ /*
-+ * amount of free blocks. This is "working" free blocks counter. It is
-+ * like "working" bitmap, please see block_alloc.c for description.
-+ */
-+ __u64 blocks_free;
-+
-+ /*
-+ * free block count for fs committed state. This is "commit" version of
-+ * free block counter.
-+ */
-+ __u64 blocks_free_committed;
-+
-+ /*
-+ * number of blocks reserved for further allocation, for all
-+ * threads.
-+ */
-+ __u64 blocks_grabbed;
-+
-+ /* number of fake allocated unformatted blocks in tree. */
-+ __u64 blocks_fake_allocated_unformatted;
-+
-+ /* number of fake allocated formatted blocks in tree. */
-+ __u64 blocks_fake_allocated;
-+
-+ /* number of blocks reserved for flush operations. */
-+ __u64 blocks_flush_reserved;
-+
-+ /* number of blocks reserved for cluster operations. */
-+ __u64 blocks_clustered;
-+
-+ /* unique file-system identifier */
-+ __u32 fsuid;
-+
-+ /* On-disk format version. If does not equal to the disk_format
-+ plugin version, some format updates (e.g. enlarging plugin
-+ set, etc) may have place on mount. */
-+ int version;
-+
-+ /* file-system wide flags. See reiser4_fs_flag enum */
-+ unsigned long fs_flags;
-+
-+ /* transaction manager */
-+ txn_mgr tmgr;
-+
-+ /* ent thread */
-+ entd_context entd;
-+
-+ /* fake inode used to bind formatted nodes */
-+ struct inode *fake;
-+ /* inode used to bind bitmaps (and journal heads) */
-+ struct inode *bitmap;
-+ /* inode used to bind copied on capture nodes */
-+ struct inode *cc;
-+
-+ /* disk layout plugin */
-+ disk_format_plugin *df_plug;
-+
-+ /* disk layout specific part of reiser4 super info data */
-+ union {
-+ format40_super_info format40;
-+ } u;
-+
-+ /* value we return in st_blksize on stat(2) */
-+ unsigned long optimal_io_size;
-+
-+ /* parameters for the flush algorithm */
-+ flush_params flush;
-+
-+ /* pointers to jnodes for journal header and footer */
-+ jnode *journal_header;
-+ jnode *journal_footer;
-+
-+ journal_location jloc;
-+
-+ /* head block number of last committed transaction */
-+ __u64 last_committed_tx;
-+
-+ /*
-+ * we remember last written location for using as a hint for new block
-+ * allocation
-+ */
-+ __u64 blocknr_hint_default;
-+
-+ /* committed number of files (oid allocator state variable ) */
-+ __u64 nr_files_committed;
-+
-+ ra_params_t ra_params;
-+
-+ /*
-+ * A mutex for serializing cut tree operation if out-of-free-space:
-+ * the only one cut_tree thread is allowed to grab space from reserved
-+ * area (it is 5% of disk space)
-+ */
-+ struct mutex delete_mutex;
-+ /* task owning ->delete_mutex */
-+ struct task_struct *delete_mutex_owner;
-+
-+ /* Diskmap's blocknumber */
-+ __u64 diskmap_block;
-+
-+ /* What to do in case of error */
-+ int onerror;
-+
-+ /* operations for objects on this file system */
-+ object_ops ops;
-+
-+ /*
-+ * structure to maintain d_cursors. See plugin/file_ops_readdir.c for
-+ * more details
-+ */
-+ d_cursor_info d_info;
-+
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ /* Alternative master superblock offset (in bytes) */
-+ unsigned long altsuper;
-+#endif
-+ struct repacker *repacker;
-+ struct page *status_page;
-+ struct bio *status_bio;
-+
-+#if REISER4_DEBUG
-+ /*
-+ * minimum used blocks value (includes super blocks, bitmap blocks and
-+ * other fs reserved areas), depends on fs format and fs size.
-+ */
-+ __u64 min_blocks_used;
-+
-+ /*
-+ * when debugging is on, all jnodes (including znodes, bitmaps, etc.)
-+ * are kept on a list anchored at sbinfo->all_jnodes. This list is
-+ * protected by sbinfo->all_guard spin lock. This lock should be taken
-+ * with _irq modifier, because it is also modified from interrupt
-+ * contexts (by RCU).
-+ */
-+ spinlock_t all_guard;
-+ /* list of all jnodes */
-+ struct list_head all_jnodes;
-+#endif
-+ struct dentry *debugfs_root;
-+};
-+
-+extern reiser4_super_info_data *get_super_private_nocheck(const struct
-+ super_block *super);
-+
-+/* Return reiser4-specific part of super block */
-+static inline reiser4_super_info_data *get_super_private(const struct
-+ super_block *super)
-+{
-+ assert("nikita-447", super != NULL);
-+
-+ return (reiser4_super_info_data *) super->s_fs_info;
-+}
-+
-+/* get ent context for the @super */
-+static inline entd_context *get_entd_context(struct super_block *super)
-+{
-+ return &get_super_private(super)->entd;
-+}
-+
-+/* "Current" super-block: main super block used during current system
-+ call. Reference to this super block is stored in reiser4_context. */
-+static inline struct super_block *reiser4_get_current_sb(void)
-+{
-+ return get_current_context()->super;
-+}
-+
-+/* Reiser4-specific part of "current" super-block: main super block used
-+ during current system call. Reference to this super block is stored in
-+ reiser4_context. */
-+static inline reiser4_super_info_data *get_current_super_private(void)
-+{
-+ return get_super_private(reiser4_get_current_sb());
-+}
-+
-+static inline ra_params_t *get_current_super_ra_params(void)
-+{
-+ return &(get_current_super_private()->ra_params);
-+}
-+
-+/*
-+ * true, if file system on @super is read-only
-+ */
-+static inline int rofs_super(struct super_block *super)
-+{
-+ return super->s_flags & MS_RDONLY;
-+}
-+
-+/*
-+ * true, if @tree represents read-only file system
-+ */
-+static inline int rofs_tree(reiser4_tree * tree)
-+{
-+ return rofs_super(tree->super);
-+}
-+
-+/*
-+ * true, if file system where @inode lives on, is read-only
-+ */
-+static inline int rofs_inode(struct inode *inode)
-+{
-+ return rofs_super(inode->i_sb);
-+}
-+
-+/*
-+ * true, if file system where @node lives on, is read-only
-+ */
-+static inline int rofs_jnode(jnode * node)
-+{
-+ return rofs_tree(jnode_get_tree(node));
-+}
-+
-+extern __u64 reiser4_current_block_count(void);
-+
-+extern void build_object_ops(struct super_block *super, object_ops * ops);
-+
-+#define REISER4_SUPER_MAGIC 0x52345362 /* (*(__u32 *)"R4Sb"); */
-+
-+static inline void spin_lock_reiser4_super(reiser4_super_info_data *sbinfo)
-+{
-+ spin_lock(&(sbinfo->guard));
-+}
-+
-+static inline void spin_unlock_reiser4_super(reiser4_super_info_data *sbinfo)
-+{
-+ assert_spin_locked(&(sbinfo->guard));
-+ spin_unlock(&(sbinfo->guard));
-+}
-+
-+extern __u64 reiser4_flush_reserved(const struct super_block *);
-+extern int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f);
-+extern long reiser4_statfs_type(const struct super_block *super);
-+extern __u64 reiser4_block_count(const struct super_block *super);
-+extern void reiser4_set_block_count(const struct super_block *super, __u64 nr);
-+extern __u64 reiser4_data_blocks(const struct super_block *super);
-+extern void reiser4_set_data_blocks(const struct super_block *super, __u64 nr);
-+extern __u64 reiser4_free_blocks(const struct super_block *super);
-+extern void reiser4_set_free_blocks(const struct super_block *super, __u64 nr);
-+extern __u32 reiser4_mkfs_id(const struct super_block *super);
-+
-+extern __u64 reiser4_free_committed_blocks(const struct super_block *super);
-+
-+extern __u64 reiser4_grabbed_blocks(const struct super_block *);
-+extern __u64 reiser4_fake_allocated(const struct super_block *);
-+extern __u64 reiser4_fake_allocated_unformatted(const struct super_block *);
-+extern __u64 reiser4_clustered_blocks(const struct super_block *);
-+
-+extern long reiser4_reserved_blocks(const struct super_block *super, uid_t uid,
-+ gid_t gid);
-+
-+extern reiser4_space_allocator *
-+reiser4_get_space_allocator(const struct super_block *super);
-+extern reiser4_oid_allocator *
-+reiser4_get_oid_allocator(const struct super_block *super);
-+extern struct inode *reiser4_get_super_fake(const struct super_block *super);
-+extern struct inode *reiser4_get_cc_fake(const struct super_block *super);
-+extern struct inode *reiser4_get_bitmap_fake(const struct super_block *super);
-+extern reiser4_tree *reiser4_get_tree(const struct super_block *super);
-+extern int is_reiser4_super(const struct super_block *super);
-+
-+extern int reiser4_blocknr_is_sane(const reiser4_block_nr * blk);
-+extern int reiser4_blocknr_is_sane_for(const struct super_block *super,
-+ const reiser4_block_nr * blk);
-+extern int reiser4_fill_super(struct super_block *s, void *data, int silent);
-+extern int reiser4_done_super(struct super_block *s);
-+
-+/* step of fill super */
-+extern int reiser4_init_fs_info(struct super_block *);
-+extern void reiser4_done_fs_info(struct super_block *);
-+extern int reiser4_init_super_data(struct super_block *, char *opt_string);
-+extern int reiser4_init_read_super(struct super_block *, int silent);
-+extern int reiser4_init_root_inode(struct super_block *);
-+extern reiser4_plugin *get_default_plugin(pset_member memb);
-+
-+/* Maximal possible object id. */
-+#define ABSOLUTE_MAX_OID ((oid_t)~0)
-+
-+#define OIDS_RESERVED ( 1 << 16 )
-+int oid_init_allocator(struct super_block *, oid_t nr_files, oid_t next);
-+oid_t oid_allocate(struct super_block *);
-+int oid_release(struct super_block *, oid_t);
-+oid_t oid_next(const struct super_block *);
-+void oid_count_allocated(void);
-+void oid_count_released(void);
-+long oids_used(const struct super_block *);
-+
-+#if REISER4_DEBUG
-+void print_fs_info(const char *prefix, const struct super_block *);
-+#endif
-+
-+extern void destroy_reiser4_cache(struct kmem_cache **);
-+
-+extern struct super_operations reiser4_super_operations;
-+extern struct export_operations reiser4_export_operations;
-+extern struct dentry_operations reiser4_dentry_operations;
-+
-+/* __REISER4_SUPER_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/super_ops.c linux-2.6.20/fs/reiser4/super_ops.c
---- linux-2.6.20.orig/fs/reiser4/super_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/super_ops.c 2007-05-06 14:50:43.879031967 +0400
-@@ -0,0 +1,728 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+#include "flush.h"
-+#include "safe_link.h"
-+
-+#include <linux/vfs.h>
-+#include <linux/writeback.h>
-+#include <linux/mount.h>
-+#include <linux/seq_file.h>
-+#include <linux/debugfs.h>
-+
-+/* slab cache for inodes */
-+static struct kmem_cache *inode_cache;
-+
-+static struct dentry *reiser4_debugfs_root = NULL;
-+
-+/**
-+ * init_once - constructor for reiser4 inodes
-+ * @obj: inode to be initialized
-+ * @cache: cache @obj belongs to
-+ * @flags: SLAB flags
-+ *
-+ * Initialization function to be called when new page is allocated by reiser4
-+ * inode cache. It is set on inode cache creation.
-+ */
-+static void init_once(void *obj, struct kmem_cache *cache, unsigned long flags)
-+{
-+ reiser4_inode_object *info;
-+
-+ info = obj;
-+
-+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
-+ SLAB_CTOR_CONSTRUCTOR) {
-+ /* initialize vfs inode */
-+ inode_init_once(&info->vfs_inode);
-+
-+ /*
-+ * initialize reiser4 specific part fo inode.
-+ * NOTE-NIKITA add here initializations for locks, list heads,
-+ * etc. that will be added to our private inode part.
-+ */
-+ INIT_LIST_HEAD(get_readdir_list(&info->vfs_inode));
-+ init_rwsem(&info->p.conv_sem);
-+ /* init semaphore which is used during inode loading */
-+ loading_init_once(&info->p);
-+ INIT_RADIX_TREE(jnode_tree_by_reiser4_inode(&info->p),
-+ GFP_ATOMIC);
-+#if REISER4_DEBUG
-+ info->p.nr_jnodes = 0;
-+#endif
-+ }
-+}
-+
-+/**
-+ * init_inodes - create znode cache
-+ *
-+ * Initializes slab cache of inodes. It is part of reiser4 module initialization.
-+ */
-+static int init_inodes(void)
-+{
-+ inode_cache = kmem_cache_create("reiser4_inode",
-+ sizeof(reiser4_inode_object),
-+ 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, init_once, NULL);
-+ if (inode_cache == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * done_inodes - delete inode cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+static void done_inodes(void)
-+{
-+ destroy_reiser4_cache(&inode_cache);
-+}
-+
-+/**
-+ * reiser4_alloc_inode - alloc_inode of super operations
-+ * @super: super block new inode is allocated for
-+ *
-+ * Allocates new inode, initializes reiser4 specific part of it.
-+ */
-+static struct inode *reiser4_alloc_inode(struct super_block *super)
-+{
-+ reiser4_inode_object *obj;
-+
-+ assert("nikita-1696", super != NULL);
-+ obj = kmem_cache_alloc(inode_cache, reiser4_ctx_gfp_mask_get());
-+ if (obj != NULL) {
-+ reiser4_inode *info;
-+
-+ info = &obj->p;
-+
-+ info->pset = plugin_set_get_empty();
-+ info->hset = plugin_set_get_empty();
-+ info->extmask = 0;
-+ info->locality_id = 0ull;
-+ info->plugin_mask = 0;
-+ info->heir_mask = 0;
-+#if !REISER4_INO_IS_OID
-+ info->oid_hi = 0;
-+#endif
-+ reiser4_seal_init(&info->sd_seal, NULL, NULL);
-+ coord_init_invalid(&info->sd_coord, NULL);
-+ info->flags = 0;
-+ spin_lock_init(&info->guard);
-+ /* this deals with info's loading semaphore */
-+ loading_alloc(info);
-+ info->vroot = UBER_TREE_ADDR;
-+ return &obj->vfs_inode;
-+ } else
-+ return NULL;
-+}
-+
-+/**
-+ * reiser4_destroy_inode - destroy_inode of super operations
-+ * @inode: inode being destroyed
-+ *
-+ * Puts reiser4 specific portion of inode, frees memory occupied by inode.
-+ */
-+static void reiser4_destroy_inode(struct inode *inode)
-+{
-+ reiser4_inode *info;
-+
-+ info = reiser4_inode_data(inode);
-+
-+ assert("vs-1220", inode_has_no_jnodes(info));
-+
-+ if (!is_bad_inode(inode) && is_inode_loaded(inode)) {
-+ file_plugin *fplug = inode_file_plugin(inode);
-+ if (fplug->destroy_inode != NULL)
-+ fplug->destroy_inode(inode);
-+ }
-+ reiser4_dispose_cursors(inode);
-+ if (info->pset)
-+ plugin_set_put(info->pset);
-+ if (info->hset)
-+ plugin_set_put(info->hset);
-+
-+ /*
-+ * cannot add similar assertion about ->i_list as prune_icache return
-+ * inode into slab with dangling ->list.{next,prev}. This is safe,
-+ * because they are re-initialized in the new_inode().
-+ */
-+ assert("nikita-2895", list_empty(&inode->i_dentry));
-+ assert("nikita-2896", hlist_unhashed(&inode->i_hash));
-+ assert("nikita-2898", list_empty_careful(get_readdir_list(inode)));
-+
-+ /* this deals with info's loading semaphore */
-+ loading_destroy(info);
-+
-+ kmem_cache_free(inode_cache,
-+ container_of(info, reiser4_inode_object, p));
-+}
-+
-+/**
-+ * reiser4_dirty_inode - dirty_inode of super operations
-+ * @inode: inode being dirtied
-+ *
-+ * Updates stat data.
-+ */
-+static void reiser4_dirty_inode(struct inode *inode)
-+{
-+ int result;
-+
-+ if (!is_in_reiser4_context())
-+ return;
-+ assert("", !IS_RDONLY(inode));
-+ assert("", (inode_file_plugin(inode)->estimate.update(inode) <=
-+ get_current_context()->grabbed_blocks));
-+
-+ result = reiser4_update_sd(inode);
-+ if (result)
-+ warning("", "failed to dirty inode for %llu: %d",
-+ get_inode_oid(inode), result);
-+}
-+
-+/**
-+ * reiser4_delete_inode - delete_inode of super operations
-+ * @inode: inode to delete
-+ *
-+ * Calls file plugin's delete_object method to delete object items from
-+ * filesystem tree and calls clear_inode.
-+ */
-+static void reiser4_delete_inode(struct inode *inode)
-+{
-+ reiser4_context *ctx;
-+ file_plugin *fplug;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-15", "failed to init context");
-+ return;
-+ }
-+
-+ if (is_inode_loaded(inode)) {
-+ fplug = inode_file_plugin(inode);
-+ if (fplug != NULL && fplug->delete_object != NULL)
-+ fplug->delete_object(inode);
-+ }
-+
-+ truncate_inode_pages(&inode->i_data, 0);
-+ inode->i_blocks = 0;
-+ clear_inode(inode);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_put_super - put_super of super operations
-+ * @super: super block to free
-+ *
-+ * Stops daemons, release resources, umounts in short.
-+ */
-+static void reiser4_put_super(struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ reiser4_context *ctx;
-+
-+ sbinfo = get_super_private(super);
-+ assert("vs-1699", sbinfo);
-+
-+ debugfs_remove(sbinfo->tmgr.debugfs_atom_count);
-+ debugfs_remove(sbinfo->tmgr.debugfs_id_count);
-+ debugfs_remove(sbinfo->debugfs_root);
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-17", "failed to init context");
-+ return;
-+ }
-+
-+ /* have disk format plugin to free its resources */
-+ if (get_super_private(super)->df_plug->release)
-+ get_super_private(super)->df_plug->release(super);
-+
-+ reiser4_done_formatted_fake(super);
-+
-+ /* stop daemons: ktxnmgr and entd */
-+ reiser4_done_entd(super);
-+ reiser4_done_ktxnmgrd(super);
-+ reiser4_done_txnmgr(&sbinfo->tmgr);
-+
-+ reiser4_done_fs_info(super);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_write_super - write_super of super operations
-+ * @super: super block to write
-+ *
-+ * Captures znode associated with super block, comit all transactions.
-+ */
-+static void reiser4_write_super(struct super_block *super)
-+{
-+ int ret;
-+ reiser4_context *ctx;
-+
-+ assert("vs-1700", !rofs_super(super));
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-16", "failed to init context");
-+ return;
-+ }
-+
-+ ret = reiser4_capture_super_block(super);
-+ if (ret != 0)
-+ warning("vs-1701",
-+ "reiser4_capture_super_block failed in write_super: %d",
-+ ret);
-+ ret = txnmgr_force_commit_all(super, 0);
-+ if (ret != 0)
-+ warning("jmacd-77113",
-+ "txn_force failed in write_super: %d", ret);
-+
-+ super->s_dirt = 0;
-+
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_statfs - statfs of super operations
-+ * @super: super block of file system in queried
-+ * @stafs: buffer to fill with statistics
-+ *
-+ * Returns information about filesystem.
-+ */
-+static int reiser4_statfs(struct dentry *dentry, struct kstatfs *statfs)
-+{
-+ sector_t total;
-+ sector_t reserved;
-+ sector_t free;
-+ sector_t forroot;
-+ sector_t deleted;
-+ reiser4_context *ctx;
-+ struct super_block *super = dentry->d_sb;
-+
-+ assert("nikita-408", super != NULL);
-+ assert("nikita-409", statfs != NULL);
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ statfs->f_type = reiser4_statfs_type(super);
-+ statfs->f_bsize = super->s_blocksize;
-+
-+ /*
-+ * 5% of total block space is reserved. This is needed for flush and
-+ * for truncates (so that we are able to perform truncate/unlink even
-+ * on the otherwise completely full file system). If this reservation
-+ * is hidden from statfs(2), users will mistakenly guess that they
-+ * have enough free space to complete some operation, which is
-+ * frustrating.
-+ *
-+ * Another possible solution is to subtract ->blocks_reserved from
-+ * ->f_bfree, but changing available space seems less intrusive than
-+ * letting user to see 5% of disk space to be used directly after
-+ * mkfs.
-+ */
-+ total = reiser4_block_count(super);
-+ reserved = get_super_private(super)->blocks_reserved;
-+ deleted = txnmgr_count_deleted_blocks();
-+ free = reiser4_free_blocks(super) + deleted;
-+ forroot = reiser4_reserved_blocks(super, 0, 0);
-+
-+ /*
-+ * These counters may be in inconsistent state because we take the
-+ * values without keeping any global spinlock. Here we do a sanity
-+ * check that free block counter does not exceed the number of all
-+ * blocks.
-+ */
-+ if (free > total)
-+ free = total;
-+ statfs->f_blocks = total - reserved;
-+ /* make sure statfs->f_bfree is never larger than statfs->f_blocks */
-+ if (free > reserved)
-+ free -= reserved;
-+ else
-+ free = 0;
-+ statfs->f_bfree = free;
-+
-+ if (free > forroot)
-+ free -= forroot;
-+ else
-+ free = 0;
-+ statfs->f_bavail = free;
-+
-+ statfs->f_files = 0;
-+ statfs->f_ffree = 0;
-+
-+ /* maximal acceptable name length depends on directory plugin. */
-+ assert("nikita-3351", super->s_root->d_inode != NULL);
-+ statfs->f_namelen = reiser4_max_filename_len(super->s_root->d_inode);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_clear_inode - clear_inode of super operation
-+ * @inode: inode about to destroy
-+ *
-+ * Does sanity checks: being destroyed should have all jnodes detached.
-+ */
-+static void reiser4_clear_inode(struct inode *inode)
-+{
-+#if REISER4_DEBUG
-+ reiser4_inode *r4_inode;
-+
-+ r4_inode = reiser4_inode_data(inode);
-+ if (!inode_has_no_jnodes(r4_inode))
-+ warning("vs-1732", "reiser4 inode has %ld jnodes\n",
-+ r4_inode->nr_jnodes);
-+#endif
-+}
-+
-+/**
-+ * reiser4_sync_inodes - sync_inodes of super operations
-+ * @super:
-+ * @wbc:
-+ *
-+ * This method is called by background and non-backgound writeback. Reiser4's
-+ * implementation uses generic_sync_sb_inodes to call reiser4_writepages for
-+ * each of dirty inodes. Reiser4_writepages handles pages dirtied via shared
-+ * mapping - dirty pages get into atoms. Writeout is called to flush some
-+ * atoms.
-+ */
-+static void reiser4_sync_inodes(struct super_block *super,
-+ struct writeback_control *wbc)
-+{
-+ reiser4_context *ctx;
-+ long to_write;
-+
-+ if (wbc->for_kupdate)
-+ /* reiser4 has its own means of periodical write-out */
-+ return;
-+
-+ to_write = wbc->nr_to_write;
-+ assert("vs-49", wbc->older_than_this == NULL);
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-13", "failed to init context");
-+ return;
-+ }
-+
-+ /*
-+ * call reiser4_writepages for each of dirty inodes to turn dirty pages
-+ * into transactions if they were not yet.
-+ */
-+ generic_sync_sb_inodes(super, wbc);
-+
-+ /* flush goes here */
-+ wbc->nr_to_write = to_write;
-+ reiser4_writeout(super, wbc);
-+
-+ /* avoid recursive calls to ->sync_inodes */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_show_options - show_options of super operations
-+ * @m: file where to write information
-+ * @mnt: mount structure
-+ *
-+ * Makes reiser4 mount options visible in /proc/mounts.
-+ */
-+static int reiser4_show_options(struct seq_file *m, struct vfsmount *mnt)
-+{
-+ struct super_block *super;
-+ reiser4_super_info_data *sbinfo;
-+
-+ super = mnt->mnt_sb;
-+ sbinfo = get_super_private(super);
-+
-+ seq_printf(m, ",atom_max_size=0x%x", sbinfo->tmgr.atom_max_size);
-+ seq_printf(m, ",atom_max_age=0x%x", sbinfo->tmgr.atom_max_age);
-+ seq_printf(m, ",atom_min_size=0x%x", sbinfo->tmgr.atom_min_size);
-+ seq_printf(m, ",atom_max_flushers=0x%x",
-+ sbinfo->tmgr.atom_max_flushers);
-+ seq_printf(m, ",cbk_cache_slots=0x%x",
-+ sbinfo->tree.cbk_cache.nr_slots);
-+
-+ return 0;
-+}
-+
-+struct super_operations reiser4_super_operations = {
-+ .alloc_inode = reiser4_alloc_inode,
-+ .destroy_inode = reiser4_destroy_inode,
-+ .dirty_inode = reiser4_dirty_inode,
-+ .delete_inode = reiser4_delete_inode,
-+ .put_super = reiser4_put_super,
-+ .write_super = reiser4_write_super,
-+ .statfs = reiser4_statfs,
-+ .clear_inode = reiser4_clear_inode,
-+ .sync_inodes = reiser4_sync_inodes,
-+ .show_options = reiser4_show_options
-+};
-+
-+/**
-+ * fill_super - initialize super block on mount
-+ * @super: super block to fill
-+ * @data: reiser4 specific mount option
-+ * @silent:
-+ *
-+ * This is to be called by reiser4_get_sb. Mounts filesystem.
-+ */
-+static int fill_super(struct super_block *super, void *data, int silent)
-+{
-+ reiser4_context ctx;
-+ int result;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("zam-989", super != NULL);
-+
-+ super->s_op = NULL;
-+ init_stack_context(&ctx, super);
-+
-+ /* allocate reiser4 specific super block */
-+ if ((result = reiser4_init_fs_info(super)) != 0)
-+ goto failed_init_sinfo;
-+
-+ sbinfo = get_super_private(super);
-+ /* initialize various reiser4 parameters, parse mount options */
-+ if ((result = reiser4_init_super_data(super, data)) != 0)
-+ goto failed_init_super_data;
-+
-+ /* read reiser4 master super block, initialize disk format plugin */
-+ if ((result = reiser4_init_read_super(super, silent)) != 0)
-+ goto failed_init_read_super;
-+
-+ /* initialize transaction manager */
-+ reiser4_init_txnmgr(&sbinfo->tmgr);
-+
-+ /* initialize ktxnmgrd context and start kernel thread ktxnmrgd */
-+ if ((result = reiser4_init_ktxnmgrd(super)) != 0)
-+ goto failed_init_ktxnmgrd;
-+
-+ /* initialize entd context and start kernel thread entd */
-+ if ((result = reiser4_init_entd(super)) != 0)
-+ goto failed_init_entd;
-+
-+ /* initialize address spaces for formatted nodes and bitmaps */
-+ if ((result = reiser4_init_formatted_fake(super)) != 0)
-+ goto failed_init_formatted_fake;
-+
-+ /* initialize disk format plugin */
-+ if ((result = get_super_private(super)->df_plug->init_format(super, data)) != 0 )
-+ goto failed_init_disk_format;
-+
-+ /*
-+ * There are some 'committed' versions of reiser4 super block counters,
-+ * which correspond to reiser4 on-disk state. These counters are
-+ * initialized here
-+ */
-+ sbinfo->blocks_free_committed = sbinfo->blocks_free;
-+ sbinfo->nr_files_committed = oids_used(super);
-+
-+ /* get inode of root directory */
-+ if ((result = reiser4_init_root_inode(super)) != 0)
-+ goto failed_init_root_inode;
-+
-+ if ((result = get_super_private(super)->df_plug->version_update(super)) != 0 )
-+ goto failed_update_format_version;
-+
-+ process_safelinks(super);
-+ reiser4_exit_context(&ctx);
-+
-+ sbinfo->debugfs_root = debugfs_create_dir(super->s_id,
-+ reiser4_debugfs_root);
-+ if (sbinfo->debugfs_root) {
-+ sbinfo->tmgr.debugfs_atom_count =
-+ debugfs_create_u32("atom_count", S_IFREG|S_IRUSR,
-+ sbinfo->debugfs_root,
-+ &sbinfo->tmgr.atom_count);
-+ sbinfo->tmgr.debugfs_id_count =
-+ debugfs_create_u32("id_count", S_IFREG|S_IRUSR,
-+ sbinfo->debugfs_root,
-+ &sbinfo->tmgr.id_count);
-+ }
-+ return 0;
-+
-+ failed_update_format_version:
-+ failed_init_root_inode:
-+ if (sbinfo->df_plug->release)
-+ sbinfo->df_plug->release(super);
-+ failed_init_disk_format:
-+ reiser4_done_formatted_fake(super);
-+ failed_init_formatted_fake:
-+ reiser4_done_entd(super);
-+ failed_init_entd:
-+ reiser4_done_ktxnmgrd(super);
-+ failed_init_ktxnmgrd:
-+ reiser4_done_txnmgr(&sbinfo->tmgr);
-+ failed_init_read_super:
-+ failed_init_super_data:
-+ reiser4_done_fs_info(super);
-+ failed_init_sinfo:
-+ reiser4_exit_context(&ctx);
-+ return result;
-+}
-+
-+/**
-+ * reiser4_get_sb - get_sb of file_system_type operations
-+ * @fs_type:
-+ * @flags: mount flags MS_RDONLY, MS_VERBOSE, etc
-+ * @dev_name: block device file name
-+ * @data: specific mount options
-+ *
-+ * Reiser4 mount entry.
-+ */
-+static int reiser4_get_sb(struct file_system_type *fs_type, int flags,
-+ const char *dev_name, void *data, struct vfsmount *mnt)
-+{
-+ return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
-+}
-+
-+/* structure describing the reiser4 filesystem implementation */
-+static struct file_system_type reiser4_fs_type = {
-+ .owner = THIS_MODULE,
-+ .name = "reiser4",
-+ .fs_flags = FS_REQUIRES_DEV,
-+ .get_sb = reiser4_get_sb,
-+ .kill_sb = kill_block_super,
-+ .next = NULL
-+};
-+
-+void destroy_reiser4_cache(struct kmem_cache **cachep)
-+{
-+ BUG_ON(*cachep == NULL);
-+ kmem_cache_destroy(*cachep);
-+ *cachep = NULL;
-+}
-+
-+/**
-+ * init_reiser4 - reiser4 initialization entry point
-+ *
-+ * Initializes reiser4 slabs, registers reiser4 filesystem type. It is called
-+ * on kernel initialization or during reiser4 module load.
-+ */
-+static int __init init_reiser4(void)
-+{
-+ int result;
-+
-+ printk(KERN_INFO
-+ "Loading Reiser4. "
-+ "See www.namesys.com for a description of Reiser4.\n");
-+
-+ /* initialize slab cache of inodes */
-+ if ((result = init_inodes()) != 0)
-+ goto failed_inode_cache;
-+
-+ /* initialize cache of znodes */
-+ if ((result = init_znodes()) != 0)
-+ goto failed_init_znodes;
-+
-+ /* initialize all plugins */
-+ if ((result = init_plugins()) != 0)
-+ goto failed_init_plugins;
-+
-+ /* initialize cache of plugin_set-s and plugin_set's hash table */
-+ if ((result = init_plugin_set()) != 0)
-+ goto failed_init_plugin_set;
-+
-+ /* initialize caches of txn_atom-s and txn_handle-s */
-+ if ((result = init_txnmgr_static()) != 0)
-+ goto failed_init_txnmgr_static;
-+
-+ /* initialize cache of jnodes */
-+ if ((result = init_jnodes()) != 0)
-+ goto failed_init_jnodes;
-+
-+ /* initialize cache of flush queues */
-+ if ((result = reiser4_init_fqs()) != 0)
-+ goto failed_init_fqs;
-+
-+ /* initialize cache of structures attached to dentry->d_fsdata */
-+ if ((result = reiser4_init_dentry_fsdata()) != 0)
-+ goto failed_init_dentry_fsdata;
-+
-+ /* initialize cache of structures attached to file->private_data */
-+ if ((result = reiser4_init_file_fsdata()) != 0)
-+ goto failed_init_file_fsdata;
-+
-+ /*
-+ * initialize cache of d_cursors. See plugin/file_ops_readdir.c for
-+ * more details
-+ */
-+ if ((result = reiser4_init_d_cursor()) != 0)
-+ goto failed_init_d_cursor;
-+
-+ if ((result = register_filesystem(&reiser4_fs_type)) == 0) {
-+ reiser4_debugfs_root = debugfs_create_dir("reiser4", NULL);
-+ return 0;
-+ }
-+
-+ reiser4_done_d_cursor();
-+ failed_init_d_cursor:
-+ reiser4_done_file_fsdata();
-+ failed_init_file_fsdata:
-+ reiser4_done_dentry_fsdata();
-+ failed_init_dentry_fsdata:
-+ reiser4_done_fqs();
-+ failed_init_fqs:
-+ done_jnodes();
-+ failed_init_jnodes:
-+ done_txnmgr_static();
-+ failed_init_txnmgr_static:
-+ done_plugin_set();
-+ failed_init_plugin_set:
-+ failed_init_plugins:
-+ done_znodes();
-+ failed_init_znodes:
-+ done_inodes();
-+ failed_inode_cache:
-+ return result;
-+}
-+
-+/**
-+ * done_reiser4 - reiser4 exit entry point
-+ *
-+ * Unregister reiser4 filesystem type, deletes caches. It is called on shutdown
-+ * or at module unload.
-+ */
-+static void __exit done_reiser4(void)
-+{
-+ int result;
-+
-+ debugfs_remove(reiser4_debugfs_root);
-+ result = unregister_filesystem(&reiser4_fs_type);
-+ BUG_ON(result != 0);
-+ reiser4_done_d_cursor();
-+ reiser4_done_file_fsdata();
-+ reiser4_done_dentry_fsdata();
-+ reiser4_done_fqs();
-+ done_jnodes();
-+ done_txnmgr_static();
-+ done_plugin_set();
-+ done_znodes();
-+ destroy_reiser4_cache(&inode_cache);
-+}
-+
-+module_init(init_reiser4);
-+module_exit(done_reiser4);
-+
-+MODULE_DESCRIPTION("Reiser4 filesystem");
-+MODULE_AUTHOR("Hans Reiser <Reiser@Namesys.COM>");
-+
-+MODULE_LICENSE("GPL");
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/tap.c linux-2.6.20/fs/reiser4/tap.c
---- linux-2.6.20.orig/fs/reiser4/tap.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tap.c 2007-05-06 14:50:43.879031967 +0400
-@@ -0,0 +1,377 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ Tree Access Pointer (tap).
-+
-+ tap is data structure combining coord and lock handle (mostly). It is
-+ useful when one has to scan tree nodes (for example, in readdir, or flush),
-+ for tap functions allow to move tap in either direction transparently
-+ crossing unit/item/node borders.
-+
-+ Tap doesn't provide automatic synchronization of its fields as it is
-+ supposed to be per-thread object.
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "coord.h"
-+#include "tree.h"
-+#include "context.h"
-+#include "tap.h"
-+#include "znode.h"
-+#include "tree_walk.h"
-+
-+#if REISER4_DEBUG
-+static int tap_invariant(const tap_t * tap);
-+static void tap_check(const tap_t * tap);
-+#else
-+#define tap_check(tap) noop
-+#endif
-+
-+/** load node tap is pointing to, if not loaded already */
-+int reiser4_tap_load(tap_t * tap)
-+{
-+ tap_check(tap);
-+ if (tap->loaded == 0) {
-+ int result;
-+
-+ result = zload_ra(tap->coord->node, &tap->ra_info);
-+ if (result != 0)
-+ return result;
-+ coord_clear_iplug(tap->coord);
-+ }
-+ ++tap->loaded;
-+ tap_check(tap);
-+ return 0;
-+}
-+
-+/** release node tap is pointing to. Dual to tap_load() */
-+void reiser4_tap_relse(tap_t * tap)
-+{
-+ tap_check(tap);
-+ if (tap->loaded > 0) {
-+ --tap->loaded;
-+ if (tap->loaded == 0) {
-+ zrelse(tap->coord->node);
-+ }
-+ }
-+ tap_check(tap);
-+}
-+
-+/**
-+ * init tap to consist of @coord and @lh. Locks on nodes will be acquired with
-+ * @mode
-+ */
-+void reiser4_tap_init(tap_t * tap, coord_t * coord, lock_handle * lh,
-+ znode_lock_mode mode)
-+{
-+ tap->coord = coord;
-+ tap->lh = lh;
-+ tap->mode = mode;
-+ tap->loaded = 0;
-+ INIT_LIST_HEAD(&tap->linkage);
-+ reiser4_init_ra_info(&tap->ra_info);
-+}
-+
-+/** add @tap to the per-thread list of all taps */
-+void reiser4_tap_monitor(tap_t * tap)
-+{
-+ assert("nikita-2623", tap != NULL);
-+ tap_check(tap);
-+ list_add(&tap->linkage, reiser4_taps_list());
-+ tap_check(tap);
-+}
-+
-+/* duplicate @src into @dst. Copy lock handle. @dst is not initially
-+ * loaded. */
-+void reiser4_tap_copy(tap_t * dst, tap_t * src)
-+{
-+ assert("nikita-3193", src != NULL);
-+ assert("nikita-3194", dst != NULL);
-+
-+ *dst->coord = *src->coord;
-+ if (src->lh->node)
-+ copy_lh(dst->lh, src->lh);
-+ dst->mode = src->mode;
-+ dst->loaded = 0;
-+ INIT_LIST_HEAD(&dst->linkage);
-+ dst->ra_info = src->ra_info;
-+}
-+
-+/** finish with @tap */
-+void reiser4_tap_done(tap_t * tap)
-+{
-+ assert("nikita-2565", tap != NULL);
-+ tap_check(tap);
-+ if (tap->loaded > 0)
-+ zrelse(tap->coord->node);
-+ done_lh(tap->lh);
-+ tap->loaded = 0;
-+ list_del_init(&tap->linkage);
-+ tap->coord->node = NULL;
-+}
-+
-+/**
-+ * move @tap to the new node, locked with @target. Load @target, if @tap was
-+ * already loaded.
-+ */
-+int reiser4_tap_move(tap_t * tap, lock_handle * target)
-+{
-+ int result = 0;
-+
-+ assert("nikita-2567", tap != NULL);
-+ assert("nikita-2568", target != NULL);
-+ assert("nikita-2570", target->node != NULL);
-+ assert("nikita-2569", tap->coord->node == tap->lh->node);
-+
-+ tap_check(tap);
-+ if (tap->loaded > 0)
-+ result = zload_ra(target->node, &tap->ra_info);
-+
-+ if (result == 0) {
-+ if (tap->loaded > 0)
-+ zrelse(tap->coord->node);
-+ done_lh(tap->lh);
-+ copy_lh(tap->lh, target);
-+ tap->coord->node = target->node;
-+ coord_clear_iplug(tap->coord);
-+ }
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/**
-+ * move @tap to @target. Acquire lock on @target, if @tap was already
-+ * loaded.
-+ */
-+static int tap_to(tap_t * tap, znode * target)
-+{
-+ int result;
-+
-+ assert("nikita-2624", tap != NULL);
-+ assert("nikita-2625", target != NULL);
-+
-+ tap_check(tap);
-+ result = 0;
-+ if (tap->coord->node != target) {
-+ lock_handle here;
-+
-+ init_lh(&here);
-+ result = longterm_lock_znode(&here, target,
-+ tap->mode, ZNODE_LOCK_HIPRI);
-+ if (result == 0) {
-+ result = reiser4_tap_move(tap, &here);
-+ done_lh(&here);
-+ }
-+ }
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/**
-+ * move @tap to given @target, loading and locking @target->node if
-+ * necessary
-+ */
-+int tap_to_coord(tap_t * tap, coord_t * target)
-+{
-+ int result;
-+
-+ tap_check(tap);
-+ result = tap_to(tap, target->node);
-+ if (result == 0)
-+ coord_dup(tap->coord, target);
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/** return list of all taps */
-+struct list_head *reiser4_taps_list(void)
-+{
-+ return &get_current_context()->taps;
-+}
-+
-+/** helper function for go_{next,prev}_{item,unit,node}() */
-+int go_dir_el(tap_t * tap, sideof dir, int units_p)
-+{
-+ coord_t dup;
-+ coord_t *coord;
-+ int result;
-+
-+ int (*coord_dir) (coord_t *);
-+ int (*get_dir_neighbor) (lock_handle *, znode *, int, int);
-+ void (*coord_init) (coord_t *, const znode *);
-+ ON_DEBUG(int (*coord_check) (const coord_t *));
-+
-+ assert("nikita-2556", tap != NULL);
-+ assert("nikita-2557", tap->coord != NULL);
-+ assert("nikita-2558", tap->lh != NULL);
-+ assert("nikita-2559", tap->coord->node != NULL);
-+
-+ tap_check(tap);
-+ if (dir == LEFT_SIDE) {
-+ coord_dir = units_p ? coord_prev_unit : coord_prev_item;
-+ get_dir_neighbor = reiser4_get_left_neighbor;
-+ coord_init = coord_init_last_unit;
-+ } else {
-+ coord_dir = units_p ? coord_next_unit : coord_next_item;
-+ get_dir_neighbor = reiser4_get_right_neighbor;
-+ coord_init = coord_init_first_unit;
-+ }
-+ ON_DEBUG(coord_check =
-+ units_p ? coord_is_existing_unit : coord_is_existing_item);
-+ assert("nikita-2560", coord_check(tap->coord));
-+
-+ coord = tap->coord;
-+ coord_dup(&dup, coord);
-+ if (coord_dir(&dup) != 0) {
-+ do {
-+ /* move to the left neighboring node */
-+ lock_handle dup;
-+
-+ init_lh(&dup);
-+ result =
-+ get_dir_neighbor(&dup, coord->node, (int)tap->mode,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result == 0) {
-+ result = reiser4_tap_move(tap, &dup);
-+ if (result == 0)
-+ coord_init(tap->coord, dup.node);
-+ done_lh(&dup);
-+ }
-+ /* skip empty nodes */
-+ } while ((result == 0) && node_is_empty(coord->node));
-+ } else {
-+ result = 0;
-+ coord_dup(coord, &dup);
-+ }
-+ assert("nikita-2564", ergo(!result, coord_check(tap->coord)));
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/**
-+ * move @tap to the next unit, transparently crossing item and node
-+ * boundaries
-+ */
-+int go_next_unit(tap_t * tap)
-+{
-+ return go_dir_el(tap, RIGHT_SIDE, 1);
-+}
-+
-+/**
-+ * move @tap to the previous unit, transparently crossing item and node
-+ * boundaries
-+ */
-+int go_prev_unit(tap_t * tap)
-+{
-+ return go_dir_el(tap, LEFT_SIDE, 1);
-+}
-+
-+/**
-+ * @shift times apply @actor to the @tap. This is used to move @tap by
-+ * @shift units (or items, or nodes) in either direction.
-+ */
-+static int rewind_to(tap_t * tap, go_actor_t actor, int shift)
-+{
-+ int result;
-+
-+ assert("nikita-2555", shift >= 0);
-+ assert("nikita-2562", tap->coord->node == tap->lh->node);
-+
-+ tap_check(tap);
-+ result = reiser4_tap_load(tap);
-+ if (result != 0)
-+ return result;
-+
-+ for (; shift > 0; --shift) {
-+ result = actor(tap);
-+ assert("nikita-2563", tap->coord->node == tap->lh->node);
-+ if (result != 0)
-+ break;
-+ }
-+ reiser4_tap_relse(tap);
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/** move @tap @shift units rightward */
-+int rewind_right(tap_t * tap, int shift)
-+{
-+ return rewind_to(tap, go_next_unit, shift);
-+}
-+
-+/** move @tap @shift units leftward */
-+int rewind_left(tap_t * tap, int shift)
-+{
-+ return rewind_to(tap, go_prev_unit, shift);
-+}
-+
-+#if REISER4_DEBUG
-+/** debugging function: print @tap content in human readable form */
-+static void print_tap(const char *prefix, const tap_t * tap)
-+{
-+ if (tap == NULL) {
-+ printk("%s: null tap\n", prefix);
-+ return;
-+ }
-+ printk("%s: loaded: %i, in-list: %i, node: %p, mode: %s\n", prefix,
-+ tap->loaded, (&tap->linkage == tap->linkage.next &&
-+ &tap->linkage == tap->linkage.prev),
-+ tap->lh->node,
-+ lock_mode_name(tap->mode));
-+ print_coord("\tcoord", tap->coord, 0);
-+}
-+
-+/** check [tap-sane] invariant */
-+static int tap_invariant(const tap_t * tap)
-+{
-+ /* [tap-sane] invariant */
-+
-+ if (tap == NULL)
-+ return 1;
-+ /* tap->mode is one of
-+ *
-+ * {ZNODE_NO_LOCK, ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}, and
-+ */
-+ if (tap->mode != ZNODE_NO_LOCK &&
-+ tap->mode != ZNODE_READ_LOCK && tap->mode != ZNODE_WRITE_LOCK)
-+ return 2;
-+ /* tap->coord != NULL, and */
-+ if (tap->coord == NULL)
-+ return 3;
-+ /* tap->lh != NULL, and */
-+ if (tap->lh == NULL)
-+ return 4;
-+ /* tap->loaded > 0 => znode_is_loaded(tap->coord->node), and */
-+ if (!ergo(tap->loaded, znode_is_loaded(tap->coord->node)))
-+ return 5;
-+ /* tap->coord->node == tap->lh->node if tap->lh->node is not 0 */
-+ if (tap->lh->node != NULL && tap->coord->node != tap->lh->node)
-+ return 6;
-+ return 0;
-+}
-+
-+/** debugging function: check internal @tap consistency */
-+static void tap_check(const tap_t * tap)
-+{
-+ int result;
-+
-+ result = tap_invariant(tap);
-+ if (result != 0) {
-+ print_tap("broken", tap);
-+ reiser4_panic("nikita-2831", "tap broken: %i\n", result);
-+ }
-+}
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/tap.h linux-2.6.20/fs/reiser4/tap.h
---- linux-2.6.20.orig/fs/reiser4/tap.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tap.h 2007-05-06 14:50:43.879031967 +0400
-@@ -0,0 +1,70 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Tree Access Pointers. See tap.c for more details. */
-+
-+#if !defined( __REISER4_TAP_H__ )
-+#define __REISER4_TAP_H__
-+
-+#include "forward.h"
-+#include "readahead.h"
-+
-+/**
-+ tree_access_pointer aka tap. Data structure combining coord_t and lock
-+ handle.
-+ Invariants involving this data-type, see doc/lock-ordering for details:
-+
-+ [tap-sane]
-+ */
-+struct tree_access_pointer {
-+ /* coord tap is at */
-+ coord_t *coord;
-+ /* lock handle on ->coord->node */
-+ lock_handle *lh;
-+ /* mode of lock acquired by this tap */
-+ znode_lock_mode mode;
-+ /* incremented by reiser4_tap_load().
-+ Decremented by reiser4_tap_relse(). */
-+ int loaded;
-+ /* list of taps */
-+ struct list_head linkage;
-+ /* read-ahead hint */
-+ ra_info_t ra_info;
-+};
-+
-+typedef int (*go_actor_t) (tap_t * tap);
-+
-+extern int reiser4_tap_load(tap_t * tap);
-+extern void reiser4_tap_relse(tap_t * tap);
-+extern void reiser4_tap_init(tap_t * tap, coord_t * coord, lock_handle * lh,
-+ znode_lock_mode mode);
-+extern void reiser4_tap_monitor(tap_t * tap);
-+extern void reiser4_tap_copy(tap_t * dst, tap_t * src);
-+extern void reiser4_tap_done(tap_t * tap);
-+extern int reiser4_tap_move(tap_t * tap, lock_handle * target);
-+extern int tap_to_coord(tap_t * tap, coord_t * target);
-+
-+extern int go_dir_el(tap_t * tap, sideof dir, int units_p);
-+extern int go_next_unit(tap_t * tap);
-+extern int go_prev_unit(tap_t * tap);
-+extern int rewind_right(tap_t * tap, int shift);
-+extern int rewind_left(tap_t * tap, int shift);
-+
-+extern struct list_head *reiser4_taps_list(void);
-+
-+#define for_all_taps(tap) \
-+ for (tap = list_entry(reiser4_taps_list()->next, tap_t, linkage); \
-+ reiser4_taps_list() != &tap->linkage; \
-+ tap = list_entry(tap->linkage.next, tap_t, linkage))
-+
-+/* __REISER4_TAP_H__ */
-+#endif
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/tree.c linux-2.6.20/fs/reiser4/tree.c
---- linux-2.6.20.orig/fs/reiser4/tree.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tree.c 2007-05-06 14:50:43.883033217 +0400
-@@ -0,0 +1,1876 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * KEYS IN A TREE.
-+ *
-+ * The tree consists of nodes located on the disk. Node in the tree is either
-+ * formatted or unformatted. Formatted node is one that has structure
-+ * understood by the tree balancing and traversal code. Formatted nodes are
-+ * further classified into leaf and internal nodes. Latter distinctions is
-+ * (almost) of only historical importance: general structure of leaves and
-+ * internal nodes is the same in Reiser4. Unformatted nodes contain raw data
-+ * that are part of bodies of ordinary files and attributes.
-+ *
-+ * Each node in the tree spawns some interval in the key space. Key ranges for
-+ * all nodes in the tree are disjoint. Actually, this only holds in some weak
-+ * sense, because of the non-unique keys: intersection of key ranges for
-+ * different nodes is either empty, or consists of exactly one key.
-+ *
-+ * Formatted node consists of a sequence of items. Each item spawns some
-+ * interval in key space. Key ranges for all items in a tree are disjoint,
-+ * modulo non-unique keys again. Items within nodes are ordered in the key
-+ * order of the smallest key in a item.
-+ *
-+ * Particular type of item can be further split into units. Unit is piece of
-+ * item that can be cut from item and moved into another item of the same
-+ * time. Units are used by balancing code to repack data during balancing.
-+ *
-+ * Unit can be further split into smaller entities (for example, extent unit
-+ * represents several pages, and it is natural for extent code to operate on
-+ * particular pages and even bytes within one unit), but this is of no
-+ * relevance to the generic balancing and lookup code.
-+ *
-+ * Although item is said to "spawn" range or interval of keys, it is not
-+ * necessary that item contains piece of data addressable by each and every
-+ * key in this range. For example, compound directory item, consisting of
-+ * units corresponding to directory entries and keyed by hashes of file names,
-+ * looks more as having "discrete spectrum": only some disjoint keys inside
-+ * range occupied by this item really address data.
-+ *
-+ * No than less, each item always has well-defined least (minimal) key, that
-+ * is recorded in item header, stored in the node this item is in. Also, item
-+ * plugin can optionally define method ->max_key_inside() returning maximal
-+ * key that can _possibly_ be located within this item. This method is used
-+ * (mainly) to determine when given piece of data should be merged into
-+ * existing item, in stead of creating new one. Because of this, even though
-+ * ->max_key_inside() can be larger that any key actually located in the item,
-+ * intervals
-+ *
-+ * [ reiser4_min_key( item ), ->max_key_inside( item ) ]
-+ *
-+ * are still disjoint for all items within the _same_ node.
-+ *
-+ * In memory node is represented by znode. It plays several roles:
-+ *
-+ * . something locks are taken on
-+ *
-+ * . something tracked by transaction manager (this is going to change)
-+ *
-+ * . something used to access node data
-+ *
-+ * . something used to maintain tree structure in memory: sibling and
-+ * parental linkage.
-+ *
-+ * . something used to organize nodes into "slums"
-+ *
-+ * More on znodes see in znode.[ch]
-+ *
-+ * DELIMITING KEYS
-+ *
-+ * To simplify balancing, allow some flexibility in locking and speed up
-+ * important coord cache optimization, we keep delimiting keys of nodes in
-+ * memory. Depending on disk format (implemented by appropriate node plugin)
-+ * node on disk can record both left and right delimiting key, only one of
-+ * them, or none. Still, our balancing and tree traversal code keep both
-+ * delimiting keys for a node that is in memory stored in the znode. When
-+ * node is first brought into memory during tree traversal, its left
-+ * delimiting key is taken from its parent, and its right delimiting key is
-+ * either next key in its parent, or is right delimiting key of parent if
-+ * node is the rightmost child of parent.
-+ *
-+ * Physical consistency of delimiting key is protected by special dk
-+ * read-write lock. That is, delimiting keys can only be inspected or
-+ * modified under this lock. But dk lock is only sufficient for fast
-+ * "pessimistic" check, because to simplify code and to decrease lock
-+ * contention, balancing (carry) only updates delimiting keys right before
-+ * unlocking all locked nodes on the given tree level. For example,
-+ * coord-by-key cache scans LRU list of recently accessed znodes. For each
-+ * node it first does fast check under dk spin lock. If key looked for is
-+ * not between delimiting keys for this node, next node is inspected and so
-+ * on. If key is inside of the key range, long term lock is taken on node
-+ * and key range is rechecked.
-+ *
-+ * COORDINATES
-+ *
-+ * To find something in the tree, you supply a key, and the key is resolved
-+ * by coord_by_key() into a coord (coordinate) that is valid as long as the
-+ * node the coord points to remains locked. As mentioned above trees
-+ * consist of nodes that consist of items that consist of units. A unit is
-+ * the smallest and indivisible piece of tree as far as balancing and tree
-+ * search are concerned. Each node, item, and unit can be addressed by
-+ * giving its level in the tree and the key occupied by this entity. A node
-+ * knows what the key ranges are of the items within it, and how to find its
-+ * items and invoke their item handlers, but it does not know how to access
-+ * individual units within its items except through the item handlers.
-+ * coord is a structure containing a pointer to the node, the ordinal number
-+ * of the item within this node (a sort of item offset), and the ordinal
-+ * number of the unit within this item.
-+ *
-+ * TREE LOOKUP
-+ *
-+ * There are two types of access to the tree: lookup and modification.
-+ *
-+ * Lookup is a search for the key in the tree. Search can look for either
-+ * exactly the key given to it, or for the largest key that is not greater
-+ * than the key given to it. This distinction is determined by "bias"
-+ * parameter of search routine (coord_by_key()). coord_by_key() either
-+ * returns error (key is not in the tree, or some kind of external error
-+ * occurred), or successfully resolves key into coord.
-+ *
-+ * This resolution is done by traversing tree top-to-bottom from root level
-+ * to the desired level. On levels above twig level (level one above the
-+ * leaf level) nodes consist exclusively of internal items. Internal item is
-+ * nothing more than pointer to the tree node on the child level. On twig
-+ * level nodes consist of internal items intermixed with extent
-+ * items. Internal items form normal search tree structure used by traversal
-+ * to descent through the tree.
-+ *
-+ * TREE LOOKUP OPTIMIZATIONS
-+ *
-+ * Tree lookup described above is expensive even if all nodes traversed are
-+ * already in the memory: for each node binary search within it has to be
-+ * performed and binary searches are CPU consuming and tend to destroy CPU
-+ * caches.
-+ *
-+ * Several optimizations are used to work around this:
-+ *
-+ * . cbk_cache (look-aside cache for tree traversals, see search.c for
-+ * details)
-+ *
-+ * . seals (see seal.[ch])
-+ *
-+ * . vroot (see search.c)
-+ *
-+ * General search-by-key is layered thusly:
-+ *
-+ * [check seal, if any] --ok--> done
-+ * |
-+ * failed
-+ * |
-+ * V
-+ * [vroot defined] --no--> node = tree_root
-+ * | |
-+ * yes |
-+ * | |
-+ * V |
-+ * node = vroot |
-+ * | |
-+ * | |
-+ * | |
-+ * V V
-+ * [check cbk_cache for key] --ok--> done
-+ * |
-+ * failed
-+ * |
-+ * V
-+ * [start tree traversal from node]
-+ *
-+ */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/static_stat.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "carry.h"
-+#include "carry_ops.h"
-+#include "tap.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "page_cache.h"
-+#include "super.h"
-+#include "reiser4.h"
-+#include "inode.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+
-+/* Disk address (block number) never ever used for any real tree node. This is
-+ used as block number of "uber" znode.
-+
-+ Invalid block addresses are 0 by tradition.
-+
-+*/
-+const reiser4_block_nr UBER_TREE_ADDR = 0ull;
-+
-+#define CUT_TREE_MIN_ITERATIONS 64
-+
-+static int find_child_by_addr(znode * parent, znode * child, coord_t * result);
-+
-+/* return node plugin of coord->node */
-+node_plugin *node_plugin_by_coord(const coord_t * coord)
-+{
-+ assert("vs-1", coord != NULL);
-+ assert("vs-2", coord->node != NULL);
-+
-+ return coord->node->nplug;
-+}
-+
-+/* insert item into tree. Fields of @coord are updated so that they can be
-+ * used by consequent insert operation. */
-+insert_result insert_by_key(reiser4_tree * tree /* tree to insert new item
-+ * into */ ,
-+ const reiser4_key * key /* key of new item */ ,
-+ reiser4_item_data * data /* parameters for item
-+ * creation */ ,
-+ coord_t * coord /* resulting insertion coord */ ,
-+ lock_handle * lh /* resulting lock
-+ * handle */ ,
-+ tree_level stop_level /** level where to insert */ ,
-+ __u32 flags /* insertion flags */ )
-+{
-+ int result;
-+
-+ assert("nikita-358", tree != NULL);
-+ assert("nikita-360", coord != NULL);
-+
-+ result = coord_by_key(tree, key, coord, lh, ZNODE_WRITE_LOCK,
-+ FIND_EXACT, stop_level, stop_level,
-+ flags | CBK_FOR_INSERT, NULL /*ra_info */ );
-+ switch (result) {
-+ default:
-+ break;
-+ case CBK_COORD_FOUND:
-+ result = IBK_ALREADY_EXISTS;
-+ break;
-+ case CBK_COORD_NOTFOUND:
-+ assert("nikita-2017", coord->node != NULL);
-+ result = insert_by_coord(coord, data, key, lh, 0 /*flags */ );
-+ break;
-+ }
-+ return result;
-+}
-+
-+/* insert item by calling carry. Helper function called if short-cut
-+ insertion failed */
-+static insert_result insert_with_carry_by_coord(coord_t * coord, /* coord where to insert */
-+ lock_handle * lh, /* lock handle of insertion
-+ * node */
-+ reiser4_item_data * data, /* parameters of new
-+ * item */
-+ const reiser4_key * key, /* key of new item */
-+ carry_opcode cop, /* carry operation to perform */
-+ cop_insert_flag flags
-+ /* carry flags */ )
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_insert_data *cdata;
-+ carry_op *op;
-+
-+ assert("umka-314", coord != NULL);
-+
-+ /* allocate carry_pool and 3 carry_level-s */
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*cdata));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, cop, coord->node, 0);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+ cdata = (carry_insert_data *) (lowest_level + 3);
-+ cdata->coord = coord;
-+ cdata->data = data;
-+ cdata->key = key;
-+ op->u.insert.d = cdata;
-+ if (flags == 0)
-+ flags = znode_get_tree(coord->node)->carry.insert_flags;
-+ op->u.insert.flags = flags;
-+ op->u.insert.type = COPT_ITEM_DATA;
-+ op->u.insert.child = NULL;
-+ if (lh != NULL) {
-+ assert("nikita-3245", lh->node == coord->node);
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+ }
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* form carry queue to perform paste of @data with @key at @coord, and launch
-+ its execution by calling carry().
-+
-+ Instruct carry to update @lh it after balancing insertion coord moves into
-+ different block.
-+
-+*/
-+static int paste_with_carry(coord_t * coord, /* coord of paste */
-+ lock_handle * lh, /* lock handle of node
-+ * where item is
-+ * pasted */
-+ reiser4_item_data * data, /* parameters of new
-+ * item */
-+ const reiser4_key * key, /* key of new item */
-+ unsigned flags /* paste flags */ )
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_insert_data *cdata;
-+ carry_op *op;
-+
-+ assert("umka-315", coord != NULL);
-+ assert("umka-316", key != NULL);
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*cdata));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, COP_PASTE, coord->node, 0);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+ cdata = (carry_insert_data *) (lowest_level + 3);
-+ cdata->coord = coord;
-+ cdata->data = data;
-+ cdata->key = key;
-+ op->u.paste.d = cdata;
-+ if (flags == 0)
-+ flags = znode_get_tree(coord->node)->carry.paste_flags;
-+ op->u.paste.flags = flags;
-+ op->u.paste.type = COPT_ITEM_DATA;
-+ if (lh != NULL) {
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+ }
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* insert item at the given coord.
-+
-+ First try to skip carry by directly calling ->create_item() method of node
-+ plugin. If this is impossible (there is not enough free space in the node,
-+ or leftmost item in the node is created), call insert_with_carry_by_coord()
-+ that will do full carry().
-+
-+*/
-+insert_result insert_by_coord(coord_t * coord /* coord where to
-+ * insert. coord->node has
-+ * to be write locked by
-+ * caller */ ,
-+ reiser4_item_data * data /* data to be
-+ * inserted */ ,
-+ const reiser4_key * key /* key of new item */ ,
-+ lock_handle * lh /* lock handle of write
-+ * lock on node */ ,
-+ __u32 flags /* insertion flags */ )
-+{
-+ unsigned item_size;
-+ int result;
-+ znode *node;
-+
-+ assert("vs-247", coord != NULL);
-+ assert("vs-248", data != NULL);
-+ assert("vs-249", data->length >= 0);
-+ assert("nikita-1191", znode_is_write_locked(coord->node));
-+
-+ node = coord->node;
-+ coord_clear_iplug(coord);
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+
-+ item_size = space_needed(node, NULL, data, 1);
-+ if (item_size > znode_free_space(node) &&
-+ (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
-+ && (flags & COPI_DONT_ALLOCATE)) {
-+ /* we are forced to use free space of coord->node and new item
-+ does not fit into it.
-+
-+ Currently we get here only when we allocate and copy units
-+ of extent item from a node to its left neighbor during
-+ "squalloc"-ing. If @node (this is left neighbor) does not
-+ have enough free space - we do not want to attempt any
-+ shifting and allocations because we are in squeezing and
-+ everything to the left of @node is tightly packed.
-+ */
-+ result = -E_NODE_FULL;
-+ } else if ((item_size <= znode_free_space(node)) &&
-+ !coord_is_before_leftmost(coord) &&
-+ (node_plugin_by_node(node)->fast_insert != NULL)
-+ && node_plugin_by_node(node)->fast_insert(coord)) {
-+ /* shortcut insertion without carry() overhead.
-+
-+ Only possible if:
-+
-+ - there is enough free space
-+
-+ - insertion is not into the leftmost position in a node
-+ (otherwise it would require updating of delimiting key in a
-+ parent)
-+
-+ - node plugin agrees with this
-+
-+ */
-+ result =
-+ node_plugin_by_node(node)->create_item(coord, key, data,
-+ NULL);
-+ znode_make_dirty(node);
-+ } else {
-+ /* otherwise do full-fledged carry(). */
-+ result =
-+ insert_with_carry_by_coord(coord, lh, data, key, COP_INSERT,
-+ flags);
-+ }
-+ zrelse(node);
-+ return result;
-+}
-+
-+/* @coord is set to leaf level and @data is to be inserted to twig level */
-+insert_result
-+insert_extent_by_coord(coord_t *
-+ coord
-+ /* coord where to insert. coord->node * has to be write * locked by caller */
-+ ,
-+ reiser4_item_data * data /* data to be inserted */ ,
-+ const reiser4_key * key /* key of new item */ ,
-+ lock_handle *
-+ lh /* lock handle of write lock on * node */ )
-+{
-+ assert("vs-405", coord != NULL);
-+ assert("vs-406", data != NULL);
-+ assert("vs-407", data->length > 0);
-+ assert("vs-408", znode_is_write_locked(coord->node));
-+ assert("vs-409", znode_get_level(coord->node) == LEAF_LEVEL);
-+
-+ return insert_with_carry_by_coord(coord, lh, data, key, COP_EXTENT,
-+ 0 /*flags */ );
-+}
-+
-+/* Insert into the item at the given coord.
-+
-+ First try to skip carry by directly calling ->paste() method of item
-+ plugin. If this is impossible (there is not enough free space in the node,
-+ or we are pasting into leftmost position in the node), call
-+ paste_with_carry() that will do full carry().
-+
-+*/
-+/* paste_into_item */
-+int insert_into_item(coord_t * coord /* coord of pasting */ ,
-+ lock_handle * lh /* lock handle on node involved */ ,
-+ const reiser4_key * key /* key of unit being pasted */ ,
-+ reiser4_item_data * data /* parameters for new unit */ ,
-+ unsigned flags /* insert/paste flags */ )
-+{
-+ int result;
-+ int size_change;
-+ node_plugin *nplug;
-+ item_plugin *iplug;
-+
-+ assert("umka-317", coord != NULL);
-+ assert("umka-318", key != NULL);
-+
-+ iplug = item_plugin_by_coord(coord);
-+ nplug = node_plugin_by_coord(coord);
-+
-+ assert("nikita-1480", iplug == data->iplug);
-+
-+ size_change = space_needed(coord->node, coord, data, 0);
-+ if (size_change > (int)znode_free_space(coord->node) &&
-+ (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
-+ && (flags & COPI_DONT_ALLOCATE)) {
-+ /* we are forced to use free space of coord->node and new data
-+ does not fit into it. */
-+ return -E_NODE_FULL;
-+ }
-+
-+ /* shortcut paste without carry() overhead.
-+
-+ Only possible if:
-+
-+ - there is enough free space
-+
-+ - paste is not into the leftmost unit in a node (otherwise
-+ it would require updating of delimiting key in a parent)
-+
-+ - node plugin agrees with this
-+
-+ - item plugin agrees with us
-+ */
-+ if (size_change <= (int)znode_free_space(coord->node) &&
-+ (coord->item_pos != 0 ||
-+ coord->unit_pos != 0 || coord->between == AFTER_UNIT) &&
-+ coord->unit_pos != 0 && nplug->fast_paste != NULL &&
-+ nplug->fast_paste(coord) &&
-+ iplug->b.fast_paste != NULL && iplug->b.fast_paste(coord)) {
-+ if (size_change > 0)
-+ nplug->change_item_size(coord, size_change);
-+ /* NOTE-NIKITA: huh? where @key is used? */
-+ result = iplug->b.paste(coord, data, NULL);
-+ if (size_change < 0)
-+ nplug->change_item_size(coord, size_change);
-+ znode_make_dirty(coord->node);
-+ } else
-+ /* otherwise do full-fledged carry(). */
-+ result = paste_with_carry(coord, lh, data, key, flags);
-+ return result;
-+}
-+
-+/* this either appends or truncates item @coord */
-+int reiser4_resize_item(coord_t * coord /* coord of item being resized */ ,
-+ reiser4_item_data * data /* parameters of resize */ ,
-+ reiser4_key * key /* key of new unit */ ,
-+ lock_handle * lh /* lock handle of node
-+ * being modified */ ,
-+ cop_insert_flag flags /* carry flags */ )
-+{
-+ int result;
-+ znode *node;
-+
-+ assert("nikita-362", coord != NULL);
-+ assert("nikita-363", data != NULL);
-+ assert("vs-245", data->length != 0);
-+
-+ node = coord->node;
-+ coord_clear_iplug(coord);
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+
-+ if (data->length < 0)
-+ result = node_plugin_by_coord(coord)->shrink_item(coord,
-+ -data->length);
-+ else
-+ result = insert_into_item(coord, lh, key, data, flags);
-+
-+ zrelse(node);
-+ return result;
-+}
-+
-+/* insert flow @f */
-+int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f)
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ reiser4_item_data *data;
-+ carry_op *op;
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*data));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
-+ 0 /* operate directly on coord -> node */ );
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+
-+ /* these are permanent during insert_flow */
-+ data = (reiser4_item_data *) (lowest_level + 3);
-+ data->user = 1;
-+ data->iplug = item_plugin_by_id(FORMATTING_ID);
-+ data->arg = NULL;
-+ /* data.length and data.data will be set before calling paste or
-+ insert */
-+ data->length = 0;
-+ data->data = NULL;
-+
-+ op->u.insert_flow.flags = 0;
-+ op->u.insert_flow.insert_point = coord;
-+ op->u.insert_flow.flow = f;
-+ op->u.insert_flow.data = data;
-+ op->u.insert_flow.new_nodes = 0;
-+
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* Given a coord in parent node, obtain a znode for the corresponding child */
-+znode *child_znode(const coord_t * parent_coord /* coord of pointer to
-+ * child */ ,
-+ znode * parent /* parent of child */ ,
-+ int incore_p /* if !0 only return child if already in
-+ * memory */ ,
-+ int setup_dkeys_p /* if !0 update delimiting keys of
-+ * child */ )
-+{
-+ znode *child;
-+
-+ assert("nikita-1374", parent_coord != NULL);
-+ assert("nikita-1482", parent != NULL);
-+#if REISER4_DEBUG
-+ if (setup_dkeys_p)
-+ assert_rw_not_locked(&(znode_get_tree(parent)->dk_lock));
-+#endif
-+ assert("nikita-2947", znode_is_any_locked(parent));
-+
-+ if (znode_get_level(parent) <= LEAF_LEVEL) {
-+ /* trying to get child of leaf node */
-+ warning("nikita-1217", "Child of maize?");
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+ if (item_is_internal(parent_coord)) {
-+ reiser4_block_nr addr;
-+ item_plugin *iplug;
-+ reiser4_tree *tree;
-+
-+ iplug = item_plugin_by_coord(parent_coord);
-+ assert("vs-512", iplug->s.internal.down_link);
-+ iplug->s.internal.down_link(parent_coord, NULL, &addr);
-+
-+ tree = znode_get_tree(parent);
-+ if (incore_p)
-+ child = zlook(tree, &addr);
-+ else
-+ child =
-+ zget(tree, &addr, parent,
-+ znode_get_level(parent) - 1,
-+ reiser4_ctx_gfp_mask_get());
-+ if ((child != NULL) && !IS_ERR(child) && setup_dkeys_p)
-+ set_child_delimiting_keys(parent, parent_coord, child);
-+ } else {
-+ warning("nikita-1483", "Internal item expected");
-+ child = ERR_PTR(RETERR(-EIO));
-+ }
-+ return child;
-+}
-+
-+/* remove znode from transaction */
-+static void uncapture_znode(znode * node)
-+{
-+ struct page *page;
-+
-+ assert("zam-1001", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ if (!reiser4_blocknr_is_fake(znode_get_block(node))) {
-+ int ret;
-+
-+ /* An already allocated block goes right to the atom's delete set. */
-+ ret =
-+ reiser4_dealloc_block(znode_get_block(node), 0,
-+ BA_DEFER | BA_FORMATTED);
-+ if (ret)
-+ warning("zam-942",
-+ "can\'t add a block (%llu) number to atom's delete set\n",
-+ (unsigned long long)(*znode_get_block(node)));
-+
-+ spin_lock_znode(node);
-+ /* Here we return flush reserved block which was reserved at the
-+ * moment when this allocated node was marked dirty and still
-+ * not used by flush in node relocation procedure. */
-+ if (ZF_ISSET(node, JNODE_FLUSH_RESERVED)) {
-+ txn_atom *atom;
-+
-+ atom = jnode_get_atom(ZJNODE(node));
-+ assert("zam-939", atom != NULL);
-+ spin_unlock_znode(node);
-+ flush_reserved2grabbed(atom, (__u64) 1);
-+ spin_unlock_atom(atom);
-+ } else
-+ spin_unlock_znode(node);
-+ } else {
-+ /* znode has assigned block which is counted as "fake
-+ allocated". Return it back to "free blocks") */
-+ fake_allocated2free((__u64) 1, BA_FORMATTED);
-+ }
-+
-+ /*
-+ * uncapture page from transaction. There is a possibility of a race
-+ * with ->releasepage(): reiser4_releasepage() detaches page from this
-+ * jnode and we have nothing to uncapture. To avoid this, get
-+ * reference of node->pg under jnode spin lock. reiser4_uncapture_page()
-+ * will deal with released page itself.
-+ */
-+ spin_lock_znode(node);
-+ page = znode_page(node);
-+ if (likely(page != NULL)) {
-+ /*
-+ * reiser4_uncapture_page() can only be called when we are sure
-+ * that znode is pinned in memory, which we are, because
-+ * forget_znode() is only called from longterm_unlock_znode().
-+ */
-+ page_cache_get(page);
-+ spin_unlock_znode(node);
-+ lock_page(page);
-+ reiser4_uncapture_page(page);
-+ unlock_page(page);
-+ page_cache_release(page);
-+ } else {
-+ txn_atom *atom;
-+
-+ /* handle "flush queued" znodes */
-+ while (1) {
-+ atom = jnode_get_atom(ZJNODE(node));
-+ assert("zam-943", atom != NULL);
-+
-+ if (!ZF_ISSET(node, JNODE_FLUSH_QUEUED)
-+ || !atom->nr_running_queues)
-+ break;
-+
-+ spin_unlock_znode(node);
-+ reiser4_atom_wait_event(atom);
-+ spin_lock_znode(node);
-+ }
-+
-+ reiser4_uncapture_block(ZJNODE(node));
-+ spin_unlock_atom(atom);
-+ zput(node);
-+ }
-+}
-+
-+/* This is called from longterm_unlock_znode() when last lock is released from
-+ the node that has been removed from the tree. At this point node is removed
-+ from sibling list and its lock is invalidated. */
-+void forget_znode(lock_handle * handle)
-+{
-+ znode *node;
-+ reiser4_tree *tree;
-+
-+ assert("umka-319", handle != NULL);
-+
-+ node = handle->node;
-+ tree = znode_get_tree(node);
-+
-+ assert("vs-164", znode_is_write_locked(node));
-+ assert("nikita-1280", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert_rw_locked(&(node->lock.guard));
-+
-+ /* We assume that this node was detached from its parent before
-+ * unlocking, it gives no way to reach this node from parent through a
-+ * down link. The node should have no children and, thereby, can't be
-+ * reached from them by their parent pointers. The only way to obtain a
-+ * reference to the node is to use sibling pointers from its left and
-+ * right neighbors. In the next several lines we remove the node from
-+ * the sibling list. */
-+
-+ write_lock_tree(tree);
-+ sibling_list_remove(node);
-+ znode_remove(node, tree);
-+ write_unlock_tree(tree);
-+
-+ /* Here we set JNODE_DYING and cancel all pending lock requests. It
-+ * forces all lock requestor threads to repeat iterations of getting
-+ * lock on a child, neighbor or parent node. But, those threads can't
-+ * come to this node again, because this node is no longer a child,
-+ * neighbor or parent of any other node. This order of znode
-+ * invalidation does not allow other threads to waste cpu time is a busy
-+ * loop, trying to lock dying object. The exception is in the flush
-+ * code when we take node directly from atom's capture list.*/
-+ reiser4_invalidate_lock(handle);
-+ uncapture_znode(node);
-+}
-+
-+/* Check that internal item at @pointer really contains pointer to @child. */
-+int check_tree_pointer(const coord_t * pointer /* would-be pointer to
-+ * @child */ ,
-+ const znode * child /* child znode */ )
-+{
-+ assert("nikita-1016", pointer != NULL);
-+ assert("nikita-1017", child != NULL);
-+ assert("nikita-1018", pointer->node != NULL);
-+
-+ assert("nikita-1325", znode_is_any_locked(pointer->node));
-+
-+ assert("nikita-2985",
-+ znode_get_level(pointer->node) == znode_get_level(child) + 1);
-+
-+ coord_clear_iplug((coord_t *) pointer);
-+
-+ if (coord_is_existing_unit(pointer)) {
-+ item_plugin *iplug;
-+ reiser4_block_nr addr;
-+
-+ if (item_is_internal(pointer)) {
-+ iplug = item_plugin_by_coord(pointer);
-+ assert("vs-513", iplug->s.internal.down_link);
-+ iplug->s.internal.down_link(pointer, NULL, &addr);
-+ /* check that cached value is correct */
-+ if (disk_addr_eq(&addr, znode_get_block(child))) {
-+ return NS_FOUND;
-+ }
-+ }
-+ }
-+ /* warning ("jmacd-1002", "tree pointer incorrect"); */
-+ return NS_NOT_FOUND;
-+}
-+
-+/* find coord of pointer to new @child in @parent.
-+
-+ Find the &coord_t in the @parent where pointer to a given @child will
-+ be in.
-+
-+*/
-+int find_new_child_ptr(znode * parent /* parent znode, passed locked */ ,
-+ znode *
-+ child UNUSED_ARG /* child znode, passed locked */ ,
-+ znode * left /* left brother of new node */ ,
-+ coord_t * result /* where result is stored in */ )
-+{
-+ int ret;
-+
-+ assert("nikita-1486", parent != NULL);
-+ assert("nikita-1487", child != NULL);
-+ assert("nikita-1488", result != NULL);
-+
-+ ret = find_child_ptr(parent, left, result);
-+ if (ret != NS_FOUND) {
-+ warning("nikita-1489", "Cannot find brother position: %i", ret);
-+ return RETERR(-EIO);
-+ } else {
-+ result->between = AFTER_UNIT;
-+ return RETERR(NS_NOT_FOUND);
-+ }
-+}
-+
-+/* find coord of pointer to @child in @parent.
-+
-+ Find the &coord_t in the @parent where pointer to a given @child is in.
-+
-+*/
-+int find_child_ptr(znode * parent /* parent znode, passed locked */ ,
-+ znode * child /* child znode, passed locked */ ,
-+ coord_t * result /* where result is stored in */ )
-+{
-+ int lookup_res;
-+ node_plugin *nplug;
-+ /* left delimiting key of a child */
-+ reiser4_key ld;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-934", parent != NULL);
-+ assert("nikita-935", child != NULL);
-+ assert("nikita-936", result != NULL);
-+ assert("zam-356", znode_is_loaded(parent));
-+
-+ coord_init_zero(result);
-+ result->node = parent;
-+
-+ nplug = parent->nplug;
-+ assert("nikita-939", nplug != NULL);
-+
-+ tree = znode_get_tree(parent);
-+ /* NOTE-NIKITA taking read-lock on tree here assumes that @result is
-+ * not aliased to ->in_parent of some znode. Otherwise,
-+ * parent_coord_to_coord() below would modify data protected by tree
-+ * lock. */
-+ read_lock_tree(tree);
-+ /* fast path. Try to use cached value. Lock tree to keep
-+ node->pos_in_parent and pos->*_blocknr consistent. */
-+ if (child->in_parent.item_pos + 1 != 0) {
-+ parent_coord_to_coord(&child->in_parent, result);
-+ if (check_tree_pointer(result, child) == NS_FOUND) {
-+ read_unlock_tree(tree);
-+ return NS_FOUND;
-+ }
-+
-+ child->in_parent.item_pos = (unsigned short)~0;
-+ }
-+ read_unlock_tree(tree);
-+
-+ /* is above failed, find some key from @child. We are looking for the
-+ least key in a child. */
-+ read_lock_dk(tree);
-+ ld = *znode_get_ld_key(child);
-+ read_unlock_dk(tree);
-+ /*
-+ * now, lookup parent with key just found. Note, that left delimiting
-+ * key doesn't identify node uniquely, because (in extremely rare
-+ * case) two nodes can have equal left delimiting keys, if one of them
-+ * is completely filled with directory entries that all happened to be
-+ * hash collision. But, we check block number in check_tree_pointer()
-+ * and, so, are safe.
-+ */
-+ lookup_res = nplug->lookup(parent, &ld, FIND_EXACT, result);
-+ /* update cached pos_in_node */
-+ if (lookup_res == NS_FOUND) {
-+ write_lock_tree(tree);
-+ coord_to_parent_coord(result, &child->in_parent);
-+ write_unlock_tree(tree);
-+ lookup_res = check_tree_pointer(result, child);
-+ }
-+ if (lookup_res == NS_NOT_FOUND)
-+ lookup_res = find_child_by_addr(parent, child, result);
-+ return lookup_res;
-+}
-+
-+/* find coord of pointer to @child in @parent by scanning
-+
-+ Find the &coord_t in the @parent where pointer to a given @child
-+ is in by scanning all internal items in @parent and comparing block
-+ numbers in them with that of @child.
-+
-+*/
-+static int find_child_by_addr(znode * parent /* parent znode, passed locked */ ,
-+ znode * child /* child znode, passed locked */ ,
-+ coord_t * result /* where result is stored in */ )
-+{
-+ int ret;
-+
-+ assert("nikita-1320", parent != NULL);
-+ assert("nikita-1321", child != NULL);
-+ assert("nikita-1322", result != NULL);
-+
-+ ret = NS_NOT_FOUND;
-+
-+ for_all_units(result, parent) {
-+ if (check_tree_pointer(result, child) == NS_FOUND) {
-+ write_lock_tree(znode_get_tree(parent));
-+ coord_to_parent_coord(result, &child->in_parent);
-+ write_unlock_tree(znode_get_tree(parent));
-+ ret = NS_FOUND;
-+ break;
-+ }
-+ }
-+ return ret;
-+}
-+
-+/* true, if @addr is "unallocated block number", which is just address, with
-+ highest bit set. */
-+int is_disk_addr_unallocated(const reiser4_block_nr * addr /* address to
-+ * check */ )
-+{
-+ assert("nikita-1766", addr != NULL);
-+ cassert(sizeof(reiser4_block_nr) == 8);
-+ return (*addr & REISER4_BLOCKNR_STATUS_BIT_MASK) ==
-+ REISER4_UNALLOCATED_STATUS_VALUE;
-+}
-+
-+/* returns true if removing bytes of given range of key [from_key, to_key]
-+ causes removing of whole item @from */
-+static int
-+item_removed_completely(coord_t * from, const reiser4_key * from_key,
-+ const reiser4_key * to_key)
-+{
-+ item_plugin *iplug;
-+ reiser4_key key_in_item;
-+
-+ assert("umka-325", from != NULL);
-+ assert("", item_is_extent(from));
-+
-+ /* check first key just for case */
-+ item_key_by_coord(from, &key_in_item);
-+ if (keygt(from_key, &key_in_item))
-+ return 0;
-+
-+ /* check last key */
-+ iplug = item_plugin_by_coord(from);
-+ assert("vs-611", iplug && iplug->s.file.append_key);
-+
-+ iplug->s.file.append_key(from, &key_in_item);
-+ set_key_offset(&key_in_item, get_key_offset(&key_in_item) - 1);
-+
-+ if (keylt(to_key, &key_in_item))
-+ /* last byte is not removed */
-+ return 0;
-+ return 1;
-+}
-+
-+/* helper function for prepare_twig_kill(): @left and @right are formatted
-+ * neighbors of extent item being completely removed. Load and lock neighbors
-+ * and store lock handles into @cdata for later use by kill_hook_extent() */
-+static int
-+prepare_children(znode * left, znode * right, carry_kill_data * kdata)
-+{
-+ int result;
-+ int left_loaded;
-+ int right_loaded;
-+
-+ result = 0;
-+ left_loaded = right_loaded = 0;
-+
-+ if (left != NULL) {
-+ result = zload(left);
-+ if (result == 0) {
-+ left_loaded = 1;
-+ result = longterm_lock_znode(kdata->left, left,
-+ ZNODE_READ_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ }
-+ }
-+ if (result == 0 && right != NULL) {
-+ result = zload(right);
-+ if (result == 0) {
-+ right_loaded = 1;
-+ result = longterm_lock_znode(kdata->right, right,
-+ ZNODE_READ_LOCK,
-+ ZNODE_LOCK_HIPRI |
-+ ZNODE_LOCK_NONBLOCK);
-+ }
-+ }
-+ if (result != 0) {
-+ done_lh(kdata->left);
-+ done_lh(kdata->right);
-+ if (left_loaded != 0)
-+ zrelse(left);
-+ if (right_loaded != 0)
-+ zrelse(right);
-+ }
-+ return result;
-+}
-+
-+static void done_children(carry_kill_data * kdata)
-+{
-+ if (kdata->left != NULL && kdata->left->node != NULL) {
-+ zrelse(kdata->left->node);
-+ done_lh(kdata->left);
-+ }
-+ if (kdata->right != NULL && kdata->right->node != NULL) {
-+ zrelse(kdata->right->node);
-+ done_lh(kdata->right);
-+ }
-+}
-+
-+/* part of cut_node. It is called when cut_node is called to remove or cut part
-+ of extent item. When head of that item is removed - we have to update right
-+ delimiting of left neighbor of extent. When item is removed completely - we
-+ have to set sibling link between left and right neighbor of removed
-+ extent. This may return -E_DEADLOCK because of trying to get left neighbor
-+ locked. So, caller should repeat an attempt
-+*/
-+/* Audited by: umka (2002.06.16) */
-+static int
-+prepare_twig_kill(carry_kill_data * kdata, znode * locked_left_neighbor)
-+{
-+ int result;
-+ reiser4_key key;
-+ lock_handle left_lh;
-+ lock_handle right_lh;
-+ coord_t left_coord;
-+ coord_t *from;
-+ znode *left_child;
-+ znode *right_child;
-+ reiser4_tree *tree;
-+ int left_zloaded_here, right_zloaded_here;
-+
-+ from = kdata->params.from;
-+ assert("umka-326", from != NULL);
-+ assert("umka-327", kdata->params.to != NULL);
-+
-+ /* for one extent item only yet */
-+ assert("vs-591", item_is_extent(from));
-+ assert("vs-592", from->item_pos == kdata->params.to->item_pos);
-+
-+ if ((kdata->params.from_key
-+ && keygt(kdata->params.from_key, item_key_by_coord(from, &key)))
-+ || from->unit_pos != 0) {
-+ /* head of item @from is not removed, there is nothing to
-+ worry about */
-+ return 0;
-+ }
-+
-+ result = 0;
-+ left_zloaded_here = 0;
-+ right_zloaded_here = 0;
-+
-+ left_child = right_child = NULL;
-+
-+ coord_dup(&left_coord, from);
-+ init_lh(&left_lh);
-+ init_lh(&right_lh);
-+ if (coord_prev_unit(&left_coord)) {
-+ /* @from is leftmost item in its node */
-+ if (!locked_left_neighbor) {
-+ result =
-+ reiser4_get_left_neighbor(&left_lh, from->node,
-+ ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ switch (result) {
-+ case 0:
-+ break;
-+ case -E_NO_NEIGHBOR:
-+ /* there is no formatted node to the left of
-+ from->node */
-+ warning("vs-605",
-+ "extent item has smallest key in "
-+ "the tree and it is about to be removed");
-+ return 0;
-+ case -E_DEADLOCK:
-+ /* need to restart */
-+ default:
-+ return result;
-+ }
-+
-+ /* we have acquired left neighbor of from->node */
-+ result = zload(left_lh.node);
-+ if (result)
-+ goto done;
-+
-+ locked_left_neighbor = left_lh.node;
-+ } else {
-+ /* squalloc_right_twig_cut should have supplied locked
-+ * left neighbor */
-+ assert("vs-834",
-+ znode_is_write_locked(locked_left_neighbor));
-+ result = zload(locked_left_neighbor);
-+ if (result)
-+ return result;
-+ }
-+
-+ left_zloaded_here = 1;
-+ coord_init_last_unit(&left_coord, locked_left_neighbor);
-+ }
-+
-+ if (!item_is_internal(&left_coord)) {
-+ /* what else but extent can be on twig level */
-+ assert("vs-606", item_is_extent(&left_coord));
-+
-+ /* there is no left formatted child */
-+ if (left_zloaded_here)
-+ zrelse(locked_left_neighbor);
-+ done_lh(&left_lh);
-+ return 0;
-+ }
-+
-+ tree = znode_get_tree(left_coord.node);
-+ left_child = child_znode(&left_coord, left_coord.node, 1, 0);
-+
-+ if (IS_ERR(left_child)) {
-+ result = PTR_ERR(left_child);
-+ goto done;
-+ }
-+
-+ /* left child is acquired, calculate new right delimiting key for it
-+ and get right child if it is necessary */
-+ if (item_removed_completely
-+ (from, kdata->params.from_key, kdata->params.to_key)) {
-+ /* try to get right child of removed item */
-+ coord_t right_coord;
-+
-+ assert("vs-607",
-+ kdata->params.to->unit_pos ==
-+ coord_last_unit_pos(kdata->params.to));
-+ coord_dup(&right_coord, kdata->params.to);
-+ if (coord_next_unit(&right_coord)) {
-+ /* @to is rightmost unit in the node */
-+ result =
-+ reiser4_get_right_neighbor(&right_lh, from->node,
-+ ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ switch (result) {
-+ case 0:
-+ result = zload(right_lh.node);
-+ if (result)
-+ goto done;
-+
-+ right_zloaded_here = 1;
-+ coord_init_first_unit(&right_coord,
-+ right_lh.node);
-+ item_key_by_coord(&right_coord, &key);
-+ break;
-+
-+ case -E_NO_NEIGHBOR:
-+ /* there is no formatted node to the right of
-+ from->node */
-+ read_lock_dk(tree);
-+ key = *znode_get_rd_key(from->node);
-+ read_unlock_dk(tree);
-+ right_coord.node = NULL;
-+ result = 0;
-+ break;
-+ default:
-+ /* real error */
-+ goto done;
-+ }
-+ } else {
-+ /* there is an item to the right of @from - take its key */
-+ item_key_by_coord(&right_coord, &key);
-+ }
-+
-+ /* try to get right child of @from */
-+ if (right_coord.node && /* there is right neighbor of @from */
-+ item_is_internal(&right_coord)) { /* it is internal item */
-+ right_child = child_znode(&right_coord,
-+ right_coord.node, 1, 0);
-+
-+ if (IS_ERR(right_child)) {
-+ result = PTR_ERR(right_child);
-+ goto done;
-+ }
-+
-+ }
-+ /* whole extent is removed between znodes left_child and right_child. Prepare them for linking and
-+ update of right delimiting key of left_child */
-+ result = prepare_children(left_child, right_child, kdata);
-+ } else {
-+ /* head of item @to is removed. left_child has to get right delimting key update. Prepare it for that */
-+ result = prepare_children(left_child, NULL, kdata);
-+ }
-+
-+ done:
-+ if (right_child)
-+ zput(right_child);
-+ if (right_zloaded_here)
-+ zrelse(right_lh.node);
-+ done_lh(&right_lh);
-+
-+ if (left_child)
-+ zput(left_child);
-+ if (left_zloaded_here)
-+ zrelse(locked_left_neighbor);
-+ done_lh(&left_lh);
-+ return result;
-+}
-+
-+/* this is used to remove part of node content between coordinates @from and @to. Units to which @from and @to are set
-+ are to be cut completely */
-+/* for try_to_merge_with_left, delete_copied, reiser4_delete_node */
-+int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key, /* first key to be removed */
-+ const reiser4_key * to_key, /* last key to be removed */
-+ reiser4_key *
-+ smallest_removed /* smallest key actually removed */ )
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_cut_data *cut_data;
-+ carry_op *op;
-+
-+ assert("vs-1715", coord_compare(from, to) != COORD_CMP_ON_RIGHT);
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*cut_data));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
-+ assert("vs-1509", op != 0);
-+ if (IS_ERR(op)) {
-+ done_carry_pool(pool);
-+ return PTR_ERR(op);
-+ }
-+
-+ cut_data = (carry_cut_data *) (lowest_level + 3);
-+ cut_data->params.from = from;
-+ cut_data->params.to = to;
-+ cut_data->params.from_key = from_key;
-+ cut_data->params.to_key = to_key;
-+ cut_data->params.smallest_removed = smallest_removed;
-+
-+ op->u.cut_or_kill.is_cut = 1;
-+ op->u.cut_or_kill.u.cut = cut_data;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* cut part of the node
-+
-+ Cut part or whole content of node.
-+
-+ cut data between @from and @to of @from->node and call carry() to make
-+ corresponding changes in the tree. @from->node may become empty. If so -
-+ pointer to it will be removed. Neighboring nodes are not changed. Smallest
-+ removed key is stored in @smallest_removed
-+
-+*/
-+int kill_node_content(coord_t * from, /* coord of the first unit/item that will be eliminated */
-+ coord_t * to, /* coord of the last unit/item that will be eliminated */
-+ const reiser4_key * from_key, /* first key to be removed */
-+ const reiser4_key * to_key, /* last key to be removed */
-+ reiser4_key * smallest_removed, /* smallest key actually removed */
-+ znode * locked_left_neighbor, /* this is set when kill_node_content is called with left neighbor
-+ * locked (in squalloc_right_twig_cut, namely) */
-+ struct inode *inode, /* inode of file whose item (or its part) is to be killed. This is necessary to
-+ invalidate pages together with item pointing to them */
-+ int truncate)
-+{ /* this call is made for file truncate) */
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_kill_data *kdata;
-+ lock_handle *left_child;
-+ lock_handle *right_child;
-+ carry_op *op;
-+
-+ assert("umka-328", from != NULL);
-+ assert("vs-316", !node_is_empty(from->node));
-+ assert("nikita-1812", coord_is_existing_unit(from)
-+ && coord_is_existing_unit(to));
-+
-+ /* allocate carry_pool, 3 carry_level-s, carry_kill_data and structures for kill_hook_extent */
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(carry_kill_data) +
-+ 2 * sizeof(lock_handle) +
-+ 5 * sizeof(reiser4_key) + 2 * sizeof(coord_t));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ kdata = (carry_kill_data *) (lowest_level + 3);
-+ left_child = (lock_handle *) (kdata + 1);
-+ right_child = left_child + 1;
-+
-+ init_lh(left_child);
-+ init_lh(right_child);
-+
-+ kdata->params.from = from;
-+ kdata->params.to = to;
-+ kdata->params.from_key = from_key;
-+ kdata->params.to_key = to_key;
-+ kdata->params.smallest_removed = smallest_removed;
-+ kdata->params.truncate = truncate;
-+ kdata->flags = 0;
-+ kdata->inode = inode;
-+ kdata->left = left_child;
-+ kdata->right = right_child;
-+ /* memory for 5 reiser4_key and 2 coord_t will be used in kill_hook_extent */
-+ kdata->buf = (char *)(right_child + 1);
-+
-+ if (znode_get_level(from->node) == TWIG_LEVEL && item_is_extent(from)) {
-+ /* left child of extent item may have to get updated right
-+ delimiting key and to get linked with right child of extent
-+ @from if it will be removed completely */
-+ result = prepare_twig_kill(kdata, locked_left_neighbor);
-+ if (result) {
-+ done_children(kdata);
-+ done_carry_pool(pool);
-+ return result;
-+ }
-+ }
-+
-+ op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_children(kdata);
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+
-+ op->u.cut_or_kill.is_cut = 0;
-+ op->u.cut_or_kill.u.kill = kdata;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+
-+ done_children(kdata);
-+ done_carry_pool(pool);
-+ return result;
-+}
-+
-+void
-+fake_kill_hook_tail(struct inode *inode, loff_t start, loff_t end, int truncate)
-+{
-+ if (reiser4_inode_get_flag(inode, REISER4_HAS_MMAP)) {
-+ pgoff_t start_pg, end_pg;
-+
-+ start_pg = start >> PAGE_CACHE_SHIFT;
-+ end_pg = (end - 1) >> PAGE_CACHE_SHIFT;
-+
-+ if ((start & (PAGE_CACHE_SIZE - 1)) == 0) {
-+ /*
-+ * kill up to the page boundary.
-+ */
-+ assert("vs-123456", start_pg == end_pg);
-+ reiser4_invalidate_pages(inode->i_mapping, start_pg, 1,
-+ truncate);
-+ } else if (start_pg != end_pg) {
-+ /*
-+ * page boundary is within killed portion of node.
-+ */
-+ assert("vs-654321", end_pg - start_pg == 1);
-+ reiser4_invalidate_pages(inode->i_mapping, end_pg,
-+ end_pg - start_pg, 1);
-+ }
-+ }
-+ inode_sub_bytes(inode, end - start);
-+}
-+
-+/**
-+ * Delete whole @node from the reiser4 tree without loading it.
-+ *
-+ * @left: locked left neighbor,
-+ * @node: node to be deleted,
-+ * @smallest_removed: leftmost key of deleted node,
-+ * @object: inode pointer, if we truncate a file body.
-+ * @truncate: true if called for file truncate.
-+ *
-+ * @return: 0 if success, error code otherwise.
-+ *
-+ * NOTE: if @object!=NULL we assume that @smallest_removed != NULL and it
-+ * contains the right value of the smallest removed key from the previous
-+ * cut_worker() iteration. This is needed for proper accounting of
-+ * "i_blocks" and "i_bytes" fields of the @object.
-+ */
-+int reiser4_delete_node(znode * node, reiser4_key * smallest_removed,
-+ struct inode *object, int truncate)
-+{
-+ lock_handle parent_lock;
-+ coord_t cut_from;
-+ coord_t cut_to;
-+ reiser4_tree *tree;
-+ int ret;
-+
-+ assert("zam-937", node != NULL);
-+ assert("zam-933", znode_is_write_locked(node));
-+ assert("zam-999", smallest_removed != NULL);
-+
-+ init_lh(&parent_lock);
-+
-+ ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ return ret;
-+
-+ assert("zam-934", !znode_above_root(parent_lock.node));
-+
-+ ret = zload(parent_lock.node);
-+ if (ret)
-+ goto failed_nozrelse;
-+
-+ ret = find_child_ptr(parent_lock.node, node, &cut_from);
-+ if (ret)
-+ goto failed;
-+
-+ /* decrement child counter and set parent pointer to NULL before
-+ deleting the list from parent node because of checks in
-+ internal_kill_item_hook (we can delete the last item from the parent
-+ node, the parent node is going to be deleted and its c_count should
-+ be zero). */
-+
-+ tree = znode_get_tree(node);
-+ write_lock_tree(tree);
-+ init_parent_coord(&node->in_parent, NULL);
-+ --parent_lock.node->c_count;
-+ write_unlock_tree(tree);
-+
-+ assert("zam-989", item_is_internal(&cut_from));
-+
-+ /* @node should be deleted after unlocking. */
-+ ZF_SET(node, JNODE_HEARD_BANSHEE);
-+
-+ /* remove a pointer from the parent node to the node being deleted. */
-+ coord_dup(&cut_to, &cut_from);
-+ /* FIXME: shouldn't this be kill_node_content */
-+ ret = cut_node_content(&cut_from, &cut_to, NULL, NULL, NULL);
-+ if (ret)
-+ /* FIXME(Zam): Should we re-connect the node to its parent if
-+ * cut_node fails? */
-+ goto failed;
-+
-+ {
-+ reiser4_tree *tree = current_tree;
-+ __u64 start_offset = 0, end_offset = 0;
-+
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+ if (object) {
-+ /* We use @smallest_removed and the left delimiting of
-+ * the current node for @object->i_blocks, i_bytes
-+ * calculation. We assume that the items after the
-+ * *@smallest_removed key have been deleted from the
-+ * file body. */
-+ start_offset = get_key_offset(znode_get_ld_key(node));
-+ end_offset = get_key_offset(smallest_removed);
-+ }
-+
-+ assert("zam-1021", znode_is_connected(node));
-+ if (node->left)
-+ znode_set_rd_key(node->left, znode_get_rd_key(node));
-+
-+ *smallest_removed = *znode_get_ld_key(node);
-+
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+
-+ if (object) {
-+ /* we used to perform actions which are to be performed on items on their removal from tree in
-+ special item method - kill_hook. Here for optimization reasons we avoid reading node
-+ containing item we remove and can not call item's kill hook. Instead we call function which
-+ does exactly the same things as tail kill hook in assumption that node we avoid reading
-+ contains only one item and that item is a tail one. */
-+ fake_kill_hook_tail(object, start_offset, end_offset,
-+ truncate);
-+ }
-+ }
-+ failed:
-+ zrelse(parent_lock.node);
-+ failed_nozrelse:
-+ done_lh(&parent_lock);
-+
-+ return ret;
-+}
-+
-+static int can_delete(const reiser4_key *key, znode *node)
-+{
-+ int result;
-+
-+ read_lock_dk(current_tree);
-+ result = keyle(key, znode_get_ld_key(node));
-+ read_unlock_dk(current_tree);
-+ return result;
-+}
-+
-+/**
-+ * This subroutine is not optimal but implementation seems to
-+ * be easier).
-+ *
-+ * @tap: the point deletion process begins from,
-+ * @from_key: the beginning of the deleted key range,
-+ * @to_key: the end of the deleted key range,
-+ * @smallest_removed: the smallest removed key,
-+ * @truncate: true if called for file truncate.
-+ * @progress: return true if a progress in file items deletions was made,
-+ * @smallest_removed value is actual in that case.
-+ *
-+ * @return: 0 if success, error code otherwise, -E_REPEAT means that long
-+ * reiser4_cut_tree operation was interrupted for allowing atom commit.
-+ */
-+int
-+cut_tree_worker_common(tap_t * tap, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed, struct inode *object,
-+ int truncate, int *progress)
-+{
-+ lock_handle next_node_lock;
-+ coord_t left_coord;
-+ int result;
-+
-+ assert("zam-931", tap->coord->node != NULL);
-+ assert("zam-932", znode_is_write_locked(tap->coord->node));
-+
-+ *progress = 0;
-+ init_lh(&next_node_lock);
-+
-+ while (1) {
-+ znode *node; /* node from which items are cut */
-+ node_plugin *nplug; /* node plugin for @node */
-+
-+ node = tap->coord->node;
-+
-+ /* Move next_node_lock to the next node on the left. */
-+ result =
-+ reiser4_get_left_neighbor(&next_node_lock, node,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result != 0 && result != -E_NO_NEIGHBOR)
-+ break;
-+ /* Check can we delete the node as a whole. */
-+ if (*progress && znode_get_level(node) == LEAF_LEVEL &&
-+ can_delete(from_key, node)) {
-+ result = reiser4_delete_node(node, smallest_removed,
-+ object, truncate);
-+ } else {
-+ result = reiser4_tap_load(tap);
-+ if (result)
-+ return result;
-+
-+ /* Prepare the second (right) point for cut_node() */
-+ if (*progress)
-+ coord_init_last_unit(tap->coord, node);
-+
-+ else if (item_plugin_by_coord(tap->coord)->b.lookup ==
-+ NULL)
-+ /* set rightmost unit for the items without lookup method */
-+ tap->coord->unit_pos =
-+ coord_last_unit_pos(tap->coord);
-+
-+ nplug = node->nplug;
-+
-+ assert("vs-686", nplug);
-+ assert("vs-687", nplug->lookup);
-+
-+ /* left_coord is leftmost unit cut from @node */
-+ result = nplug->lookup(node, from_key,
-+ FIND_MAX_NOT_MORE_THAN,
-+ &left_coord);
-+
-+ if (IS_CBKERR(result))
-+ break;
-+
-+ /* adjust coordinates so that they are set to existing units */
-+ if (coord_set_to_right(&left_coord)
-+ || coord_set_to_left(tap->coord)) {
-+ result = 0;
-+ break;
-+ }
-+
-+ if (coord_compare(&left_coord, tap->coord) ==
-+ COORD_CMP_ON_RIGHT) {
-+ /* keys from @from_key to @to_key are not in the tree */
-+ result = 0;
-+ break;
-+ }
-+
-+ if (left_coord.item_pos != tap->coord->item_pos) {
-+ /* do not allow to cut more than one item. It is added to solve problem of truncating
-+ partially converted files. If file is partially converted there may exist a twig node
-+ containing both internal item or items pointing to leaf nodes with formatting items
-+ and extent item. We do not want to kill internal items being at twig node here
-+ because cut_tree_worker assumes killing them from level level */
-+ coord_dup(&left_coord, tap->coord);
-+ assert("vs-1652",
-+ coord_is_existing_unit(&left_coord));
-+ left_coord.unit_pos = 0;
-+ }
-+
-+ /* cut data from one node */
-+ // *smallest_removed = *reiser4_min_key();
-+ result =
-+ kill_node_content(&left_coord, tap->coord, from_key,
-+ to_key, smallest_removed,
-+ next_node_lock.node, object,
-+ truncate);
-+ reiser4_tap_relse(tap);
-+ }
-+ if (result)
-+ break;
-+
-+ ++(*progress);
-+
-+ /* Check whether all items with keys >= from_key were removed
-+ * from the tree. */
-+ if (keyle(smallest_removed, from_key))
-+ /* result = 0; */
-+ break;
-+
-+ if (next_node_lock.node == NULL)
-+ break;
-+
-+ result = reiser4_tap_move(tap, &next_node_lock);
-+ done_lh(&next_node_lock);
-+ if (result)
-+ break;
-+
-+ /* Break long reiser4_cut_tree operation (deletion of a large
-+ file) if atom requires commit. */
-+ if (*progress > CUT_TREE_MIN_ITERATIONS
-+ && current_atom_should_commit()) {
-+ result = -E_REPEAT;
-+ break;
-+ }
-+ }
-+ done_lh(&next_node_lock);
-+ // assert("vs-301", !keyeq(&smallest_removed, reiser4_min_key()));
-+ return result;
-+}
-+
-+/* there is a fundamental problem with optimizing deletes: VFS does it
-+ one file at a time. Another problem is that if an item can be
-+ anything, then deleting items must be done one at a time. It just
-+ seems clean to writes this to specify a from and a to key, and cut
-+ everything between them though. */
-+
-+/* use this function with care if deleting more than what is part of a single file. */
-+/* do not use this when cutting a single item, it is suboptimal for that */
-+
-+/* You are encouraged to write plugin specific versions of this. It
-+ cannot be optimal for all plugins because it works item at a time,
-+ and some plugins could sometimes work node at a time. Regular files
-+ however are not optimizable to work node at a time because of
-+ extents needing to free the blocks they point to.
-+
-+ Optimizations compared to v3 code:
-+
-+ It does not balance (that task is left to memory pressure code).
-+
-+ Nodes are deleted only if empty.
-+
-+ Uses extents.
-+
-+ Performs read-ahead of formatted nodes whose contents are part of
-+ the deletion.
-+*/
-+
-+/**
-+ * Delete everything from the reiser4 tree between two keys: @from_key and
-+ * @to_key.
-+ *
-+ * @from_key: the beginning of the deleted key range,
-+ * @to_key: the end of the deleted key range,
-+ * @smallest_removed: the smallest removed key,
-+ * @object: owner of cutting items.
-+ * @truncate: true if called for file truncate.
-+ * @progress: return true if a progress in file items deletions was made,
-+ * @smallest_removed value is actual in that case.
-+ *
-+ * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree
-+ * operation was interrupted for allowing atom commit .
-+ */
-+
-+int reiser4_cut_tree_object(reiser4_tree * tree, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed_p,
-+ struct inode *object, int truncate, int *progress)
-+{
-+ lock_handle lock;
-+ int result;
-+ tap_t tap;
-+ coord_t right_coord;
-+ reiser4_key smallest_removed;
-+ int (*cut_tree_worker) (tap_t *, const reiser4_key *,
-+ const reiser4_key *, reiser4_key *,
-+ struct inode *, int, int *);
-+ STORE_COUNTERS;
-+
-+ assert("umka-329", tree != NULL);
-+ assert("umka-330", from_key != NULL);
-+ assert("umka-331", to_key != NULL);
-+ assert("zam-936", keyle(from_key, to_key));
-+
-+ if (smallest_removed_p == NULL)
-+ smallest_removed_p = &smallest_removed;
-+
-+ init_lh(&lock);
-+
-+ do {
-+ /* Find rightmost item to cut away from the tree. */
-+ result = reiser4_object_lookup(object, to_key, &right_coord,
-+ &lock, ZNODE_WRITE_LOCK,
-+ FIND_MAX_NOT_MORE_THAN,
-+ TWIG_LEVEL, LEAF_LEVEL,
-+ CBK_UNIQUE, NULL /*ra_info */);
-+ if (result != CBK_COORD_FOUND)
-+ break;
-+ if (object == NULL
-+ || inode_file_plugin(object)->cut_tree_worker == NULL)
-+ cut_tree_worker = cut_tree_worker_common;
-+ else
-+ cut_tree_worker =
-+ inode_file_plugin(object)->cut_tree_worker;
-+ reiser4_tap_init(&tap, &right_coord, &lock, ZNODE_WRITE_LOCK);
-+ result =
-+ cut_tree_worker(&tap, from_key, to_key, smallest_removed_p,
-+ object, truncate, progress);
-+ reiser4_tap_done(&tap);
-+
-+ reiser4_preempt_point();
-+
-+ } while (0);
-+
-+ done_lh(&lock);
-+
-+ if (result) {
-+ switch (result) {
-+ case -E_NO_NEIGHBOR:
-+ result = 0;
-+ break;
-+ case -E_DEADLOCK:
-+ result = -E_REPEAT;
-+ case -E_REPEAT:
-+ case -ENOMEM:
-+ case -ENOENT:
-+ break;
-+ default:
-+ warning("nikita-2861", "failure: %i", result);
-+ }
-+ }
-+
-+ CHECK_COUNTERS;
-+ return result;
-+}
-+
-+/* repeat reiser4_cut_tree_object until everything is deleted.
-+ * unlike cut_file_items, it does not end current transaction if -E_REPEAT
-+ * is returned by cut_tree_object. */
-+int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
-+ const reiser4_key * to, struct inode *inode, int truncate)
-+{
-+ int result;
-+ int progress;
-+
-+ do {
-+ result = reiser4_cut_tree_object(tree, from, to, NULL,
-+ inode, truncate, &progress);
-+ } while (result == -E_REPEAT);
-+
-+ return result;
-+}
-+
-+/* finishing reiser4 initialization */
-+int reiser4_init_tree(reiser4_tree * tree /* pointer to structure being
-+ * initialized */ ,
-+ const reiser4_block_nr * root_block /* address of a root block
-+ * on a disk */ ,
-+ tree_level height /* height of a tree */ ,
-+ node_plugin * nplug /* default node plugin */ )
-+{
-+ int result;
-+
-+ assert("nikita-306", tree != NULL);
-+ assert("nikita-307", root_block != NULL);
-+ assert("nikita-308", height > 0);
-+ assert("nikita-309", nplug != NULL);
-+ assert("zam-587", tree->super != NULL);
-+
-+ tree->root_block = *root_block;
-+ tree->height = height;
-+ tree->estimate_one_insert = calc_estimate_one_insert(height);
-+ tree->nplug = nplug;
-+
-+ tree->znode_epoch = 1ull;
-+
-+ cbk_cache_init(&tree->cbk_cache);
-+
-+ result = znodes_tree_init(tree);
-+ if (result == 0)
-+ result = jnodes_tree_init(tree);
-+ if (result == 0) {
-+ tree->uber = zget(tree, &UBER_TREE_ADDR, NULL, 0,
-+ reiser4_ctx_gfp_mask_get());
-+ if (IS_ERR(tree->uber)) {
-+ result = PTR_ERR(tree->uber);
-+ tree->uber = NULL;
-+ }
-+ }
-+ return result;
-+}
-+
-+/* release resources associated with @tree */
-+void reiser4_done_tree(reiser4_tree * tree /* tree to release */ )
-+{
-+ if (tree == NULL)
-+ return;
-+
-+ if (tree->uber != NULL) {
-+ zput(tree->uber);
-+ tree->uber = NULL;
-+ }
-+ znodes_tree_done(tree);
-+ jnodes_tree_done(tree);
-+ cbk_cache_done(&tree->cbk_cache);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/tree.h linux-2.6.20/fs/reiser4/tree.h
---- linux-2.6.20.orig/fs/reiser4/tree.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tree.h 2007-05-06 14:50:43.883033217 +0400
-@@ -0,0 +1,577 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Tree operations. See fs/reiser4/tree.c for comments */
-+
-+#if !defined( __REISER4_TREE_H__ )
-+#define __REISER4_TREE_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "znode.h"
-+#include "tap.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+/* fictive block number never actually used */
-+extern const reiser4_block_nr UBER_TREE_ADDR;
-+
-+/* &cbk_cache_slot - entry in a coord cache.
-+
-+ This is entry in a coord_by_key (cbk) cache, represented by
-+ &cbk_cache.
-+
-+*/
-+typedef struct cbk_cache_slot {
-+ /* cached node */
-+ znode *node;
-+ /* linkage to the next cbk cache slot in a LRU order */
-+ struct list_head lru;
-+} cbk_cache_slot;
-+
-+/* &cbk_cache - coord cache. This is part of reiser4_tree.
-+
-+ cbk_cache is supposed to speed up tree lookups by caching results of recent
-+ successful lookups (we don't cache negative results as dentry cache
-+ does). Cache consists of relatively small number of entries kept in a LRU
-+ order. Each entry (&cbk_cache_slot) contains a pointer to znode, from
-+ which we can obtain a range of keys that covered by this znode. Before
-+ embarking into real tree traversal we scan cbk_cache slot by slot and for
-+ each slot check whether key we are looking for is between minimal and
-+ maximal keys for node pointed to by this slot. If no match is found, real
-+ tree traversal is performed and if result is successful, appropriate entry
-+ is inserted into cache, possibly pulling least recently used entry out of
-+ it.
-+
-+ Tree spin lock is used to protect coord cache. If contention for this
-+ lock proves to be too high, more finer grained locking can be added.
-+
-+ Invariants involving parts of this data-type:
-+
-+ [cbk-cache-invariant]
-+*/
-+typedef struct cbk_cache {
-+ /* serializator */
-+ rwlock_t guard;
-+ int nr_slots;
-+ /* head of LRU list of cache slots */
-+ struct list_head lru;
-+ /* actual array of slots */
-+ cbk_cache_slot *slot;
-+} cbk_cache;
-+
-+/* level_lookup_result - possible outcome of looking up key at some level.
-+ This is used by coord_by_key when traversing tree downward. */
-+typedef enum {
-+ /* continue to the next level */
-+ LOOKUP_CONT,
-+ /* done. Either required item was found, or we can prove it
-+ doesn't exist, or some error occurred. */
-+ LOOKUP_DONE,
-+ /* restart traversal from the root. Infamous "repetition". */
-+ LOOKUP_REST
-+} level_lookup_result;
-+
-+/* This is representation of internal reiser4 tree where all file-system
-+ data and meta-data are stored. This structure is passed to all tree
-+ manipulation functions. It's different from the super block because:
-+ we don't want to limit ourselves to strictly one to one mapping
-+ between super blocks and trees, and, because they are logically
-+ different: there are things in a super block that have no relation to
-+ the tree (bitmaps, journalling area, mount options, etc.) and there
-+ are things in a tree that bear no relation to the super block, like
-+ tree of znodes.
-+
-+ At this time, there is only one tree
-+ per filesystem, and this struct is part of the super block. We only
-+ call the super block the super block for historical reasons (most
-+ other filesystems call the per filesystem metadata the super block).
-+*/
-+
-+struct reiser4_tree {
-+ /* block_nr == 0 is fake znode. Write lock it, while changing
-+ tree height. */
-+ /* disk address of root node of a tree */
-+ reiser4_block_nr root_block;
-+
-+ /* level of the root node. If this is 1, tree consists of root
-+ node only */
-+ tree_level height;
-+
-+ /*
-+ * this is cached here avoid calling plugins through function
-+ * dereference all the time.
-+ */
-+ __u64 estimate_one_insert;
-+
-+ /* cache of recent tree lookup results */
-+ cbk_cache cbk_cache;
-+
-+ /* hash table to look up znodes by block number. */
-+ z_hash_table zhash_table;
-+ z_hash_table zfake_table;
-+ /* hash table to look up jnodes by inode and offset. */
-+ j_hash_table jhash_table;
-+
-+ /* lock protecting:
-+ - parent pointers,
-+ - sibling pointers,
-+ - znode hash table
-+ - coord cache
-+ */
-+ /* NOTE: The "giant" tree lock can be replaced by more spin locks,
-+ hoping they will be less contented. We can use one spin lock per one
-+ znode hash bucket. With adding of some code complexity, sibling
-+ pointers can be protected by both znode spin locks. However it looks
-+ more SMP scalable we should test this locking change on n-ways (n >
-+ 4) SMP machines. Current 4-ways machine test does not show that tree
-+ lock is contented and it is a bottleneck (2003.07.25). */
-+
-+ rwlock_t tree_lock;
-+
-+ /* lock protecting delimiting keys */
-+ rwlock_t dk_lock;
-+
-+ /* spin lock protecting znode_epoch */
-+ spinlock_t epoch_lock;
-+ /* version stamp used to mark znode updates. See seal.[ch] for more
-+ * information. */
-+ __u64 znode_epoch;
-+
-+ znode *uber;
-+ node_plugin *nplug;
-+ struct super_block *super;
-+ struct {
-+ /* carry flags used for insertion of new nodes */
-+ __u32 new_node_flags;
-+ /* carry flags used for insertion of new extents */
-+ __u32 new_extent_flags;
-+ /* carry flags used for paste operations */
-+ __u32 paste_flags;
-+ /* carry flags used for insert operations */
-+ __u32 insert_flags;
-+ } carry;
-+};
-+
-+extern int reiser4_init_tree(reiser4_tree * tree,
-+ const reiser4_block_nr * root_block,
-+ tree_level height, node_plugin * default_plugin);
-+extern void reiser4_done_tree(reiser4_tree * tree);
-+
-+/* cbk flags: options for coord_by_key() */
-+typedef enum {
-+ /* coord_by_key() is called for insertion. This is necessary because
-+ of extents being located at the twig level. For explanation, see
-+ comment just above is_next_item_internal().
-+ */
-+ CBK_FOR_INSERT = (1 << 0),
-+ /* coord_by_key() is called with key that is known to be unique */
-+ CBK_UNIQUE = (1 << 1),
-+ /* coord_by_key() can trust delimiting keys. This options is not user
-+ accessible. coord_by_key() will set it automatically. It will be
-+ only cleared by special-case in extents-on-the-twig-level handling
-+ where it is necessary to insert item with a key smaller than
-+ leftmost key in a node. This is necessary because of extents being
-+ located at the twig level. For explanation, see comment just above
-+ is_next_item_internal().
-+ */
-+ CBK_TRUST_DK = (1 << 2),
-+ CBK_READA = (1 << 3), /* original: readahead leaves which contain items of certain file */
-+ CBK_READDIR_RA = (1 << 4), /* readdir: readahead whole directory and all its stat datas */
-+ CBK_DKSET = (1 << 5),
-+ CBK_EXTENDED_COORD = (1 << 6), /* coord_t is actually */
-+ CBK_IN_CACHE = (1 << 7), /* node is already in cache */
-+ CBK_USE_CRABLOCK = (1 << 8) /* use crab_lock in stead of long term
-+ * lock */
-+} cbk_flags;
-+
-+/* insertion outcome. IBK = insert by key */
-+typedef enum {
-+ IBK_INSERT_OK = 0,
-+ IBK_ALREADY_EXISTS = -EEXIST,
-+ IBK_IO_ERROR = -EIO,
-+ IBK_NO_SPACE = -E_NODE_FULL,
-+ IBK_OOM = -ENOMEM
-+} insert_result;
-+
-+#define IS_CBKERR(err) ((err) != CBK_COORD_FOUND && (err) != CBK_COORD_NOTFOUND)
-+
-+typedef int (*tree_iterate_actor_t) (reiser4_tree * tree, coord_t * coord,
-+ lock_handle * lh, void *arg);
-+extern int reiser4_iterate_tree(reiser4_tree * tree, coord_t * coord,
-+ lock_handle * lh,
-+ tree_iterate_actor_t actor, void *arg,
-+ znode_lock_mode mode, int through_units_p);
-+extern int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
-+ znode_lock_request pri, lock_handle * lh);
-+
-+/* return node plugin of @node */
-+static inline node_plugin *node_plugin_by_node(const znode *
-+ node /* node to query */ )
-+{
-+ assert("vs-213", node != NULL);
-+ assert("vs-214", znode_is_loaded(node));
-+
-+ return node->nplug;
-+}
-+
-+/* number of items in @node */
-+static inline pos_in_node_t node_num_items(const znode * node)
-+{
-+ assert("nikita-2754", znode_is_loaded(node));
-+ assert("nikita-2468",
-+ node_plugin_by_node(node)->num_of_items(node) == node->nr_items);
-+
-+ return node->nr_items;
-+}
-+
-+/* Return the number of items at the present node. Asserts coord->node !=
-+ NULL. */
-+static inline unsigned coord_num_items(const coord_t * coord)
-+{
-+ assert("jmacd-9805", coord->node != NULL);
-+
-+ return node_num_items(coord->node);
-+}
-+
-+/* true if @node is empty */
-+static inline int node_is_empty(const znode * node)
-+{
-+ return node_num_items(node) == 0;
-+}
-+
-+typedef enum {
-+ SHIFTED_SOMETHING = 0,
-+ SHIFT_NO_SPACE = -E_NODE_FULL,
-+ SHIFT_IO_ERROR = -EIO,
-+ SHIFT_OOM = -ENOMEM,
-+} shift_result;
-+
-+extern node_plugin *node_plugin_by_coord(const coord_t * coord);
-+extern int is_coord_in_node(const coord_t * coord);
-+extern int key_in_node(const reiser4_key *, const coord_t *);
-+extern void coord_item_move_to(coord_t * coord, int items);
-+extern void coord_unit_move_to(coord_t * coord, int units);
-+
-+/* there are two types of repetitive accesses (ra): intra-syscall
-+ (local) and inter-syscall (global). Local ra is used when
-+ during single syscall we add/delete several items and units in the
-+ same place in a tree. Note that plan-A fragments local ra by
-+ separating stat-data and file body in key-space. Global ra is
-+ used when user does repetitive modifications in the same place in a
-+ tree.
-+
-+ Our ra implementation serves following purposes:
-+ 1 it affects balancing decisions so that next operation in a row
-+ can be performed faster;
-+ 2 it affects lower-level read-ahead in page-cache;
-+ 3 it allows to avoid unnecessary lookups by maintaining some state
-+ across several operations (this is only for local ra);
-+ 4 it leaves room for lazy-micro-balancing: when we start a sequence of
-+ operations they are performed without actually doing any intra-node
-+ shifts, until we finish sequence or scope of sequence leaves
-+ current node, only then we really pack node (local ra only).
-+*/
-+
-+/* another thing that can be useful is to keep per-tree and/or
-+ per-process cache of recent lookups. This cache can be organised as a
-+ list of block numbers of formatted nodes sorted by starting key in
-+ this node. Balancings should invalidate appropriate parts of this
-+ cache.
-+*/
-+
-+lookup_result coord_by_key(reiser4_tree * tree, const reiser4_key * key,
-+ coord_t * coord, lock_handle * handle,
-+ znode_lock_mode lock, lookup_bias bias,
-+ tree_level lock_level, tree_level stop_level,
-+ __u32 flags, ra_info_t *);
-+
-+lookup_result reiser4_object_lookup(struct inode *object,
-+ const reiser4_key * key,
-+ coord_t * coord,
-+ lock_handle * lh,
-+ znode_lock_mode lock_mode,
-+ lookup_bias bias,
-+ tree_level lock_level,
-+ tree_level stop_level,
-+ __u32 flags, ra_info_t * info);
-+
-+insert_result insert_by_key(reiser4_tree * tree, const reiser4_key * key,
-+ reiser4_item_data * data, coord_t * coord,
-+ lock_handle * lh,
-+ tree_level stop_level, __u32 flags);
-+insert_result insert_by_coord(coord_t * coord,
-+ reiser4_item_data * data, const reiser4_key * key,
-+ lock_handle * lh, __u32);
-+insert_result insert_extent_by_coord(coord_t * coord,
-+ reiser4_item_data * data,
-+ const reiser4_key * key, lock_handle * lh);
-+int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed);
-+int kill_node_content(coord_t * from, coord_t * to,
-+ const reiser4_key * from_key, const reiser4_key * to_key,
-+ reiser4_key * smallest_removed,
-+ znode * locked_left_neighbor, struct inode *inode,
-+ int truncate);
-+
-+int reiser4_resize_item(coord_t * coord, reiser4_item_data * data,
-+ reiser4_key * key, lock_handle * lh, cop_insert_flag);
-+int insert_into_item(coord_t * coord, lock_handle * lh, const reiser4_key * key,
-+ reiser4_item_data * data, unsigned);
-+int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f);
-+int find_new_child_ptr(znode * parent, znode * child, znode * left,
-+ coord_t * result);
-+
-+int shift_right_of_but_excluding_insert_coord(coord_t * insert_coord);
-+int shift_left_of_and_including_insert_coord(coord_t * insert_coord);
-+
-+void fake_kill_hook_tail(struct inode *, loff_t start, loff_t end, int);
-+
-+extern int cut_tree_worker_common(tap_t *, const reiser4_key *,
-+ const reiser4_key *, reiser4_key *,
-+ struct inode *, int, int *);
-+extern int reiser4_cut_tree_object(reiser4_tree *, const reiser4_key *,
-+ const reiser4_key *, reiser4_key *,
-+ struct inode *, int, int *);
-+extern int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
-+ const reiser4_key * to, struct inode *, int);
-+
-+extern int reiser4_delete_node(znode *, reiser4_key *, struct inode *, int);
-+extern int check_tree_pointer(const coord_t * pointer, const znode * child);
-+extern int find_new_child_ptr(znode * parent, znode * child UNUSED_ARG,
-+ znode * left, coord_t * result);
-+extern int find_child_ptr(znode * parent, znode * child, coord_t * result);
-+extern int set_child_delimiting_keys(znode * parent, const coord_t * in_parent,
-+ znode * child);
-+extern znode *child_znode(const coord_t * in_parent, znode * parent,
-+ int incore_p, int setup_dkeys_p);
-+
-+extern int cbk_cache_init(cbk_cache * cache);
-+extern void cbk_cache_done(cbk_cache * cache);
-+extern void cbk_cache_invalidate(const znode * node, reiser4_tree * tree);
-+
-+extern char *sprint_address(const reiser4_block_nr * block);
-+
-+#if REISER4_DEBUG
-+extern void print_coord_content(const char *prefix, coord_t * p);
-+extern void reiser4_print_address(const char *prefix,
-+ const reiser4_block_nr * block);
-+extern void print_tree_rec(const char *prefix, reiser4_tree * tree,
-+ __u32 flags);
-+extern void check_dkeys(znode *node);
-+#else
-+#define print_coord_content(p, c) noop
-+#define reiser4_print_address(p, b) noop
-+#endif
-+
-+extern void forget_znode(lock_handle * handle);
-+extern int deallocate_znode(znode * node);
-+
-+extern int is_disk_addr_unallocated(const reiser4_block_nr * addr);
-+
-+/* struct used internally to pack all numerous arguments of tree lookup.
-+ Used to avoid passing a lot of arguments to helper functions. */
-+typedef struct cbk_handle {
-+ /* tree we are in */
-+ reiser4_tree *tree;
-+ /* key we are going after */
-+ const reiser4_key *key;
-+ /* coord we will store result in */
-+ coord_t *coord;
-+ /* type of lock to take on target node */
-+ znode_lock_mode lock_mode;
-+ /* lookup bias. See comments at the declaration of lookup_bias */
-+ lookup_bias bias;
-+ /* lock level: level starting from which tree traversal starts taking
-+ * write locks. */
-+ tree_level lock_level;
-+ /* level where search will stop. Either item will be found between
-+ lock_level and stop_level, or CBK_COORD_NOTFOUND will be
-+ returned.
-+ */
-+ tree_level stop_level;
-+ /* level we are currently at */
-+ tree_level level;
-+ /* block number of @active node. Tree traversal operates on two
-+ nodes: active and parent. */
-+ reiser4_block_nr block;
-+ /* put here error message to be printed by caller */
-+ const char *error;
-+ /* result passed back to caller */
-+ lookup_result result;
-+ /* lock handles for active and parent */
-+ lock_handle *parent_lh;
-+ lock_handle *active_lh;
-+ reiser4_key ld_key;
-+ reiser4_key rd_key;
-+ /* flags, passed to the cbk routine. Bits of this bitmask are defined
-+ in tree.h:cbk_flags enum. */
-+ __u32 flags;
-+ ra_info_t *ra_info;
-+ struct inode *object;
-+} cbk_handle;
-+
-+extern znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h);
-+
-+/* eottl.c */
-+extern int handle_eottl(cbk_handle *h, int *outcome);
-+
-+int lookup_multikey(cbk_handle * handle, int nr_keys);
-+int lookup_couple(reiser4_tree * tree,
-+ const reiser4_key * key1, const reiser4_key * key2,
-+ coord_t * coord1, coord_t * coord2,
-+ lock_handle * lh1, lock_handle * lh2,
-+ znode_lock_mode lock_mode, lookup_bias bias,
-+ tree_level lock_level, tree_level stop_level, __u32 flags,
-+ int *result1, int *result2);
-+
-+static inline void read_lock_tree(reiser4_tree *tree)
-+{
-+ /* check that tree is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
-+ LOCK_CNT_NIL(read_locked_tree) &&
-+ LOCK_CNT_NIL(write_locked_tree)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(spin_locked_stack)));
-+
-+ read_lock(&(tree->tree_lock));
-+
-+ LOCK_CNT_INC(read_locked_tree);
-+ LOCK_CNT_INC(rw_locked_tree);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void read_unlock_tree(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(read_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(read_locked_tree);
-+ LOCK_CNT_DEC(rw_locked_tree);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ read_unlock(&(tree->tree_lock));
-+}
-+
-+static inline void write_lock_tree(reiser4_tree *tree)
-+{
-+ /* check that tree is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
-+ LOCK_CNT_NIL(read_locked_tree) &&
-+ LOCK_CNT_NIL(write_locked_tree)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(spin_locked_stack)));
-+
-+ write_lock(&(tree->tree_lock));
-+
-+ LOCK_CNT_INC(write_locked_tree);
-+ LOCK_CNT_INC(rw_locked_tree);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void write_unlock_tree(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(write_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(write_locked_tree);
-+ LOCK_CNT_DEC(rw_locked_tree);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ write_unlock(&(tree->tree_lock));
-+}
-+
-+static inline void read_lock_dk(reiser4_tree *tree)
-+{
-+ /* check that dk is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(read_locked_dk) &&
-+ LOCK_CNT_NIL(write_locked_dk)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", LOCK_CNT_NIL(spin_locked_stack));
-+
-+ read_lock(&((tree)->dk_lock));
-+
-+ LOCK_CNT_INC(read_locked_dk);
-+ LOCK_CNT_INC(rw_locked_dk);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void read_unlock_dk(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(read_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(read_locked_dk);
-+ LOCK_CNT_DEC(rw_locked_dk);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ read_unlock(&(tree->dk_lock));
-+}
-+
-+static inline void write_lock_dk(reiser4_tree *tree)
-+{
-+ /* check that dk is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(read_locked_dk) &&
-+ LOCK_CNT_NIL(write_locked_dk)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", LOCK_CNT_NIL(spin_locked_stack));
-+
-+ write_lock(&((tree)->dk_lock));
-+
-+ LOCK_CNT_INC(write_locked_dk);
-+ LOCK_CNT_INC(rw_locked_dk);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void write_unlock_dk(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(write_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(write_locked_dk);
-+ LOCK_CNT_DEC(rw_locked_dk);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ write_unlock(&(tree->dk_lock));
-+}
-+
-+/* estimate api. Implementation is in estimate.c */
-+reiser4_block_nr estimate_one_insert_item(reiser4_tree *);
-+reiser4_block_nr estimate_one_insert_into_item(reiser4_tree *);
-+reiser4_block_nr estimate_insert_flow(tree_level);
-+reiser4_block_nr estimate_one_item_removal(reiser4_tree *);
-+reiser4_block_nr calc_estimate_one_insert(tree_level);
-+reiser4_block_nr estimate_dirty_cluster(struct inode *);
-+reiser4_block_nr estimate_insert_cluster(struct inode *);
-+reiser4_block_nr estimate_update_cluster(struct inode *);
-+
-+/* __REISER4_TREE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/tree_mod.c linux-2.6.20/fs/reiser4/tree_mod.c
---- linux-2.6.20.orig/fs/reiser4/tree_mod.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tree_mod.c 2007-05-06 14:50:43.887034467 +0400
-@@ -0,0 +1,386 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * Functions to add/delete new nodes to/from the tree.
-+ *
-+ * Functions from this file are used by carry (see carry*) to handle:
-+ *
-+ * . insertion of new formatted node into tree
-+ *
-+ * . addition of new tree root, increasing tree height
-+ *
-+ * . removing tree root, decreasing tree height
-+ *
-+ */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/plugin.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "tree_mod.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+#include <linux/err.h>
-+
-+static int add_child_ptr(znode * parent, znode * child);
-+/* warning only issued if error is not -E_REPEAT */
-+#define ewarning( error, ... ) \
-+ if( ( error ) != -E_REPEAT ) \
-+ warning( __VA_ARGS__ )
-+
-+/* allocate new node on the @level and immediately on the right of @brother. */
-+znode * reiser4_new_node(znode * brother /* existing left neighbor
-+ * of new node */,
-+ tree_level level /* tree level at which new node is to
-+ * be allocated */)
-+{
-+ znode *result;
-+ int retcode;
-+ reiser4_block_nr blocknr;
-+
-+ assert("nikita-930", brother != NULL);
-+ assert("umka-264", level < REAL_MAX_ZTREE_HEIGHT);
-+
-+ retcode = assign_fake_blocknr_formatted(&blocknr);
-+ if (retcode == 0) {
-+ result =
-+ zget(znode_get_tree(brother), &blocknr, NULL, level,
-+ reiser4_ctx_gfp_mask_get());
-+ if (IS_ERR(result)) {
-+ ewarning(PTR_ERR(result), "nikita-929",
-+ "Cannot allocate znode for carry: %li",
-+ PTR_ERR(result));
-+ return result;
-+ }
-+ /* cheap test, can be executed even when debugging is off */
-+ if (!znode_just_created(result)) {
-+ warning("nikita-2213",
-+ "Allocated already existing block: %llu",
-+ (unsigned long long)blocknr);
-+ zput(result);
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+
-+ assert("nikita-931", result != NULL);
-+ result->nplug = znode_get_tree(brother)->nplug;
-+ assert("nikita-933", result->nplug != NULL);
-+
-+ retcode = zinit_new(result, reiser4_ctx_gfp_mask_get());
-+ if (retcode == 0) {
-+ ZF_SET(result, JNODE_CREATED);
-+ zrelse(result);
-+ } else {
-+ zput(result);
-+ result = ERR_PTR(retcode);
-+ }
-+ } else {
-+ /* failure to allocate new node during balancing.
-+ This should never happen. Ever. Returning -E_REPEAT
-+ is not viable solution, because "out of disk space"
-+ is not transient error that will go away by itself.
-+ */
-+ ewarning(retcode, "nikita-928",
-+ "Cannot allocate block for carry: %i", retcode);
-+ result = ERR_PTR(retcode);
-+ }
-+ assert("nikita-1071", result != NULL);
-+ return result;
-+}
-+
-+/* allocate new root and add it to the tree
-+
-+ This helper function is called by add_new_root().
-+
-+*/
-+znode *reiser4_add_tree_root(znode * old_root /* existing tree root */ ,
-+ znode * fake /* "fake" znode */ )
-+{
-+ reiser4_tree *tree = znode_get_tree(old_root);
-+ znode *new_root = NULL; /* to shut gcc up */
-+ int result;
-+
-+ assert("nikita-1069", old_root != NULL);
-+ assert("umka-262", fake != NULL);
-+ assert("umka-263", tree != NULL);
-+
-+ /* "fake" znode---one always hanging just above current root. This
-+ node is locked when new root is created or existing root is
-+ deleted. Downward tree traversal takes lock on it before taking
-+ lock on a root node. This avoids race conditions with root
-+ manipulations.
-+
-+ */
-+ assert("nikita-1348", znode_above_root(fake));
-+ assert("nikita-1211", znode_is_root(old_root));
-+
-+ result = 0;
-+ if (tree->height >= REAL_MAX_ZTREE_HEIGHT) {
-+ warning("nikita-1344", "Tree is too tall: %i", tree->height);
-+ /* ext2 returns -ENOSPC when it runs out of free inodes with a
-+ following comment (fs/ext2/ialloc.c:441): Is it really
-+ ENOSPC?
-+
-+ -EXFULL? -EINVAL?
-+ */
-+ result = RETERR(-ENOSPC);
-+ } else {
-+ /* Allocate block for new root. It's not that
-+ important where it will be allocated, as root is
-+ almost always in memory. Moreover, allocate on
-+ flush can be going here.
-+ */
-+ assert("nikita-1448", znode_is_root(old_root));
-+ new_root = reiser4_new_node(fake, tree->height + 1);
-+ if (!IS_ERR(new_root) && (result = zload(new_root)) == 0) {
-+ lock_handle rlh;
-+
-+ init_lh(&rlh);
-+ result =
-+ longterm_lock_znode(&rlh, new_root,
-+ ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (result == 0) {
-+ parent_coord_t *in_parent;
-+
-+ znode_make_dirty(fake);
-+
-+ /* new root is a child of "fake" node */
-+ write_lock_tree(tree);
-+
-+ ++tree->height;
-+
-+ /* recalculate max balance overhead */
-+ tree->estimate_one_insert =
-+ estimate_one_insert_item(tree);
-+
-+ tree->root_block = *znode_get_block(new_root);
-+ in_parent = &new_root->in_parent;
-+ init_parent_coord(in_parent, fake);
-+ /* manually insert new root into sibling
-+ * list. With this all nodes involved into
-+ * balancing are connected after balancing is
-+ * done---useful invariant to check. */
-+ sibling_list_insert_nolock(new_root, NULL);
-+ write_unlock_tree(tree);
-+
-+ /* insert into new root pointer to the
-+ @old_root. */
-+ assert("nikita-1110",
-+ WITH_DATA(new_root,
-+ node_is_empty(new_root)));
-+ write_lock_dk(tree);
-+ znode_set_ld_key(new_root, reiser4_min_key());
-+ znode_set_rd_key(new_root, reiser4_max_key());
-+ write_unlock_dk(tree);
-+ if (REISER4_DEBUG) {
-+ ZF_CLR(old_root, JNODE_LEFT_CONNECTED);
-+ ZF_CLR(old_root, JNODE_RIGHT_CONNECTED);
-+ ZF_SET(old_root, JNODE_ORPHAN);
-+ }
-+ result = add_child_ptr(new_root, old_root);
-+ done_lh(&rlh);
-+ }
-+ zrelse(new_root);
-+ }
-+ }
-+ if (result != 0)
-+ new_root = ERR_PTR(result);
-+ return new_root;
-+}
-+
-+/* build &reiser4_item_data for inserting child pointer
-+
-+ Build &reiser4_item_data that can be later used to insert pointer to @child
-+ in its parent.
-+
-+*/
-+void build_child_ptr_data(znode * child /* node pointer to which will be
-+ * inserted */ ,
-+ reiser4_item_data * data /* where to store result */ )
-+{
-+ assert("nikita-1116", child != NULL);
-+ assert("nikita-1117", data != NULL);
-+
-+ /*
-+ * NOTE: use address of child's blocknr as address of data to be
-+ * inserted. As result of this data gets into on-disk structure in cpu
-+ * byte order. internal's create_hook converts it to little endian byte
-+ * order.
-+ */
-+ data->data = (char *)znode_get_block(child);
-+ /* data -> data is kernel space */
-+ data->user = 0;
-+ data->length = sizeof(reiser4_block_nr);
-+ /* FIXME-VS: hardcoded internal item? */
-+
-+ /* AUDIT: Is it possible that "item_plugin_by_id" may find nothing? */
-+ data->iplug = item_plugin_by_id(NODE_POINTER_ID);
-+}
-+
-+/* add pointer to @child into empty @parent.
-+
-+ This is used when pointer to old root is inserted into new root which is
-+ empty.
-+*/
-+static int add_child_ptr(znode * parent, znode * child)
-+{
-+ coord_t coord;
-+ reiser4_item_data data;
-+ int result;
-+ reiser4_key key;
-+
-+ assert("nikita-1111", parent != NULL);
-+ assert("nikita-1112", child != NULL);
-+ assert("nikita-1115",
-+ znode_get_level(parent) == znode_get_level(child) + 1);
-+
-+ result = zload(parent);
-+ if (result != 0)
-+ return result;
-+ assert("nikita-1113", node_is_empty(parent));
-+ coord_init_first_unit(&coord, parent);
-+
-+ build_child_ptr_data(child, &data);
-+ data.arg = NULL;
-+
-+ read_lock_dk(znode_get_tree(parent));
-+ key = *znode_get_ld_key(child);
-+ read_unlock_dk(znode_get_tree(parent));
-+
-+ result = node_plugin_by_node(parent)->create_item(&coord, &key, &data,
-+ NULL);
-+ znode_make_dirty(parent);
-+ zrelse(parent);
-+ return result;
-+}
-+
-+/* actually remove tree root */
-+static int reiser4_kill_root(reiser4_tree * tree /* tree from which root is
-+ * being removed */,
-+ znode * old_root /* root node that is being
-+ * removed */ ,
-+ znode * new_root /* new root---sole child of
-+ * @old_root */,
-+ const reiser4_block_nr * new_root_blk /* disk address of
-+ * @new_root */)
-+{
-+ znode *uber;
-+ int result;
-+ lock_handle handle_for_uber;
-+
-+ assert("umka-265", tree != NULL);
-+ assert("nikita-1198", new_root != NULL);
-+ assert("nikita-1199",
-+ znode_get_level(new_root) + 1 == znode_get_level(old_root));
-+
-+ assert("nikita-1201", znode_is_write_locked(old_root));
-+
-+ assert("nikita-1203",
-+ disk_addr_eq(new_root_blk, znode_get_block(new_root)));
-+
-+ init_lh(&handle_for_uber);
-+ /* obtain and lock "fake" znode protecting changes in tree height. */
-+ result = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
-+ &handle_for_uber);
-+ if (result == 0) {
-+ uber = handle_for_uber.node;
-+
-+ znode_make_dirty(uber);
-+
-+ /* don't take long term lock a @new_root. Take spinlock. */
-+
-+ write_lock_tree(tree);
-+
-+ tree->root_block = *new_root_blk;
-+ --tree->height;
-+
-+ /* recalculate max balance overhead */
-+ tree->estimate_one_insert = estimate_one_insert_item(tree);
-+
-+ assert("nikita-1202",
-+ tree->height == znode_get_level(new_root));
-+
-+ /* new root is child on "fake" node */
-+ init_parent_coord(&new_root->in_parent, uber);
-+ ++uber->c_count;
-+
-+ /* sibling_list_insert_nolock(new_root, NULL); */
-+ write_unlock_tree(tree);
-+
-+ /* reinitialise old root. */
-+ result = node_plugin_by_node(old_root)->init(old_root);
-+ znode_make_dirty(old_root);
-+ if (result == 0) {
-+ assert("nikita-1279", node_is_empty(old_root));
-+ ZF_SET(old_root, JNODE_HEARD_BANSHEE);
-+ old_root->c_count = 0;
-+ }
-+ }
-+ done_lh(&handle_for_uber);
-+
-+ return result;
-+}
-+
-+/* remove tree root
-+
-+ This function removes tree root, decreasing tree height by one. Tree root
-+ and its only child (that is going to become new tree root) are write locked
-+ at the entry.
-+
-+ To remove tree root we need to take lock on special "fake" znode that
-+ protects changes of tree height. See comments in reiser4_add_tree_root() for
-+ more on this.
-+
-+ Also parent pointers have to be updated in
-+ old and new root. To simplify code, function is split into two parts: outer
-+ reiser4_kill_tree_root() collects all necessary arguments and calls
-+ reiser4_kill_root() to do the actual job.
-+
-+*/
-+int reiser4_kill_tree_root(znode * old_root /* tree root that we are
-+ removing*/)
-+{
-+ int result;
-+ coord_t down_link;
-+ znode *new_root;
-+ reiser4_tree *tree;
-+
-+ assert("umka-266", current_tree != NULL);
-+ assert("nikita-1194", old_root != NULL);
-+ assert("nikita-1196", znode_is_root(old_root));
-+ assert("nikita-1200", node_num_items(old_root) == 1);
-+ assert("nikita-1401", znode_is_write_locked(old_root));
-+
-+ coord_init_first_unit(&down_link, old_root);
-+
-+ tree = znode_get_tree(old_root);
-+ new_root = child_znode(&down_link, old_root, 0, 1);
-+ if (!IS_ERR(new_root)) {
-+ result =
-+ reiser4_kill_root(tree, old_root, new_root,
-+ znode_get_block(new_root));
-+ zput(new_root);
-+ } else
-+ result = PTR_ERR(new_root);
-+
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/tree_mod.h linux-2.6.20/fs/reiser4/tree_mod.h
---- linux-2.6.20.orig/fs/reiser4/tree_mod.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tree_mod.h 2007-05-06 14:50:43.887034467 +0400
-@@ -0,0 +1,29 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Functions to add/delete new nodes to/from the tree. See tree_mod.c for
-+ * comments. */
-+
-+#if !defined( __REISER4_TREE_MOD_H__ )
-+#define __REISER4_TREE_MOD_H__
-+
-+#include "forward.h"
-+
-+znode *reiser4_new_node(znode * brother, tree_level level);
-+znode *reiser4_add_tree_root(znode * old_root, znode * fake);
-+int reiser4_kill_tree_root(znode * old_root);
-+void build_child_ptr_data(znode * child, reiser4_item_data * data);
-+
-+/* __REISER4_TREE_MOD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/tree_walk.c linux-2.6.20/fs/reiser4/tree_walk.c
---- linux-2.6.20.orig/fs/reiser4/tree_walk.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tree_walk.c 2007-05-06 14:50:43.887034467 +0400
-@@ -0,0 +1,927 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Routines and macros to:
-+
-+ get_left_neighbor()
-+
-+ get_right_neighbor()
-+
-+ get_parent()
-+
-+ get_first_child()
-+
-+ get_last_child()
-+
-+ various routines to walk the whole tree and do things to it like
-+ repack it, or move it to tertiary storage. Please make them as
-+ generic as is reasonable.
-+
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "tree_walk.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+/* These macros are used internally in tree_walk.c in attempt to make
-+ lock_neighbor() code usable to build lock_parent(), lock_right_neighbor,
-+ lock_left_neighbor */
-+#define GET_NODE_BY_PTR_OFFSET(node, off) (*(znode**)(((unsigned long)(node)) + (off)))
-+#define FIELD_OFFSET(name) offsetof(znode, name)
-+#define PARENT_PTR_OFFSET FIELD_OFFSET(in_parent.node)
-+#define LEFT_PTR_OFFSET FIELD_OFFSET(left)
-+#define RIGHT_PTR_OFFSET FIELD_OFFSET(right)
-+
-+/* This is the generic procedure to get and lock `generic' neighbor (left or
-+ right neighbor or parent). It implements common algorithm for all cases of
-+ getting lock on neighbor node, only znode structure field is different in
-+ each case. This is parameterized by ptr_offset argument, which is byte
-+ offset for the pointer to the desired neighbor within the current node's
-+ znode structure. This function should be called with the tree lock held */
-+static int lock_neighbor(
-+ /* resulting lock handle */
-+ lock_handle * result,
-+ /* znode to lock */
-+ znode * node,
-+ /* pointer to neighbor (or parent) znode field offset, in bytes from
-+ the base address of znode structure */
-+ int ptr_offset,
-+ /* lock mode for longterm_lock_znode call */
-+ znode_lock_mode mode,
-+ /* lock request for longterm_lock_znode call */
-+ znode_lock_request req,
-+ /* GN_* flags */
-+ int flags, int rlocked)
-+{
-+ reiser4_tree *tree = znode_get_tree(node);
-+ znode *neighbor;
-+ int ret;
-+
-+ assert("umka-236", node != NULL);
-+ assert("umka-237", tree != NULL);
-+ assert_rw_locked(&(tree->tree_lock));
-+
-+ if (flags & GN_TRY_LOCK)
-+ req |= ZNODE_LOCK_NONBLOCK;
-+ if (flags & GN_SAME_ATOM)
-+ req |= ZNODE_LOCK_DONT_FUSE;
-+
-+ /* get neighbor's address by using of sibling link, quit while loop
-+ (and return) if link is not available. */
-+ while (1) {
-+ neighbor = GET_NODE_BY_PTR_OFFSET(node, ptr_offset);
-+
-+ /* return -E_NO_NEIGHBOR if parent or side pointer is NULL or if
-+ * node pointed by it is not connected.
-+ *
-+ * However, GN_ALLOW_NOT_CONNECTED option masks "connected"
-+ * check and allows passing reference to not connected znode to
-+ * subsequent longterm_lock_znode() call. This kills possible
-+ * busy loop if we are trying to get longterm lock on locked but
-+ * not yet connected parent node. */
-+ if (neighbor == NULL || !((flags & GN_ALLOW_NOT_CONNECTED)
-+ || znode_is_connected(neighbor))) {
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+
-+ /* protect it from deletion. */
-+ zref(neighbor);
-+
-+ rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
-+
-+ ret = longterm_lock_znode(result, neighbor, mode, req);
-+
-+ /* The lock handle obtains its own reference, release the one from above. */
-+ zput(neighbor);
-+
-+ rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
-+
-+ /* restart if node we got reference to is being
-+ invalidated. we should not get reference to this node
-+ again. */
-+ if (ret == -EINVAL)
-+ continue;
-+ if (ret)
-+ return ret;
-+
-+ /* check if neighbor link still points to just locked znode;
-+ the link could have been changed while the process slept. */
-+ if (neighbor == GET_NODE_BY_PTR_OFFSET(node, ptr_offset))
-+ return 0;
-+
-+ /* znode was locked by mistake; unlock it and restart locking
-+ process from beginning. */
-+ rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
-+ longterm_unlock_znode(result);
-+ rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
-+ }
-+}
-+
-+/* get parent node with longterm lock, accepts GN* flags. */
-+int reiser4_get_parent_flags(lock_handle * lh /* resulting lock handle */ ,
-+ znode * node /* child node */ ,
-+ znode_lock_mode mode
-+ /* type of lock: read or write */ ,
-+ int flags /* GN_* flags */ )
-+{
-+ int result;
-+
-+ read_lock_tree(znode_get_tree(node));
-+ result = lock_neighbor(lh, node, PARENT_PTR_OFFSET, mode,
-+ ZNODE_LOCK_HIPRI, flags, 1);
-+ read_unlock_tree(znode_get_tree(node));
-+ return result;
-+}
-+
-+/* wrapper function to lock right or left neighbor depending on GN_GO_LEFT
-+ bit in @flags parameter */
-+/* Audited by: umka (2002.06.14) */
-+static inline int
-+lock_side_neighbor(lock_handle * result,
-+ znode * node, znode_lock_mode mode, int flags, int rlocked)
-+{
-+ int ret;
-+ int ptr_offset;
-+ znode_lock_request req;
-+
-+ if (flags & GN_GO_LEFT) {
-+ ptr_offset = LEFT_PTR_OFFSET;
-+ req = ZNODE_LOCK_LOPRI;
-+ } else {
-+ ptr_offset = RIGHT_PTR_OFFSET;
-+ req = ZNODE_LOCK_HIPRI;
-+ }
-+
-+ ret =
-+ lock_neighbor(result, node, ptr_offset, mode, req, flags, rlocked);
-+
-+ if (ret == -E_NO_NEIGHBOR) /* if we walk left or right -E_NO_NEIGHBOR does not
-+ * guarantee that neighbor is absent in the
-+ * tree; in this case we return -ENOENT --
-+ * means neighbor at least not found in
-+ * cache */
-+ return RETERR(-ENOENT);
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+
-+int check_sibling_list(znode * node)
-+{
-+ znode *scan;
-+ znode *next;
-+
-+ assert("nikita-3283", LOCK_CNT_GTZ(write_locked_tree));
-+
-+ if (node == NULL)
-+ return 1;
-+
-+ if (ZF_ISSET(node, JNODE_RIP))
-+ return 1;
-+
-+ assert("nikita-3270", node != NULL);
-+ assert_rw_write_locked(&(znode_get_tree(node)->tree_lock));
-+
-+ for (scan = node; znode_is_left_connected(scan); scan = next) {
-+ next = scan->left;
-+ if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
-+ assert("nikita-3271", znode_is_right_connected(next));
-+ assert("nikita-3272", next->right == scan);
-+ } else
-+ break;
-+ }
-+ for (scan = node; znode_is_right_connected(scan); scan = next) {
-+ next = scan->right;
-+ if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
-+ assert("nikita-3273", znode_is_left_connected(next));
-+ assert("nikita-3274", next->left == scan);
-+ } else
-+ break;
-+ }
-+ return 1;
-+}
-+
-+#endif
-+
-+/* Znode sibling pointers maintenence. */
-+
-+/* Znode sibling pointers are established between any neighbored nodes which are
-+ in cache. There are two znode state bits (JNODE_LEFT_CONNECTED,
-+ JNODE_RIGHT_CONNECTED), if left or right sibling pointer contains actual
-+ value (even NULL), corresponded JNODE_*_CONNECTED bit is set.
-+
-+ Reiser4 tree operations which may allocate new znodes (CBK, tree balancing)
-+ take care about searching (hash table lookup may be required) of znode
-+ neighbors, establishing sibling pointers between them and setting
-+ JNODE_*_CONNECTED state bits. */
-+
-+/* adjusting of sibling pointers and `connected' states for two
-+ neighbors; works if one neighbor is NULL (was not found). */
-+
-+/* FIXME-VS: this is unstatic-ed to use in tree.c in prepare_twig_cut */
-+void link_left_and_right(znode * left, znode * right)
-+{
-+ assert("nikita-3275", check_sibling_list(left));
-+ assert("nikita-3275", check_sibling_list(right));
-+
-+ if (left != NULL) {
-+ if (left->right == NULL) {
-+ left->right = right;
-+ ZF_SET(left, JNODE_RIGHT_CONNECTED);
-+
-+ ON_DEBUG(left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+
-+ } else if (ZF_ISSET(left->right, JNODE_HEARD_BANSHEE)
-+ && left->right != right) {
-+
-+ ON_DEBUG(left->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ left->right_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ left->right->left = NULL;
-+ left->right = right;
-+ ZF_SET(left, JNODE_RIGHT_CONNECTED);
-+ } else
-+ /*
-+ * there is a race condition in renew_sibling_link()
-+ * and assertions below check that it is only one
-+ * there. Thread T1 calls renew_sibling_link() without
-+ * GN_NO_ALLOC flag. zlook() doesn't find neighbor
-+ * node, but before T1 gets to the
-+ * link_left_and_right(), another thread T2 creates
-+ * neighbor node and connects it. check for
-+ * left->right == NULL above protects T1 from
-+ * overwriting correct left->right pointer installed
-+ * by T2.
-+ */
-+ assert("nikita-3302",
-+ right == NULL || left->right == right);
-+ }
-+ if (right != NULL) {
-+ if (right->left == NULL) {
-+ right->left = left;
-+ ZF_SET(right, JNODE_LEFT_CONNECTED);
-+
-+ ON_DEBUG(right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+
-+ } else if (ZF_ISSET(right->left, JNODE_HEARD_BANSHEE)
-+ && right->left != left) {
-+
-+ ON_DEBUG(right->left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ right->left_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ right->left->right = NULL;
-+ right->left = left;
-+ ZF_SET(right, JNODE_LEFT_CONNECTED);
-+
-+ } else
-+ assert("nikita-3303",
-+ left == NULL || right->left == left);
-+ }
-+ assert("nikita-3275", check_sibling_list(left));
-+ assert("nikita-3275", check_sibling_list(right));
-+}
-+
-+/* Audited by: umka (2002.06.14) */
-+static void link_znodes(znode * first, znode * second, int to_left)
-+{
-+ if (to_left)
-+ link_left_and_right(second, first);
-+ else
-+ link_left_and_right(first, second);
-+}
-+
-+/* getting of next (to left or to right, depend on gn_to_left bit in flags)
-+ coord's unit position in horizontal direction, even across node
-+ boundary. Should be called under tree lock, it protects nonexistence of
-+ sibling link on parent level, if lock_side_neighbor() fails with
-+ -ENOENT. */
-+static int far_next_coord(coord_t * coord, lock_handle * handle, int flags)
-+{
-+ int ret;
-+ znode *node;
-+ reiser4_tree *tree;
-+
-+ assert("umka-243", coord != NULL);
-+ assert("umka-244", handle != NULL);
-+ assert("zam-1069", handle->node == NULL);
-+
-+ ret =
-+ (flags & GN_GO_LEFT) ? coord_prev_unit(coord) :
-+ coord_next_unit(coord);
-+ if (!ret)
-+ return 0;
-+
-+ ret =
-+ lock_side_neighbor(handle, coord->node, ZNODE_READ_LOCK, flags, 0);
-+ if (ret)
-+ return ret;
-+
-+ node = handle->node;
-+ tree = znode_get_tree(node);
-+ write_unlock_tree(tree);
-+
-+ coord_init_zero(coord);
-+
-+ /* We avoid synchronous read here if it is specified by flag. */
-+ if ((flags & GN_ASYNC) && znode_page(handle->node) == NULL) {
-+ ret = jstartio(ZJNODE(handle->node));
-+ if (!ret)
-+ ret = -E_REPEAT;
-+ goto error_locked;
-+ }
-+
-+ /* corresponded zrelse() should be called by the clients of
-+ far_next_coord(), in place when this node gets unlocked. */
-+ ret = zload(handle->node);
-+ if (ret)
-+ goto error_locked;
-+
-+ if (flags & GN_GO_LEFT)
-+ coord_init_last_unit(coord, node);
-+ else
-+ coord_init_first_unit(coord, node);
-+
-+ if (0) {
-+ error_locked:
-+ longterm_unlock_znode(handle);
-+ }
-+ write_lock_tree(tree);
-+ return ret;
-+}
-+
-+/* Very significant function which performs a step in horizontal direction
-+ when sibling pointer is not available. Actually, it is only function which
-+ does it.
-+ Note: this function does not restore locking status at exit,
-+ caller should does care about proper unlocking and zrelsing */
-+static int
-+renew_sibling_link(coord_t * coord, lock_handle * handle, znode * child,
-+ tree_level level, int flags, int *nr_locked)
-+{
-+ int ret;
-+ int to_left = flags & GN_GO_LEFT;
-+ reiser4_block_nr da;
-+ /* parent of the neighbor node; we set it to parent until not sharing
-+ of one parent between child and neighbor node is detected */
-+ znode *side_parent = coord->node;
-+ reiser4_tree *tree = znode_get_tree(child);
-+ znode *neighbor = NULL;
-+
-+ assert("umka-245", coord != NULL);
-+ assert("umka-246", handle != NULL);
-+ assert("umka-247", child != NULL);
-+ assert("umka-303", tree != NULL);
-+
-+ init_lh(handle);
-+ write_lock_tree(tree);
-+ ret = far_next_coord(coord, handle, flags);
-+
-+ if (ret) {
-+ if (ret != -ENOENT) {
-+ write_unlock_tree(tree);
-+ return ret;
-+ }
-+ } else {
-+ item_plugin *iplug;
-+
-+ if (handle->node != NULL) {
-+ (*nr_locked)++;
-+ side_parent = handle->node;
-+ }
-+
-+ /* does coord object points to internal item? We do not
-+ support sibling pointers between znode for formatted and
-+ unformatted nodes and return -E_NO_NEIGHBOR in that case. */
-+ iplug = item_plugin_by_coord(coord);
-+ if (!item_is_internal(coord)) {
-+ link_znodes(child, NULL, to_left);
-+ write_unlock_tree(tree);
-+ /* we know there can't be formatted neighbor */
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+ write_unlock_tree(tree);
-+
-+ iplug->s.internal.down_link(coord, NULL, &da);
-+
-+ if (flags & GN_NO_ALLOC) {
-+ neighbor = zlook(tree, &da);
-+ } else {
-+ neighbor =
-+ zget(tree, &da, side_parent, level,
-+ reiser4_ctx_gfp_mask_get());
-+ }
-+
-+ if (IS_ERR(neighbor)) {
-+ ret = PTR_ERR(neighbor);
-+ return ret;
-+ }
-+
-+ if (neighbor)
-+ /* update delimiting keys */
-+ set_child_delimiting_keys(coord->node, coord, neighbor);
-+
-+ write_lock_tree(tree);
-+ }
-+
-+ if (likely(neighbor == NULL ||
-+ (znode_get_level(child) == znode_get_level(neighbor)
-+ && child != neighbor)))
-+ link_znodes(child, neighbor, to_left);
-+ else {
-+ warning("nikita-3532",
-+ "Sibling nodes on the different levels: %i != %i\n",
-+ znode_get_level(child), znode_get_level(neighbor));
-+ ret = RETERR(-EIO);
-+ }
-+
-+ write_unlock_tree(tree);
-+
-+ /* if GN_NO_ALLOC isn't set we keep reference to neighbor znode */
-+ if (neighbor != NULL && (flags & GN_NO_ALLOC))
-+ /* atomic_dec(&ZJNODE(neighbor)->x_count); */
-+ zput(neighbor);
-+
-+ return ret;
-+}
-+
-+/* This function is for establishing of one side relation. */
-+/* Audited by: umka (2002.06.14) */
-+static int connect_one_side(coord_t * coord, znode * node, int flags)
-+{
-+ coord_t local;
-+ lock_handle handle;
-+ int nr_locked;
-+ int ret;
-+
-+ assert("umka-248", coord != NULL);
-+ assert("umka-249", node != NULL);
-+
-+ coord_dup_nocheck(&local, coord);
-+
-+ init_lh(&handle);
-+
-+ ret =
-+ renew_sibling_link(&local, &handle, node, znode_get_level(node),
-+ flags | GN_NO_ALLOC, &nr_locked);
-+
-+ if (handle.node != NULL) {
-+ /* complementary operations for zload() and lock() in far_next_coord() */
-+ zrelse(handle.node);
-+ longterm_unlock_znode(&handle);
-+ }
-+
-+ /* we catch error codes which are not interesting for us because we
-+ run renew_sibling_link() only for znode connection. */
-+ if (ret == -ENOENT || ret == -E_NO_NEIGHBOR)
-+ return 0;
-+
-+ return ret;
-+}
-+
-+/* if @child is not in `connected' state, performs hash searches for left and
-+ right neighbor nodes and establishes horizontal sibling links */
-+/* Audited by: umka (2002.06.14), umka (2002.06.15) */
-+int connect_znode(coord_t * parent_coord, znode * child)
-+{
-+ reiser4_tree *tree = znode_get_tree(child);
-+ int ret = 0;
-+
-+ assert("zam-330", parent_coord != NULL);
-+ assert("zam-331", child != NULL);
-+ assert("zam-332", parent_coord->node != NULL);
-+ assert("umka-305", tree != NULL);
-+
-+ /* it is trivial to `connect' root znode because it can't have
-+ neighbors */
-+ if (znode_above_root(parent_coord->node)) {
-+ child->left = NULL;
-+ child->right = NULL;
-+ ZF_SET(child, JNODE_LEFT_CONNECTED);
-+ ZF_SET(child, JNODE_RIGHT_CONNECTED);
-+
-+ ON_DEBUG(child->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ child->right_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ return 0;
-+ }
-+
-+ /* load parent node */
-+ coord_clear_iplug(parent_coord);
-+ ret = zload(parent_coord->node);
-+
-+ if (ret != 0)
-+ return ret;
-+
-+ /* protect `connected' state check by tree_lock */
-+ read_lock_tree(tree);
-+
-+ if (!znode_is_right_connected(child)) {
-+ read_unlock_tree(tree);
-+ /* connect right (default is right) */
-+ ret = connect_one_side(parent_coord, child, GN_NO_ALLOC);
-+ if (ret)
-+ goto zrelse_and_ret;
-+
-+ read_lock_tree(tree);
-+ }
-+
-+ ret = znode_is_left_connected(child);
-+
-+ read_unlock_tree(tree);
-+
-+ if (!ret) {
-+ ret =
-+ connect_one_side(parent_coord, child,
-+ GN_NO_ALLOC | GN_GO_LEFT);
-+ } else
-+ ret = 0;
-+
-+ zrelse_and_ret:
-+ zrelse(parent_coord->node);
-+
-+ return ret;
-+}
-+
-+/* this function is like renew_sibling_link() but allocates neighbor node if
-+ it doesn't exist and `connects' it. It may require making two steps in
-+ horizontal direction, first one for neighbor node finding/allocation,
-+ second one is for finding neighbor of neighbor to connect freshly allocated
-+ znode. */
-+/* Audited by: umka (2002.06.14), umka (2002.06.15) */
-+static int
-+renew_neighbor(coord_t * coord, znode * node, tree_level level, int flags)
-+{
-+ coord_t local;
-+ lock_handle empty[2];
-+ reiser4_tree *tree = znode_get_tree(node);
-+ znode *neighbor = NULL;
-+ int nr_locked = 0;
-+ int ret;
-+
-+ assert("umka-250", coord != NULL);
-+ assert("umka-251", node != NULL);
-+ assert("umka-307", tree != NULL);
-+ assert("umka-308", level <= tree->height);
-+
-+ /* umka (2002.06.14)
-+ Here probably should be a check for given "level" validness.
-+ Something like assert("xxx-yyy", level < REAL_MAX_ZTREE_HEIGHT);
-+ */
-+
-+ coord_dup(&local, coord);
-+
-+ ret =
-+ renew_sibling_link(&local, &empty[0], node, level,
-+ flags & ~GN_NO_ALLOC, &nr_locked);
-+ if (ret)
-+ goto out;
-+
-+ /* tree lock is not needed here because we keep parent node(s) locked
-+ and reference to neighbor znode incremented */
-+ neighbor = (flags & GN_GO_LEFT) ? node->left : node->right;
-+
-+ read_lock_tree(tree);
-+ ret = znode_is_connected(neighbor);
-+ read_unlock_tree(tree);
-+ if (ret) {
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ ret =
-+ renew_sibling_link(&local, &empty[nr_locked], neighbor, level,
-+ flags | GN_NO_ALLOC, &nr_locked);
-+ /* second renew_sibling_link() call is used for znode connection only,
-+ so we can live with these errors */
-+ if (-ENOENT == ret || -E_NO_NEIGHBOR == ret)
-+ ret = 0;
-+
-+ out:
-+
-+ for (--nr_locked; nr_locked >= 0; --nr_locked) {
-+ zrelse(empty[nr_locked].node);
-+ longterm_unlock_znode(&empty[nr_locked]);
-+ }
-+
-+ if (neighbor != NULL)
-+ /* decrement znode reference counter without actually
-+ releasing it. */
-+ atomic_dec(&ZJNODE(neighbor)->x_count);
-+
-+ return ret;
-+}
-+
-+/*
-+ reiser4_get_neighbor() -- lock node's neighbor.
-+
-+ reiser4_get_neighbor() locks node's neighbor (left or right one, depends on
-+ given parameter) using sibling link to it. If sibling link is not available
-+ (i.e. neighbor znode is not in cache) and flags allow read blocks, we go one
-+ level up for information about neighbor's disk address. We lock node's
-+ parent, if it is common parent for both 'node' and its neighbor, neighbor's
-+ disk address is in next (to left or to right) down link from link that points
-+ to original node. If not, we need to lock parent's neighbor, read its content
-+ and take first(last) downlink with neighbor's disk address. That locking
-+ could be done by using sibling link and lock_neighbor() function, if sibling
-+ link exists. In another case we have to go level up again until we find
-+ common parent or valid sibling link. Then go down
-+ allocating/connecting/locking/reading nodes until neighbor of first one is
-+ locked.
-+
-+ @neighbor: result lock handle,
-+ @node: a node which we lock neighbor of,
-+ @lock_mode: lock mode {LM_READ, LM_WRITE},
-+ @flags: logical OR of {GN_*} (see description above) subset.
-+
-+ @return: 0 if success, negative value if lock was impossible due to an error
-+ or lack of neighbor node.
-+*/
-+
-+/* Audited by: umka (2002.06.14), umka (2002.06.15) */
-+int
-+reiser4_get_neighbor(lock_handle * neighbor, znode * node,
-+ znode_lock_mode lock_mode, int flags)
-+{
-+ reiser4_tree *tree = znode_get_tree(node);
-+ lock_handle path[REAL_MAX_ZTREE_HEIGHT];
-+
-+ coord_t coord;
-+
-+ tree_level base_level;
-+ tree_level h = 0;
-+ int ret;
-+
-+ assert("umka-252", tree != NULL);
-+ assert("umka-253", neighbor != NULL);
-+ assert("umka-254", node != NULL);
-+
-+ base_level = znode_get_level(node);
-+
-+ assert("umka-310", base_level <= tree->height);
-+
-+ coord_init_zero(&coord);
-+
-+ again:
-+ /* first, we try to use simple lock_neighbor() which requires sibling
-+ link existence */
-+ read_lock_tree(tree);
-+ ret = lock_side_neighbor(neighbor, node, lock_mode, flags, 1);
-+ read_unlock_tree(tree);
-+ if (!ret) {
-+ /* load znode content if it was specified */
-+ if (flags & GN_LOAD_NEIGHBOR) {
-+ ret = zload(node);
-+ if (ret)
-+ longterm_unlock_znode(neighbor);
-+ }
-+ return ret;
-+ }
-+
-+ /* only -ENOENT means we may look upward and try to connect
-+ @node with its neighbor (if @flags allow us to do it) */
-+ if (ret != -ENOENT || !(flags & GN_CAN_USE_UPPER_LEVELS))
-+ return ret;
-+
-+ /* before establishing of sibling link we lock parent node; it is
-+ required by renew_neighbor() to work. */
-+ init_lh(&path[0]);
-+ ret = reiser4_get_parent(&path[0], node, ZNODE_READ_LOCK);
-+ if (ret)
-+ return ret;
-+ if (znode_above_root(path[0].node)) {
-+ longterm_unlock_znode(&path[0]);
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+
-+ while (1) {
-+ znode *child = (h == 0) ? node : path[h - 1].node;
-+ znode *parent = path[h].node;
-+
-+ ret = zload(parent);
-+ if (ret)
-+ break;
-+
-+ ret = find_child_ptr(parent, child, &coord);
-+
-+ if (ret) {
-+ zrelse(parent);
-+ break;
-+ }
-+
-+ /* try to establish missing sibling link */
-+ ret = renew_neighbor(&coord, child, h + base_level, flags);
-+
-+ zrelse(parent);
-+
-+ switch (ret) {
-+ case 0:
-+ /* unlocking of parent znode prevents simple
-+ deadlock situation */
-+ done_lh(&path[h]);
-+
-+ /* depend on tree level we stay on we repeat first
-+ locking attempt ... */
-+ if (h == 0)
-+ goto again;
-+
-+ /* ... or repeat establishing of sibling link at
-+ one level below. */
-+ --h;
-+ break;
-+
-+ case -ENOENT:
-+ /* sibling link is not available -- we go
-+ upward. */
-+ init_lh(&path[h + 1]);
-+ ret =
-+ reiser4_get_parent(&path[h + 1], parent,
-+ ZNODE_READ_LOCK);
-+ if (ret)
-+ goto fail;
-+ ++h;
-+ if (znode_above_root(path[h].node)) {
-+ ret = RETERR(-E_NO_NEIGHBOR);
-+ goto fail;
-+ }
-+ break;
-+
-+ case -E_DEADLOCK:
-+ /* there was lock request from hi-pri locker. if
-+ it is possible we unlock last parent node and
-+ re-lock it again. */
-+ for (; reiser4_check_deadlock(); h--) {
-+ done_lh(&path[h]);
-+ if (h == 0)
-+ goto fail;
-+ }
-+
-+ break;
-+
-+ default: /* other errors. */
-+ goto fail;
-+ }
-+ }
-+ fail:
-+ ON_DEBUG(check_lock_node_data(node));
-+ ON_DEBUG(check_lock_data());
-+
-+ /* unlock path */
-+ do {
-+ /* FIXME-Zam: when we get here from case -E_DEADLOCK's goto
-+ fail; path[0] is already done_lh-ed, therefore
-+ longterm_unlock_znode(&path[h]); is not applicable */
-+ done_lh(&path[h]);
-+ --h;
-+ } while (h + 1 != 0);
-+
-+ return ret;
-+}
-+
-+/* remove node from sibling list */
-+/* Audited by: umka (2002.06.14) */
-+void sibling_list_remove(znode * node)
-+{
-+ reiser4_tree *tree;
-+
-+ tree = znode_get_tree(node);
-+ assert("umka-255", node != NULL);
-+ assert_rw_write_locked(&(tree->tree_lock));
-+ assert("nikita-3275", check_sibling_list(node));
-+
-+ write_lock_dk(tree);
-+ if (znode_is_right_connected(node) && node->right != NULL &&
-+ znode_is_left_connected(node) && node->left != NULL) {
-+ assert("zam-32245",
-+ keyeq(znode_get_rd_key(node),
-+ znode_get_ld_key(node->right)));
-+ znode_set_rd_key(node->left, znode_get_ld_key(node->right));
-+ }
-+ write_unlock_dk(tree);
-+
-+ if (znode_is_right_connected(node) && node->right != NULL) {
-+ assert("zam-322", znode_is_left_connected(node->right));
-+ node->right->left = node->left;
-+ ON_DEBUG(node->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ if (znode_is_left_connected(node) && node->left != NULL) {
-+ assert("zam-323", znode_is_right_connected(node->left));
-+ node->left->right = node->right;
-+ ON_DEBUG(node->left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+
-+ ZF_CLR(node, JNODE_LEFT_CONNECTED);
-+ ZF_CLR(node, JNODE_RIGHT_CONNECTED);
-+ ON_DEBUG(node->left = node->right = NULL;
-+ node->left_version = atomic_inc_return(&delim_key_version);
-+ node->right_version = atomic_inc_return(&delim_key_version););
-+ assert("nikita-3276", check_sibling_list(node));
-+}
-+
-+/* disconnect node from sibling list */
-+void sibling_list_drop(znode * node)
-+{
-+ znode *right;
-+ znode *left;
-+
-+ assert("nikita-2464", node != NULL);
-+ assert("nikita-3277", check_sibling_list(node));
-+
-+ right = node->right;
-+ if (right != NULL) {
-+ assert("nikita-2465", znode_is_left_connected(right));
-+ right->left = NULL;
-+ ON_DEBUG(right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ left = node->left;
-+ if (left != NULL) {
-+ assert("zam-323", znode_is_right_connected(left));
-+ left->right = NULL;
-+ ON_DEBUG(left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ ZF_CLR(node, JNODE_LEFT_CONNECTED);
-+ ZF_CLR(node, JNODE_RIGHT_CONNECTED);
-+ ON_DEBUG(node->left = node->right = NULL;
-+ node->left_version = atomic_inc_return(&delim_key_version);
-+ node->right_version = atomic_inc_return(&delim_key_version););
-+}
-+
-+/* Insert new node into sibling list. Regular balancing inserts new node
-+ after (at right side) existing and locked node (@before), except one case
-+ of adding new tree root node. @before should be NULL in that case. */
-+void sibling_list_insert_nolock(znode * new, znode * before)
-+{
-+ assert("zam-334", new != NULL);
-+ assert("nikita-3298", !znode_is_left_connected(new));
-+ assert("nikita-3299", !znode_is_right_connected(new));
-+ assert("nikita-3300", new->left == NULL);
-+ assert("nikita-3301", new->right == NULL);
-+ assert("nikita-3278", check_sibling_list(new));
-+ assert("nikita-3279", check_sibling_list(before));
-+
-+ if (before != NULL) {
-+ assert("zam-333", znode_is_connected(before));
-+ new->right = before->right;
-+ new->left = before;
-+ ON_DEBUG(new->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ new->left_version =
-+ atomic_inc_return(&delim_key_version););
-+ if (before->right != NULL) {
-+ before->right->left = new;
-+ ON_DEBUG(before->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ before->right = new;
-+ ON_DEBUG(before->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ } else {
-+ new->right = NULL;
-+ new->left = NULL;
-+ ON_DEBUG(new->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ new->left_version =
-+ atomic_inc_return(&delim_key_version););
-+ }
-+ ZF_SET(new, JNODE_LEFT_CONNECTED);
-+ ZF_SET(new, JNODE_RIGHT_CONNECTED);
-+ assert("nikita-3280", check_sibling_list(new));
-+ assert("nikita-3281", check_sibling_list(before));
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/tree_walk.h linux-2.6.20/fs/reiser4/tree_walk.h
---- linux-2.6.20.orig/fs/reiser4/tree_walk.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/tree_walk.h 2007-05-06 14:50:43.887034467 +0400
-@@ -0,0 +1,125 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* definitions of reiser4 tree walk functions */
-+
-+#ifndef __FS_REISER4_TREE_WALK_H__
-+#define __FS_REISER4_TREE_WALK_H__
-+
-+#include "debug.h"
-+#include "forward.h"
-+
-+/* establishes horizontal links between cached znodes */
-+int connect_znode(coord_t * coord, znode * node);
-+
-+/* tree traversal functions (reiser4_get_parent(), reiser4_get_neighbor())
-+ have the following common arguments:
-+
-+ return codes:
-+
-+ @return : 0 - OK,
-+
-+ZAM-FIXME-HANS: wrong return code name. Change them all.
-+ -ENOENT - neighbor is not in cache, what is detected by sibling
-+ link absence.
-+
-+ -E_NO_NEIGHBOR - we are sure that neighbor (or parent) node cannot be
-+ found (because we are left-/right- most node of the
-+ tree, for example). Also, this return code is for
-+ reiser4_get_parent() when we see no parent link -- it
-+ means that our node is root node.
-+
-+ -E_DEADLOCK - deadlock detected (request from high-priority process
-+ received), other error codes are conformed to
-+ /usr/include/asm/errno.h .
-+*/
-+
-+int
-+reiser4_get_parent_flags(lock_handle * result, znode * node,
-+ znode_lock_mode mode, int flags);
-+
-+/* bits definition for reiser4_get_neighbor function `flags' arg. */
-+typedef enum {
-+ /* If sibling pointer is NULL, this flag allows get_neighbor() to try to
-+ * find not allocated not connected neigbor by going though upper
-+ * levels */
-+ GN_CAN_USE_UPPER_LEVELS = 0x1,
-+ /* locking left neighbor instead of right one */
-+ GN_GO_LEFT = 0x2,
-+ /* automatically load neighbor node content */
-+ GN_LOAD_NEIGHBOR = 0x4,
-+ /* return -E_REPEAT if can't lock */
-+ GN_TRY_LOCK = 0x8,
-+ /* used internally in tree_walk.c, causes renew_sibling to not
-+ allocate neighbor znode, but only search for it in znode cache */
-+ GN_NO_ALLOC = 0x10,
-+ /* do not go across atom boundaries */
-+ GN_SAME_ATOM = 0x20,
-+ /* allow to lock not connected nodes */
-+ GN_ALLOW_NOT_CONNECTED = 0x40,
-+ /* Avoid synchronous jload, instead, call jstartio() and return -E_REPEAT. */
-+ GN_ASYNC = 0x80
-+} znode_get_neigbor_flags;
-+
-+/* A commonly used wrapper for reiser4_get_parent_flags(). */
-+static inline int reiser4_get_parent(lock_handle * result, znode * node,
-+ znode_lock_mode mode)
-+{
-+ return reiser4_get_parent_flags(result, node, mode,
-+ GN_ALLOW_NOT_CONNECTED);
-+}
-+
-+int reiser4_get_neighbor(lock_handle * neighbor, znode * node,
-+ znode_lock_mode lock_mode, int flags);
-+
-+/* there are wrappers for most common usages of reiser4_get_neighbor() */
-+static inline int
-+reiser4_get_left_neighbor(lock_handle * result, znode * node, int lock_mode,
-+ int flags)
-+{
-+ return reiser4_get_neighbor(result, node, lock_mode,
-+ flags | GN_GO_LEFT);
-+}
-+
-+static inline int
-+reiser4_get_right_neighbor(lock_handle * result, znode * node, int lock_mode,
-+ int flags)
-+{
-+ ON_DEBUG(check_lock_node_data(node));
-+ ON_DEBUG(check_lock_data());
-+ return reiser4_get_neighbor(result, node, lock_mode,
-+ flags & (~GN_GO_LEFT));
-+}
-+
-+extern void sibling_list_remove(znode * node);
-+extern void sibling_list_drop(znode * node);
-+extern void sibling_list_insert_nolock(znode * new, znode * before);
-+extern void link_left_and_right(znode * left, znode * right);
-+
-+/* Functions called by tree_walk() when tree_walk() ... */
-+struct tree_walk_actor {
-+ /* ... meets a formatted node, */
-+ int (*process_znode) (tap_t *, void *);
-+ /* ... meets an extent, */
-+ int (*process_extent) (tap_t *, void *);
-+ /* ... begins tree traversal or repeats it after -E_REPEAT was returned by
-+ * node or extent processing functions. */
-+ int (*before) (void *);
-+};
-+
-+#if REISER4_DEBUG
-+int check_sibling_list(znode * node);
-+#else
-+#define check_sibling_list(n) (1)
-+#endif
-+
-+#endif /* __FS_REISER4_TREE_WALK_H__ */
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/txnmgr.c linux-2.6.20/fs/reiser4/txnmgr.c
---- linux-2.6.20.orig/fs/reiser4/txnmgr.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/txnmgr.c 2007-05-06 14:50:43.895036966 +0400
-@@ -0,0 +1,3164 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Joshua MacDonald wrote the first draft of this code. */
-+
-+/* ZAM-LONGTERM-FIXME-HANS: The locking in this file is badly designed, and a
-+filesystem scales only as well as its worst locking design. You need to
-+substantially restructure this code. Josh was not as experienced a programmer
-+as you. Particularly review how the locking style differs from what you did
-+for znodes usingt hi-lo priority locking, and present to me an opinion on
-+whether the differences are well founded. */
-+
-+/* I cannot help but to disagree with the sentiment above. Locking of
-+ * transaction manager is _not_ badly designed, and, at the very least, is not
-+ * the scaling bottleneck. Scaling bottleneck is _exactly_ hi-lo priority
-+ * locking on znodes, especially on the root node of the tree. --nikita,
-+ * 2003.10.13 */
-+
-+/* The txnmgr is a set of interfaces that keep track of atoms and transcrash handles. The
-+ txnmgr processes capture_block requests and manages the relationship between jnodes and
-+ atoms through the various stages of a transcrash, and it also oversees the fusion and
-+ capture-on-copy processes. The main difficulty with this task is maintaining a
-+ deadlock-free lock ordering between atoms and jnodes/handles. The reason for the
-+ difficulty is that jnodes, handles, and atoms contain pointer circles, and the cycle
-+ must be broken. The main requirement is that atom-fusion be deadlock free, so once you
-+ hold the atom_lock you may then wait to acquire any jnode or handle lock. This implies
-+ that any time you check the atom-pointer of a jnode or handle and then try to lock that
-+ atom, you must use trylock() and possibly reverse the order.
-+
-+ This code implements the design documented at:
-+
-+ http://namesys.com/txn-doc.html
-+
-+ZAM-FIXME-HANS: update v4.html to contain all of the information present in the above (but updated), and then remove the
-+above document and reference the new. Be sure to provide some credit to Josh. I already have some writings on this
-+topic in v4.html, but they are lacking in details present in the above. Cure that. Remember to write for the bright 12
-+year old --- define all technical terms used.
-+
-+*/
-+
-+/* Thoughts on the external transaction interface:
-+
-+ In the current code, a TRANSCRASH handle is created implicitly by reiser4_init_context() (which
-+ creates state that lasts for the duration of a system call and is called at the start
-+ of ReiserFS methods implementing VFS operations), and closed by reiser4_exit_context(),
-+ occupying the scope of a single system call. We wish to give certain applications an
-+ interface to begin and close (commit) transactions. Since our implementation of
-+ transactions does not yet support isolation, allowing an application to open a
-+ transaction implies trusting it to later close the transaction. Part of the
-+ transaction interface will be aimed at enabling that trust, but the interface for
-+ actually using transactions is fairly narrow.
-+
-+ BEGIN_TRANSCRASH: Returns a transcrash identifier. It should be possible to translate
-+ this identifier into a string that a shell-script could use, allowing you to start a
-+ transaction by issuing a command. Once open, the transcrash should be set in the task
-+ structure, and there should be options (I suppose) to allow it to be carried across
-+ fork/exec. A transcrash has several options:
-+
-+ - READ_FUSING or WRITE_FUSING: The default policy is for txn-capture to capture only
-+ on writes (WRITE_FUSING) and allow "dirty reads". If the application wishes to
-+ capture on reads as well, it should set READ_FUSING.
-+
-+ - TIMEOUT: Since a non-isolated transcrash cannot be undone, every transcrash must
-+ eventually close (or else the machine must crash). If the application dies an
-+ unexpected death with an open transcrash, for example, or if it hangs for a long
-+ duration, one solution (to avoid crashing the machine) is to simply close it anyway.
-+ This is a dangerous option, but it is one way to solve the problem until isolated
-+ transcrashes are available for untrusted applications.
-+
-+ It seems to be what databases do, though it is unclear how one avoids a DoS attack
-+ creating a vulnerability based on resource starvation. Guaranteeing that some
-+ minimum amount of computational resources are made available would seem more correct
-+ than guaranteeing some amount of time. When we again have someone to code the work,
-+ this issue should be considered carefully. -Hans
-+
-+ RESERVE_BLOCKS: A running transcrash should indicate to the transaction manager how
-+ many dirty blocks it expects. The reserve_blocks interface should be called at a point
-+ where it is safe for the application to fail, because the system may not be able to
-+ grant the allocation and the application must be able to back-out. For this reason,
-+ the number of reserve-blocks can also be passed as an argument to BEGIN_TRANSCRASH, but
-+ the application may also wish to extend the allocation after beginning its transcrash.
-+
-+ CLOSE_TRANSCRASH: The application closes the transcrash when it is finished making
-+ modifications that require transaction protection. When isolated transactions are
-+ supported the CLOSE operation is replaced by either COMMIT or ABORT. For example, if a
-+ RESERVE_BLOCKS call fails for the application, it should "abort" by calling
-+ CLOSE_TRANSCRASH, even though it really commits any changes that were made (which is
-+ why, for safety, the application should call RESERVE_BLOCKS before making any changes).
-+
-+ For actually implementing these out-of-system-call-scopped transcrashes, the
-+ reiser4_context has a "txn_handle *trans" pointer that may be set to an open
-+ transcrash. Currently there are no dynamically-allocated transcrashes, but there is a
-+ "struct kmem_cache *_txnh_slab" created for that purpose in this file.
-+*/
-+
-+/* Extending the other system call interfaces for future transaction features:
-+
-+ Specialized applications may benefit from passing flags to the ordinary system call
-+ interface such as read(), write(), or stat(). For example, the application specifies
-+ WRITE_FUSING by default but wishes to add that a certain read() command should be
-+ treated as READ_FUSING. But which read? Is it the directory-entry read, the stat-data
-+ read, or the file-data read? These issues are straight-forward, but there are a lot of
-+ them and adding the necessary flags-passing code will be tedious.
-+
-+ When supporting isolated transactions, there is a corresponding READ_MODIFY_WRITE (RMW)
-+ flag, which specifies that although it is a read operation being requested, a
-+ write-lock should be taken. The reason is that read-locks are shared while write-locks
-+ are exclusive, so taking a read-lock when a later-write is known in advance will often
-+ leads to deadlock. If a reader knows it will write later, it should issue read
-+ requests with the RMW flag set.
-+*/
-+
-+/*
-+ The znode/atom deadlock avoidance.
-+
-+ FIXME(Zam): writing of this comment is in progress.
-+
-+ The atom's special stage ASTAGE_CAPTURE_WAIT introduces a kind of atom's
-+ long-term locking, which makes reiser4 locking scheme more complex. It had
-+ deadlocks until we implement deadlock avoidance algorithms. That deadlocks
-+ looked as the following: one stopped thread waits for a long-term lock on
-+ znode, the thread who owns that lock waits when fusion with another atom will
-+ be allowed.
-+
-+ The source of the deadlocks is an optimization of not capturing index nodes
-+ for read. Let's prove it. Suppose we have dumb node capturing scheme which
-+ unconditionally captures each block before locking it.
-+
-+ That scheme has no deadlocks. Let's begin with the thread which stage is
-+ ASTAGE_CAPTURE_WAIT and it waits for a znode lock. The thread can't wait for
-+ a capture because it's stage allows fusion with any atom except which are
-+ being committed currently. A process of atom commit can't deadlock because
-+ atom commit procedure does not acquire locks and does not fuse with other
-+ atoms. Reiser4 does capturing right before going to sleep inside the
-+ longtertm_lock_znode() function, it means the znode which we want to lock is
-+ already captured and its atom is in ASTAGE_CAPTURE_WAIT stage. If we
-+ continue the analysis we understand that no one process in the sequence may
-+ waits atom fusion. Thereby there are no deadlocks of described kind.
-+
-+ The capturing optimization makes the deadlocks possible. A thread can wait a
-+ lock which owner did not captured that node. The lock owner's current atom
-+ is not fused with the first atom and it does not get a ASTAGE_CAPTURE_WAIT
-+ state. A deadlock is possible when that atom meets another one which is in
-+ ASTAGE_CAPTURE_WAIT already.
-+
-+ The deadlock avoidance scheme includes two algorithms:
-+
-+ First algorithm is used when a thread captures a node which is locked but not
-+ captured by another thread. Those nodes are marked MISSED_IN_CAPTURE at the
-+ moment we skip their capturing. If such a node (marked MISSED_IN_CAPTURE) is
-+ being captured by a thread with current atom is in ASTAGE_CAPTURE_WAIT, the
-+ routine which forces all lock owners to join with current atom is executed.
-+
-+ Second algorithm does not allow to skip capturing of already captured nodes.
-+
-+ Both algorithms together prevent waiting a longterm lock without atom fusion
-+ with atoms of all lock owners, which is a key thing for getting atom/znode
-+ locking deadlocks.
-+*/
-+
-+/*
-+ * Transactions and mmap(2).
-+ *
-+ * 1. Transactions are not supported for accesses through mmap(2), because
-+ * this would effectively amount to user-level transactions whose duration
-+ * is beyond control of the kernel.
-+ *
-+ * 2. That said, we still want to preserve some decency with regard to
-+ * mmap(2). During normal write(2) call, following sequence of events
-+ * happens:
-+ *
-+ * 1. page is created;
-+ *
-+ * 2. jnode is created, dirtied and captured into current atom.
-+ *
-+ * 3. extent is inserted and modified.
-+ *
-+ * Steps (2) and (3) take place under long term lock on the twig node.
-+ *
-+ * When file is accessed through mmap(2) page is always created during
-+ * page fault.
-+ * After this (in reiser4_readpage()->reiser4_readpage_extent()):
-+ *
-+ * 1. if access is made to non-hole page new jnode is created, (if
-+ * necessary)
-+ *
-+ * 2. if access is made to the hole page, jnode is not created (XXX
-+ * not clear why).
-+ *
-+ * Also, even if page is created by write page fault it is not marked
-+ * dirty immediately by handle_mm_fault(). Probably this is to avoid races
-+ * with page write-out.
-+ *
-+ * Dirty bit installed by hardware is only transferred to the struct page
-+ * later, when page is unmapped (in zap_pte_range(), or
-+ * try_to_unmap_one()).
-+ *
-+ * So, with mmap(2) we have to handle following irksome situations:
-+ *
-+ * 1. there exists modified page (clean or dirty) without jnode
-+ *
-+ * 2. there exists modified page (clean or dirty) with clean jnode
-+ *
-+ * 3. clean page which is a part of atom can be transparently modified
-+ * at any moment through mapping without becoming dirty.
-+ *
-+ * (1) and (2) can lead to the out-of-memory situation: ->writepage()
-+ * doesn't know what to do with such pages and ->sync_sb()/->writepages()
-+ * don't see them, because these methods operate on atoms.
-+ *
-+ * (3) can lead to the loss of data: suppose we have dirty page with dirty
-+ * captured jnode captured by some atom. As part of early flush (for
-+ * example) page was written out. Dirty bit was cleared on both page and
-+ * jnode. After this page is modified through mapping, but kernel doesn't
-+ * notice and just discards page and jnode as part of commit. (XXX
-+ * actually it doesn't, because to reclaim page ->releasepage() has to be
-+ * called and before this dirty bit will be transferred to the struct
-+ * page).
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "wander.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "page_cache.h"
-+#include "reiser4.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "flush.h"
-+
-+#include <asm/atomic.h>
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/mm.h>
-+#include <linux/slab.h>
-+#include <linux/pagemap.h>
-+#include <linux/writeback.h>
-+#include <linux/swap.h> /* for totalram_pages */
-+
-+static void atom_free(txn_atom * atom);
-+
-+static int commit_txnh(txn_handle * txnh);
-+
-+static void wakeup_atom_waitfor_list(txn_atom * atom);
-+static void wakeup_atom_waiting_list(txn_atom * atom);
-+
-+static void capture_assign_txnh_nolock(txn_atom * atom, txn_handle * txnh);
-+
-+static void capture_assign_block_nolock(txn_atom * atom, jnode * node);
-+
-+static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node);
-+
-+static int capture_init_fusion(jnode * node, txn_handle * txnh,
-+ txn_capture mode);
-+
-+static int capture_fuse_wait(txn_handle *, txn_atom *, txn_atom *, txn_capture);
-+
-+static void capture_fuse_into(txn_atom * small, txn_atom * large);
-+
-+void reiser4_invalidate_list(struct list_head *);
-+
-+/* GENERIC STRUCTURES */
-+
-+typedef struct _txn_wait_links txn_wait_links;
-+
-+struct _txn_wait_links {
-+ lock_stack *_lock_stack;
-+ struct list_head _fwaitfor_link;
-+ struct list_head _fwaiting_link;
-+ int (*waitfor_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
-+ int (*waiting_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
-+};
-+
-+/* FIXME: In theory, we should be using the slab cache init & destructor
-+ methods instead of, e.g., jnode_init, etc. */
-+static struct kmem_cache *_atom_slab = NULL;
-+/* this is for user-visible, cross system-call transactions. */
-+static struct kmem_cache *_txnh_slab = NULL;
-+
-+/**
-+ * init_txnmgr_static - create transaction manager slab caches
-+ *
-+ * Initializes caches of txn-atoms and txn_handle. It is part of reiser4 module
-+ * initialization.
-+ */
-+int init_txnmgr_static(void)
-+{
-+ assert("jmacd-600", _atom_slab == NULL);
-+ assert("jmacd-601", _txnh_slab == NULL);
-+
-+ ON_DEBUG(atomic_set(&flush_cnt, 0));
-+
-+ _atom_slab = kmem_cache_create("txn_atom", sizeof(txn_atom), 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL, NULL);
-+ if (_atom_slab == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ _txnh_slab = kmem_cache_create("txn_handle", sizeof(txn_handle), 0,
-+ SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ if (_txnh_slab == NULL) {
-+ kmem_cache_destroy(_atom_slab);
-+ _atom_slab = NULL;
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ return 0;
-+}
-+
-+/**
-+ * done_txnmgr_static - delete txn_atom and txn_handle caches
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_txnmgr_static(void)
-+{
-+ destroy_reiser4_cache(&_atom_slab);
-+ destroy_reiser4_cache(&_txnh_slab);
-+}
-+
-+/**
-+ * init_txnmgr - initialize a new transaction manager
-+ * @mgr: pointer to transaction manager embedded in reiser4 super block
-+ *
-+ * This is called on mount. Makes necessary initializations.
-+ */
-+void reiser4_init_txnmgr(txn_mgr *mgr)
-+{
-+ assert("umka-169", mgr != NULL);
-+
-+ mgr->atom_count = 0;
-+ mgr->id_count = 1;
-+ INIT_LIST_HEAD(&mgr->atoms_list);
-+ spin_lock_init(&mgr->tmgr_lock);
-+ mutex_init(&mgr->commit_mutex);
-+}
-+
-+/**
-+ * reiser4_done_txnmgr - stop transaction manager
-+ * @mgr: pointer to transaction manager embedded in reiser4 super block
-+ *
-+ * This is called on umount. Does sanity checks.
-+ */
-+void reiser4_done_txnmgr(txn_mgr *mgr)
-+{
-+ assert("umka-170", mgr != NULL);
-+ assert("umka-1701", list_empty_careful(&mgr->atoms_list));
-+ assert("umka-1702", mgr->atom_count == 0);
-+}
-+
-+/* Initialize a transaction handle. */
-+/* Audited by: umka (2002.06.13) */
-+static void txnh_init(txn_handle * txnh, txn_mode mode)
-+{
-+ assert("umka-171", txnh != NULL);
-+
-+ txnh->mode = mode;
-+ txnh->atom = NULL;
-+ reiser4_ctx_gfp_mask_set();
-+ txnh->flags = 0;
-+ spin_lock_init(&txnh->hlock);
-+ INIT_LIST_HEAD(&txnh->txnh_link);
-+}
-+
-+#if REISER4_DEBUG
-+/* Check if a transaction handle is clean. */
-+static int txnh_isclean(txn_handle * txnh)
-+{
-+ assert("umka-172", txnh != NULL);
-+ return txnh->atom == NULL &&
-+ LOCK_CNT_NIL(spin_locked_txnh);
-+}
-+#endif
-+
-+/* Initialize an atom. */
-+static void atom_init(txn_atom * atom)
-+{
-+ int level;
-+
-+ assert("umka-173", atom != NULL);
-+
-+ memset(atom, 0, sizeof(txn_atom));
-+
-+ atom->stage = ASTAGE_FREE;
-+ atom->start_time = jiffies;
-+
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1)
-+ INIT_LIST_HEAD(ATOM_DIRTY_LIST(atom, level));
-+
-+ INIT_LIST_HEAD(ATOM_CLEAN_LIST(atom));
-+ INIT_LIST_HEAD(ATOM_OVRWR_LIST(atom));
-+ INIT_LIST_HEAD(ATOM_WB_LIST(atom));
-+ INIT_LIST_HEAD(&atom->inodes);
-+ spin_lock_init(&(atom->alock));
-+ /* list of transaction handles */
-+ INIT_LIST_HEAD(&atom->txnh_list);
-+ /* link to transaction manager's list of atoms */
-+ INIT_LIST_HEAD(&atom->atom_link);
-+ INIT_LIST_HEAD(&atom->fwaitfor_list);
-+ INIT_LIST_HEAD(&atom->fwaiting_list);
-+ blocknr_set_init(&atom->delete_set);
-+ blocknr_set_init(&atom->wandered_map);
-+
-+ init_atom_fq_parts(atom);
-+}
-+
-+#if REISER4_DEBUG
-+/* Check if an atom is clean. */
-+static int atom_isclean(txn_atom * atom)
-+{
-+ int level;
-+
-+ assert("umka-174", atom != NULL);
-+
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ if (!list_empty_careful(ATOM_DIRTY_LIST(atom, level))) {
-+ return 0;
-+ }
-+ }
-+
-+ return atom->stage == ASTAGE_FREE &&
-+ atom->txnh_count == 0 &&
-+ atom->capture_count == 0 &&
-+ atomic_read(&atom->refcount) == 0 &&
-+ (&atom->atom_link == atom->atom_link.next &&
-+ &atom->atom_link == atom->atom_link.prev) &&
-+ list_empty_careful(&atom->txnh_list) &&
-+ list_empty_careful(ATOM_CLEAN_LIST(atom)) &&
-+ list_empty_careful(ATOM_OVRWR_LIST(atom)) &&
-+ list_empty_careful(ATOM_WB_LIST(atom)) &&
-+ list_empty_careful(&atom->fwaitfor_list) &&
-+ list_empty_careful(&atom->fwaiting_list) &&
-+ atom_fq_parts_are_clean(atom);
-+}
-+#endif
-+
-+/* Begin a transaction in this context. Currently this uses the reiser4_context's
-+ trans_in_ctx, which means that transaction handles are stack-allocated. Eventually
-+ this will be extended to allow transaction handles to span several contexts. */
-+/* Audited by: umka (2002.06.13) */
-+void reiser4_txn_begin(reiser4_context * context)
-+{
-+ assert("jmacd-544", context->trans == NULL);
-+
-+ context->trans = &context->trans_in_ctx;
-+
-+ /* FIXME_LATER_JMACD Currently there's no way to begin a TXN_READ_FUSING
-+ transcrash. Default should be TXN_WRITE_FUSING. Also, the _trans variable is
-+ stack allocated right now, but we would like to allow for dynamically allocated
-+ transcrashes that span multiple system calls.
-+ */
-+ txnh_init(context->trans, TXN_WRITE_FUSING);
-+}
-+
-+/* Finish a transaction handle context. */
-+int reiser4_txn_end(reiser4_context * context)
-+{
-+ long ret = 0;
-+ txn_handle *txnh;
-+
-+ assert("umka-283", context != NULL);
-+ assert("nikita-3012", reiser4_schedulable());
-+ assert("vs-24", context == get_current_context());
-+ assert("nikita-2967", lock_stack_isclean(get_current_lock_stack()));
-+
-+ txnh = context->trans;
-+ if (txnh != NULL) {
-+ if (txnh->atom != NULL)
-+ ret = commit_txnh(txnh);
-+ assert("jmacd-633", txnh_isclean(txnh));
-+ context->trans = NULL;
-+ }
-+ return ret;
-+}
-+
-+void reiser4_txn_restart(reiser4_context * context)
-+{
-+ reiser4_txn_end(context);
-+ reiser4_preempt_point();
-+ reiser4_txn_begin(context);
-+}
-+
-+void reiser4_txn_restart_current(void)
-+{
-+ reiser4_txn_restart(get_current_context());
-+}
-+
-+/* TXN_ATOM */
-+
-+/* Get the atom belonging to a txnh, which is not locked. Return txnh locked. Locks atom, if atom
-+ is not NULL. This performs the necessary spin_trylock to break the lock-ordering cycle. May
-+ return NULL. */
-+static txn_atom *txnh_get_atom(txn_handle * txnh)
-+{
-+ txn_atom *atom;
-+
-+ assert("umka-180", txnh != NULL);
-+ assert_spin_not_locked(&(txnh->hlock));
-+
-+ while (1) {
-+ spin_lock_txnh(txnh);
-+ atom = txnh->atom;
-+
-+ if (atom == NULL)
-+ break;
-+
-+ if (spin_trylock_atom(atom))
-+ break;
-+
-+ atomic_inc(&atom->refcount);
-+
-+ spin_unlock_txnh(txnh);
-+ spin_lock_atom(atom);
-+ spin_lock_txnh(txnh);
-+
-+ if (txnh->atom == atom) {
-+ atomic_dec(&atom->refcount);
-+ break;
-+ }
-+
-+ spin_unlock_txnh(txnh);
-+ atom_dec_and_unlock(atom);
-+ }
-+
-+ return atom;
-+}
-+
-+/* Get the current atom and spinlock it if current atom present. May return NULL */
-+txn_atom *get_current_atom_locked_nocheck(void)
-+{
-+ reiser4_context *cx;
-+ txn_atom *atom;
-+ txn_handle *txnh;
-+
-+ cx = get_current_context();
-+ assert("zam-437", cx != NULL);
-+
-+ txnh = cx->trans;
-+ assert("zam-435", txnh != NULL);
-+
-+ atom = txnh_get_atom(txnh);
-+
-+ spin_unlock_txnh(txnh);
-+ return atom;
-+}
-+
-+/* Get the atom belonging to a jnode, which is initially locked. Return with
-+ both jnode and atom locked. This performs the necessary spin_trylock to
-+ break the lock-ordering cycle. Assumes the jnode is already locked, and
-+ returns NULL if atom is not set. */
-+txn_atom *jnode_get_atom(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert("umka-181", node != NULL);
-+
-+ while (1) {
-+ assert_spin_locked(&(node->guard));
-+
-+ atom = node->atom;
-+ /* node is not in any atom */
-+ if (atom == NULL)
-+ break;
-+
-+ /* If atom is not locked, grab the lock and return */
-+ if (spin_trylock_atom(atom))
-+ break;
-+
-+ /* At least one jnode belongs to this atom it guarantees that
-+ * atom->refcount > 0, we can safely increment refcount. */
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_jnode(node);
-+
-+ /* re-acquire spin locks in the right order */
-+ spin_lock_atom(atom);
-+ spin_lock_jnode(node);
-+
-+ /* check if node still points to the same atom. */
-+ if (node->atom == atom) {
-+ atomic_dec(&atom->refcount);
-+ break;
-+ }
-+
-+ /* releasing of atom lock and reference requires not holding
-+ * locks on jnodes. */
-+ spin_unlock_jnode(node);
-+
-+ /* We do not sure that this atom has extra references except our
-+ * one, so we should call proper function which may free atom if
-+ * last reference is released. */
-+ atom_dec_and_unlock(atom);
-+
-+ /* lock jnode again for getting valid node->atom pointer
-+ * value. */
-+ spin_lock_jnode(node);
-+ }
-+
-+ return atom;
-+}
-+
-+/* Returns true if @node is dirty and part of the same atom as one of its neighbors. Used
-+ by flush code to indicate whether the next node (in some direction) is suitable for
-+ flushing. */
-+int
-+same_slum_check(jnode * node, jnode * check, int alloc_check, int alloc_value)
-+{
-+ int compat;
-+ txn_atom *atom;
-+
-+ assert("umka-182", node != NULL);
-+ assert("umka-183", check != NULL);
-+
-+ /* Not sure what this function is supposed to do if supplied with @check that is
-+ neither formatted nor unformatted (bitmap or so). */
-+ assert("nikita-2373", jnode_is_znode(check)
-+ || jnode_is_unformatted(check));
-+
-+ /* Need a lock on CHECK to get its atom and to check various state bits.
-+ Don't need a lock on NODE once we get the atom lock. */
-+ /* It is not enough to lock two nodes and check (node->atom ==
-+ check->atom) because atom could be locked and being fused at that
-+ moment, jnodes of the atom of that state (being fused) can point to
-+ different objects, but the atom is the same. */
-+ spin_lock_jnode(check);
-+
-+ atom = jnode_get_atom(check);
-+
-+ if (atom == NULL) {
-+ compat = 0;
-+ } else {
-+ compat = (node->atom == atom && JF_ISSET(check, JNODE_DIRTY));
-+
-+ if (compat && jnode_is_znode(check)) {
-+ compat &= znode_is_connected(JZNODE(check));
-+ }
-+
-+ if (compat && alloc_check) {
-+ compat &= (alloc_value == jnode_is_flushprepped(check));
-+ }
-+
-+ spin_unlock_atom(atom);
-+ }
-+
-+ spin_unlock_jnode(check);
-+
-+ return compat;
-+}
-+
-+/* Decrement the atom's reference count and if it falls to zero, free it. */
-+void atom_dec_and_unlock(txn_atom * atom)
-+{
-+ txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+
-+ assert("umka-186", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("zam-1039", atomic_read(&atom->refcount) > 0);
-+
-+ if (atomic_dec_and_test(&atom->refcount)) {
-+ /* take txnmgr lock and atom lock in proper order. */
-+ if (!spin_trylock_txnmgr(mgr)) {
-+ /* This atom should exist after we re-acquire its
-+ * spinlock, so we increment its reference counter. */
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_atom(atom);
-+ spin_lock_txnmgr(mgr);
-+ spin_lock_atom(atom);
-+
-+ if (!atomic_dec_and_test(&atom->refcount)) {
-+ spin_unlock_atom(atom);
-+ spin_unlock_txnmgr(mgr);
-+ return;
-+ }
-+ }
-+ assert_spin_locked(&(mgr->tmgr_lock));
-+ atom_free(atom);
-+ spin_unlock_txnmgr(mgr);
-+ } else
-+ spin_unlock_atom(atom);
-+}
-+
-+/* Create new atom and connect it to given transaction handle. This adds the
-+ atom to the transaction manager's list and sets its reference count to 1, an
-+ artificial reference which is kept until it commits. We play strange games
-+ to avoid allocation under jnode & txnh spinlocks.*/
-+
-+static int atom_begin_and_assign_to_txnh(txn_atom ** atom_alloc, txn_handle * txnh)
-+{
-+ txn_atom *atom;
-+ txn_mgr *mgr;
-+
-+ if (REISER4_DEBUG && rofs_tree(current_tree)) {
-+ warning("nikita-3366", "Creating atom on rofs");
-+ dump_stack();
-+ }
-+
-+ if (*atom_alloc == NULL) {
-+ (*atom_alloc) = kmem_cache_alloc(_atom_slab,
-+ reiser4_ctx_gfp_mask_get());
-+
-+ if (*atom_alloc == NULL)
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ /* and, also, txnmgr spin lock should be taken before jnode and txnh
-+ locks. */
-+ mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+ spin_lock_txnmgr(mgr);
-+ spin_lock_txnh(txnh);
-+
-+ /* Check whether new atom still needed */
-+ if (txnh->atom != NULL) {
-+ /* NOTE-NIKITA probably it is rather better to free
-+ * atom_alloc here than thread it up to reiser4_try_capture() */
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_txnmgr(mgr);
-+
-+ return -E_REPEAT;
-+ }
-+
-+ atom = *atom_alloc;
-+ *atom_alloc = NULL;
-+
-+ atom_init(atom);
-+
-+ assert("jmacd-17", atom_isclean(atom));
-+
-+ /*
-+ * lock ordering is broken here. It is ok, as long as @atom is new
-+ * and inaccessible for others. We can't use spin_lock_atom or
-+ * spin_lock(&atom->alock) because they care about locking
-+ * dependencies. spin_trylock_lock doesn't.
-+ */
-+ check_me("", spin_trylock_atom(atom));
-+
-+ /* add atom to the end of transaction manager's list of atoms */
-+ list_add_tail(&atom->atom_link, &mgr->atoms_list);
-+ atom->atom_id = mgr->id_count++;
-+ mgr->atom_count += 1;
-+
-+ /* Release txnmgr lock */
-+ spin_unlock_txnmgr(mgr);
-+
-+ /* One reference until it commits. */
-+ atomic_inc(&atom->refcount);
-+ atom->stage = ASTAGE_CAPTURE_FUSE;
-+ atom->super = reiser4_get_current_sb();
-+ capture_assign_txnh_nolock(atom, txnh);
-+
-+ spin_unlock_atom(atom);
-+ spin_unlock_txnh(txnh);
-+
-+ return -E_REPEAT;
-+}
-+
-+/* Return true if an atom is currently "open". */
-+static int atom_isopen(const txn_atom * atom)
-+{
-+ assert("umka-185", atom != NULL);
-+
-+ return atom->stage > 0 && atom->stage < ASTAGE_PRE_COMMIT;
-+}
-+
-+/* Return the number of pointers to this atom that must be updated during fusion. This
-+ approximates the amount of work to be done. Fusion chooses the atom with fewer
-+ pointers to fuse into the atom with more pointers. */
-+static int atom_pointer_count(const txn_atom * atom)
-+{
-+ assert("umka-187", atom != NULL);
-+
-+ /* This is a measure of the amount of work needed to fuse this atom
-+ * into another. */
-+ return atom->txnh_count + atom->capture_count;
-+}
-+
-+/* Called holding the atom lock, this removes the atom from the transaction manager list
-+ and frees it. */
-+static void atom_free(txn_atom * atom)
-+{
-+ txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+
-+ assert("umka-188", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ /* Remove from the txn_mgr's atom list */
-+ assert_spin_locked(&(mgr->tmgr_lock));
-+ mgr->atom_count -= 1;
-+ list_del_init(&atom->atom_link);
-+
-+ /* Clean the atom */
-+ assert("jmacd-16",
-+ (atom->stage == ASTAGE_INVALID || atom->stage == ASTAGE_DONE));
-+ atom->stage = ASTAGE_FREE;
-+
-+ blocknr_set_destroy(&atom->delete_set);
-+ blocknr_set_destroy(&atom->wandered_map);
-+
-+ assert("jmacd-16", atom_isclean(atom));
-+
-+ spin_unlock_atom(atom);
-+
-+ kmem_cache_free(_atom_slab, atom);
-+}
-+
-+static int atom_is_dotard(const txn_atom * atom)
-+{
-+ return time_after(jiffies, atom->start_time +
-+ get_current_super_private()->tmgr.atom_max_age);
-+}
-+
-+static int atom_can_be_committed(txn_atom * atom)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ assert("zam-885", atom->txnh_count > atom->nr_waiters);
-+ return atom->txnh_count == atom->nr_waiters + 1;
-+}
-+
-+/* Return true if an atom should commit now. This is determined by aging, atom
-+ size or atom flags. */
-+static int atom_should_commit(const txn_atom * atom)
-+{
-+ assert("umka-189", atom != NULL);
-+ return
-+ (atom->flags & ATOM_FORCE_COMMIT) ||
-+ ((unsigned)atom_pointer_count(atom) >
-+ get_current_super_private()->tmgr.atom_max_size)
-+ || atom_is_dotard(atom);
-+}
-+
-+/* return 1 if current atom exists and requires commit. */
-+int current_atom_should_commit(void)
-+{
-+ txn_atom *atom;
-+ int result = 0;
-+
-+ atom = get_current_atom_locked_nocheck();
-+ if (atom) {
-+ result = atom_should_commit(atom);
-+ spin_unlock_atom(atom);
-+ }
-+ return result;
-+}
-+
-+static int atom_should_commit_asap(const txn_atom * atom)
-+{
-+ unsigned int captured;
-+ unsigned int pinnedpages;
-+
-+ assert("nikita-3309", atom != NULL);
-+
-+ captured = (unsigned)atom->capture_count;
-+ pinnedpages = (captured >> PAGE_CACHE_SHIFT) * sizeof(znode);
-+
-+ return (pinnedpages > (totalram_pages >> 3)) || (atom->flushed > 100);
-+}
-+
-+static jnode *find_first_dirty_in_list(struct list_head *head, int flags)
-+{
-+ jnode *first_dirty;
-+
-+ list_for_each_entry(first_dirty, head, capture_link) {
-+ if (!(flags & JNODE_FLUSH_COMMIT)) {
-+ /*
-+ * skip jnodes which "heard banshee" or having active
-+ * I/O
-+ */
-+ if (JF_ISSET(first_dirty, JNODE_HEARD_BANSHEE) ||
-+ JF_ISSET(first_dirty, JNODE_WRITEBACK))
-+ continue;
-+ }
-+ return first_dirty;
-+ }
-+ return NULL;
-+}
-+
-+/* Get first dirty node from the atom's dirty_nodes[n] lists; return NULL if atom has no dirty
-+ nodes on atom's lists */
-+jnode *find_first_dirty_jnode(txn_atom * atom, int flags)
-+{
-+ jnode *first_dirty;
-+ tree_level level;
-+
-+ assert_spin_locked(&(atom->alock));
-+
-+ /* The flush starts from LEAF_LEVEL (=1). */
-+ for (level = 1; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ if (list_empty_careful(ATOM_DIRTY_LIST(atom, level)))
-+ continue;
-+
-+ first_dirty =
-+ find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, level),
-+ flags);
-+ if (first_dirty)
-+ return first_dirty;
-+ }
-+
-+ /* znode-above-root is on the list #0. */
-+ return find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, 0), flags);
-+}
-+
-+static void dispatch_wb_list(txn_atom * atom, flush_queue_t * fq)
-+{
-+ jnode *cur;
-+
-+ assert("zam-905", atom_is_protected(atom));
-+
-+ cur = list_entry(ATOM_WB_LIST(atom)->next, jnode, capture_link);
-+ while (ATOM_WB_LIST(atom) != &cur->capture_link) {
-+ jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
-+
-+ spin_lock_jnode(cur);
-+ if (!JF_ISSET(cur, JNODE_WRITEBACK)) {
-+ if (JF_ISSET(cur, JNODE_DIRTY)) {
-+ queue_jnode(fq, cur);
-+ } else {
-+ /* move jnode to atom's clean list */
-+ list_move_tail(&cur->capture_link,
-+ ATOM_CLEAN_LIST(atom));
-+ }
-+ }
-+ spin_unlock_jnode(cur);
-+
-+ cur = next;
-+ }
-+}
-+
-+/* Scan current atom->writeback_nodes list, re-submit dirty and !writeback
-+ * jnodes to disk. */
-+static int submit_wb_list(void)
-+{
-+ int ret;
-+ flush_queue_t *fq;
-+
-+ fq = get_fq_for_current_atom();
-+ if (IS_ERR(fq))
-+ return PTR_ERR(fq);
-+
-+ dispatch_wb_list(fq->atom, fq);
-+ spin_unlock_atom(fq->atom);
-+
-+ ret = reiser4_write_fq(fq, NULL, 1);
-+ reiser4_fq_put(fq);
-+
-+ return ret;
-+}
-+
-+/* Wait completion of all writes, re-submit atom writeback list if needed. */
-+static int current_atom_complete_writes(void)
-+{
-+ int ret;
-+
-+ /* Each jnode from that list was modified and dirtied when it had i/o
-+ * request running already. After i/o completion we have to resubmit
-+ * them to disk again.*/
-+ ret = submit_wb_list();
-+ if (ret < 0)
-+ return ret;
-+
-+ /* Wait all i/o completion */
-+ ret = current_atom_finish_all_fq();
-+ if (ret)
-+ return ret;
-+
-+ /* Scan wb list again; all i/o should be completed, we re-submit dirty
-+ * nodes to disk */
-+ ret = submit_wb_list();
-+ if (ret < 0)
-+ return ret;
-+
-+ /* Wait all nodes we just submitted */
-+ return current_atom_finish_all_fq();
-+}
-+
-+#if REISER4_DEBUG
-+
-+static void reiser4_info_atom(const char *prefix, const txn_atom * atom)
-+{
-+ if (atom == NULL) {
-+ printk("%s: no atom\n", prefix);
-+ return;
-+ }
-+
-+ printk("%s: refcount: %i id: %i flags: %x txnh_count: %i"
-+ " capture_count: %i stage: %x start: %lu, flushed: %i\n", prefix,
-+ atomic_read(&atom->refcount), atom->atom_id, atom->flags,
-+ atom->txnh_count, atom->capture_count, atom->stage,
-+ atom->start_time, atom->flushed);
-+}
-+
-+#else /* REISER4_DEBUG */
-+
-+static inline void reiser4_info_atom(const char *prefix, const txn_atom * atom) {}
-+
-+#endif /* REISER4_DEBUG */
-+
-+#define TOOMANYFLUSHES (1 << 13)
-+
-+/* Called with the atom locked and no open "active" transaction handlers except
-+ ours, this function calls flush_current_atom() until all dirty nodes are
-+ processed. Then it initiates commit processing.
-+
-+ Called by the single remaining open "active" txnh, which is closing. Other
-+ open txnhs belong to processes which wait atom commit in commit_txnh()
-+ routine. They are counted as "waiters" in atom->nr_waiters. Therefore as
-+ long as we hold the atom lock none of the jnodes can be captured and/or
-+ locked.
-+
-+ Return value is an error code if commit fails.
-+*/
-+static int commit_current_atom(long *nr_submitted, txn_atom ** atom)
-+{
-+ reiser4_super_info_data *sbinfo = get_current_super_private();
-+ long ret = 0;
-+ /* how many times jnode_flush() was called as a part of attempt to
-+ * commit this atom. */
-+ int flushiters;
-+
-+ assert("zam-888", atom != NULL && *atom != NULL);
-+ assert_spin_locked(&((*atom)->alock));
-+ assert("zam-887", get_current_context()->trans->atom == *atom);
-+ assert("jmacd-151", atom_isopen(*atom));
-+
-+ assert("nikita-3184",
-+ get_current_super_private()->delete_mutex_owner != current);
-+
-+ for (flushiters = 0;; ++flushiters) {
-+ ret =
-+ flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS |
-+ JNODE_FLUSH_COMMIT,
-+ LONG_MAX /* nr_to_write */ ,
-+ nr_submitted, atom, NULL);
-+ if (ret != -E_REPEAT)
-+ break;
-+
-+ /* if atom's dirty list contains one znode which is
-+ HEARD_BANSHEE and is locked we have to allow lock owner to
-+ continue and uncapture that znode */
-+ reiser4_preempt_point();
-+
-+ *atom = get_current_atom_locked();
-+ if (flushiters > TOOMANYFLUSHES && IS_POW(flushiters)) {
-+ warning("nikita-3176",
-+ "Flushing like mad: %i", flushiters);
-+ reiser4_info_atom("atom", *atom);
-+ DEBUGON(flushiters > (1 << 20));
-+ }
-+ }
-+
-+ if (ret)
-+ return ret;
-+
-+ assert_spin_locked(&((*atom)->alock));
-+
-+ if (!atom_can_be_committed(*atom)) {
-+ spin_unlock_atom(*atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+
-+ if ((*atom)->capture_count == 0)
-+ goto done;
-+
-+ /* Up to this point we have been flushing and after flush is called we
-+ return -E_REPEAT. Now we can commit. We cannot return -E_REPEAT
-+ at this point, commit should be successful. */
-+ reiser4_atom_set_stage(*atom, ASTAGE_PRE_COMMIT);
-+ ON_DEBUG(((*atom)->committer = current));
-+ spin_unlock_atom(*atom);
-+
-+ ret = current_atom_complete_writes();
-+ if (ret)
-+ return ret;
-+
-+ assert("zam-906", list_empty(ATOM_WB_LIST(*atom)));
-+
-+ /* isolate critical code path which should be executed by only one
-+ * thread using tmgr mutex */
-+ mutex_lock(&sbinfo->tmgr.commit_mutex);
-+
-+ ret = reiser4_write_logs(nr_submitted);
-+ if (ret < 0)
-+ reiser4_panic("zam-597", "write log failed (%ld)\n", ret);
-+
-+ /* The atom->ovrwr_nodes list is processed under commit mutex held
-+ because of bitmap nodes which are captured by special way in
-+ reiser4_pre_commit_hook_bitmap(), that way does not include
-+ capture_fuse_wait() as a capturing of other nodes does -- the commit
-+ mutex is used for transaction isolation instead. */
-+ reiser4_invalidate_list(ATOM_OVRWR_LIST(*atom));
-+ mutex_unlock(&sbinfo->tmgr.commit_mutex);
-+
-+ reiser4_invalidate_list(ATOM_CLEAN_LIST(*atom));
-+ reiser4_invalidate_list(ATOM_WB_LIST(*atom));
-+ assert("zam-927", list_empty(&(*atom)->inodes));
-+
-+ spin_lock_atom(*atom);
-+ done:
-+ reiser4_atom_set_stage(*atom, ASTAGE_DONE);
-+ ON_DEBUG((*atom)->committer = NULL);
-+
-+ /* Atom's state changes, so wake up everybody waiting for this
-+ event. */
-+ wakeup_atom_waiting_list(*atom);
-+
-+ /* Decrement the "until commit" reference, at least one txnh (the caller) is
-+ still open. */
-+ atomic_dec(&(*atom)->refcount);
-+
-+ assert("jmacd-1070", atomic_read(&(*atom)->refcount) > 0);
-+ assert("jmacd-1062", (*atom)->capture_count == 0);
-+ BUG_ON((*atom)->capture_count != 0);
-+ assert_spin_locked(&((*atom)->alock));
-+
-+ return ret;
-+}
-+
-+/* TXN_TXNH */
-+
-+/**
-+ * force_commit_atom - commit current atom and wait commit completion
-+ * @txnh:
-+ *
-+ * Commits current atom and wait commit completion; current atom and @txnh have
-+ * to be spinlocked before call, this function unlocks them on exit.
-+ */
-+int force_commit_atom(txn_handle *txnh)
-+{
-+ txn_atom *atom;
-+
-+ assert("zam-837", txnh != NULL);
-+ assert_spin_locked(&(txnh->hlock));
-+ assert("nikita-2966", lock_stack_isclean(get_current_lock_stack()));
-+
-+ atom = txnh->atom;
-+
-+ assert("zam-834", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ /*
-+ * Set flags for atom and txnh: forcing atom commit and waiting for
-+ * commit completion
-+ */
-+ txnh->flags |= TXNH_WAIT_COMMIT;
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(atom);
-+
-+ /* commit is here */
-+ reiser4_txn_restart_current();
-+ return 0;
-+}
-+
-+/* Called to force commit of any outstanding atoms. @commit_all_atoms controls
-+ * should we commit all atoms including new ones which are created after this
-+ * functions is called. */
-+int txnmgr_force_commit_all(struct super_block *super, int commit_all_atoms)
-+{
-+ int ret;
-+ txn_atom *atom;
-+ txn_mgr *mgr;
-+ txn_handle *txnh;
-+ unsigned long start_time = jiffies;
-+ reiser4_context *ctx = get_current_context();
-+
-+ assert("nikita-2965", lock_stack_isclean(get_current_lock_stack()));
-+ assert("nikita-3058", reiser4_commit_check_locks());
-+
-+ reiser4_txn_restart_current();
-+
-+ mgr = &get_super_private(super)->tmgr;
-+
-+ txnh = ctx->trans;
-+
-+ again:
-+
-+ spin_lock_txnmgr(mgr);
-+
-+ list_for_each_entry(atom, &mgr->atoms_list, atom_link) {
-+ spin_lock_atom(atom);
-+
-+ /* Commit any atom which can be committed. If @commit_new_atoms
-+ * is not set we commit only atoms which were created before
-+ * this call is started. */
-+ if (commit_all_atoms
-+ || time_before_eq(atom->start_time, start_time)) {
-+ if (atom->stage <= ASTAGE_POST_COMMIT) {
-+ spin_unlock_txnmgr(mgr);
-+
-+ if (atom->stage < ASTAGE_PRE_COMMIT) {
-+ spin_lock_txnh(txnh);
-+ /* Add force-context txnh */
-+ capture_assign_txnh_nolock(atom, txnh);
-+ ret = force_commit_atom(txnh);
-+ if (ret)
-+ return ret;
-+ } else
-+ /* wait atom commit */
-+ reiser4_atom_wait_event(atom);
-+
-+ goto again;
-+ }
-+ }
-+
-+ spin_unlock_atom(atom);
-+ }
-+
-+#if REISER4_DEBUG
-+ if (commit_all_atoms) {
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ spin_lock_reiser4_super(sbinfo);
-+ assert("zam-813",
-+ sbinfo->blocks_fake_allocated_unformatted == 0);
-+ assert("zam-812", sbinfo->blocks_fake_allocated == 0);
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+#endif
-+
-+ spin_unlock_txnmgr(mgr);
-+
-+ return 0;
-+}
-+
-+/* check whether commit_some_atoms() can commit @atom. Locking is up to the
-+ * caller */
-+static int atom_is_committable(txn_atom * atom)
-+{
-+ return
-+ atom->stage < ASTAGE_PRE_COMMIT &&
-+ atom->txnh_count == atom->nr_waiters && atom_should_commit(atom);
-+}
-+
-+/* called periodically from ktxnmgrd to commit old atoms. Releases ktxnmgrd spin
-+ * lock at exit */
-+int commit_some_atoms(txn_mgr * mgr)
-+{
-+ int ret = 0;
-+ txn_atom *atom;
-+ txn_handle *txnh;
-+ reiser4_context *ctx;
-+ struct list_head *pos, *tmp;
-+
-+ ctx = get_current_context();
-+ assert("nikita-2444", ctx != NULL);
-+
-+ txnh = ctx->trans;
-+ spin_lock_txnmgr(mgr);
-+
-+ /*
-+ * this is to avoid gcc complain that atom might be used
-+ * uninitialized
-+ */
-+ atom = NULL;
-+
-+ /* look for atom to commit */
-+ list_for_each_safe(pos, tmp, &mgr->atoms_list) {
-+ atom = list_entry(pos, txn_atom, atom_link);
-+ /*
-+ * first test without taking atom spin lock, whether it is
-+ * eligible for committing at all
-+ */
-+ if (atom_is_committable(atom)) {
-+ /* now, take spin lock and re-check */
-+ spin_lock_atom(atom);
-+ if (atom_is_committable(atom))
-+ break;
-+ spin_unlock_atom(atom);
-+ }
-+ }
-+
-+ ret = (&mgr->atoms_list == pos);
-+ spin_unlock_txnmgr(mgr);
-+
-+ if (ret) {
-+ /* nothing found */
-+ spin_unlock(&mgr->daemon->guard);
-+ return 0;
-+ }
-+
-+ spin_lock_txnh(txnh);
-+
-+ BUG_ON(atom == NULL);
-+ /* Set the atom to force committing */
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+
-+ /* Add force-context txnh */
-+ capture_assign_txnh_nolock(atom, txnh);
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(atom);
-+
-+ /* we are about to release daemon spin lock, notify daemon it
-+ has to rescan atoms */
-+ mgr->daemon->rescan = 1;
-+ spin_unlock(&mgr->daemon->guard);
-+ reiser4_txn_restart_current();
-+ return 0;
-+}
-+
-+static int txn_try_to_fuse_small_atom(txn_mgr * tmgr, txn_atom * atom)
-+{
-+ int atom_stage;
-+ txn_atom *atom_2;
-+ int repeat;
-+
-+ assert("zam-1051", atom->stage < ASTAGE_PRE_COMMIT);
-+
-+ atom_stage = atom->stage;
-+ repeat = 0;
-+
-+ if (!spin_trylock_txnmgr(tmgr)) {
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_atom(atom);
-+ spin_lock_txnmgr(tmgr);
-+ spin_lock_atom(atom);
-+ repeat = 1;
-+ if (atom->stage != atom_stage) {
-+ spin_unlock_txnmgr(tmgr);
-+ atom_dec_and_unlock(atom);
-+ return -E_REPEAT;
-+ }
-+ atomic_dec(&atom->refcount);
-+ }
-+
-+ list_for_each_entry(atom_2, &tmgr->atoms_list, atom_link) {
-+ if (atom == atom_2)
-+ continue;
-+ /*
-+ * if trylock does not succeed we just do not fuse with that
-+ * atom.
-+ */
-+ if (spin_trylock_atom(atom_2)) {
-+ if (atom_2->stage < ASTAGE_PRE_COMMIT) {
-+ spin_unlock_txnmgr(tmgr);
-+ capture_fuse_into(atom_2, atom);
-+ /* all locks are lost we can only repeat here */
-+ return -E_REPEAT;
-+ }
-+ spin_unlock_atom(atom_2);
-+ }
-+ }
-+ atom->flags |= ATOM_CANCEL_FUSION;
-+ spin_unlock_txnmgr(tmgr);
-+ if (repeat) {
-+ spin_unlock_atom(atom);
-+ return -E_REPEAT;
-+ }
-+ return 0;
-+}
-+
-+/* Calls jnode_flush for current atom if it exists; if not, just take another
-+ atom and call jnode_flush() for him. If current transaction handle has
-+ already assigned atom (current atom) we have to close current transaction
-+ prior to switch to another atom or do something with current atom. This
-+ code tries to flush current atom.
-+
-+ flush_some_atom() is called as part of memory clearing process. It is
-+ invoked from balance_dirty_pages(), pdflushd, and entd.
-+
-+ If we can flush no nodes, atom is committed, because this frees memory.
-+
-+ If atom is too large or too old it is committed also.
-+*/
-+int
-+flush_some_atom(jnode * start, long *nr_submitted, const struct writeback_control *wbc,
-+ int flags)
-+{
-+ reiser4_context *ctx = get_current_context();
-+ txn_mgr *tmgr = &get_super_private(ctx->super)->tmgr;
-+ txn_handle *txnh = ctx->trans;
-+ txn_atom *atom;
-+ int ret;
-+
-+ BUG_ON(wbc->nr_to_write == 0);
-+ BUG_ON(*nr_submitted != 0);
-+ assert("zam-1042", txnh != NULL);
-+ repeat:
-+ if (txnh->atom == NULL) {
-+ /* current atom is not available, take first from txnmgr */
-+ spin_lock_txnmgr(tmgr);
-+
-+ /* traverse the list of all atoms */
-+ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
-+ /* lock atom before checking its state */
-+ spin_lock_atom(atom);
-+
-+ /*
-+ * we need an atom which is not being committed and
-+ * which has no flushers (jnode_flush() add one flusher
-+ * at the beginning and subtract one at the end).
-+ */
-+ if (atom->stage < ASTAGE_PRE_COMMIT &&
-+ atom->nr_flushers == 0) {
-+ spin_lock_txnh(txnh);
-+ capture_assign_txnh_nolock(atom, txnh);
-+ spin_unlock_txnh(txnh);
-+
-+ goto found;
-+ }
-+
-+ spin_unlock_atom(atom);
-+ }
-+
-+ /*
-+ * Write throttling is case of no one atom can be
-+ * flushed/committed.
-+ */
-+ if (!current_is_pdflush() && !wbc->nonblocking) {
-+ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
-+ spin_lock_atom(atom);
-+ /* Repeat the check from the above. */
-+ if (atom->stage < ASTAGE_PRE_COMMIT
-+ && atom->nr_flushers == 0) {
-+ spin_lock_txnh(txnh);
-+ capture_assign_txnh_nolock(atom, txnh);
-+ spin_unlock_txnh(txnh);
-+
-+ goto found;
-+ }
-+ if (atom->stage <= ASTAGE_POST_COMMIT) {
-+ spin_unlock_txnmgr(tmgr);
-+ /*
-+ * we just wait until atom's flusher
-+ * makes a progress in flushing or
-+ * committing the atom
-+ */
-+ reiser4_atom_wait_event(atom);
-+ goto repeat;
-+ }
-+ spin_unlock_atom(atom);
-+ }
-+ }
-+ spin_unlock_txnmgr(tmgr);
-+ return 0;
-+ found:
-+ spin_unlock_txnmgr(tmgr);
-+ } else
-+ atom = get_current_atom_locked();
-+
-+ BUG_ON(atom->super != ctx->super);
-+ assert("vs-35", atom->super == ctx->super);
-+ if (start) {
-+ spin_lock_jnode(start);
-+ ret = (atom == start->atom) ? 1 : 0;
-+ spin_unlock_jnode(start);
-+ if (ret == 0)
-+ start = NULL;
-+ }
-+ ret = flush_current_atom(flags, wbc->nr_to_write, nr_submitted, &atom, start);
-+ if (ret == 0) {
-+ /* flush_current_atom returns 0 only if it submitted for write
-+ nothing */
-+ BUG_ON(*nr_submitted != 0);
-+ if (*nr_submitted == 0 || atom_should_commit_asap(atom)) {
-+ if (atom->capture_count < tmgr->atom_min_size &&
-+ !(atom->flags & ATOM_CANCEL_FUSION)) {
-+ ret = txn_try_to_fuse_small_atom(tmgr, atom);
-+ if (ret == -E_REPEAT) {
-+ reiser4_preempt_point();
-+ goto repeat;
-+ }
-+ }
-+ /* if early flushing could not make more nodes clean,
-+ * or atom is too old/large,
-+ * we force current atom to commit */
-+ /* wait for commit completion but only if this
-+ * wouldn't stall pdflushd and ent thread. */
-+ if (!wbc->nonblocking && !ctx->entd)
-+ txnh->flags |= TXNH_WAIT_COMMIT;
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+ }
-+ spin_unlock_atom(atom);
-+ } else if (ret == -E_REPEAT) {
-+ if (*nr_submitted == 0) {
-+ /* let others who hampers flushing (hold longterm locks,
-+ for instance) to free the way for flush */
-+ reiser4_preempt_point();
-+ goto repeat;
-+ }
-+ ret = 0;
-+ }
-+/*
-+ if (*nr_submitted > wbc->nr_to_write)
-+ warning("", "asked for %ld, written %ld\n", wbc->nr_to_write, *nr_submitted);
-+*/
-+ reiser4_txn_restart(ctx);
-+
-+ return ret;
-+}
-+
-+/* Remove processed nodes from atom's clean list (thereby remove them from transaction). */
-+void reiser4_invalidate_list(struct list_head *head)
-+{
-+ while (!list_empty(head)) {
-+ jnode *node;
-+
-+ node = list_entry(head->next, jnode, capture_link);
-+ spin_lock_jnode(node);
-+ reiser4_uncapture_block(node);
-+ jput(node);
-+ }
-+}
-+
-+static void init_wlinks(txn_wait_links * wlinks)
-+{
-+ wlinks->_lock_stack = get_current_lock_stack();
-+ INIT_LIST_HEAD(&wlinks->_fwaitfor_link);
-+ INIT_LIST_HEAD(&wlinks->_fwaiting_link);
-+ wlinks->waitfor_cb = NULL;
-+ wlinks->waiting_cb = NULL;
-+}
-+
-+/* Add atom to the atom's waitfor list and wait for somebody to wake us up; */
-+void reiser4_atom_wait_event(txn_atom * atom)
-+{
-+ txn_wait_links _wlinks;
-+
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-3156",
-+ lock_stack_isclean(get_current_lock_stack()) ||
-+ atom->nr_running_queues > 0);
-+
-+ init_wlinks(&_wlinks);
-+ list_add_tail(&_wlinks._fwaitfor_link, &atom->fwaitfor_list);
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_atom(atom);
-+
-+ reiser4_prepare_to_sleep(_wlinks._lock_stack);
-+ reiser4_go_to_sleep(_wlinks._lock_stack);
-+
-+ spin_lock_atom(atom);
-+ list_del(&_wlinks._fwaitfor_link);
-+ atom_dec_and_unlock(atom);
-+}
-+
-+void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage)
-+{
-+ assert("nikita-3535", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-3536", stage <= ASTAGE_INVALID);
-+ /* Excelsior! */
-+ assert("nikita-3537", stage >= atom->stage);
-+ if (atom->stage != stage) {
-+ atom->stage = stage;
-+ reiser4_atom_send_event(atom);
-+ }
-+}
-+
-+/* wake all threads which wait for an event */
-+void reiser4_atom_send_event(txn_atom * atom)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ wakeup_atom_waitfor_list(atom);
-+}
-+
-+/* Informs txn manager code that owner of this txn_handle should wait atom commit completion (for
-+ example, because it does fsync(2)) */
-+static int should_wait_commit(txn_handle * h)
-+{
-+ return h->flags & TXNH_WAIT_COMMIT;
-+}
-+
-+typedef struct commit_data {
-+ txn_atom *atom;
-+ txn_handle *txnh;
-+ long nr_written;
-+ /* as an optimization we start committing atom by first trying to
-+ * flush it few times without switching into ASTAGE_CAPTURE_WAIT. This
-+ * allows to reduce stalls due to other threads waiting for atom in
-+ * ASTAGE_CAPTURE_WAIT stage. ->preflush is counter of these
-+ * preliminary flushes. */
-+ int preflush;
-+ /* have we waited on atom. */
-+ int wait;
-+ int failed;
-+ int wake_ktxnmgrd_up;
-+} commit_data;
-+
-+/*
-+ * Called from commit_txnh() repeatedly, until either error happens, or atom
-+ * commits successfully.
-+ */
-+static int try_commit_txnh(commit_data * cd)
-+{
-+ int result;
-+
-+ assert("nikita-2968", lock_stack_isclean(get_current_lock_stack()));
-+
-+ /* Get the atom and txnh locked. */
-+ cd->atom = txnh_get_atom(cd->txnh);
-+ assert("jmacd-309", cd->atom != NULL);
-+ spin_unlock_txnh(cd->txnh);
-+
-+ if (cd->wait) {
-+ cd->atom->nr_waiters--;
-+ cd->wait = 0;
-+ }
-+
-+ if (cd->atom->stage == ASTAGE_DONE)
-+ return 0;
-+
-+ if (cd->failed)
-+ return 0;
-+
-+ if (atom_should_commit(cd->atom)) {
-+ /* if atom is _very_ large schedule it for commit as soon as
-+ * possible. */
-+ if (atom_should_commit_asap(cd->atom)) {
-+ /*
-+ * When atom is in PRE_COMMIT or later stage following
-+ * invariant (encoded in atom_can_be_committed())
-+ * holds: there is exactly one non-waiter transaction
-+ * handle opened on this atom. When thread wants to
-+ * wait until atom commits (for example sync()) it
-+ * waits on atom event after increasing
-+ * atom->nr_waiters (see blow in this function). It
-+ * cannot be guaranteed that atom is already committed
-+ * after receiving event, so loop has to be
-+ * re-started. But if atom switched into PRE_COMMIT
-+ * stage and became too large, we cannot change its
-+ * state back to CAPTURE_WAIT (atom stage can only
-+ * increase monotonically), hence this check.
-+ */
-+ if (cd->atom->stage < ASTAGE_CAPTURE_WAIT)
-+ reiser4_atom_set_stage(cd->atom,
-+ ASTAGE_CAPTURE_WAIT);
-+ cd->atom->flags |= ATOM_FORCE_COMMIT;
-+ }
-+ if (cd->txnh->flags & TXNH_DONT_COMMIT) {
-+ /*
-+ * this thread (transaction handle that is) doesn't
-+ * want to commit atom. Notify waiters that handle is
-+ * closed. This can happen, for example, when we are
-+ * under VFS directory lock and don't want to commit
-+ * atom right now to avoid stalling other threads
-+ * working in the same directory.
-+ */
-+
-+ /* Wake the ktxnmgrd up if the ktxnmgrd is needed to
-+ * commit this atom: no atom waiters and only one
-+ * (our) open transaction handle. */
-+ cd->wake_ktxnmgrd_up =
-+ cd->atom->txnh_count == 1 &&
-+ cd->atom->nr_waiters == 0;
-+ reiser4_atom_send_event(cd->atom);
-+ result = 0;
-+ } else if (!atom_can_be_committed(cd->atom)) {
-+ if (should_wait_commit(cd->txnh)) {
-+ /* sync(): wait for commit */
-+ cd->atom->nr_waiters++;
-+ cd->wait = 1;
-+ reiser4_atom_wait_event(cd->atom);
-+ result = RETERR(-E_REPEAT);
-+ } else {
-+ result = 0;
-+ }
-+ } else if (cd->preflush > 0 && !is_current_ktxnmgrd()) {
-+ /*
-+ * optimization: flush atom without switching it into
-+ * ASTAGE_CAPTURE_WAIT.
-+ *
-+ * But don't do this for ktxnmgrd, because ktxnmgrd
-+ * should never block on atom fusion.
-+ */
-+ result = flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS,
-+ LONG_MAX, &cd->nr_written,
-+ &cd->atom, NULL);
-+ if (result == 0) {
-+ spin_unlock_atom(cd->atom);
-+ cd->preflush = 0;
-+ result = RETERR(-E_REPEAT);
-+ } else /* Atoms wasn't flushed
-+ * completely. Rinse. Repeat. */
-+ --cd->preflush;
-+ } else {
-+ /* We change atom state to ASTAGE_CAPTURE_WAIT to
-+ prevent atom fusion and count ourself as an active
-+ flusher */
-+ reiser4_atom_set_stage(cd->atom, ASTAGE_CAPTURE_WAIT);
-+ cd->atom->flags |= ATOM_FORCE_COMMIT;
-+
-+ result =
-+ commit_current_atom(&cd->nr_written, &cd->atom);
-+ if (result != 0 && result != -E_REPEAT)
-+ cd->failed = 1;
-+ }
-+ } else
-+ result = 0;
-+
-+#if REISER4_DEBUG
-+ if (result == 0)
-+ assert_spin_locked(&(cd->atom->alock));
-+#endif
-+
-+ /* perfectly valid assertion, except that when atom/txnh is not locked
-+ * fusion can take place, and cd->atom points nowhere. */
-+ /*
-+ assert("jmacd-1028", ergo(result != 0, spin_atom_is_not_locked(cd->atom)));
-+ */
-+ return result;
-+}
-+
-+/* Called to commit a transaction handle. This decrements the atom's number of open
-+ handles and if it is the last handle to commit and the atom should commit, initiates
-+ atom commit. if commit does not fail, return number of written blocks */
-+static int commit_txnh(txn_handle * txnh)
-+{
-+ commit_data cd;
-+ assert("umka-192", txnh != NULL);
-+
-+ memset(&cd, 0, sizeof cd);
-+ cd.txnh = txnh;
-+ cd.preflush = 10;
-+
-+ /* calls try_commit_txnh() until either atom commits, or error
-+ * happens */
-+ while (try_commit_txnh(&cd) != 0)
-+ reiser4_preempt_point();
-+
-+ spin_lock_txnh(txnh);
-+
-+ cd.atom->txnh_count -= 1;
-+ txnh->atom = NULL;
-+ /* remove transaction handle from atom's list of transaction handles */
-+ list_del_init(&txnh->txnh_link);
-+
-+ spin_unlock_txnh(txnh);
-+ atom_dec_and_unlock(cd.atom);
-+ /* if we don't want to do a commit (TXNH_DONT_COMMIT is set, probably
-+ * because it takes time) by current thread, we do that work
-+ * asynchronously by ktxnmgrd daemon. */
-+ if (cd.wake_ktxnmgrd_up)
-+ ktxnmgrd_kick(&get_current_super_private()->tmgr);
-+
-+ return 0;
-+}
-+
-+/* TRY_CAPTURE */
-+
-+/* This routine attempts a single block-capture request. It may return -E_REPEAT if some
-+ condition indicates that the request should be retried, and it may block if the
-+ txn_capture mode does not include the TXN_CAPTURE_NONBLOCKING request flag.
-+
-+ This routine encodes the basic logic of block capturing described by:
-+
-+ http://namesys.com/v4/v4.html
-+
-+ Our goal here is to ensure that any two blocks that contain dependent modifications
-+ should commit at the same time. This function enforces this discipline by initiating
-+ fusion whenever a transaction handle belonging to one atom requests to read or write a
-+ block belonging to another atom (TXN_CAPTURE_WRITE or TXN_CAPTURE_READ_ATOMIC).
-+
-+ In addition, this routine handles the initial assignment of atoms to blocks and
-+ transaction handles. These are possible outcomes of this function:
-+
-+ 1. The block and handle are already part of the same atom: return immediate success
-+
-+ 2. The block is assigned but the handle is not: call capture_assign_txnh to assign
-+ the handle to the block's atom.
-+
-+ 3. The handle is assigned but the block is not: call capture_assign_block to assign
-+ the block to the handle's atom.
-+
-+ 4. Both handle and block are assigned, but to different atoms: call capture_init_fusion
-+ to fuse atoms.
-+
-+ 5. Neither block nor handle are assigned: create a new atom and assign them both.
-+
-+ 6. A read request for a non-captured block: return immediate success.
-+
-+ This function acquires and releases the handle's spinlock. This function is called
-+ under the jnode lock and if the return value is 0, it returns with the jnode lock still
-+ held. If the return is -E_REPEAT or some other error condition, the jnode lock is
-+ released. The external interface (reiser4_try_capture) manages re-aquiring the jnode
-+ lock in the failure case.
-+*/
-+static int try_capture_block(
-+ txn_handle * txnh, jnode * node, txn_capture mode,
-+ txn_atom ** atom_alloc)
-+{
-+ txn_atom *block_atom;
-+ txn_atom *txnh_atom;
-+
-+ /* Should not call capture for READ_NONCOM requests, handled in reiser4_try_capture. */
-+ assert("jmacd-567", CAPTURE_TYPE(mode) != TXN_CAPTURE_READ_NONCOM);
-+
-+ /* FIXME-ZAM-HANS: FIXME_LATER_JMACD Should assert that atom->tree ==
-+ * node->tree somewhere. */
-+ assert("umka-194", txnh != NULL);
-+ assert("umka-195", node != NULL);
-+
-+ /* The jnode is already locked! Being called from reiser4_try_capture(). */
-+ assert_spin_locked(&(node->guard));
-+ block_atom = node->atom;
-+
-+ /* Get txnh spinlock, this allows us to compare txn_atom pointers but it doesn't
-+ let us touch the atoms themselves. */
-+ spin_lock_txnh(txnh);
-+ txnh_atom = txnh->atom;
-+ /* Process of capturing continues into one of four branches depends on
-+ which atoms from (block atom (node->atom), current atom (txnh->atom))
-+ exist. */
-+ if (txnh_atom == NULL) {
-+ if (block_atom == NULL) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+ /* assign empty atom to the txnh and repeat */
-+ return atom_begin_and_assign_to_txnh(atom_alloc, txnh);
-+ } else {
-+ atomic_inc(&block_atom->refcount);
-+ /* node spin-lock isn't needed anymore */
-+ spin_unlock_jnode(node);
-+ if (!spin_trylock_atom(block_atom)) {
-+ spin_unlock_txnh(txnh);
-+ spin_lock_atom(block_atom);
-+ spin_lock_txnh(txnh);
-+ }
-+ /* re-check state after getting txnh and the node
-+ * atom spin-locked */
-+ if (node->atom != block_atom || txnh->atom != NULL) {
-+ spin_unlock_txnh(txnh);
-+ atom_dec_and_unlock(block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ atomic_dec(&block_atom->refcount);
-+ if (block_atom->stage > ASTAGE_CAPTURE_WAIT ||
-+ (block_atom->stage == ASTAGE_CAPTURE_WAIT &&
-+ block_atom->txnh_count != 0))
-+ return capture_fuse_wait(txnh, block_atom, NULL, mode);
-+ capture_assign_txnh_nolock(block_atom, txnh);
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ } else {
-+ /* It is time to perform deadlock prevention check over the
-+ node we want to capture. It is possible this node was locked
-+ for read without capturing it. The optimization which allows
-+ to do it helps us in keeping atoms independent as long as
-+ possible but it may cause lock/fuse deadlock problems.
-+
-+ A number of similar deadlock situations with locked but not
-+ captured nodes were found. In each situation there are two
-+ or more threads: one of them does flushing while another one
-+ does routine balancing or tree lookup. The flushing thread
-+ (F) sleeps in long term locking request for node (N), another
-+ thread (A) sleeps in trying to capture some node already
-+ belonging the atom F, F has a state which prevents
-+ immediately fusion .
-+
-+ Deadlocks of this kind cannot happen if node N was properly
-+ captured by thread A. The F thread fuse atoms before locking
-+ therefore current atom of thread F and current atom of thread
-+ A became the same atom and thread A may proceed. This does
-+ not work if node N was not captured because the fusion of
-+ atom does not happens.
-+
-+ The following scheme solves the deadlock: If
-+ longterm_lock_znode locks and does not capture a znode, that
-+ znode is marked as MISSED_IN_CAPTURE. A node marked this way
-+ is processed by the code below which restores the missed
-+ capture and fuses current atoms of all the node lock owners
-+ by calling the fuse_not_fused_lock_owners() function. */
-+ if (JF_ISSET(node, JNODE_MISSED_IN_CAPTURE)) {
-+ JF_CLR(node, JNODE_MISSED_IN_CAPTURE);
-+ if (jnode_is_znode(node) && znode_is_locked(JZNODE(node))) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+ fuse_not_fused_lock_owners(txnh, JZNODE(node));
-+ return RETERR(-E_REPEAT);
-+ }
-+ }
-+ if (block_atom == NULL) {
-+ atomic_inc(&txnh_atom->refcount);
-+ spin_unlock_txnh(txnh);
-+ if (!spin_trylock_atom(txnh_atom)) {
-+ spin_unlock_jnode(node);
-+ spin_lock_atom(txnh_atom);
-+ spin_lock_jnode(node);
-+ }
-+ if (txnh->atom != txnh_atom || node->atom != NULL
-+ || JF_ISSET(node, JNODE_IS_DYING)) {
-+ spin_unlock_jnode(node);
-+ atom_dec_and_unlock(txnh_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ atomic_dec(&txnh_atom->refcount);
-+ capture_assign_block_nolock(txnh_atom, node);
-+ spin_unlock_atom(txnh_atom);
-+ } else {
-+ if (txnh_atom != block_atom) {
-+ if (mode & TXN_CAPTURE_DONT_FUSE) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+ /* we are in a "no-fusion" mode and @node is
-+ * already part of transaction. */
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+ return capture_init_fusion(node, txnh, mode);
-+ }
-+ spin_unlock_txnh(txnh);
-+ }
-+ }
-+ return 0;
-+}
-+
-+static txn_capture
-+build_capture_mode(jnode * node, znode_lock_mode lock_mode, txn_capture flags)
-+{
-+ txn_capture cap_mode;
-+
-+ assert_spin_locked(&(node->guard));
-+
-+ /* FIXME_JMACD No way to set TXN_CAPTURE_READ_MODIFY yet. */
-+
-+ if (lock_mode == ZNODE_WRITE_LOCK) {
-+ cap_mode = TXN_CAPTURE_WRITE;
-+ } else if (node->atom != NULL) {
-+ cap_mode = TXN_CAPTURE_WRITE;
-+ } else if (0 && /* txnh->mode == TXN_READ_FUSING && */
-+ jnode_get_level(node) == LEAF_LEVEL) {
-+ /* NOTE-NIKITA TXN_READ_FUSING is not currently used */
-+ /* We only need a READ_FUSING capture at the leaf level. This
-+ is because the internal levels of the tree (twigs included)
-+ are redundant from the point of the user that asked for a
-+ read-fusing transcrash. The user only wants to read-fuse
-+ atoms due to reading uncommitted data that another user has
-+ written. It is the file system that reads/writes the
-+ internal tree levels, the user only reads/writes leaves. */
-+ cap_mode = TXN_CAPTURE_READ_ATOMIC;
-+ } else {
-+ /* In this case (read lock at a non-leaf) there's no reason to
-+ * capture. */
-+ /* cap_mode = TXN_CAPTURE_READ_NONCOM; */
-+ return 0;
-+ }
-+
-+ cap_mode |= (flags & (TXN_CAPTURE_NONBLOCKING | TXN_CAPTURE_DONT_FUSE));
-+ assert("nikita-3186", cap_mode != 0);
-+ return cap_mode;
-+}
-+
-+/* This is an external interface to try_capture_block(), it calls
-+ try_capture_block() repeatedly as long as -E_REPEAT is returned.
-+
-+ @node: node to capture,
-+ @lock_mode: read or write lock is used in capture mode calculation,
-+ @flags: see txn_capture flags enumeration,
-+ @can_coc : can copy-on-capture
-+
-+ @return: 0 - node was successfully captured, -E_REPEAT - capture request
-+ cannot be processed immediately as it was requested in flags,
-+ < 0 - other errors.
-+*/
-+int reiser4_try_capture(jnode *node, znode_lock_mode lock_mode,
-+ txn_capture flags)
-+{
-+ txn_atom *atom_alloc = NULL;
-+ txn_capture cap_mode;
-+ txn_handle *txnh = get_current_context()->trans;
-+ int ret;
-+
-+ assert_spin_locked(&(node->guard));
-+
-+ repeat:
-+ if (JF_ISSET(node, JNODE_IS_DYING))
-+ return RETERR(-EINVAL);
-+ if (node->atom != NULL && txnh->atom == node->atom)
-+ return 0;
-+ cap_mode = build_capture_mode(node, lock_mode, flags);
-+ if (cap_mode == 0 ||
-+ (!(cap_mode & TXN_CAPTURE_WTYPES) && node->atom == NULL)) {
-+ /* Mark this node as "MISSED". It helps in further deadlock
-+ * analysis */
-+ if (jnode_is_znode(node))
-+ JF_SET(node, JNODE_MISSED_IN_CAPTURE);
-+ return 0;
-+ }
-+ /* Repeat try_capture as long as -E_REPEAT is returned. */
-+ ret = try_capture_block(txnh, node, cap_mode, &atom_alloc);
-+ /* Regardless of non_blocking:
-+
-+ If ret == 0 then jnode is still locked.
-+ If ret != 0 then jnode is unlocked.
-+ */
-+#if REISER4_DEBUG
-+ if (ret == 0)
-+ assert_spin_locked(&(node->guard));
-+ else
-+ assert_spin_not_locked(&(node->guard));
-+#endif
-+ assert_spin_not_locked(&(txnh->guard));
-+
-+ if (ret == -E_REPEAT) {
-+ /* E_REPEAT implies all locks were released, therefore we need
-+ to take the jnode's lock again. */
-+ spin_lock_jnode(node);
-+
-+ /* Although this may appear to be a busy loop, it is not.
-+ There are several conditions that cause E_REPEAT to be
-+ returned by the call to try_capture_block, all cases
-+ indicating some kind of state change that means you should
-+ retry the request and will get a different result. In some
-+ cases this could be avoided with some extra code, but
-+ generally it is done because the necessary locks were
-+ released as a result of the operation and repeating is the
-+ simplest thing to do (less bug potential). The cases are:
-+ atom fusion returns E_REPEAT after it completes (jnode and
-+ txnh were unlocked); race conditions in assign_block,
-+ assign_txnh, and init_fusion return E_REPEAT (trylock
-+ failure); after going to sleep in capture_fuse_wait
-+ (request was blocked but may now succeed). I'm not quite
-+ sure how capture_copy works yet, but it may also return
-+ E_REPEAT. When the request is legitimately blocked, the
-+ requestor goes to sleep in fuse_wait, so this is not a busy
-+ loop. */
-+ /* NOTE-NIKITA: still don't understand:
-+
-+ try_capture_block->capture_assign_txnh->spin_trylock_atom->E_REPEAT
-+
-+ looks like busy loop?
-+ */
-+ goto repeat;
-+ }
-+
-+ /* free extra atom object that was possibly allocated by
-+ try_capture_block().
-+
-+ Do this before acquiring jnode spin lock to
-+ minimize time spent under lock. --nikita */
-+ if (atom_alloc != NULL) {
-+ kmem_cache_free(_atom_slab, atom_alloc);
-+ }
-+
-+ if (ret != 0) {
-+ if (ret == -E_BLOCK) {
-+ assert("nikita-3360",
-+ cap_mode & TXN_CAPTURE_NONBLOCKING);
-+ ret = -E_REPEAT;
-+ }
-+
-+ /* Failure means jnode is not locked. FIXME_LATER_JMACD May
-+ want to fix the above code to avoid releasing the lock and
-+ re-acquiring it, but there are cases were failure occurs
-+ when the lock is not held, and those cases would need to be
-+ modified to re-take the lock. */
-+ spin_lock_jnode(node);
-+ }
-+
-+ /* Jnode is still locked. */
-+ assert_spin_locked(&(node->guard));
-+ return ret;
-+}
-+
-+static void release_two_atoms(txn_atom *one, txn_atom *two)
-+{
-+ spin_unlock_atom(one);
-+ atom_dec_and_unlock(two);
-+ spin_lock_atom(one);
-+ atom_dec_and_unlock(one);
-+}
-+
-+/* This function sets up a call to try_capture_block and repeats as long as -E_REPEAT is
-+ returned by that routine. The txn_capture request mode is computed here depending on
-+ the transaction handle's type and the lock request. This is called from the depths of
-+ the lock manager with the jnode lock held and it always returns with the jnode lock
-+ held.
-+*/
-+
-+/* fuse all 'active' atoms of lock owners of given node. */
-+static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node)
-+{
-+ lock_handle *lh;
-+ int repeat;
-+ txn_atom *atomh, *atomf;
-+ reiser4_context *me = get_current_context();
-+ reiser4_context *ctx = NULL;
-+
-+ assert_spin_not_locked(&(ZJNODE(node)->guard));
-+ assert_spin_not_locked(&(txnh->hlock));
-+
-+ repeat:
-+ repeat = 0;
-+ atomh = txnh_get_atom(txnh);
-+ spin_unlock_txnh(txnh);
-+ assert("zam-692", atomh != NULL);
-+
-+ spin_lock_zlock(&node->lock);
-+ /* inspect list of lock owners */
-+ list_for_each_entry(lh, &node->lock.owners, owners_link) {
-+ ctx = get_context_by_lock_stack(lh->owner);
-+ if (ctx == me)
-+ continue;
-+ /* below we use two assumptions to avoid addition spin-locks
-+ for checking the condition :
-+
-+ 1) if the lock stack has lock, the transaction should be
-+ opened, i.e. ctx->trans != NULL;
-+
-+ 2) reading of well-aligned ctx->trans->atom is atomic, if it
-+ equals to the address of spin-locked atomh, we take that
-+ the atoms are the same, nothing has to be captured. */
-+ if (atomh != ctx->trans->atom) {
-+ reiser4_wake_up(lh->owner);
-+ repeat = 1;
-+ break;
-+ }
-+ }
-+ if (repeat) {
-+ if (!spin_trylock_txnh(ctx->trans)) {
-+ spin_unlock_zlock(&node->lock);
-+ spin_unlock_atom(atomh);
-+ goto repeat;
-+ }
-+ atomf = ctx->trans->atom;
-+ if (atomf == NULL) {
-+ capture_assign_txnh_nolock(atomh, ctx->trans);
-+ /* release zlock lock _after_ assigning the atom to the
-+ * transaction handle, otherwise the lock owner thread
-+ * may unlock all znodes, exit kernel context and here
-+ * we would access an invalid transaction handle. */
-+ spin_unlock_zlock(&node->lock);
-+ spin_unlock_atom(atomh);
-+ spin_unlock_txnh(ctx->trans);
-+ goto repeat;
-+ }
-+ assert("zam-1059", atomf != atomh);
-+ spin_unlock_zlock(&node->lock);
-+ atomic_inc(&atomh->refcount);
-+ atomic_inc(&atomf->refcount);
-+ spin_unlock_txnh(ctx->trans);
-+ if (atomf > atomh) {
-+ spin_lock_atom_nested(atomf);
-+ } else {
-+ spin_unlock_atom(atomh);
-+ spin_lock_atom(atomf);
-+ spin_lock_atom_nested(atomh);
-+ }
-+ if (atomh == atomf || !atom_isopen(atomh) || !atom_isopen(atomf)) {
-+ release_two_atoms(atomf, atomh);
-+ goto repeat;
-+ }
-+ atomic_dec(&atomh->refcount);
-+ atomic_dec(&atomf->refcount);
-+ capture_fuse_into(atomf, atomh);
-+ goto repeat;
-+ }
-+ spin_unlock_zlock(&node->lock);
-+ spin_unlock_atom(atomh);
-+}
-+
-+/* This is the interface to capture unformatted nodes via their struct page
-+ reference. Currently it is only used in reiser4_invalidatepage */
-+int try_capture_page_to_invalidate(struct page *pg)
-+{
-+ int ret;
-+ jnode *node;
-+
-+ assert("umka-292", pg != NULL);
-+ assert("nikita-2597", PageLocked(pg));
-+
-+ if (IS_ERR(node = jnode_of_page(pg))) {
-+ return PTR_ERR(node);
-+ }
-+
-+ spin_lock_jnode(node);
-+ unlock_page(pg);
-+
-+ ret = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ spin_unlock_jnode(node);
-+ jput(node);
-+ lock_page(pg);
-+ return ret;
-+}
-+
-+/* This informs the transaction manager when a node is deleted. Add the block to the
-+ atom's delete set and uncapture the block.
-+
-+VS-FIXME-HANS: this E_REPEAT paradigm clutters the code and creates a need for
-+explanations. find all the functions that use it, and unless there is some very
-+good reason to use it (I have not noticed one so far and I doubt it exists, but maybe somewhere somehow....),
-+move the loop to inside the function.
-+
-+VS-FIXME-HANS: can this code be at all streamlined? In particular, can you lock and unlock the jnode fewer times?
-+ */
-+void reiser4_uncapture_page(struct page *pg)
-+{
-+ jnode *node;
-+ txn_atom *atom;
-+
-+ assert("umka-199", pg != NULL);
-+ assert("nikita-3155", PageLocked(pg));
-+
-+ clear_page_dirty_for_io(pg);
-+
-+ reiser4_wait_page_writeback(pg);
-+
-+ node = jprivate(pg);
-+ BUG_ON(node == NULL);
-+
-+ spin_lock_jnode(node);
-+
-+ atom = jnode_get_atom(node);
-+ if (atom == NULL) {
-+ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+
-+ /* We can remove jnode from transaction even if it is on flush queue
-+ * prepped list, we only need to be sure that flush queue is not being
-+ * written by reiser4_write_fq(). reiser4_write_fq() does not use atom
-+ * spin lock for protection of the prepped nodes list, instead
-+ * write_fq() increments atom's nr_running_queues counters for the time
-+ * when prepped list is not protected by spin lock. Here we check this
-+ * counter if we want to remove jnode from flush queue and, if the
-+ * counter is not zero, wait all reiser4_write_fq() for this atom to
-+ * complete. This is not significant overhead. */
-+ while (JF_ISSET(node, JNODE_FLUSH_QUEUED) && atom->nr_running_queues) {
-+ spin_unlock_jnode(node);
-+ /*
-+ * at this moment we want to wait for "atom event", viz. wait
-+ * until @node can be removed from flush queue. But
-+ * reiser4_atom_wait_event() cannot be called with page locked,
-+ * because it deadlocks with jnode_extent_write(). Unlock page,
-+ * after making sure (through page_cache_get()) that it cannot
-+ * be released from memory.
-+ */
-+ page_cache_get(pg);
-+ unlock_page(pg);
-+ reiser4_atom_wait_event(atom);
-+ lock_page(pg);
-+ /*
-+ * page may has been detached by ->writepage()->releasepage().
-+ */
-+ reiser4_wait_page_writeback(pg);
-+ spin_lock_jnode(node);
-+ page_cache_release(pg);
-+ atom = jnode_get_atom(node);
-+/* VS-FIXME-HANS: improve the commenting in this function */
-+ if (atom == NULL) {
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+ }
-+ reiser4_uncapture_block(node);
-+ spin_unlock_atom(atom);
-+ jput(node);
-+}
-+
-+/* this is used in extent's kill hook to uncapture and unhash jnodes attached to
-+ * inode's tree of jnodes */
-+void reiser4_uncapture_jnode(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert_spin_locked(&(node->guard));
-+ assert("", node->pg == 0);
-+
-+ atom = jnode_get_atom(node);
-+ if (atom == NULL) {
-+ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+
-+ reiser4_uncapture_block(node);
-+ spin_unlock_atom(atom);
-+ jput(node);
-+}
-+
-+/* No-locking version of assign_txnh. Sets the transaction handle's atom pointer,
-+ increases atom refcount and txnh_count, adds to txnh_list. */
-+static void capture_assign_txnh_nolock(txn_atom *atom, txn_handle *txnh)
-+{
-+ assert("umka-200", atom != NULL);
-+ assert("umka-201", txnh != NULL);
-+
-+ assert_spin_locked(&(txnh->hlock));
-+ assert_spin_locked(&(atom->alock));
-+ assert("jmacd-824", txnh->atom == NULL);
-+ assert("nikita-3540", atom_isopen(atom));
-+ BUG_ON(txnh->atom != NULL);
-+
-+ atomic_inc(&atom->refcount);
-+ txnh->atom = atom;
-+ reiser4_ctx_gfp_mask_set();
-+ list_add_tail(&txnh->txnh_link, &atom->txnh_list);
-+ atom->txnh_count += 1;
-+}
-+
-+/* No-locking version of assign_block. Sets the block's atom pointer, references the
-+ block, adds it to the clean or dirty capture_jnode list, increments capture_count. */
-+static void capture_assign_block_nolock(txn_atom *atom, jnode *node)
-+{
-+ assert("umka-202", atom != NULL);
-+ assert("umka-203", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ assert_spin_locked(&(atom->alock));
-+ assert("jmacd-323", node->atom == NULL);
-+ BUG_ON(!list_empty_careful(&node->capture_link));
-+ assert("nikita-3470", !JF_ISSET(node, JNODE_DIRTY));
-+
-+ /* Pointer from jnode to atom is not counted in atom->refcount. */
-+ node->atom = atom;
-+
-+ list_add_tail(&node->capture_link, ATOM_CLEAN_LIST(atom));
-+ atom->capture_count += 1;
-+ /* reference to jnode is acquired by atom. */
-+ jref(node);
-+
-+ ON_DEBUG(count_jnode(atom, node, NOT_CAPTURED, CLEAN_LIST, 1));
-+
-+ LOCK_CNT_INC(t_refs);
-+}
-+
-+/* common code for dirtying both unformatted jnodes and formatted znodes. */
-+static void do_jnode_make_dirty(jnode * node, txn_atom * atom)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert_spin_locked(&(atom->alock));
-+ assert("jmacd-3981", !JF_ISSET(node, JNODE_DIRTY));
-+
-+ JF_SET(node, JNODE_DIRTY);
-+
-+ get_current_context()->nr_marked_dirty++;
-+
-+ /* We grab2flush_reserve one additional block only if node was
-+ not CREATED and jnode_flush did not sort it into neither
-+ relocate set nor overwrite one. If node is in overwrite or
-+ relocate set we assume that atom's flush reserved counter was
-+ already adjusted. */
-+ if (!JF_ISSET(node, JNODE_CREATED) && !JF_ISSET(node, JNODE_RELOC)
-+ && !JF_ISSET(node, JNODE_OVRWR) && jnode_is_leaf(node)
-+ && !jnode_is_cluster_page(node)) {
-+ assert("vs-1093", !reiser4_blocknr_is_fake(&node->blocknr));
-+ assert("vs-1506", *jnode_get_block(node) != 0);
-+ grabbed2flush_reserved_nolock(atom, (__u64) 1);
-+ JF_SET(node, JNODE_FLUSH_RESERVED);
-+ }
-+
-+ if (!JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
-+ /* If the atom is not set yet, it will be added to the appropriate list in
-+ capture_assign_block_nolock. */
-+ /* Sometimes a node is set dirty before being captured -- the case for new
-+ jnodes. In that case the jnode will be added to the appropriate list
-+ in capture_assign_block_nolock. Another reason not to re-link jnode is
-+ that jnode is on a flush queue (see flush.c for details) */
-+
-+ int level = jnode_get_level(node);
-+
-+ assert("nikita-3152", !JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-654", atom->stage < ASTAGE_PRE_COMMIT);
-+ assert("nikita-2607", 0 <= level);
-+ assert("nikita-2606", level <= REAL_MAX_ZTREE_HEIGHT);
-+
-+ /* move node to atom's dirty list */
-+ list_move_tail(&node->capture_link, ATOM_DIRTY_LIST(atom, level));
-+ ON_DEBUG(count_jnode
-+ (atom, node, NODE_LIST(node), DIRTY_LIST, 1));
-+ }
-+}
-+
-+/* Set the dirty status for this (spin locked) jnode. */
-+void jnode_make_dirty_locked(jnode * node)
-+{
-+ assert("umka-204", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+
-+ if (REISER4_DEBUG && rofs_jnode(node)) {
-+ warning("nikita-3365", "Dirtying jnode on rofs");
-+ dump_stack();
-+ }
-+
-+ /* Fast check for already dirty node */
-+ if (!JF_ISSET(node, JNODE_DIRTY)) {
-+ txn_atom *atom;
-+
-+ atom = jnode_get_atom(node);
-+ assert("vs-1094", atom);
-+ /* Check jnode dirty status again because node spin lock might
-+ * be released inside jnode_get_atom(). */
-+ if (likely(!JF_ISSET(node, JNODE_DIRTY)))
-+ do_jnode_make_dirty(node, atom);
-+ spin_unlock_atom(atom);
-+ }
-+}
-+
-+/* Set the dirty status for this znode. */
-+void znode_make_dirty(znode * z)
-+{
-+ jnode *node;
-+ struct page *page;
-+
-+ assert("umka-204", z != NULL);
-+ assert("nikita-3290", znode_above_root(z) || znode_is_loaded(z));
-+ assert("nikita-3560", znode_is_write_locked(z));
-+
-+ node = ZJNODE(z);
-+ /* znode is longterm locked, we can check dirty bit without spinlock */
-+ if (JF_ISSET(node, JNODE_DIRTY)) {
-+ /* znode is dirty already. All we have to do is to change znode version */
-+ z->version = znode_build_version(jnode_get_tree(node));
-+ return;
-+ }
-+
-+ spin_lock_jnode(node);
-+ jnode_make_dirty_locked(node);
-+ page = jnode_page(node);
-+ if (page != NULL) {
-+ /* this is useful assertion (allows one to check that no
-+ * modifications are lost due to update of in-flight page),
-+ * but it requires locking on page to check PG_writeback
-+ * bit. */
-+ /* assert("nikita-3292",
-+ !PageWriteback(page) || ZF_ISSET(z, JNODE_WRITEBACK)); */
-+ page_cache_get(page);
-+
-+ /* jnode lock is not needed for the rest of
-+ * znode_set_dirty(). */
-+ spin_unlock_jnode(node);
-+ /* reiser4 file write code calls set_page_dirty for
-+ * unformatted nodes, for formatted nodes we do it here. */
-+ reiser4_set_page_dirty_internal(page);
-+ page_cache_release(page);
-+ /* bump version counter in znode */
-+ z->version = znode_build_version(jnode_get_tree(node));
-+ } else {
-+ assert("zam-596", znode_above_root(JZNODE(node)));
-+ spin_unlock_jnode(node);
-+ }
-+
-+ assert("nikita-1900", znode_is_write_locked(z));
-+ assert("jmacd-9777", node->atom != NULL);
-+}
-+
-+int reiser4_sync_atom(txn_atom * atom)
-+{
-+ int result;
-+ txn_handle *txnh;
-+
-+ txnh = get_current_context()->trans;
-+
-+ result = 0;
-+ if (atom != NULL) {
-+ if (atom->stage < ASTAGE_PRE_COMMIT) {
-+ spin_lock_txnh(txnh);
-+ capture_assign_txnh_nolock(atom, txnh);
-+ result = force_commit_atom(txnh);
-+ } else if (atom->stage < ASTAGE_POST_COMMIT) {
-+ /* wait atom commit */
-+ reiser4_atom_wait_event(atom);
-+ /* try once more */
-+ result = RETERR(-E_REPEAT);
-+ } else
-+ spin_unlock_atom(atom);
-+ }
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/* move jnode form one list to another
-+ call this after atom->capture_count is updated */
-+void
-+count_jnode(txn_atom * atom, jnode * node, atom_list old_list,
-+ atom_list new_list, int check_lists)
-+{
-+ struct list_head *pos;
-+
-+ assert("zam-1018", atom_is_protected(atom));
-+ assert_spin_locked(&(node->guard));
-+ assert("", NODE_LIST(node) == old_list);
-+
-+ switch (NODE_LIST(node)) {
-+ case NOT_CAPTURED:
-+ break;
-+ case DIRTY_LIST:
-+ assert("", atom->dirty > 0);
-+ atom->dirty--;
-+ break;
-+ case CLEAN_LIST:
-+ assert("", atom->clean > 0);
-+ atom->clean--;
-+ break;
-+ case FQ_LIST:
-+ assert("", atom->fq > 0);
-+ atom->fq--;
-+ break;
-+ case WB_LIST:
-+ assert("", atom->wb > 0);
-+ atom->wb--;
-+ break;
-+ case OVRWR_LIST:
-+ assert("", atom->ovrwr > 0);
-+ atom->ovrwr--;
-+ break;
-+ default:
-+ impossible("", "");
-+ }
-+
-+ switch (new_list) {
-+ case NOT_CAPTURED:
-+ break;
-+ case DIRTY_LIST:
-+ atom->dirty++;
-+ break;
-+ case CLEAN_LIST:
-+ atom->clean++;
-+ break;
-+ case FQ_LIST:
-+ atom->fq++;
-+ break;
-+ case WB_LIST:
-+ atom->wb++;
-+ break;
-+ case OVRWR_LIST:
-+ atom->ovrwr++;
-+ break;
-+ default:
-+ impossible("", "");
-+ }
-+ ASSIGN_NODE_LIST(node, new_list);
-+ if (0 && check_lists) {
-+ int count;
-+ tree_level level;
-+
-+ count = 0;
-+
-+ /* flush queue list */
-+ /* reiser4_check_fq(atom); */
-+
-+ /* dirty list */
-+ count = 0;
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ list_for_each(pos, ATOM_DIRTY_LIST(atom, level))
-+ count++;
-+ }
-+ if (count != atom->dirty)
-+ warning("", "dirty counter %d, real %d\n", atom->dirty,
-+ count);
-+
-+ /* clean list */
-+ count = 0;
-+ list_for_each(pos, ATOM_CLEAN_LIST(atom))
-+ count++;
-+ if (count != atom->clean)
-+ warning("", "clean counter %d, real %d\n", atom->clean,
-+ count);
-+
-+ /* wb list */
-+ count = 0;
-+ list_for_each(pos, ATOM_WB_LIST(atom))
-+ count++;
-+ if (count != atom->wb)
-+ warning("", "wb counter %d, real %d\n", atom->wb,
-+ count);
-+
-+ /* overwrite list */
-+ count = 0;
-+ list_for_each(pos, ATOM_OVRWR_LIST(atom))
-+ count++;
-+
-+ if (count != atom->ovrwr)
-+ warning("", "ovrwr counter %d, real %d\n", atom->ovrwr,
-+ count);
-+ }
-+ assert("vs-1624", atom->num_queued == atom->fq);
-+ if (atom->capture_count !=
-+ atom->dirty + atom->clean + atom->ovrwr + atom->wb + atom->fq) {
-+ printk
-+ ("count %d, dirty %d clean %d ovrwr %d wb %d fq %d\n",
-+ atom->capture_count, atom->dirty, atom->clean, atom->ovrwr,
-+ atom->wb, atom->fq);
-+ assert("vs-1622",
-+ atom->capture_count ==
-+ atom->dirty + atom->clean + atom->ovrwr + atom->wb +
-+ atom->fq);
-+ }
-+}
-+
-+#endif
-+
-+/* Make node OVRWR and put it on atom->overwrite_nodes list, atom lock and jnode
-+ * lock should be taken before calling this function. */
-+void jnode_make_wander_nolock(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert("nikita-2431", node != NULL);
-+ assert("nikita-2432", !JF_ISSET(node, JNODE_RELOC));
-+ assert("nikita-3153", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-897", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
-+ assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
-+
-+ atom = node->atom;
-+
-+ assert("zam-895", atom != NULL);
-+ assert("zam-894", atom_is_protected(atom));
-+
-+ JF_SET(node, JNODE_OVRWR);
-+ /* move node to atom's overwrite list */
-+ list_move_tail(&node->capture_link, ATOM_OVRWR_LIST(atom));
-+ ON_DEBUG(count_jnode(atom, node, DIRTY_LIST, OVRWR_LIST, 1));
-+}
-+
-+/* Same as jnode_make_wander_nolock, but all necessary locks are taken inside
-+ * this function. */
-+void jnode_make_wander(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ spin_lock_jnode(node);
-+ atom = jnode_get_atom(node);
-+ assert("zam-913", atom != NULL);
-+ assert("zam-914", !JF_ISSET(node, JNODE_RELOC));
-+
-+ jnode_make_wander_nolock(node);
-+ spin_unlock_atom(atom);
-+ spin_unlock_jnode(node);
-+}
-+
-+/* this just sets RELOC bit */
-+static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert("zam-916", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
-+ assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-920", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
-+ assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
-+ jnode_set_reloc(node);
-+}
-+
-+/* Make znode RELOC and put it on flush queue */
-+void znode_make_reloc(znode * z, flush_queue_t * fq)
-+{
-+ jnode *node;
-+ txn_atom *atom;
-+
-+ node = ZJNODE(z);
-+ spin_lock_jnode(node);
-+
-+ atom = jnode_get_atom(node);
-+ assert("zam-919", atom != NULL);
-+
-+ jnode_make_reloc_nolock(fq, node);
-+ queue_jnode(fq, node);
-+
-+ spin_unlock_atom(atom);
-+ spin_unlock_jnode(node);
-+
-+}
-+
-+/* Make unformatted node RELOC and put it on flush queue */
-+void unformatted_make_reloc(jnode *node, flush_queue_t *fq)
-+{
-+ assert("vs-1479", jnode_is_unformatted(node));
-+
-+ jnode_make_reloc_nolock(fq, node);
-+ queue_jnode(fq, node);
-+}
-+
-+int reiser4_capture_super_block(struct super_block *s)
-+{
-+ int result;
-+ znode *uber;
-+ lock_handle lh;
-+
-+ init_lh(&lh);
-+ result = get_uber_znode(reiser4_get_tree(s),
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI, &lh);
-+ if (result)
-+ return result;
-+
-+ uber = lh.node;
-+ /* Grabbing one block for superblock */
-+ result = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
-+ if (result != 0)
-+ return result;
-+
-+ znode_make_dirty(uber);
-+
-+ done_lh(&lh);
-+ return 0;
-+}
-+
-+/* Wakeup every handle on the atom's WAITFOR list */
-+static void wakeup_atom_waitfor_list(txn_atom * atom)
-+{
-+ txn_wait_links *wlinks;
-+
-+ assert("umka-210", atom != NULL);
-+
-+ /* atom is locked */
-+ list_for_each_entry(wlinks, &atom->fwaitfor_list, _fwaitfor_link) {
-+ if (wlinks->waitfor_cb == NULL ||
-+ wlinks->waitfor_cb(atom, wlinks))
-+ /* Wake up. */
-+ reiser4_wake_up(wlinks->_lock_stack);
-+ }
-+}
-+
-+/* Wakeup every handle on the atom's WAITING list */
-+static void wakeup_atom_waiting_list(txn_atom * atom)
-+{
-+ txn_wait_links *wlinks;
-+
-+ assert("umka-211", atom != NULL);
-+
-+ /* atom is locked */
-+ list_for_each_entry(wlinks, &atom->fwaiting_list, _fwaiting_link) {
-+ if (wlinks->waiting_cb == NULL ||
-+ wlinks->waiting_cb(atom, wlinks))
-+ /* Wake up. */
-+ reiser4_wake_up(wlinks->_lock_stack);
-+ }
-+}
-+
-+/* helper function used by capture_fuse_wait() to avoid "spurious wake-ups" */
-+static int wait_for_fusion(txn_atom * atom, txn_wait_links * wlinks)
-+{
-+ assert("nikita-3330", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ /* atom->txnh_count == 1 is for waking waiters up if we are releasing
-+ * last transaction handle. */
-+ return atom->stage != ASTAGE_CAPTURE_WAIT || atom->txnh_count == 1;
-+}
-+
-+/* The general purpose of this function is to wait on the first of two possible events.
-+ The situation is that a handle (and its atom atomh) is blocked trying to capture a
-+ block (i.e., node) but the node's atom (atomf) is in the CAPTURE_WAIT state. The
-+ handle's atom (atomh) is not in the CAPTURE_WAIT state. However, atomh could fuse with
-+ another atom or, due to age, enter the CAPTURE_WAIT state itself, at which point it
-+ needs to unblock the handle to avoid deadlock. When the txnh is unblocked it will
-+ proceed and fuse the two atoms in the CAPTURE_WAIT state.
-+
-+ In other words, if either atomh or atomf change state, the handle will be awakened,
-+ thus there are two lists per atom: WAITING and WAITFOR.
-+
-+ This is also called by capture_assign_txnh with (atomh == NULL) to wait for atomf to
-+ close but it is not assigned to an atom of its own.
-+
-+ Lock ordering in this method: all four locks are held: JNODE_LOCK, TXNH_LOCK,
-+ BOTH_ATOM_LOCKS. Result: all four locks are released.
-+*/
-+static int capture_fuse_wait(txn_handle * txnh, txn_atom * atomf,
-+ txn_atom * atomh, txn_capture mode)
-+{
-+ int ret;
-+ txn_wait_links wlinks;
-+
-+ assert("umka-213", txnh != NULL);
-+ assert("umka-214", atomf != NULL);
-+
-+ if ((mode & TXN_CAPTURE_NONBLOCKING) != 0) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(atomf);
-+
-+ if (atomh) {
-+ spin_unlock_atom(atomh);
-+ }
-+
-+ return RETERR(-E_BLOCK);
-+ }
-+
-+ /* Initialize the waiting list links. */
-+ init_wlinks(&wlinks);
-+
-+ /* Add txnh to atomf's waitfor list, unlock atomf. */
-+ list_add_tail(&wlinks._fwaitfor_link, &atomf->fwaitfor_list);
-+ wlinks.waitfor_cb = wait_for_fusion;
-+ atomic_inc(&atomf->refcount);
-+ spin_unlock_atom(atomf);
-+
-+ if (atomh) {
-+ /* Add txnh to atomh's waiting list, unlock atomh. */
-+ list_add_tail(&wlinks._fwaiting_link, &atomh->fwaiting_list);
-+ atomic_inc(&atomh->refcount);
-+ spin_unlock_atom(atomh);
-+ }
-+
-+ /* Go to sleep. */
-+ spin_unlock_txnh(txnh);
-+
-+ ret = reiser4_prepare_to_sleep(wlinks._lock_stack);
-+ if (ret == 0) {
-+ reiser4_go_to_sleep(wlinks._lock_stack);
-+ ret = RETERR(-E_REPEAT);
-+ }
-+
-+ /* Remove from the waitfor list. */
-+ spin_lock_atom(atomf);
-+
-+ list_del(&wlinks._fwaitfor_link);
-+ atom_dec_and_unlock(atomf);
-+
-+ if (atomh) {
-+ /* Remove from the waiting list. */
-+ spin_lock_atom(atomh);
-+ list_del(&wlinks._fwaiting_link);
-+ atom_dec_and_unlock(atomh);
-+ }
-+ return ret;
-+}
-+
-+static void lock_two_atoms(txn_atom * one, txn_atom * two)
-+{
-+ assert("zam-1067", one != two);
-+
-+ /* lock the atom with lesser address first */
-+ if (one < two) {
-+ spin_lock_atom(one);
-+ spin_lock_atom_nested(two);
-+ } else {
-+ spin_lock_atom(two);
-+ spin_lock_atom_nested(one);
-+ }
-+}
-+
-+/* Perform the necessary work to prepare for fusing two atoms, which involves
-+ * acquiring two atom locks in the proper order. If one of the node's atom is
-+ * blocking fusion (i.e., it is in the CAPTURE_WAIT stage) and the handle's
-+ * atom is not then the handle's request is put to sleep. If the node's atom
-+ * is committing, then the node can be copy-on-captured. Otherwise, pick the
-+ * atom with fewer pointers to be fused into the atom with more pointer and
-+ * call capture_fuse_into.
-+ */
-+static int capture_init_fusion(jnode *node, txn_handle *txnh, txn_capture mode)
-+{
-+ txn_atom * txnh_atom = txnh->atom;
-+ txn_atom * block_atom = node->atom;
-+
-+ atomic_inc(&txnh_atom->refcount);
-+ atomic_inc(&block_atom->refcount);
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+
-+ lock_two_atoms(txnh_atom, block_atom);
-+
-+ if (txnh->atom != txnh_atom || node->atom != block_atom ) {
-+ release_two_atoms(txnh_atom, block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+
-+ atomic_dec(&txnh_atom->refcount);
-+ atomic_dec(&block_atom->refcount);
-+
-+ assert ("zam-1066", atom_isopen(txnh_atom));
-+
-+ if (txnh_atom->stage >= block_atom->stage ||
-+ (block_atom->stage == ASTAGE_CAPTURE_WAIT && block_atom->txnh_count == 0)) {
-+ capture_fuse_into(txnh_atom, block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ spin_lock_txnh(txnh);
-+ return capture_fuse_wait(txnh, block_atom, txnh_atom, mode);
-+}
-+
-+/* This function splices together two jnode lists (small and large) and sets all jnodes in
-+ the small list to point to the large atom. Returns the length of the list. */
-+static int
-+capture_fuse_jnode_lists(txn_atom *large, struct list_head *large_head,
-+ struct list_head *small_head)
-+{
-+ int count = 0;
-+ jnode *node;
-+
-+ assert("umka-218", large != NULL);
-+ assert("umka-219", large_head != NULL);
-+ assert("umka-220", small_head != NULL);
-+ /* small atom should be locked also. */
-+ assert_spin_locked(&(large->alock));
-+
-+ /* For every jnode on small's capture list... */
-+ list_for_each_entry(node, small_head, capture_link) {
-+ count += 1;
-+
-+ /* With the jnode lock held, update atom pointer. */
-+ spin_lock_jnode(node);
-+ node->atom = large;
-+ spin_unlock_jnode(node);
-+ }
-+
-+ /* Splice the lists. */
-+ list_splice_init(small_head, large_head->prev);
-+
-+ return count;
-+}
-+
-+/* This function splices together two txnh lists (small and large) and sets all txn handles in
-+ the small list to point to the large atom. Returns the length of the list. */
-+static int
-+capture_fuse_txnh_lists(txn_atom *large, struct list_head *large_head,
-+ struct list_head *small_head)
-+{
-+ int count = 0;
-+ txn_handle *txnh;
-+
-+ assert("umka-221", large != NULL);
-+ assert("umka-222", large_head != NULL);
-+ assert("umka-223", small_head != NULL);
-+
-+ /* Adjust every txnh to the new atom. */
-+ list_for_each_entry(txnh, small_head, txnh_link) {
-+ count += 1;
-+
-+ /* With the txnh lock held, update atom pointer. */
-+ spin_lock_txnh(txnh);
-+ txnh->atom = large;
-+ spin_unlock_txnh(txnh);
-+ }
-+
-+ /* Splice the txn_handle list. */
-+ list_splice_init(small_head, large_head->prev);
-+
-+ return count;
-+}
-+
-+/* This function fuses two atoms. The captured nodes and handles belonging to SMALL are
-+ added to LARGE and their ->atom pointers are all updated. The associated counts are
-+ updated as well, and any waiting handles belonging to either are awakened. Finally the
-+ smaller atom's refcount is decremented.
-+*/
-+static void capture_fuse_into(txn_atom * small, txn_atom * large)
-+{
-+ int level;
-+ unsigned zcount = 0;
-+ unsigned tcount = 0;
-+
-+ assert("umka-224", small != NULL);
-+ assert("umka-225", small != NULL);
-+
-+ assert_spin_locked(&(large->alock));
-+ assert_spin_locked(&(small->alock));
-+
-+ assert("jmacd-201", atom_isopen(small));
-+ assert("jmacd-202", atom_isopen(large));
-+
-+ /* Splice and update the per-level dirty jnode lists */
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ zcount +=
-+ capture_fuse_jnode_lists(large,
-+ ATOM_DIRTY_LIST(large, level),
-+ ATOM_DIRTY_LIST(small, level));
-+ }
-+
-+ /* Splice and update the [clean,dirty] jnode and txnh lists */
-+ zcount +=
-+ capture_fuse_jnode_lists(large, ATOM_CLEAN_LIST(large),
-+ ATOM_CLEAN_LIST(small));
-+ zcount +=
-+ capture_fuse_jnode_lists(large, ATOM_OVRWR_LIST(large),
-+ ATOM_OVRWR_LIST(small));
-+ zcount +=
-+ capture_fuse_jnode_lists(large, ATOM_WB_LIST(large),
-+ ATOM_WB_LIST(small));
-+ zcount +=
-+ capture_fuse_jnode_lists(large, &large->inodes, &small->inodes);
-+ tcount +=
-+ capture_fuse_txnh_lists(large, &large->txnh_list,
-+ &small->txnh_list);
-+
-+ /* Check our accounting. */
-+ assert("jmacd-1063",
-+ zcount + small->num_queued == small->capture_count);
-+ assert("jmacd-1065", tcount == small->txnh_count);
-+
-+ /* sum numbers of waiters threads */
-+ large->nr_waiters += small->nr_waiters;
-+ small->nr_waiters = 0;
-+
-+ /* splice flush queues */
-+ reiser4_fuse_fq(large, small);
-+
-+ /* update counter of jnode on every atom' list */
-+ ON_DEBUG(large->dirty += small->dirty;
-+ small->dirty = 0;
-+ large->clean += small->clean;
-+ small->clean = 0;
-+ large->ovrwr += small->ovrwr;
-+ small->ovrwr = 0;
-+ large->wb += small->wb;
-+ small->wb = 0;
-+ large->fq += small->fq;
-+ small->fq = 0;);
-+
-+ /* count flushers in result atom */
-+ large->nr_flushers += small->nr_flushers;
-+ small->nr_flushers = 0;
-+
-+ /* update counts of flushed nodes */
-+ large->flushed += small->flushed;
-+ small->flushed = 0;
-+
-+ /* Transfer list counts to large. */
-+ large->txnh_count += small->txnh_count;
-+ large->capture_count += small->capture_count;
-+
-+ /* Add all txnh references to large. */
-+ atomic_add(small->txnh_count, &large->refcount);
-+ atomic_sub(small->txnh_count, &small->refcount);
-+
-+ /* Reset small counts */
-+ small->txnh_count = 0;
-+ small->capture_count = 0;
-+
-+ /* Assign the oldest start_time, merge flags. */
-+ large->start_time = min(large->start_time, small->start_time);
-+ large->flags |= small->flags;
-+
-+ /* Merge blocknr sets. */
-+ blocknr_set_merge(&small->delete_set, &large->delete_set);
-+ blocknr_set_merge(&small->wandered_map, &large->wandered_map);
-+
-+ /* Merge allocated/deleted file counts */
-+ large->nr_objects_deleted += small->nr_objects_deleted;
-+ large->nr_objects_created += small->nr_objects_created;
-+
-+ small->nr_objects_deleted = 0;
-+ small->nr_objects_created = 0;
-+
-+ /* Merge allocated blocks counts */
-+ large->nr_blocks_allocated += small->nr_blocks_allocated;
-+
-+ large->nr_running_queues += small->nr_running_queues;
-+ small->nr_running_queues = 0;
-+
-+ /* Merge blocks reserved for overwrite set. */
-+ large->flush_reserved += small->flush_reserved;
-+ small->flush_reserved = 0;
-+
-+ if (large->stage < small->stage) {
-+ /* Large only needs to notify if it has changed state. */
-+ reiser4_atom_set_stage(large, small->stage);
-+ wakeup_atom_waiting_list(large);
-+ }
-+
-+ reiser4_atom_set_stage(small, ASTAGE_INVALID);
-+
-+ /* Notify any waiters--small needs to unload its wait lists. Waiters
-+ actually remove themselves from the list before returning from the
-+ fuse_wait function. */
-+ wakeup_atom_waiting_list(small);
-+
-+ /* Unlock atoms */
-+ spin_unlock_atom(large);
-+ atom_dec_and_unlock(small);
-+}
-+
-+/* TXNMGR STUFF */
-+
-+/* Release a block from the atom, reversing the effects of being captured,
-+ do not release atom's reference to jnode due to holding spin-locks.
-+ Currently this is only called when the atom commits.
-+
-+ NOTE: this function does not release a (journal) reference to jnode
-+ due to locking optimizations, you should call jput() somewhere after
-+ calling reiser4_uncapture_block(). */
-+void reiser4_uncapture_block(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert("umka-226", node != NULL);
-+ atom = node->atom;
-+ assert("umka-228", atom != NULL);
-+
-+ assert("jmacd-1021", node->atom == atom);
-+ assert_spin_locked(&(node->guard));
-+ assert("jmacd-1023", atom_is_protected(atom));
-+
-+ JF_CLR(node, JNODE_DIRTY);
-+ JF_CLR(node, JNODE_RELOC);
-+ JF_CLR(node, JNODE_OVRWR);
-+ JF_CLR(node, JNODE_CREATED);
-+ JF_CLR(node, JNODE_WRITEBACK);
-+ JF_CLR(node, JNODE_REPACK);
-+
-+ list_del_init(&node->capture_link);
-+ if (JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
-+ assert("zam-925", atom_isopen(atom));
-+ assert("vs-1623", NODE_LIST(node) == FQ_LIST);
-+ ON_DEBUG(atom->num_queued--);
-+ JF_CLR(node, JNODE_FLUSH_QUEUED);
-+ }
-+ atom->capture_count -= 1;
-+ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), NOT_CAPTURED, 1));
-+ node->atom = NULL;
-+
-+ spin_unlock_jnode(node);
-+ LOCK_CNT_DEC(t_refs);
-+}
-+
-+/* Unconditional insert of jnode into atom's overwrite list. Currently used in
-+ bitmap-based allocator code for adding modified bitmap blocks the
-+ transaction. @atom and @node are spin locked */
-+void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node)
-+{
-+ assert("zam-538", atom_is_protected(atom));
-+ assert_spin_locked(&(node->guard));
-+ assert("zam-899", JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-543", node->atom == NULL);
-+ assert("vs-1433", !jnode_is_unformatted(node) && !jnode_is_znode(node));
-+
-+ list_add(&node->capture_link, ATOM_OVRWR_LIST(atom));
-+ jref(node);
-+ node->atom = atom;
-+ atom->capture_count++;
-+ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), OVRWR_LIST, 1));
-+}
-+
-+static int count_deleted_blocks_actor(txn_atom * atom,
-+ const reiser4_block_nr * a,
-+ const reiser4_block_nr * b, void *data)
-+{
-+ reiser4_block_nr *counter = data;
-+
-+ assert("zam-995", data != NULL);
-+ assert("zam-996", a != NULL);
-+ if (b == NULL)
-+ *counter += 1;
-+ else
-+ *counter += *b;
-+ return 0;
-+}
-+
-+reiser4_block_nr txnmgr_count_deleted_blocks(void)
-+{
-+ reiser4_block_nr result;
-+ txn_mgr *tmgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+ txn_atom *atom;
-+
-+ result = 0;
-+
-+ spin_lock_txnmgr(tmgr);
-+ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
-+ spin_lock_atom(atom);
-+ if (atom_isopen(atom))
-+ blocknr_set_iterator(
-+ atom, &atom->delete_set,
-+ count_deleted_blocks_actor, &result, 0);
-+ spin_unlock_atom(atom);
-+ }
-+ spin_unlock_txnmgr(tmgr);
-+
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.20.orig/fs/reiser4/txnmgr.h linux-2.6.20/fs/reiser4/txnmgr.h
---- linux-2.6.20.orig/fs/reiser4/txnmgr.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/txnmgr.h 2007-05-06 14:50:43.899038216 +0400
-@@ -0,0 +1,708 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* data-types and function declarations for transaction manager. See txnmgr.c
-+ * for details. */
-+
-+#ifndef __REISER4_TXNMGR_H__
-+#define __REISER4_TXNMGR_H__
-+
-+#include "forward.h"
-+#include "dformat.h"
-+
-+#include <linux/fs.h>
-+#include <linux/mm.h>
-+#include <linux/types.h>
-+#include <linux/spinlock.h>
-+#include <asm/atomic.h>
-+#include <linux/wait.h>
-+
-+/* TYPE DECLARATIONS */
-+
-+/* This enumeration describes the possible types of a capture request (reiser4_try_capture).
-+ A capture request dynamically assigns a block to the calling thread's transaction
-+ handle. */
-+typedef enum {
-+ /* A READ_ATOMIC request indicates that a block will be read and that the caller's
-+ atom should fuse in order to ensure that the block commits atomically with the
-+ caller. */
-+ TXN_CAPTURE_READ_ATOMIC = (1 << 0),
-+
-+ /* A READ_NONCOM request indicates that a block will be read and that the caller is
-+ willing to read a non-committed block without causing atoms to fuse. */
-+ TXN_CAPTURE_READ_NONCOM = (1 << 1),
-+
-+ /* A READ_MODIFY request indicates that a block will be read but that the caller
-+ wishes for the block to be captured as it will be written. This capture request
-+ mode is not currently used, but eventually it will be useful for preventing
-+ deadlock in read-modify-write cycles. */
-+ TXN_CAPTURE_READ_MODIFY = (1 << 2),
-+
-+ /* A WRITE capture request indicates that a block will be modified and that atoms
-+ should fuse to make the commit atomic. */
-+ TXN_CAPTURE_WRITE = (1 << 3),
-+
-+ /* CAPTURE_TYPES is a mask of the four above capture types, used to separate the
-+ exclusive type designation from extra bits that may be supplied -- see
-+ below. */
-+ TXN_CAPTURE_TYPES = (TXN_CAPTURE_READ_ATOMIC |
-+ TXN_CAPTURE_READ_NONCOM | TXN_CAPTURE_READ_MODIFY |
-+ TXN_CAPTURE_WRITE),
-+
-+ /* A subset of CAPTURE_TYPES, CAPTURE_WTYPES is a mask of request types that
-+ indicate modification will occur. */
-+ TXN_CAPTURE_WTYPES = (TXN_CAPTURE_READ_MODIFY | TXN_CAPTURE_WRITE),
-+
-+ /* An option to reiser4_try_capture, NONBLOCKING indicates that the caller would
-+ prefer not to sleep waiting for an aging atom to commit. */
-+ TXN_CAPTURE_NONBLOCKING = (1 << 4),
-+
-+ /* An option to reiser4_try_capture to prevent atom fusion, just simple
-+ capturing is allowed */
-+ TXN_CAPTURE_DONT_FUSE = (1 << 5)
-+
-+ /* This macro selects only the exclusive capture request types, stripping out any
-+ options that were supplied (i.e., NONBLOCKING). */
-+#define CAPTURE_TYPE(x) ((x) & TXN_CAPTURE_TYPES)
-+} txn_capture;
-+
-+/* There are two kinds of transaction handle: WRITE_FUSING and READ_FUSING, the only
-+ difference is in the handling of read requests. A WRITE_FUSING transaction handle
-+ defaults read capture requests to TXN_CAPTURE_READ_NONCOM whereas a READ_FUSIONG
-+ transaction handle defaults to TXN_CAPTURE_READ_ATOMIC. */
-+typedef enum {
-+ TXN_WRITE_FUSING = (1 << 0),
-+ TXN_READ_FUSING = (1 << 1) | TXN_WRITE_FUSING, /* READ implies WRITE */
-+} txn_mode;
-+
-+/* Every atom has a stage, which is one of these exclusive values: */
-+typedef enum {
-+ /* Initially an atom is free. */
-+ ASTAGE_FREE = 0,
-+
-+ /* An atom begins by entering the CAPTURE_FUSE stage, where it proceeds to capture
-+ blocks and fuse with other atoms. */
-+ ASTAGE_CAPTURE_FUSE = 1,
-+
-+ /* We need to have a ASTAGE_CAPTURE_SLOW in which an atom fuses with one node for every X nodes it flushes to disk where X > 1. */
-+
-+ /* When an atom reaches a certain age it must do all it can to commit. An atom in
-+ the CAPTURE_WAIT stage refuses new transaction handles and prevents fusion from
-+ atoms in the CAPTURE_FUSE stage. */
-+ ASTAGE_CAPTURE_WAIT = 2,
-+
-+ /* Waiting for I/O before commit. Copy-on-capture (see
-+ http://namesys.com/v4/v4.html). */
-+ ASTAGE_PRE_COMMIT = 3,
-+
-+ /* Post-commit overwrite I/O. Steal-on-capture. */
-+ ASTAGE_POST_COMMIT = 4,
-+
-+ /* Atom which waits for the removal of the last reference to (it? ) to
-+ * be deleted from memory */
-+ ASTAGE_DONE = 5,
-+
-+ /* invalid atom. */
-+ ASTAGE_INVALID = 6,
-+
-+} txn_stage;
-+
-+/* Certain flags may be set in the txn_atom->flags field. */
-+typedef enum {
-+ /* Indicates that the atom should commit as soon as possible. */
-+ ATOM_FORCE_COMMIT = (1 << 0),
-+ /* to avoid endless loop, mark the atom (which was considered as too
-+ * small) after failed attempt to fuse it. */
-+ ATOM_CANCEL_FUSION = (1 << 1)
-+} txn_flags;
-+
-+/* Flags for controlling commit_txnh */
-+typedef enum {
-+ /* Wait commit atom completion in commit_txnh */
-+ TXNH_WAIT_COMMIT = 0x2,
-+ /* Don't commit atom when this handle is closed */
-+ TXNH_DONT_COMMIT = 0x4
-+} txn_handle_flags_t;
-+
-+/* TYPE DEFINITIONS */
-+
-+/* A note on lock ordering: the handle & jnode spinlock protects reading of their ->atom
-+ fields, so typically an operation on the atom through either of these objects must (1)
-+ lock the object, (2) read the atom pointer, (3) lock the atom.
-+
-+ During atom fusion, the process holds locks on both atoms at once. Then, it iterates
-+ through the list of handles and pages held by the smaller of the two atoms. For each
-+ handle and page referencing the smaller atom, the fusing process must: (1) lock the
-+ object, and (2) update the atom pointer.
-+
-+ You can see that there is a conflict of lock ordering here, so the more-complex
-+ procedure should have priority, i.e., the fusing process has priority so that it is
-+ guaranteed to make progress and to avoid restarts.
-+
-+ This decision, however, means additional complexity for aquiring the atom lock in the
-+ first place.
-+
-+ The general original procedure followed in the code was:
-+
-+ TXN_OBJECT *obj = ...;
-+ TXN_ATOM *atom;
-+
-+ spin_lock (& obj->_lock);
-+
-+ atom = obj->_atom;
-+
-+ if (! spin_trylock_atom (atom))
-+ {
-+ spin_unlock (& obj->_lock);
-+ RESTART OPERATION, THERE WAS A RACE;
-+ }
-+
-+ ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
-+
-+ It has however been found that this wastes CPU a lot in a manner that is
-+ hard to profile. So, proper refcounting was added to atoms, and new
-+ standard locking sequence is like following:
-+
-+ TXN_OBJECT *obj = ...;
-+ TXN_ATOM *atom;
-+
-+ spin_lock (& obj->_lock);
-+
-+ atom = obj->_atom;
-+
-+ if (! spin_trylock_atom (atom))
-+ {
-+ atomic_inc (& atom->refcount);
-+ spin_unlock (& obj->_lock);
-+ spin_lock (&atom->_lock);
-+ atomic_dec (& atom->refcount);
-+ // HERE atom is locked
-+ spin_unlock (&atom->_lock);
-+ RESTART OPERATION, THERE WAS A RACE;
-+ }
-+
-+ ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
-+
-+ (core of this is implemented in trylock_throttle() function)
-+
-+ See the jnode_get_atom() function for a common case.
-+
-+ As an additional (and important) optimization allowing to avoid restarts,
-+ it is possible to re-check required pre-conditions at the HERE point in
-+ code above and proceed without restarting if they are still satisfied.
-+*/
-+
-+/* An atomic transaction: this is the underlying system representation
-+ of a transaction, not the one seen by clients.
-+
-+ Invariants involving this data-type:
-+
-+ [sb-fake-allocated]
-+*/
-+struct txn_atom {
-+ /* The spinlock protecting the atom, held during fusion and various other state
-+ changes. */
-+ spinlock_t alock;
-+
-+ /* The atom's reference counter, increasing (in case of a duplication
-+ of an existing reference or when we are sure that some other
-+ reference exists) may be done without taking spinlock, decrementing
-+ of the ref. counter requires a spinlock to be held.
-+
-+ Each transaction handle counts in ->refcount. All jnodes count as
-+ one reference acquired in atom_begin_andlock(), released in
-+ commit_current_atom().
-+ */
-+ atomic_t refcount;
-+
-+ /* The atom_id identifies the atom in persistent records such as the log. */
-+ __u32 atom_id;
-+
-+ /* Flags holding any of the txn_flags enumerated values (e.g.,
-+ ATOM_FORCE_COMMIT). */
-+ __u32 flags;
-+
-+ /* Number of open handles. */
-+ __u32 txnh_count;
-+
-+ /* The number of znodes captured by this atom. Equal to the sum of lengths of the
-+ dirty_nodes[level] and clean_nodes lists. */
-+ __u32 capture_count;
-+
-+#if REISER4_DEBUG
-+ int clean;
-+ int dirty;
-+ int ovrwr;
-+ int wb;
-+ int fq;
-+#endif
-+
-+ __u32 flushed;
-+
-+ /* Current transaction stage. */
-+ txn_stage stage;
-+
-+ /* Start time. */
-+ unsigned long start_time;
-+
-+ /* The atom's delete set. It collects block numbers of the nodes
-+ which were deleted during the transaction. */
-+ struct list_head delete_set;
-+
-+ /* The atom's wandered_block mapping. */
-+ struct list_head wandered_map;
-+
-+ /* The transaction's list of dirty captured nodes--per level. Index
-+ by (level). dirty_nodes[0] is for znode-above-root */
-+ struct list_head dirty_nodes[REAL_MAX_ZTREE_HEIGHT + 1];
-+
-+ /* The transaction's list of clean captured nodes. */
-+ struct list_head clean_nodes;
-+
-+ /* The atom's overwrite set */
-+ struct list_head ovrwr_nodes;
-+
-+ /* nodes which are being written to disk */
-+ struct list_head writeback_nodes;
-+
-+ /* list of inodes */
-+ struct list_head inodes;
-+
-+ /* List of handles associated with this atom. */
-+ struct list_head txnh_list;
-+
-+ /* Transaction list link: list of atoms in the transaction manager. */
-+ struct list_head atom_link;
-+
-+ /* List of handles waiting FOR this atom: see 'capture_fuse_wait' comment. */
-+ struct list_head fwaitfor_list;
-+
-+ /* List of this atom's handles that are waiting: see 'capture_fuse_wait' comment. */
-+ struct list_head fwaiting_list;
-+
-+ /* Numbers of objects which were deleted/created in this transaction
-+ thereby numbers of objects IDs which were released/deallocated. */
-+ int nr_objects_deleted;
-+ int nr_objects_created;
-+ /* number of blocks allocated during the transaction */
-+ __u64 nr_blocks_allocated;
-+ /* All atom's flush queue objects are on this list */
-+ struct list_head flush_queues;
-+#if REISER4_DEBUG
-+ /* number of flush queues for this atom. */
-+ int nr_flush_queues;
-+ /* Number of jnodes which were removed from atom's lists and put
-+ on flush_queue */
-+ int num_queued;
-+#endif
-+ /* number of threads who wait for this atom to complete commit */
-+ int nr_waiters;
-+ /* number of threads which do jnode_flush() over this atom */
-+ int nr_flushers;
-+ /* number of flush queues which are IN_USE and jnodes from fq->prepped
-+ are submitted to disk by the reiser4_write_fq() routine. */
-+ int nr_running_queues;
-+ /* A counter of grabbed unformatted nodes, see a description of the
-+ * reiser4 space reservation scheme at block_alloc.c */
-+ reiser4_block_nr flush_reserved;
-+#if REISER4_DEBUG
-+ void *committer;
-+#endif
-+ struct super_block *super;
-+};
-+
-+#define ATOM_DIRTY_LIST(atom, level) (&(atom)->dirty_nodes[level])
-+#define ATOM_CLEAN_LIST(atom) (&(atom)->clean_nodes)
-+#define ATOM_OVRWR_LIST(atom) (&(atom)->ovrwr_nodes)
-+#define ATOM_WB_LIST(atom) (&(atom)->writeback_nodes)
-+#define ATOM_FQ_LIST(fq) (&(fq)->prepped)
-+
-+#define NODE_LIST(node) (node)->list
-+#define ASSIGN_NODE_LIST(node, list) ON_DEBUG(NODE_LIST(node) = list)
-+ON_DEBUG(void
-+ count_jnode(txn_atom *, jnode *, atom_list old_list,
-+ atom_list new_list, int check_lists));
-+
-+typedef struct protected_jnodes {
-+ struct list_head inatom; /* link to atom's list these structures */
-+ struct list_head nodes; /* head of list of protected nodes */
-+} protected_jnodes;
-+
-+/* A transaction handle: the client obtains and commits this handle which is assigned by
-+ the system to a txn_atom. */
-+struct txn_handle {
-+ /* Spinlock protecting ->atom pointer */
-+ spinlock_t hlock;
-+
-+ /* Flags for controlling commit_txnh() behavior */
-+ /* from txn_handle_flags_t */
-+ txn_handle_flags_t flags;
-+
-+ /* Whether it is READ_FUSING or WRITE_FUSING. */
-+ txn_mode mode;
-+
-+ /* If assigned, the atom it is part of. */
-+ txn_atom *atom;
-+
-+ /* Transaction list link. Head is in txn_atom. */
-+ struct list_head txnh_link;
-+};
-+
-+/* The transaction manager: one is contained in the reiser4_super_info_data */
-+struct txn_mgr {
-+ /* A spinlock protecting the atom list, id_count, flush_control */
-+ spinlock_t tmgr_lock;
-+
-+ /* List of atoms. */
-+ struct list_head atoms_list;
-+
-+ /* Number of atoms. */
-+ int atom_count;
-+
-+ /* A counter used to assign atom->atom_id values. */
-+ __u32 id_count;
-+
-+ /* a mutex object for commit serialization */
-+ struct mutex commit_mutex;
-+
-+ /* a list of all txnmrgs served by particular daemon. */
-+ struct list_head linkage;
-+
-+ /* description of daemon for this txnmgr */
-+ ktxnmgrd_context *daemon;
-+
-+ /* parameters. Adjustable through mount options. */
-+ unsigned int atom_max_size;
-+ unsigned int atom_max_age;
-+ unsigned int atom_min_size;
-+ /* max number of concurrent flushers for one atom, 0 - unlimited. */
-+ unsigned int atom_max_flushers;
-+ struct dentry *debugfs_atom_count;
-+ struct dentry *debugfs_id_count;
-+};
-+
-+/* FUNCTION DECLARATIONS */
-+
-+/* These are the externally (within Reiser4) visible transaction functions, therefore they
-+ are prefixed with "txn_". For comments, see txnmgr.c. */
-+
-+extern int init_txnmgr_static(void);
-+extern void done_txnmgr_static(void);
-+
-+extern void reiser4_init_txnmgr(txn_mgr *);
-+extern void reiser4_done_txnmgr(txn_mgr *);
-+
-+extern int reiser4_txn_reserve(int reserved);
-+
-+extern void reiser4_txn_begin(reiser4_context * context);
-+extern int reiser4_txn_end(reiser4_context * context);
-+
-+extern void reiser4_txn_restart(reiser4_context * context);
-+extern void reiser4_txn_restart_current(void);
-+
-+extern int txnmgr_force_commit_all(struct super_block *, int);
-+extern int current_atom_should_commit(void);
-+
-+extern jnode *find_first_dirty_jnode(txn_atom *, int);
-+
-+extern int commit_some_atoms(txn_mgr *);
-+extern int force_commit_atom(txn_handle *);
-+extern int flush_current_atom(int, long, long *, txn_atom **, jnode *);
-+
-+extern int flush_some_atom(jnode *, long *, const struct writeback_control *, int);
-+
-+extern void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage);
-+
-+extern int same_slum_check(jnode * base, jnode * check, int alloc_check,
-+ int alloc_value);
-+extern void atom_dec_and_unlock(txn_atom * atom);
-+
-+extern int reiser4_try_capture(jnode * node, znode_lock_mode mode, txn_capture flags);
-+extern int try_capture_page_to_invalidate(struct page *pg);
-+
-+extern void reiser4_uncapture_page(struct page *pg);
-+extern void reiser4_uncapture_block(jnode *);
-+extern void reiser4_uncapture_jnode(jnode *);
-+
-+extern int reiser4_capture_inode(struct inode *);
-+extern int reiser4_uncapture_inode(struct inode *);
-+
-+extern txn_atom *get_current_atom_locked_nocheck(void);
-+
-+#if REISER4_DEBUG
-+
-+/**
-+ * atom_is_protected - make sure that nobody but us can do anything with atom
-+ * @atom: atom to be checked
-+ *
-+ * This is used to assert that atom either entered commit stages or is spin
-+ * locked.
-+ */
-+static inline int atom_is_protected(txn_atom *atom)
-+{
-+ if (atom->stage >= ASTAGE_PRE_COMMIT)
-+ return 1;
-+ assert_spin_locked(&(atom->alock));
-+ return 1;
-+}
-+
-+#endif
-+
-+/* Get the current atom and spinlock it if current atom present. May not return NULL */
-+static inline txn_atom *get_current_atom_locked(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked_nocheck();
-+ assert("zam-761", atom != NULL);
-+
-+ return atom;
-+}
-+
-+extern txn_atom *jnode_get_atom(jnode *);
-+
-+extern void reiser4_atom_wait_event(txn_atom *);
-+extern void reiser4_atom_send_event(txn_atom *);
-+
-+extern void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node);
-+extern int reiser4_capture_super_block(struct super_block *s);
-+int capture_bulk(jnode **, int count);
-+
-+/* See the comment on the function blocknrset.c:blocknr_set_add for the
-+ calling convention of these three routines. */
-+extern void blocknr_set_init(struct list_head * bset);
-+extern void blocknr_set_destroy(struct list_head * bset);
-+extern void blocknr_set_merge(struct list_head * from, struct list_head * into);
-+extern int blocknr_set_add_extent(txn_atom * atom,
-+ struct list_head * bset,
-+ blocknr_set_entry ** new_bsep,
-+ const reiser4_block_nr * start,
-+ const reiser4_block_nr * len);
-+extern int blocknr_set_add_pair(txn_atom * atom, struct list_head * bset,
-+ blocknr_set_entry ** new_bsep,
-+ const reiser4_block_nr * a,
-+ const reiser4_block_nr * b);
-+
-+typedef int (*blocknr_set_actor_f) (txn_atom *, const reiser4_block_nr *,
-+ const reiser4_block_nr *, void *);
-+
-+extern int blocknr_set_iterator(txn_atom * atom, struct list_head * bset,
-+ blocknr_set_actor_f actor, void *data,
-+ int delete);
-+
-+/* flush code takes care about how to fuse flush queues */
-+extern void flush_init_atom(txn_atom * atom);
-+extern void flush_fuse_queues(txn_atom * large, txn_atom * small);
-+
-+static inline void spin_lock_atom(txn_atom *atom)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_atom) &&
-+ LOCK_CNT_NIL(spin_locked_jnode) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock(&(atom->alock));
-+
-+ LOCK_CNT_INC(spin_locked_atom);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_lock_atom_nested(txn_atom *atom)
-+{
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_jnode) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock_nested(&(atom->alock), SINGLE_DEPTH_NESTING);
-+
-+ LOCK_CNT_INC(spin_locked_atom);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline int spin_trylock_atom(txn_atom *atom)
-+{
-+ if (spin_trylock(&(atom->alock))) {
-+ LOCK_CNT_INC(spin_locked_atom);
-+ LOCK_CNT_INC(spin_locked);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+static inline void spin_unlock_atom(txn_atom *atom)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_atom));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_atom);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(atom->alock));
-+}
-+
-+static inline void spin_lock_txnh(txn_handle *txnh)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock(&(txnh->hlock));
-+
-+ LOCK_CNT_INC(spin_locked_txnh);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline int spin_trylock_txnh(txn_handle *txnh)
-+{
-+ if (spin_trylock(&(txnh->hlock))) {
-+ LOCK_CNT_INC(spin_locked_txnh);
-+ LOCK_CNT_INC(spin_locked);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+static inline void spin_unlock_txnh(txn_handle *txnh)
-+{
-+ assert_spin_locked(&(txnh->hlock));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnh));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_txnh);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(txnh->hlock));
-+}
-+
-+#define spin_ordering_pred_txnmgr(tmgr) \
-+ ( LOCK_CNT_NIL(spin_locked_atom) && \
-+ LOCK_CNT_NIL(spin_locked_txnh) && \
-+ LOCK_CNT_NIL(spin_locked_jnode) && \
-+ LOCK_CNT_NIL(rw_locked_zlock) && \
-+ LOCK_CNT_NIL(rw_locked_dk) && \
-+ LOCK_CNT_NIL(rw_locked_tree) )
-+
-+static inline void spin_lock_txnmgr(txn_mgr *mgr)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_atom) &&
-+ LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_jnode) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock(&(mgr->tmgr_lock));
-+
-+ LOCK_CNT_INC(spin_locked_txnmgr);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline int spin_trylock_txnmgr(txn_mgr *mgr)
-+{
-+ if (spin_trylock(&(mgr->tmgr_lock))) {
-+ LOCK_CNT_INC(spin_locked_txnmgr);
-+ LOCK_CNT_INC(spin_locked);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+static inline void spin_unlock_txnmgr(txn_mgr *mgr)
-+{
-+ assert_spin_locked(&(mgr->tmgr_lock));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnmgr));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_txnmgr);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(mgr->tmgr_lock));
-+}
-+
-+typedef enum {
-+ FQ_IN_USE = 0x1
-+} flush_queue_state_t;
-+
-+typedef struct flush_queue flush_queue_t;
-+
-+/* This is an accumulator for jnodes prepared for writing to disk. A flush queue
-+ is filled by the jnode_flush() routine, and written to disk under memory
-+ pressure or at atom commit time. */
-+/* LOCKING: fq state and fq->atom are protected by guard spinlock, fq->nr_queued
-+ field and fq->prepped list can be modified if atom is spin-locked and fq
-+ object is "in-use" state. For read-only traversal of the fq->prepped list
-+ and reading of the fq->nr_queued field it is enough to keep fq "in-use" or
-+ only have atom spin-locked. */
-+struct flush_queue {
-+ /* linkage element is the first in this structure to make debugging
-+ easier. See field in atom struct for description of list. */
-+ struct list_head alink;
-+ /* A spinlock to protect changes of fq state and fq->atom pointer */
-+ spinlock_t guard;
-+ /* flush_queue state: [in_use | ready] */
-+ flush_queue_state_t state;
-+ /* A list which contains queued nodes, queued nodes are removed from any
-+ * atom's list and put on this ->prepped one. */
-+ struct list_head prepped;
-+ /* number of submitted i/o requests */
-+ atomic_t nr_submitted;
-+ /* number of i/o errors */
-+ atomic_t nr_errors;
-+ /* An atom this flush queue is attached to */
-+ txn_atom *atom;
-+ /* A wait queue head to wait on i/o completion */
-+ wait_queue_head_t wait;
-+#if REISER4_DEBUG
-+ /* A thread which took this fq in exclusive use, NULL if fq is free,
-+ * used for debugging. */
-+ struct task_struct *owner;
-+#endif
-+};
-+
-+extern int reiser4_fq_by_atom(txn_atom *, flush_queue_t **);
-+extern void reiser4_fq_put_nolock(flush_queue_t *);
-+extern void reiser4_fq_put(flush_queue_t *);
-+extern void reiser4_fuse_fq(txn_atom * to, txn_atom * from);
-+extern void queue_jnode(flush_queue_t *, jnode *);
-+
-+extern int reiser4_write_fq(flush_queue_t *, long *, int);
-+extern int current_atom_finish_all_fq(void);
-+extern void init_atom_fq_parts(txn_atom *);
-+
-+extern reiser4_block_nr txnmgr_count_deleted_blocks(void);
-+
-+extern void znode_make_dirty(znode * node);
-+extern void jnode_make_dirty_locked(jnode * node);
-+
-+extern int reiser4_sync_atom(txn_atom * atom);
-+
-+#if REISER4_DEBUG
-+extern int atom_fq_parts_are_clean(txn_atom *);
-+#endif
-+
-+extern void add_fq_to_bio(flush_queue_t *, struct bio *);
-+extern flush_queue_t *get_fq_for_current_atom(void);
-+
-+void protected_jnodes_init(protected_jnodes * list);
-+void protected_jnodes_done(protected_jnodes * list);
-+void reiser4_invalidate_list(struct list_head * head);
-+
-+# endif /* __REISER4_TXNMGR_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/type_safe_hash.h linux-2.6.20/fs/reiser4/type_safe_hash.h
---- linux-2.6.20.orig/fs/reiser4/type_safe_hash.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/type_safe_hash.h 2007-05-06 14:50:43.899038216 +0400
-@@ -0,0 +1,320 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* A hash table class that uses hash chains (singly-linked) and is
-+ parametrized to provide type safety. */
-+
-+#ifndef __REISER4_TYPE_SAFE_HASH_H__
-+#define __REISER4_TYPE_SAFE_HASH_H__
-+
-+#include "debug.h"
-+
-+#include <asm/errno.h>
-+/* Step 1: Use TYPE_SAFE_HASH_DECLARE() to define the TABLE and LINK objects
-+ based on the object type. You need to declare the item type before
-+ this definition, define it after this definition. */
-+#define TYPE_SAFE_HASH_DECLARE(PREFIX,ITEM_TYPE) \
-+ \
-+typedef struct PREFIX##_hash_table_ PREFIX##_hash_table; \
-+typedef struct PREFIX##_hash_link_ PREFIX##_hash_link; \
-+ \
-+struct PREFIX##_hash_table_ \
-+{ \
-+ ITEM_TYPE **_table; \
-+ __u32 _buckets; \
-+}; \
-+ \
-+struct PREFIX##_hash_link_ \
-+{ \
-+ ITEM_TYPE *_next; \
-+}
-+
-+/* Step 2: Define the object type of the hash: give it field of type
-+ PREFIX_hash_link. */
-+
-+/* Step 3: Use TYPE_SAFE_HASH_DEFINE to define the hash table interface using
-+ the type and field name used in step 3. The arguments are:
-+
-+ ITEM_TYPE The item type being hashed
-+ KEY_TYPE The type of key being hashed
-+ KEY_NAME The name of the key field within the item
-+ LINK_NAME The name of the link field within the item, which you must make type PREFIX_hash_link)
-+ HASH_FUNC The name of the hash function (or macro, takes const pointer to key)
-+ EQ_FUNC The name of the equality function (or macro, takes const pointer to two keys)
-+
-+ It implements these functions:
-+
-+ prefix_hash_init Initialize the table given its size.
-+ prefix_hash_insert Insert an item
-+ prefix_hash_insert_index Insert an item w/ precomputed hash_index
-+ prefix_hash_find Find an item by key
-+ prefix_hash_find_index Find an item w/ precomputed hash_index
-+ prefix_hash_remove Remove an item, returns 1 if found, 0 if not found
-+ prefix_hash_remove_index Remove an item w/ precomputed hash_index
-+
-+ If you'd like something to be done differently, feel free to ask me
-+ for modifications. Additional features that could be added but
-+ have not been:
-+
-+ prefix_hash_remove_key Find and remove an item by key
-+ prefix_hash_remove_key_index Find and remove an item by key w/ precomputed hash_index
-+
-+ The hash_function currently receives only the key as an argument,
-+ meaning it must somehow know the number of buckets. If this is a
-+ problem let me know.
-+
-+ This hash table uses a single-linked hash chain. This means
-+ insertion is fast but deletion requires searching the chain.
-+
-+ There is also the doubly-linked hash chain approach, under which
-+ deletion requires no search but the code is longer and it takes two
-+ pointers per item.
-+
-+ The circularly-linked approach has the shortest code but requires
-+ two pointers per bucket, doubling the size of the bucket array (in
-+ addition to two pointers per item).
-+*/
-+#define TYPE_SAFE_HASH_DEFINE(PREFIX,ITEM_TYPE,KEY_TYPE,KEY_NAME,LINK_NAME,HASH_FUNC,EQ_FUNC) \
-+ \
-+static __inline__ void \
-+PREFIX##_check_hash (PREFIX##_hash_table *table UNUSED_ARG, \
-+ __u32 hash UNUSED_ARG) \
-+{ \
-+ assert("nikita-2780", hash < table->_buckets); \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_init (PREFIX##_hash_table *hash, \
-+ __u32 buckets) \
-+{ \
-+ hash->_table = (ITEM_TYPE**) KMALLOC (sizeof (ITEM_TYPE*) * buckets); \
-+ hash->_buckets = buckets; \
-+ if (hash->_table == NULL) \
-+ { \
-+ return RETERR(-ENOMEM); \
-+ } \
-+ memset (hash->_table, 0, sizeof (ITEM_TYPE*) * buckets); \
-+ ON_DEBUG(printk(#PREFIX "_hash_table: %i buckets\n", buckets)); \
-+ return 0; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_done (PREFIX##_hash_table *hash) \
-+{ \
-+ if (REISER4_DEBUG && hash->_table != NULL) { \
-+ __u32 i; \
-+ for (i = 0 ; i < hash->_buckets ; ++ i) \
-+ assert("nikita-2905", hash->_table[i] == NULL); \
-+ } \
-+ if (hash->_table != NULL) \
-+ KFREE (hash->_table, sizeof (ITEM_TYPE*) * hash->_buckets); \
-+ hash->_table = NULL; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_prefetch_next (ITEM_TYPE *item) \
-+{ \
-+ prefetch(item->LINK_NAME._next); \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_prefetch_bucket (PREFIX##_hash_table *hash, \
-+ __u32 index) \
-+{ \
-+ prefetch(hash->_table[index]); \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find_index (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ ITEM_TYPE *item; \
-+ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ for (item = hash->_table[hash_index]; \
-+ item != NULL; \
-+ item = item->LINK_NAME._next) \
-+ { \
-+ prefetch(item->LINK_NAME._next); \
-+ prefetch(item->LINK_NAME._next + offsetof(ITEM_TYPE, KEY_NAME)); \
-+ if (EQ_FUNC (& item->KEY_NAME, find_key)) \
-+ { \
-+ return item; \
-+ } \
-+ } \
-+ \
-+ return NULL; \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find_index_lru (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ ITEM_TYPE ** item = &hash->_table[hash_index]; \
-+ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ while (*item != NULL) { \
-+ prefetch(&(*item)->LINK_NAME._next); \
-+ if (EQ_FUNC (&(*item)->KEY_NAME, find_key)) { \
-+ ITEM_TYPE *found; \
-+ \
-+ found = *item; \
-+ *item = found->LINK_NAME._next; \
-+ found->LINK_NAME._next = hash->_table[hash_index]; \
-+ hash->_table[hash_index] = found; \
-+ return found; \
-+ } \
-+ item = &(*item)->LINK_NAME._next; \
-+ } \
-+ return NULL; \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_remove_index (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ ITEM_TYPE *del_item) \
-+{ \
-+ ITEM_TYPE ** hash_item_p = &hash->_table[hash_index]; \
-+ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ while (*hash_item_p != NULL) { \
-+ prefetch(&(*hash_item_p)->LINK_NAME._next); \
-+ if (*hash_item_p == del_item) { \
-+ *hash_item_p = (*hash_item_p)->LINK_NAME._next; \
-+ return 1; \
-+ } \
-+ hash_item_p = &(*hash_item_p)->LINK_NAME._next; \
-+ } \
-+ return 0; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert_index (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ ins_item->LINK_NAME._next = hash->_table[hash_index]; \
-+ hash->_table[hash_index] = ins_item; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert_index_rcu (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ ins_item->LINK_NAME._next = hash->_table[hash_index]; \
-+ smp_wmb(); \
-+ hash->_table[hash_index] = ins_item; \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find (PREFIX##_hash_table *hash, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ return PREFIX##_hash_find_index (hash, HASH_FUNC(hash, find_key), find_key); \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find_lru (PREFIX##_hash_table *hash, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ return PREFIX##_hash_find_index_lru (hash, HASH_FUNC(hash, find_key), find_key); \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_remove (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *del_item) \
-+{ \
-+ return PREFIX##_hash_remove_index (hash, \
-+ HASH_FUNC(hash, &del_item->KEY_NAME), del_item); \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_remove_rcu (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *del_item) \
-+{ \
-+ return PREFIX##_hash_remove (hash, del_item); \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ return PREFIX##_hash_insert_index (hash, \
-+ HASH_FUNC(hash, &ins_item->KEY_NAME), ins_item); \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert_rcu (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ return PREFIX##_hash_insert_index_rcu (hash, HASH_FUNC(hash, &ins_item->KEY_NAME), \
-+ ins_item); \
-+} \
-+ \
-+static __inline__ ITEM_TYPE * \
-+PREFIX##_hash_first (PREFIX##_hash_table *hash, __u32 ind) \
-+{ \
-+ ITEM_TYPE *first; \
-+ \
-+ for (first = NULL; ind < hash->_buckets; ++ ind) { \
-+ first = hash->_table[ind]; \
-+ if (first != NULL) \
-+ break; \
-+ } \
-+ return first; \
-+} \
-+ \
-+static __inline__ ITEM_TYPE * \
-+PREFIX##_hash_next (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *item) \
-+{ \
-+ ITEM_TYPE *next; \
-+ \
-+ if (item == NULL) \
-+ return NULL; \
-+ next = item->LINK_NAME._next; \
-+ if (next == NULL) \
-+ next = PREFIX##_hash_first (hash, HASH_FUNC(hash, &item->KEY_NAME) + 1); \
-+ return next; \
-+} \
-+ \
-+typedef struct {} PREFIX##_hash_dummy
-+
-+#define for_all_ht_buckets(table, head) \
-+for ((head) = &(table) -> _table[ 0 ] ; \
-+ (head) != &(table) -> _table[ (table) -> _buckets ] ; ++ (head))
-+
-+#define for_all_in_bucket(bucket, item, next, field) \
-+for ((item) = *(bucket), (next) = (item) ? (item) -> field._next : NULL ; \
-+ (item) != NULL ; \
-+ (item) = (next), (next) = (item) ? (item) -> field._next : NULL )
-+
-+#define for_all_in_htable(table, prefix, item, next) \
-+for ((item) = prefix ## _hash_first ((table), 0), \
-+ (next) = prefix ## _hash_next ((table), (item)) ; \
-+ (item) != NULL ; \
-+ (item) = (next), \
-+ (next) = prefix ## _hash_next ((table), (item)))
-+
-+/* __REISER4_TYPE_SAFE_HASH_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/vfs_ops.c linux-2.6.20/fs/reiser4/vfs_ops.c
---- linux-2.6.20.orig/fs/reiser4/vfs_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/vfs_ops.c 2007-05-06 14:50:43.899038216 +0400
-@@ -0,0 +1,259 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Interface to VFS. Reiser4 {super|export|dentry}_operations are defined
-+ here. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/file/file.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/disk_format/disk_format.h"
-+#include "plugin/plugin.h"
-+#include "plugin/plugin_set.h"
-+#include "plugin/object.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "reiser4.h"
-+#include "entd.h"
-+#include "status_flags.h"
-+#include "flush.h"
-+#include "dscale.h"
-+
-+#include <linux/profile.h>
-+#include <linux/types.h>
-+#include <linux/mount.h>
-+#include <linux/vfs.h>
-+#include <linux/mm.h>
-+#include <linux/buffer_head.h>
-+#include <linux/dcache.h>
-+#include <linux/list.h>
-+#include <linux/pagemap.h>
-+#include <linux/slab.h>
-+#include <linux/seq_file.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/writeback.h>
-+#include <linux/blkdev.h>
-+#include <linux/quotaops.h>
-+#include <linux/security.h>
-+#include <linux/reboot.h>
-+#include <linux/rcupdate.h>
-+
-+/* update inode stat-data by calling plugin */
-+int reiser4_update_sd(struct inode *object)
-+{
-+ file_plugin *fplug;
-+
-+ assert("nikita-2338", object != NULL);
-+ /* check for read-only file system. */
-+ if (IS_RDONLY(object))
-+ return 0;
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-2339", fplug != NULL);
-+ return fplug->write_sd_by_inode(object);
-+}
-+
-+/* helper function: increase inode nlink count and call plugin method to save
-+ updated stat-data.
-+
-+ Used by link/create and during creation of dot and dotdot in mkdir
-+*/
-+int reiser4_add_nlink(struct inode *object /* object to which link is added */ ,
-+ struct inode *parent /* parent where new entry will be */
-+ ,
-+ int write_sd_p /* true if stat-data has to be
-+ * updated */ )
-+{
-+ file_plugin *fplug;
-+ int result;
-+
-+ assert("nikita-1351", object != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-1445", fplug != NULL);
-+
-+ /* ask plugin whether it can add yet another link to this
-+ object */
-+ if (!fplug->can_add_link(object))
-+ return RETERR(-EMLINK);
-+
-+ assert("nikita-2211", fplug->add_link != NULL);
-+ /* call plugin to do actual addition of link */
-+ result = fplug->add_link(object, parent);
-+
-+ /* optionally update stat data */
-+ if (result == 0 && write_sd_p)
-+ result = fplug->write_sd_by_inode(object);
-+ return result;
-+}
-+
-+/* helper function: decrease inode nlink count and call plugin method to save
-+ updated stat-data.
-+
-+ Used by unlink/create
-+*/
-+int reiser4_del_nlink(struct inode *object /* object from which link is
-+ * removed */ ,
-+ struct inode *parent /* parent where entry was */ ,
-+ int write_sd_p /* true is stat-data has to be
-+ * updated */ )
-+{
-+ file_plugin *fplug;
-+ int result;
-+
-+ assert("nikita-1349", object != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-1350", fplug != NULL);
-+ assert("nikita-1446", object->i_nlink > 0);
-+ assert("nikita-2210", fplug->rem_link != NULL);
-+
-+ /* call plugin to do actual deletion of link */
-+ result = fplug->rem_link(object, parent);
-+
-+ /* optionally update stat data */
-+ if (result == 0 && write_sd_p)
-+ result = fplug->write_sd_by_inode(object);
-+ return result;
-+}
-+
-+/* Release reiser4 dentry. This is d_op->d_release() method. */
-+static void reiser4_d_release(struct dentry *dentry /* dentry released */ )
-+{
-+ reiser4_free_dentry_fsdata(dentry);
-+}
-+
-+/*
-+ * Called by reiser4_sync_inodes(), during speculative write-back (through
-+ * pdflush, or balance_dirty_pages()).
-+ */
-+void reiser4_writeout(struct super_block *sb, struct writeback_control *wbc)
-+{
-+ long written = 0;
-+ int repeats = 0;
-+ int result;
-+ struct address_space *mapping;
-+
-+ /*
-+ * Performs early flushing, trying to free some memory. If there is
-+ * nothing to flush, commits some atoms.
-+ */
-+
-+ /* Commit all atoms if reiser4_writepages() is called from sys_sync() or
-+ sys_fsync(). */
-+ if (wbc->sync_mode != WB_SYNC_NONE) {
-+ txnmgr_force_commit_all(sb, 0);
-+ return;
-+ }
-+
-+ BUG_ON(reiser4_get_super_fake(sb) == NULL);
-+ mapping = reiser4_get_super_fake(sb)->i_mapping;
-+ do {
-+ long nr_submitted = 0;
-+ jnode *node = NULL;
-+
-+ /* do not put more requests to overload write queue */
-+ if (wbc->nonblocking &&
-+ bdi_write_congested(mapping->backing_dev_info)) {
-+ blk_run_address_space(mapping);
-+ wbc->encountered_congestion = 1;
-+ break;
-+ }
-+ repeats++;
-+ BUG_ON(wbc->nr_to_write <= 0);
-+
-+ if (get_current_context()->entd) {
-+ entd_context *ent = get_entd_context(sb);
-+
-+ if (ent->cur_request->node)
-+ /*
-+ * this is ent thread and it managed to capture
-+ * requested page itself - start flush from
-+ * that page
-+ */
-+ node = jref(ent->cur_request->node);
-+ }
-+
-+ result = flush_some_atom(node, &nr_submitted, wbc,
-+ JNODE_FLUSH_WRITE_BLOCKS);
-+ if (result != 0)
-+ warning("nikita-31001", "Flush failed: %i", result);
-+ if (node)
-+ jput(node);
-+ if (!nr_submitted)
-+ break;
-+
-+ wbc->nr_to_write -= nr_submitted;
-+ written += nr_submitted;
-+ } while (wbc->nr_to_write > 0);
-+}
-+
-+void reiser4_throttle_write(struct inode *inode)
-+{
-+ reiser4_txn_restart_current();
-+ balance_dirty_pages_ratelimited(inode->i_mapping);
-+}
-+
-+const char *REISER4_SUPER_MAGIC_STRING = "ReIsEr4";
-+const int REISER4_MAGIC_OFFSET = 16 * 4096; /* offset to magic string from the
-+ * beginning of device */
-+
-+/*
-+ * Reiser4 initialization/shutdown.
-+ *
-+ * Code below performs global reiser4 initialization that is done either as
-+ * part of kernel initialization (when reiser4 is statically built-in), or
-+ * during reiser4 module load (when compiled as module).
-+ */
-+
-+void reiser4_handle_error(void)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ if (!sb)
-+ return;
-+ reiser4_status_write(REISER4_STATUS_DAMAGED, 0,
-+ "Filesystem error occured");
-+ switch (get_super_private(sb)->onerror) {
-+ case 0:
-+ reiser4_panic("foobar-42", "Filesystem error occured\n");
-+ case 1:
-+ default:
-+ if (sb->s_flags & MS_RDONLY)
-+ return;
-+ sb->s_flags |= MS_RDONLY;
-+ break;
-+ }
-+}
-+
-+struct dentry_operations reiser4_dentry_operations = {
-+ .d_revalidate = NULL,
-+ .d_hash = NULL,
-+ .d_compare = NULL,
-+ .d_delete = NULL,
-+ .d_release = reiser4_d_release,
-+ .d_iput = NULL,
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/vfs_ops.h linux-2.6.20/fs/reiser4/vfs_ops.h
---- linux-2.6.20.orig/fs/reiser4/vfs_ops.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/vfs_ops.h 2007-05-06 14:50:43.899038216 +0400
-@@ -0,0 +1,53 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* vfs_ops.c's exported symbols */
-+
-+#if !defined( __FS_REISER4_VFS_OPS_H__ )
-+#define __FS_REISER4_VFS_OPS_H__
-+
-+#include "forward.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "plugin/file/file.h"
-+#include "super.h"
-+#include "readahead.h"
-+
-+#include <linux/types.h> /* for loff_t */
-+#include <linux/fs.h> /* for struct address_space */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/mm.h>
-+#include <linux/backing-dev.h>
-+
-+/* address space operations */
-+int reiser4_writepage(struct page *, struct writeback_control *);
-+int reiser4_set_page_dirty(struct page *);
-+void reiser4_invalidatepage(struct page *, unsigned long offset);
-+int reiser4_releasepage(struct page *, gfp_t);
-+
-+extern int reiser4_update_sd(struct inode *);
-+extern int reiser4_add_nlink(struct inode *, struct inode *, int);
-+extern int reiser4_del_nlink(struct inode *, struct inode *, int);
-+
-+extern int reiser4_start_up_io(struct page *page);
-+extern void reiser4_throttle_write(struct inode *);
-+extern int jnode_is_releasable(jnode *);
-+
-+#define CAPTURE_APAGE_BURST (1024l)
-+void reiser4_writeout(struct super_block *, struct writeback_control *);
-+
-+extern void reiser4_handle_error(void);
-+
-+/* __FS_REISER4_VFS_OPS_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/wander.c linux-2.6.20/fs/reiser4/wander.c
---- linux-2.6.20.orig/fs/reiser4/wander.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/wander.c 2007-05-06 14:50:43.903039466 +0400
-@@ -0,0 +1,1797 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Reiser4 Wandering Log */
-+
-+/* You should read http://www.namesys.com/txn-doc.html
-+
-+ That describes how filesystem operations are performed as atomic
-+ transactions, and how we try to arrange it so that we can write most of the
-+ data only once while performing the operation atomically.
-+
-+ For the purposes of this code, it is enough for it to understand that it
-+ has been told a given block should be written either once, or twice (if
-+ twice then once to the wandered location and once to the real location).
-+
-+ This code guarantees that those blocks that are defined to be part of an
-+ atom either all take effect or none of them take effect.
-+
-+ Relocate set nodes are submitted to write by the jnode_flush() routine, and
-+ the overwrite set is submitted by reiser4_write_log(). This is because with
-+ the overwrite set we seek to optimize writes, and with the relocate set we
-+ seek to cause disk order to correlate with the parent first pre-order.
-+
-+ reiser4_write_log() allocates and writes wandered blocks and maintains
-+ additional on-disk structures of the atom as wander records (each wander
-+ record occupies one block) for storing of the "wandered map" (a table which
-+ contains a relation between wandered and real block numbers) and other
-+ information which might be needed at transaction recovery time.
-+
-+ The wander records are unidirectionally linked into a circle: each wander
-+ record contains a block number of the next wander record, the last wander
-+ record points to the first one.
-+
-+ One wander record (named "tx head" in this file) has a format which is
-+ different from the other wander records. The "tx head" has a reference to the
-+ "tx head" block of the previously committed atom. Also, "tx head" contains
-+ fs information (the free blocks counter, and the oid allocator state) which
-+ is logged in a special way .
-+
-+ There are two journal control blocks, named journal header and journal
-+ footer which have fixed on-disk locations. The journal header has a
-+ reference to the "tx head" block of the last committed atom. The journal
-+ footer points to the "tx head" of the last flushed atom. The atom is
-+ "played" when all blocks from its overwrite set are written to disk the
-+ second time (i.e. written to their real locations).
-+
-+ NOTE: People who know reiserfs internals and its journal structure might be
-+ confused with these terms journal footer and journal header. There is a table
-+ with terms of similar semantics in reiserfs (reiser3) and reiser4:
-+
-+ REISER3 TERM | REISER4 TERM | DESCRIPTION
-+ --------------------+-----------------------+----------------------------
-+ commit record | journal header | atomic write of this record
-+ | | ends transaction commit
-+ --------------------+-----------------------+----------------------------
-+ journal header | journal footer | atomic write of this record
-+ | | ends post-commit writes.
-+ | | After successful
-+ | | writing of this journal
-+ | | blocks (in reiser3) or
-+ | | wandered blocks/records are
-+ | | free for re-use.
-+ --------------------+-----------------------+----------------------------
-+
-+ The atom commit process is the following:
-+
-+ 1. The overwrite set is taken from atom's clean list, and its size is
-+ counted.
-+
-+ 2. The number of necessary wander records (including tx head) is calculated,
-+ and the wander record blocks are allocated.
-+
-+ 3. Allocate wandered blocks and populate wander records by wandered map.
-+
-+ 4. submit write requests for wander records and wandered blocks.
-+
-+ 5. wait until submitted write requests complete.
-+
-+ 6. update journal header: change the pointer to the block number of just
-+ written tx head, submit an i/o for modified journal header block and wait
-+ for i/o completion.
-+
-+ NOTE: The special logging for bitmap blocks and some reiser4 super block
-+ fields makes processes of atom commit, flush and recovering a bit more
-+ complex (see comments in the source code for details).
-+
-+ The atom playing process is the following:
-+
-+ 1. Write atom's overwrite set in-place.
-+
-+ 2. Wait on i/o.
-+
-+ 3. Update journal footer: change the pointer to block number of tx head
-+ block of the atom we currently flushing, submit an i/o, wait on i/o
-+ completion.
-+
-+ 4. Free disk space which was used for wandered blocks and wander records.
-+
-+ After the freeing of wandered blocks and wander records we have that journal
-+ footer points to the on-disk structure which might be overwritten soon.
-+ Neither the log writer nor the journal recovery procedure use that pointer
-+ for accessing the data. When the journal recovery procedure finds the oldest
-+ transaction it compares the journal footer pointer value with the "prev_tx"
-+ pointer value in tx head, if values are equal the oldest not flushed
-+ transaction is found.
-+
-+ NOTE on disk space leakage: the information about of what blocks and how many
-+ blocks are allocated for wandered blocks, wandered records is not written to
-+ the disk because of special logging for bitmaps and some super blocks
-+ counters. After a system crash we the reiser4 does not remember those
-+ objects allocation, thus we have no such a kind of disk space leakage.
-+*/
-+
-+/* Special logging of reiser4 super block fields. */
-+
-+/* There are some reiser4 super block fields (free block count and OID allocator
-+ state (number of files and next free OID) which are logged separately from
-+ super block to avoid unnecessary atom fusion.
-+
-+ So, the reiser4 super block can be not captured by a transaction with
-+ allocates/deallocates disk blocks or create/delete file objects. Moreover,
-+ the reiser4 on-disk super block is not touched when such a transaction is
-+ committed and flushed. Those "counters logged specially" are logged in "tx
-+ head" blocks and in the journal footer block.
-+
-+ A step-by-step description of special logging:
-+
-+ 0. The per-atom information about deleted or created files and allocated or
-+ freed blocks is collected during the transaction. The atom's
-+ ->nr_objects_created and ->nr_objects_deleted are for object
-+ deletion/creation tracking, the numbers of allocated and freed blocks are
-+ calculated using atom's delete set and atom's capture list -- all new and
-+ relocated nodes should be on atom's clean list and should have JNODE_RELOC
-+ bit set.
-+
-+ 1. The "logged specially" reiser4 super block fields have their "committed"
-+ versions in the reiser4 in-memory super block. They get modified only at
-+ atom commit time. The atom's commit thread has an exclusive access to those
-+ "committed" fields because the log writer implementation supports only one
-+ atom commit a time (there is a per-fs "commit" mutex). At
-+ that time "committed" counters are modified using per-atom information
-+ collected during the transaction. These counters are stored on disk as a
-+ part of tx head block when atom is committed.
-+
-+ 2. When the atom is flushed the value of the free block counter and the OID
-+ allocator state get written to the journal footer block. A special journal
-+ procedure (journal_recover_sb_data()) takes those values from the journal
-+ footer and updates the reiser4 in-memory super block.
-+
-+ NOTE: That means free block count and OID allocator state are logged
-+ separately from the reiser4 super block regardless of the fact that the
-+ reiser4 super block has fields to store both the free block counter and the
-+ OID allocator.
-+
-+ Writing the whole super block at commit time requires knowing true values of
-+ all its fields without changes made by not yet committed transactions. It is
-+ possible by having their "committed" version of the super block like the
-+ reiser4 bitmap blocks have "committed" and "working" versions. However,
-+ another scheme was implemented which stores special logged values in the
-+ unused free space inside transaction head block. In my opinion it has an
-+ advantage of not writing whole super block when only part of it was
-+ modified. */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "page_cache.h"
-+#include "wander.h"
-+#include "reiser4.h"
-+#include "super.h"
-+#include "vfs_ops.h"
-+#include "writeout.h"
-+#include "inode.h"
-+#include "entd.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/pagemap.h>
-+#include <linux/bio.h> /* for struct bio */
-+#include <linux/blkdev.h>
-+
-+static int write_jnodes_to_disk_extent(
-+ jnode *, int, const reiser4_block_nr *, flush_queue_t *, int);
-+
-+/* The commit_handle is a container for objects needed at atom commit time */
-+struct commit_handle {
-+ /* A pointer to atom's list of OVRWR nodes */
-+ struct list_head *overwrite_set;
-+ /* atom's overwrite set size */
-+ int overwrite_set_size;
-+ /* jnodes for wander record blocks */
-+ struct list_head tx_list;
-+ /* number of wander records */
-+ __u32 tx_size;
-+ /* 'committed' sb counters are saved here until atom is completely
-+ flushed */
-+ __u64 free_blocks;
-+ __u64 nr_files;
-+ __u64 next_oid;
-+ /* A pointer to the atom which is being committed */
-+ txn_atom *atom;
-+ /* A pointer to current super block */
-+ struct super_block *super;
-+ /* The counter of modified bitmaps */
-+ reiser4_block_nr nr_bitmap;
-+};
-+
-+static void init_commit_handle(struct commit_handle *ch, txn_atom *atom)
-+{
-+ memset(ch, 0, sizeof(struct commit_handle));
-+ INIT_LIST_HEAD(&ch->tx_list);
-+
-+ ch->atom = atom;
-+ ch->super = reiser4_get_current_sb();
-+}
-+
-+static void done_commit_handle(struct commit_handle *ch)
-+{
-+ assert("zam-690", list_empty(&ch->tx_list));
-+}
-+
-+static inline int reiser4_use_write_barrier(struct super_block * s)
-+{
-+ return !reiser4_is_set(s, REISER4_NO_WRITE_BARRIER);
-+}
-+
-+static void disable_write_barrier(struct super_block * s)
-+{
-+ notice("zam-1055", "%s does not support write barriers,"
-+ " using synchronous write instead.", s->s_id);
-+ set_bit((int)REISER4_NO_WRITE_BARRIER, &get_super_private(s)->fs_flags);
-+}
-+
-+/* fill journal header block data */
-+static void format_journal_header(struct commit_handle *ch)
-+{
-+ struct reiser4_super_info_data *sbinfo;
-+ struct journal_header *header;
-+ jnode *txhead;
-+
-+ sbinfo = get_super_private(ch->super);
-+ assert("zam-479", sbinfo != NULL);
-+ assert("zam-480", sbinfo->journal_header != NULL);
-+
-+ txhead = list_entry(ch->tx_list.next, jnode, capture_link);
-+
-+ jload(sbinfo->journal_header);
-+
-+ header = (struct journal_header *)jdata(sbinfo->journal_header);
-+ assert("zam-484", header != NULL);
-+
-+ put_unaligned(cpu_to_le64(*jnode_get_block(txhead)),
-+ &header->last_committed_tx);
-+
-+ jrelse(sbinfo->journal_header);
-+}
-+
-+/* fill journal footer block data */
-+static void format_journal_footer(struct commit_handle *ch)
-+{
-+ struct reiser4_super_info_data *sbinfo;
-+ struct journal_footer *footer;
-+ jnode *tx_head;
-+
-+ sbinfo = get_super_private(ch->super);
-+
-+ tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
-+
-+ assert("zam-493", sbinfo != NULL);
-+ assert("zam-494", sbinfo->journal_header != NULL);
-+
-+ check_me("zam-691", jload(sbinfo->journal_footer) == 0);
-+
-+ footer = (struct journal_footer *)jdata(sbinfo->journal_footer);
-+ assert("zam-495", footer != NULL);
-+
-+ put_unaligned(cpu_to_le64(*jnode_get_block(tx_head)),
-+ &footer->last_flushed_tx);
-+ put_unaligned(cpu_to_le64(ch->free_blocks), &footer->free_blocks);
-+
-+ put_unaligned(cpu_to_le64(ch->nr_files), &footer->nr_files);
-+ put_unaligned(cpu_to_le64(ch->next_oid), &footer->next_oid);
-+
-+ jrelse(sbinfo->journal_footer);
-+}
-+
-+/* wander record capacity depends on current block size */
-+static int wander_record_capacity(const struct super_block *super)
-+{
-+ return (super->s_blocksize -
-+ sizeof(struct wander_record_header)) /
-+ sizeof(struct wander_entry);
-+}
-+
-+/* Fill first wander record (tx head) in accordance with supplied given data */
-+static void format_tx_head(struct commit_handle *ch)
-+{
-+ jnode *tx_head;
-+ jnode *next;
-+ struct tx_header *header;
-+
-+ tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
-+ assert("zam-692", &ch->tx_list != &tx_head->capture_link);
-+
-+ next = list_entry(tx_head->capture_link.next, jnode, capture_link);
-+ if (&ch->tx_list == &next->capture_link)
-+ next = tx_head;
-+
-+ header = (struct tx_header *)jdata(tx_head);
-+
-+ assert("zam-460", header != NULL);
-+ assert("zam-462", ch->super->s_blocksize >= sizeof(struct tx_header));
-+
-+ memset(jdata(tx_head), 0, (size_t) ch->super->s_blocksize);
-+ memcpy(jdata(tx_head), TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE);
-+
-+ put_unaligned(cpu_to_le32(ch->tx_size), &header->total);
-+ put_unaligned(cpu_to_le64(get_super_private(ch->super)->last_committed_tx),
-+ &header->prev_tx);
-+ put_unaligned(cpu_to_le64(*jnode_get_block(next)), &header->next_block);
-+ put_unaligned(cpu_to_le64(ch->free_blocks), &header->free_blocks);
-+ put_unaligned(cpu_to_le64(ch->nr_files), &header->nr_files);
-+ put_unaligned(cpu_to_le64(ch->next_oid), &header->next_oid);
-+}
-+
-+/* prepare ordinary wander record block (fill all service fields) */
-+static void
-+format_wander_record(struct commit_handle *ch, jnode *node, __u32 serial)
-+{
-+ struct wander_record_header *LRH;
-+ jnode *next;
-+
-+ assert("zam-464", node != NULL);
-+
-+ LRH = (struct wander_record_header *)jdata(node);
-+ next = list_entry(node->capture_link.next, jnode, capture_link);
-+
-+ if (&ch->tx_list == &next->capture_link)
-+ next = list_entry(ch->tx_list.next, jnode, capture_link);
-+
-+ assert("zam-465", LRH != NULL);
-+ assert("zam-463",
-+ ch->super->s_blocksize > sizeof(struct wander_record_header));
-+
-+ memset(jdata(node), 0, (size_t) ch->super->s_blocksize);
-+ memcpy(jdata(node), WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE);
-+
-+ put_unaligned(cpu_to_le32(ch->tx_size), &LRH->total);
-+ put_unaligned(cpu_to_le32(serial), &LRH->serial);
-+ put_unaligned(cpu_to_le64(*jnode_get_block(next)), &LRH->next_block);
-+}
-+
-+/* add one wandered map entry to formatted wander record */
-+static void
-+store_entry(jnode * node, int index, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b)
-+{
-+ char *data;
-+ struct wander_entry *pairs;
-+
-+ data = jdata(node);
-+ assert("zam-451", data != NULL);
-+
-+ pairs =
-+ (struct wander_entry *)(data + sizeof(struct wander_record_header));
-+
-+ put_unaligned(cpu_to_le64(*a), &pairs[index].original);
-+ put_unaligned(cpu_to_le64(*b), &pairs[index].wandered);
-+}
-+
-+/* currently, wander records contains contain only wandered map, which depend on
-+ overwrite set size */
-+static void get_tx_size(struct commit_handle *ch)
-+{
-+ assert("zam-440", ch->overwrite_set_size != 0);
-+ assert("zam-695", ch->tx_size == 0);
-+
-+ /* count all ordinary wander records
-+ (<overwrite_set_size> - 1) / <wander_record_capacity> + 1 and add one
-+ for tx head block */
-+ ch->tx_size =
-+ (ch->overwrite_set_size - 1) / wander_record_capacity(ch->super) +
-+ 2;
-+}
-+
-+/* A special structure for using in store_wmap_actor() for saving its state
-+ between calls */
-+struct store_wmap_params {
-+ jnode *cur; /* jnode of current wander record to fill */
-+ int idx; /* free element index in wander record */
-+ int capacity; /* capacity */
-+
-+#if REISER4_DEBUG
-+ struct list_head *tx_list;
-+#endif
-+};
-+
-+/* an actor for use in blocknr_set_iterator routine which populates the list
-+ of pre-formatted wander records by wandered map info */
-+static int
-+store_wmap_actor(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b, void *data)
-+{
-+ struct store_wmap_params *params = data;
-+
-+ if (params->idx >= params->capacity) {
-+ /* a new wander record should be taken from the tx_list */
-+ params->cur = list_entry(params->cur->capture_link.next, jnode, capture_link);
-+ assert("zam-454",
-+ params->tx_list != ¶ms->cur->capture_link);
-+
-+ params->idx = 0;
-+ }
-+
-+ store_entry(params->cur, params->idx, a, b);
-+ params->idx++;
-+
-+ return 0;
-+}
-+
-+/* This function is called after Relocate set gets written to disk, Overwrite
-+ set is written to wandered locations and all wander records are written
-+ also. Updated journal header blocks contains a pointer (block number) to
-+ first wander record of the just written transaction */
-+static int update_journal_header(struct commit_handle *ch, int use_barrier)
-+{
-+ struct reiser4_super_info_data *sbinfo = get_super_private(ch->super);
-+ jnode *jh = sbinfo->journal_header;
-+ jnode *head = list_entry(ch->tx_list.next, jnode, capture_link);
-+ int ret;
-+
-+ format_journal_header(ch);
-+
-+ ret = write_jnodes_to_disk_extent(jh, 1, jnode_get_block(jh), NULL,
-+ use_barrier ? WRITEOUT_BARRIER : 0);
-+ if (ret)
-+ return ret;
-+
-+ // blk_run_address_space(sbinfo->fake->i_mapping);
-+ /*blk_run_queues(); */
-+
-+ ret = jwait_io(jh, WRITE);
-+
-+ if (ret)
-+ return ret;
-+
-+ sbinfo->last_committed_tx = *jnode_get_block(head);
-+
-+ return 0;
-+}
-+
-+/* This function is called after write-back is finished. We update journal
-+ footer block and free blocks which were occupied by wandered blocks and
-+ transaction wander records */
-+static int update_journal_footer(struct commit_handle *ch, int use_barrier)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(ch->super);
-+
-+ jnode *jf = sbinfo->journal_footer;
-+
-+ int ret;
-+
-+ format_journal_footer(ch);
-+
-+ ret = write_jnodes_to_disk_extent(jf, 1, jnode_get_block(jf), NULL,
-+ use_barrier ? WRITEOUT_BARRIER : 0);
-+ if (ret)
-+ return ret;
-+
-+ // blk_run_address_space(sbinfo->fake->i_mapping);
-+ /*blk_run_queue(); */
-+
-+ ret = jwait_io(jf, WRITE);
-+ if (ret)
-+ return ret;
-+
-+ return 0;
-+}
-+
-+/* free block numbers of wander records of already written in place transaction */
-+static void dealloc_tx_list(struct commit_handle *ch)
-+{
-+ while (!list_empty(&ch->tx_list)) {
-+ jnode *cur = list_entry(ch->tx_list.next, jnode, capture_link);
-+ list_del(&cur->capture_link);
-+ ON_DEBUG(INIT_LIST_HEAD(&cur->capture_link));
-+ reiser4_dealloc_block(jnode_get_block(cur), BLOCK_NOT_COUNTED,
-+ BA_FORMATTED);
-+
-+ unpin_jnode_data(cur);
-+ reiser4_drop_io_head(cur);
-+ }
-+}
-+
-+/* An actor for use in block_nr_iterator() routine which frees wandered blocks
-+ from atom's overwrite set. */
-+static int
-+dealloc_wmap_actor(txn_atom * atom UNUSED_ARG,
-+ const reiser4_block_nr * a UNUSED_ARG,
-+ const reiser4_block_nr * b, void *data UNUSED_ARG)
-+{
-+
-+ assert("zam-499", b != NULL);
-+ assert("zam-500", *b != 0);
-+ assert("zam-501", !reiser4_blocknr_is_fake(b));
-+
-+ reiser4_dealloc_block(b, BLOCK_NOT_COUNTED, BA_FORMATTED);
-+ return 0;
-+}
-+
-+/* free wandered block locations of already written in place transaction */
-+static void dealloc_wmap(struct commit_handle *ch)
-+{
-+ assert("zam-696", ch->atom != NULL);
-+
-+ blocknr_set_iterator(ch->atom, &ch->atom->wandered_map,
-+ dealloc_wmap_actor, NULL, 1);
-+}
-+
-+/* helper function for alloc wandered blocks, which refill set of block
-+ numbers needed for wandered blocks */
-+static int
-+get_more_wandered_blocks(int count, reiser4_block_nr * start, int *len)
-+{
-+ reiser4_blocknr_hint hint;
-+ int ret;
-+
-+ reiser4_block_nr wide_len = count;
-+
-+ /* FIXME-ZAM: A special policy needed for allocation of wandered blocks
-+ ZAM-FIXME-HANS: yes, what happened to our discussion of using a fixed
-+ reserved allocation area so as to get the best qualities of fixed
-+ journals? */
-+ reiser4_blocknr_hint_init(&hint);
-+ hint.block_stage = BLOCK_GRABBED;
-+
-+ ret = reiser4_alloc_blocks(&hint, start, &wide_len,
-+ BA_FORMATTED | BA_USE_DEFAULT_SEARCH_START);
-+ *len = (int)wide_len;
-+
-+ return ret;
-+}
-+
-+/*
-+ * roll back changes made before issuing BIO in the case of IO error.
-+ */
-+static void undo_bio(struct bio *bio)
-+{
-+ int i;
-+
-+ for (i = 0; i < bio->bi_vcnt; ++i) {
-+ struct page *pg;
-+ jnode *node;
-+
-+ pg = bio->bi_io_vec[i].bv_page;
-+ ClearPageWriteback(pg);
-+ node = jprivate(pg);
-+ spin_lock_jnode(node);
-+ JF_CLR(node, JNODE_WRITEBACK);
-+ JF_SET(node, JNODE_DIRTY);
-+ spin_unlock_jnode(node);
-+ }
-+ bio_put(bio);
-+}
-+
-+/* put overwrite set back to atom's clean list */
-+static void put_overwrite_set(struct commit_handle *ch)
-+{
-+ jnode *cur;
-+
-+ list_for_each_entry(cur, ch->overwrite_set, capture_link)
-+ jrelse_tail(cur);
-+}
-+
-+/* Count overwrite set size, grab disk space for wandered blocks allocation.
-+ Since we have a separate list for atom's overwrite set we just scan the list,
-+ count bitmap and other not leaf nodes which wandered blocks allocation we
-+ have to grab space for. */
-+static int get_overwrite_set(struct commit_handle *ch)
-+{
-+ int ret;
-+ jnode *cur;
-+ __u64 nr_not_leaves = 0;
-+#if REISER4_DEBUG
-+ __u64 nr_formatted_leaves = 0;
-+ __u64 nr_unformatted_leaves = 0;
-+#endif
-+
-+ assert("zam-697", ch->overwrite_set_size == 0);
-+
-+ ch->overwrite_set = ATOM_OVRWR_LIST(ch->atom);
-+ cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
-+
-+ while (ch->overwrite_set != &cur->capture_link) {
-+ jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
-+
-+ /* Count bitmap locks for getting correct statistics what number
-+ * of blocks were cleared by the transaction commit. */
-+ if (jnode_get_type(cur) == JNODE_BITMAP)
-+ ch->nr_bitmap++;
-+
-+ assert("zam-939", JF_ISSET(cur, JNODE_OVRWR)
-+ || jnode_get_type(cur) == JNODE_BITMAP);
-+
-+ if (jnode_is_znode(cur) && znode_above_root(JZNODE(cur))) {
-+ /* we replace fake znode by another (real)
-+ znode which is suggested by disk_layout
-+ plugin */
-+
-+ /* FIXME: it looks like fake znode should be
-+ replaced by jnode supplied by
-+ disk_layout. */
-+
-+ struct super_block *s = reiser4_get_current_sb();
-+ reiser4_super_info_data *sbinfo =
-+ get_current_super_private();
-+
-+ if (sbinfo->df_plug->log_super) {
-+ jnode *sj = sbinfo->df_plug->log_super(s);
-+
-+ assert("zam-593", sj != NULL);
-+
-+ if (IS_ERR(sj))
-+ return PTR_ERR(sj);
-+
-+ spin_lock_jnode(sj);
-+ JF_SET(sj, JNODE_OVRWR);
-+ insert_into_atom_ovrwr_list(ch->atom, sj);
-+ spin_unlock_jnode(sj);
-+
-+ /* jload it as the rest of overwrite set */
-+ jload_gfp(sj, reiser4_ctx_gfp_mask_get(), 0);
-+
-+ ch->overwrite_set_size++;
-+ }
-+ spin_lock_jnode(cur);
-+ reiser4_uncapture_block(cur);
-+ jput(cur);
-+
-+ } else {
-+ int ret;
-+ ch->overwrite_set_size++;
-+ ret = jload_gfp(cur, reiser4_ctx_gfp_mask_get(), 0);
-+ if (ret)
-+ reiser4_panic("zam-783",
-+ "cannot load e-flushed jnode back (ret = %d)\n",
-+ ret);
-+ }
-+
-+ /* Count not leaves here because we have to grab disk space
-+ * for wandered blocks. They were not counted as "flush
-+ * reserved". Counting should be done _after_ nodes are pinned
-+ * into memory by jload(). */
-+ if (!jnode_is_leaf(cur))
-+ nr_not_leaves++;
-+ else {
-+#if REISER4_DEBUG
-+ /* at this point @cur either has JNODE_FLUSH_RESERVED
-+ * or is eflushed. Locking is not strong enough to
-+ * write an assertion checking for this. */
-+ if (jnode_is_znode(cur))
-+ nr_formatted_leaves++;
-+ else
-+ nr_unformatted_leaves++;
-+#endif
-+ JF_CLR(cur, JNODE_FLUSH_RESERVED);
-+ }
-+
-+ cur = next;
-+ }
-+
-+ /* Grab space for writing (wandered blocks) of not leaves found in
-+ * overwrite set. */
-+ ret = reiser4_grab_space_force(nr_not_leaves, BA_RESERVED);
-+ if (ret)
-+ return ret;
-+
-+ /* Disk space for allocation of wandered blocks of leaf nodes already
-+ * reserved as "flush reserved", move it to grabbed space counter. */
-+ spin_lock_atom(ch->atom);
-+ assert("zam-940",
-+ nr_formatted_leaves + nr_unformatted_leaves <=
-+ ch->atom->flush_reserved);
-+ flush_reserved2grabbed(ch->atom, ch->atom->flush_reserved);
-+ spin_unlock_atom(ch->atom);
-+
-+ return ch->overwrite_set_size;
-+}
-+
-+/**
-+ * write_jnodes_to_disk_extent - submit write request
-+ * @head:
-+ * @first: first jnode of the list
-+ * @nr: number of jnodes on the list
-+ * @block_p:
-+ * @fq:
-+ * @flags: used to decide whether page is to get PG_reclaim flag
-+ *
-+ * Submits a write request for @nr jnodes beginning from the @first, other
-+ * jnodes are after the @first on the double-linked "capture" list. All jnodes
-+ * will be written to the disk region of @nr blocks starting with @block_p block
-+ * number. If @fq is not NULL it means that waiting for i/o completion will be
-+ * done more efficiently by using flush_queue_t objects.
-+ * This function is the one which writes list of jnodes in batch mode. It does
-+ * all low-level things as bio construction and page states manipulation.
-+ *
-+ * ZAM-FIXME-HANS: brief me on why this function exists, and why bios are
-+ * aggregated in this function instead of being left to the layers below
-+ *
-+ * FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that?
-+ * Why that layer needed? Why BIOs cannot be constructed here?
-+ */
-+static int write_jnodes_to_disk_extent(
-+ jnode *first, int nr, const reiser4_block_nr *block_p,
-+ flush_queue_t *fq, int flags)
-+{
-+ struct super_block *super = reiser4_get_current_sb();
-+ int write_op = ( flags & WRITEOUT_BARRIER ) ? WRITE_BARRIER : WRITE;
-+ int max_blocks;
-+ jnode *cur = first;
-+ reiser4_block_nr block;
-+
-+ assert("zam-571", first != NULL);
-+ assert("zam-572", block_p != NULL);
-+ assert("zam-570", nr > 0);
-+
-+ block = *block_p;
-+ max_blocks = min(bio_get_nr_vecs(super->s_bdev), BIO_MAX_PAGES);
-+
-+ while (nr > 0) {
-+ struct bio *bio;
-+ int nr_blocks = min(nr, max_blocks);
-+ int i;
-+ int nr_used;
-+
-+ bio = bio_alloc(GFP_NOIO, nr_blocks);
-+ if (!bio)
-+ return RETERR(-ENOMEM);
-+
-+ bio->bi_bdev = super->s_bdev;
-+ bio->bi_sector = block * (super->s_blocksize >> 9);
-+ for (nr_used = 0, i = 0; i < nr_blocks; i++) {
-+ struct page *pg;
-+
-+ pg = jnode_page(cur);
-+ assert("zam-573", pg != NULL);
-+
-+ page_cache_get(pg);
-+
-+ lock_and_wait_page_writeback(pg);
-+
-+ if (!bio_add_page(bio, pg, super->s_blocksize, 0)) {
-+ /*
-+ * underlying device is satiated. Stop adding
-+ * pages to the bio.
-+ */
-+ unlock_page(pg);
-+ page_cache_release(pg);
-+ break;
-+ }
-+
-+ spin_lock_jnode(cur);
-+ assert("nikita-3166",
-+ pg->mapping == jnode_get_mapping(cur));
-+ assert("zam-912", !JF_ISSET(cur, JNODE_WRITEBACK));
-+#if REISER4_DEBUG
-+ spin_lock(&cur->load);
-+ assert("nikita-3165", !jnode_is_releasable(cur));
-+ spin_unlock(&cur->load);
-+#endif
-+ JF_SET(cur, JNODE_WRITEBACK);
-+ JF_CLR(cur, JNODE_DIRTY);
-+ ON_DEBUG(cur->written++);
-+ spin_unlock_jnode(cur);
-+
-+ ClearPageError(pg);
-+ set_page_writeback(pg);
-+
-+ if (get_current_context()->entd) {
-+ /* this is ent thread */
-+ entd_context *ent = get_entd_context(super);
-+ struct wbq *rq, *next;
-+
-+ spin_lock(&ent->guard);
-+
-+ if (pg == ent->cur_request->page) {
-+ /*
-+ * entd is called for this page. This
-+ * request is not in th etodo list
-+ */
-+ ent->cur_request->written = 1;
-+ } else {
-+ /*
-+ * if we have written a page for which writepage
-+ * is called for - move request to another list.
-+ */
-+ list_for_each_entry_safe(rq, next, &ent->todo_list, link) {
-+ assert("", rq->magic == WBQ_MAGIC);
-+ if (pg == rq->page) {
-+ /*
-+ * remove request from
-+ * entd's queue, but do
-+ * not wake up a thread
-+ * which put this
-+ * request
-+ */
-+ list_del_init(&rq->link);
-+ ent->nr_todo_reqs --;
-+ list_add_tail(&rq->link, &ent->done_list);
-+ ent->nr_done_reqs ++;
-+ rq->written = 1;
-+ break;
-+ }
-+ }
-+ }
-+ spin_unlock(&ent->guard);
-+ }
-+
-+ clear_page_dirty_for_io(pg);
-+
-+ unlock_page(pg);
-+
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ nr_used++;
-+ }
-+ if (nr_used > 0) {
-+ assert("nikita-3453",
-+ bio->bi_size == super->s_blocksize * nr_used);
-+ assert("nikita-3454", bio->bi_vcnt == nr_used);
-+
-+ /* Check if we are allowed to write at all */
-+ if (super->s_flags & MS_RDONLY)
-+ undo_bio(bio);
-+ else {
-+ int not_supported;
-+
-+ add_fq_to_bio(fq, bio);
-+ bio_get(bio);
-+ reiser4_submit_bio(write_op, bio);
-+ not_supported = bio_flagged(bio, BIO_EOPNOTSUPP);
-+ bio_put(bio);
-+ if (not_supported)
-+ return -EOPNOTSUPP;
-+ }
-+
-+ block += nr_used - 1;
-+ update_blocknr_hint_default(super, &block);
-+ block += 1;
-+ } else {
-+ bio_put(bio);
-+ }
-+ nr -= nr_used;
-+ }
-+
-+ return 0;
-+}
-+
-+/* This is a procedure which recovers a contiguous sequences of disk block
-+ numbers in the given list of j-nodes and submits write requests on this
-+ per-sequence basis */
-+int
-+write_jnode_list(struct list_head *head, flush_queue_t *fq,
-+ long *nr_submitted, int flags)
-+{
-+ int ret;
-+ jnode *beg = list_entry(head->next, jnode, capture_link);
-+
-+ while (head != &beg->capture_link) {
-+ int nr = 1;
-+ jnode *cur = list_entry(beg->capture_link.next, jnode, capture_link);
-+
-+ while (head != &cur->capture_link) {
-+ if (*jnode_get_block(cur) != *jnode_get_block(beg) + nr)
-+ break;
-+ ++nr;
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+
-+ ret = write_jnodes_to_disk_extent(
-+ beg, nr, jnode_get_block(beg), fq, flags);
-+ if (ret)
-+ return ret;
-+
-+ if (nr_submitted)
-+ *nr_submitted += nr;
-+
-+ beg = cur;
-+ }
-+
-+ return 0;
-+}
-+
-+/* add given wandered mapping to atom's wandered map */
-+static int
-+add_region_to_wmap(jnode * cur, int len, const reiser4_block_nr * block_p)
-+{
-+ int ret;
-+ blocknr_set_entry *new_bsep = NULL;
-+ reiser4_block_nr block;
-+
-+ txn_atom *atom;
-+
-+ assert("zam-568", block_p != NULL);
-+ block = *block_p;
-+ assert("zam-569", len > 0);
-+
-+ while ((len--) > 0) {
-+ do {
-+ atom = get_current_atom_locked();
-+ assert("zam-536",
-+ !reiser4_blocknr_is_fake(jnode_get_block(cur)));
-+ ret =
-+ blocknr_set_add_pair(atom, &atom->wandered_map,
-+ &new_bsep,
-+ jnode_get_block(cur), &block);
-+ } while (ret == -E_REPEAT);
-+
-+ if (ret) {
-+ /* deallocate blocks which were not added to wandered
-+ map */
-+ reiser4_block_nr wide_len = len;
-+
-+ reiser4_dealloc_blocks(&block, &wide_len,
-+ BLOCK_NOT_COUNTED,
-+ BA_FORMATTED
-+ /* formatted, without defer */ );
-+
-+ return ret;
-+ }
-+
-+ spin_unlock_atom(atom);
-+
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ ++block;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Allocate wandered blocks for current atom's OVERWRITE SET and immediately
-+ submit IO for allocated blocks. We assume that current atom is in a stage
-+ when any atom fusion is impossible and atom is unlocked and it is safe. */
-+static int alloc_wandered_blocks(struct commit_handle *ch, flush_queue_t *fq)
-+{
-+ reiser4_block_nr block;
-+
-+ int rest;
-+ int len;
-+ int ret;
-+
-+ jnode *cur;
-+
-+ assert("zam-534", ch->overwrite_set_size > 0);
-+
-+ rest = ch->overwrite_set_size;
-+
-+ cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
-+ while (ch->overwrite_set != &cur->capture_link) {
-+ assert("zam-567", JF_ISSET(cur, JNODE_OVRWR));
-+
-+ ret = get_more_wandered_blocks(rest, &block, &len);
-+ if (ret)
-+ return ret;
-+
-+ rest -= len;
-+
-+ ret = add_region_to_wmap(cur, len, &block);
-+ if (ret)
-+ return ret;
-+
-+ ret = write_jnodes_to_disk_extent(cur, len, &block, fq, 0);
-+ if (ret)
-+ return ret;
-+
-+ while ((len--) > 0) {
-+ assert("zam-604",
-+ ch->overwrite_set != &cur->capture_link);
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* allocate given number of nodes over the journal area and link them into a
-+ list, return pointer to the first jnode in the list */
-+static int alloc_tx(struct commit_handle *ch, flush_queue_t * fq)
-+{
-+ reiser4_blocknr_hint hint;
-+ reiser4_block_nr allocated = 0;
-+ reiser4_block_nr first, len;
-+ jnode *cur;
-+ jnode *txhead;
-+ int ret;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("zam-698", ch->tx_size > 0);
-+ assert("zam-699", list_empty_careful(&ch->tx_list));
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ while (allocated < (unsigned)ch->tx_size) {
-+ len = (ch->tx_size - allocated);
-+
-+ reiser4_blocknr_hint_init(&hint);
-+
-+ hint.block_stage = BLOCK_GRABBED;
-+
-+ /* FIXME: there should be some block allocation policy for
-+ nodes which contain wander records */
-+
-+ /* We assume that disk space for wandered record blocks can be
-+ * taken from reserved area. */
-+ ret = reiser4_alloc_blocks(&hint, &first, &len,
-+ BA_FORMATTED | BA_RESERVED |
-+ BA_USE_DEFAULT_SEARCH_START);
-+ reiser4_blocknr_hint_done(&hint);
-+
-+ if (ret)
-+ return ret;
-+
-+ allocated += len;
-+
-+ /* create jnodes for all wander records */
-+ while (len--) {
-+ cur = reiser4_alloc_io_head(&first);
-+
-+ if (cur == NULL) {
-+ ret = RETERR(-ENOMEM);
-+ goto free_not_assigned;
-+ }
-+
-+ ret = jinit_new(cur, reiser4_ctx_gfp_mask_get());
-+
-+ if (ret != 0) {
-+ jfree(cur);
-+ goto free_not_assigned;
-+ }
-+
-+ pin_jnode_data(cur);
-+
-+ list_add_tail(&cur->capture_link, &ch->tx_list);
-+
-+ first++;
-+ }
-+ }
-+
-+ { /* format a on-disk linked list of wander records */
-+ int serial = 1;
-+
-+ txhead = list_entry(ch->tx_list.next, jnode, capture_link);
-+ format_tx_head(ch);
-+
-+ cur = list_entry(txhead->capture_link.next, jnode, capture_link);
-+ while (&ch->tx_list != &cur->capture_link) {
-+ format_wander_record(ch, cur, serial++);
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ { /* Fill wander records with Wandered Set */
-+ struct store_wmap_params params;
-+ txn_atom *atom;
-+
-+ params.cur = list_entry(txhead->capture_link.next, jnode, capture_link);
-+
-+ params.idx = 0;
-+ params.capacity =
-+ wander_record_capacity(reiser4_get_current_sb());
-+
-+ atom = get_current_atom_locked();
-+ blocknr_set_iterator(atom, &atom->wandered_map,
-+ &store_wmap_actor, ¶ms, 0);
-+ spin_unlock_atom(atom);
-+ }
-+
-+ { /* relse all jnodes from tx_list */
-+ cur = list_entry(ch->tx_list.next, jnode, capture_link);
-+ while (&ch->tx_list != &cur->capture_link) {
-+ jrelse(cur);
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ ret = write_jnode_list(&ch->tx_list, fq, NULL, 0);
-+
-+ return ret;
-+
-+ free_not_assigned:
-+ /* We deallocate blocks not yet assigned to jnodes on tx_list. The
-+ caller takes care about invalidating of tx list */
-+ reiser4_dealloc_blocks(&first, &len, BLOCK_NOT_COUNTED, BA_FORMATTED);
-+
-+ return ret;
-+}
-+
-+static int commit_tx(struct commit_handle *ch)
-+{
-+ flush_queue_t *fq;
-+ int barrier;
-+ int ret;
-+
-+ /* Grab more space for wandered records. */
-+ ret = reiser4_grab_space_force((__u64) (ch->tx_size), BA_RESERVED);
-+ if (ret)
-+ return ret;
-+
-+ fq = get_fq_for_current_atom();
-+ if (IS_ERR(fq))
-+ return PTR_ERR(fq);
-+
-+ spin_unlock_atom(fq->atom);
-+ do {
-+ ret = alloc_wandered_blocks(ch, fq);
-+ if (ret)
-+ break;
-+ ret = alloc_tx(ch, fq);
-+ if (ret)
-+ break;
-+ } while (0);
-+
-+ reiser4_fq_put(fq);
-+ if (ret)
-+ return ret;
-+ repeat_wo_barrier:
-+ barrier = reiser4_use_write_barrier(ch->super);
-+ if (!barrier) {
-+ ret = current_atom_finish_all_fq();
-+ if (ret)
-+ return ret;
-+ }
-+ ret = update_journal_header(ch, barrier);
-+ if (barrier) {
-+ if (ret) {
-+ if (ret == -EOPNOTSUPP) {
-+ disable_write_barrier(ch->super);
-+ goto repeat_wo_barrier;
-+ }
-+ return ret;
-+ }
-+ ret = current_atom_finish_all_fq();
-+ }
-+ return ret;
-+}
-+
-+static int write_tx_back(struct commit_handle * ch)
-+{
-+ flush_queue_t *fq;
-+ int ret;
-+ int barrier;
-+
-+ reiser4_post_commit_hook();
-+ fq = get_fq_for_current_atom();
-+ if (IS_ERR(fq))
-+ return PTR_ERR(fq);
-+ spin_unlock_atom(fq->atom);
-+ ret = write_jnode_list(
-+ ch->overwrite_set, fq, NULL, WRITEOUT_FOR_PAGE_RECLAIM);
-+ reiser4_fq_put(fq);
-+ if (ret)
-+ return ret;
-+ repeat_wo_barrier:
-+ barrier = reiser4_use_write_barrier(ch->super);
-+ if (!barrier) {
-+ ret = current_atom_finish_all_fq();
-+ if (ret)
-+ return ret;
-+ }
-+ ret = update_journal_footer(ch, barrier);
-+ if (barrier) {
-+ if (ret) {
-+ if (ret == -EOPNOTSUPP) {
-+ disable_write_barrier(ch->super);
-+ goto repeat_wo_barrier;
-+ }
-+ return ret;
-+ }
-+ ret = current_atom_finish_all_fq();
-+ }
-+ if (ret)
-+ return ret;
-+ reiser4_post_write_back_hook();
-+ return 0;
-+}
-+
-+/* We assume that at this moment all captured blocks are marked as RELOC or
-+ WANDER (belong to Relocate o Overwrite set), all nodes from Relocate set
-+ are submitted to write.
-+*/
-+
-+int reiser4_write_logs(long *nr_submitted)
-+{
-+ txn_atom *atom;
-+ struct super_block *super = reiser4_get_current_sb();
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ struct commit_handle ch;
-+ int ret;
-+
-+ writeout_mode_enable();
-+
-+ /* block allocator may add j-nodes to the clean_list */
-+ ret = reiser4_pre_commit_hook();
-+ if (ret)
-+ return ret;
-+
-+ /* No locks are required if we take atom which stage >=
-+ * ASTAGE_PRE_COMMIT */
-+ atom = get_current_context()->trans->atom;
-+ assert("zam-965", atom != NULL);
-+
-+ /* relocate set is on the atom->clean_nodes list after
-+ * current_atom_complete_writes() finishes. It can be safely
-+ * uncaptured after commit_mutex is locked, because any atom that
-+ * captures these nodes is guaranteed to commit after current one.
-+ *
-+ * This can only be done after reiser4_pre_commit_hook(), because it is where
-+ * early flushed jnodes with CREATED bit are transferred to the
-+ * overwrite list. */
-+ reiser4_invalidate_list(ATOM_CLEAN_LIST(atom));
-+ spin_lock_atom(atom);
-+ /* There might be waiters for the relocate nodes which we have
-+ * released, wake them up. */
-+ reiser4_atom_send_event(atom);
-+ spin_unlock_atom(atom);
-+
-+ if (REISER4_DEBUG) {
-+ int level;
-+
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; ++level)
-+ assert("nikita-3352",
-+ list_empty_careful(ATOM_DIRTY_LIST(atom, level)));
-+ }
-+
-+ sbinfo->nr_files_committed += (unsigned)atom->nr_objects_created;
-+ sbinfo->nr_files_committed -= (unsigned)atom->nr_objects_deleted;
-+
-+ init_commit_handle(&ch, atom);
-+
-+ ch.free_blocks = sbinfo->blocks_free_committed;
-+ ch.nr_files = sbinfo->nr_files_committed;
-+ /* ZAM-FIXME-HANS: email me what the contention level is for the super
-+ * lock. */
-+ ch.next_oid = oid_next(super);
-+
-+ /* count overwrite set and place it in a separate list */
-+ ret = get_overwrite_set(&ch);
-+
-+ if (ret <= 0) {
-+ /* It is possible that overwrite set is empty here, it means
-+ all captured nodes are clean */
-+ goto up_and_ret;
-+ }
-+
-+ /* Inform the caller about what number of dirty pages will be
-+ * submitted to disk. */
-+ *nr_submitted += ch.overwrite_set_size - ch.nr_bitmap;
-+
-+ /* count all records needed for storing of the wandered set */
-+ get_tx_size(&ch);
-+
-+ ret = commit_tx(&ch);
-+ if (ret)
-+ goto up_and_ret;
-+
-+ spin_lock_atom(atom);
-+ reiser4_atom_set_stage(atom, ASTAGE_POST_COMMIT);
-+ spin_unlock_atom(atom);
-+
-+ ret = write_tx_back(&ch);
-+ reiser4_post_write_back_hook();
-+
-+ up_and_ret:
-+ if (ret) {
-+ /* there could be fq attached to current atom; the only way to
-+ remove them is: */
-+ current_atom_finish_all_fq();
-+ }
-+
-+ /* free blocks of flushed transaction */
-+ dealloc_tx_list(&ch);
-+ dealloc_wmap(&ch);
-+
-+ put_overwrite_set(&ch);
-+
-+ done_commit_handle(&ch);
-+
-+ writeout_mode_disable();
-+
-+ return ret;
-+}
-+
-+/* consistency checks for journal data/control blocks: header, footer, log
-+ records, transactions head blocks. All functions return zero on success. */
-+
-+static int check_journal_header(const jnode * node UNUSED_ARG)
-+{
-+ /* FIXME: journal header has no magic field yet. */
-+ return 0;
-+}
-+
-+/* wait for write completion for all jnodes from given list */
-+static int wait_on_jnode_list(struct list_head *head)
-+{
-+ jnode *scan;
-+ int ret = 0;
-+
-+ list_for_each_entry(scan, head, capture_link) {
-+ struct page *pg = jnode_page(scan);
-+
-+ if (pg) {
-+ if (PageWriteback(pg))
-+ wait_on_page_writeback(pg);
-+
-+ if (PageError(pg))
-+ ret++;
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+static int check_journal_footer(const jnode * node UNUSED_ARG)
-+{
-+ /* FIXME: journal footer has no magic field yet. */
-+ return 0;
-+}
-+
-+static int check_tx_head(const jnode * node)
-+{
-+ struct tx_header *header = (struct tx_header *)jdata(node);
-+
-+ if (memcmp(&header->magic, TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE) != 0) {
-+ warning("zam-627", "tx head at block %s corrupted\n",
-+ sprint_address(jnode_get_block(node)));
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+static int check_wander_record(const jnode * node)
-+{
-+ struct wander_record_header *RH =
-+ (struct wander_record_header *)jdata(node);
-+
-+ if (memcmp(&RH->magic, WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE) !=
-+ 0) {
-+ warning("zam-628", "wander record at block %s corrupted\n",
-+ sprint_address(jnode_get_block(node)));
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+/* fill commit_handler structure by everything what is needed for update_journal_footer */
-+static int restore_commit_handle(struct commit_handle *ch, jnode *tx_head)
-+{
-+ struct tx_header *TXH;
-+ int ret;
-+
-+ ret = jload(tx_head);
-+ if (ret)
-+ return ret;
-+
-+ TXH = (struct tx_header *)jdata(tx_head);
-+
-+ ch->free_blocks = le64_to_cpu(get_unaligned(&TXH->free_blocks));
-+ ch->nr_files = le64_to_cpu(get_unaligned(&TXH->nr_files));
-+ ch->next_oid = le64_to_cpu(get_unaligned(&TXH->next_oid));
-+
-+ jrelse(tx_head);
-+
-+ list_add(&tx_head->capture_link, &ch->tx_list);
-+
-+ return 0;
-+}
-+
-+/* replay one transaction: restore and write overwrite set in place */
-+static int replay_transaction(const struct super_block *s,
-+ jnode * tx_head,
-+ const reiser4_block_nr * log_rec_block_p,
-+ const reiser4_block_nr * end_block,
-+ unsigned int nr_wander_records)
-+{
-+ reiser4_block_nr log_rec_block = *log_rec_block_p;
-+ struct commit_handle ch;
-+ LIST_HEAD(overwrite_set);
-+ jnode *log;
-+ int ret;
-+
-+ init_commit_handle(&ch, NULL);
-+ ch.overwrite_set = &overwrite_set;
-+
-+ restore_commit_handle(&ch, tx_head);
-+
-+ while (log_rec_block != *end_block) {
-+ struct wander_record_header *header;
-+ struct wander_entry *entry;
-+
-+ int i;
-+
-+ if (nr_wander_records == 0) {
-+ warning("zam-631",
-+ "number of wander records in the linked list"
-+ " greater than number stored in tx head.\n");
-+ ret = RETERR(-EIO);
-+ goto free_ow_set;
-+ }
-+
-+ log = reiser4_alloc_io_head(&log_rec_block);
-+ if (log == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ ret = jload(log);
-+ if (ret < 0) {
-+ reiser4_drop_io_head(log);
-+ return ret;
-+ }
-+
-+ ret = check_wander_record(log);
-+ if (ret) {
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+ return ret;
-+ }
-+
-+ header = (struct wander_record_header *)jdata(log);
-+ log_rec_block = le64_to_cpu(get_unaligned(&header->next_block));
-+
-+ entry = (struct wander_entry *)(header + 1);
-+
-+ /* restore overwrite set from wander record content */
-+ for (i = 0; i < wander_record_capacity(s); i++) {
-+ reiser4_block_nr block;
-+ jnode *node;
-+
-+ block = le64_to_cpu(get_unaligned(&entry->wandered));
-+ if (block == 0)
-+ break;
-+
-+ node = reiser4_alloc_io_head(&block);
-+ if (node == NULL) {
-+ ret = RETERR(-ENOMEM);
-+ /*
-+ * FIXME-VS:???
-+ */
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+ goto free_ow_set;
-+ }
-+
-+ ret = jload(node);
-+
-+ if (ret < 0) {
-+ reiser4_drop_io_head(node);
-+ /*
-+ * FIXME-VS:???
-+ */
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+ goto free_ow_set;
-+ }
-+
-+ block = le64_to_cpu(get_unaligned(&entry->original));
-+
-+ assert("zam-603", block != 0);
-+
-+ jnode_set_block(node, &block);
-+
-+ list_add_tail(&node->capture_link, ch.overwrite_set);
-+
-+ ++entry;
-+ }
-+
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+
-+ --nr_wander_records;
-+ }
-+
-+ if (nr_wander_records != 0) {
-+ warning("zam-632", "number of wander records in the linked list"
-+ " less than number stored in tx head.\n");
-+ ret = RETERR(-EIO);
-+ goto free_ow_set;
-+ }
-+
-+ { /* write wandered set in place */
-+ write_jnode_list(ch.overwrite_set, NULL, NULL, 0);
-+ ret = wait_on_jnode_list(ch.overwrite_set);
-+
-+ if (ret) {
-+ ret = RETERR(-EIO);
-+ goto free_ow_set;
-+ }
-+ }
-+
-+ ret = update_journal_footer(&ch, 0);
-+
-+ free_ow_set:
-+
-+ while (!list_empty(ch.overwrite_set)) {
-+ jnode *cur = list_entry(ch.overwrite_set->next, jnode, capture_link);
-+ list_del_init(&cur->capture_link);
-+ jrelse(cur);
-+ reiser4_drop_io_head(cur);
-+ }
-+
-+ list_del_init(&tx_head->capture_link);
-+
-+ done_commit_handle(&ch);
-+
-+ return ret;
-+}
-+
-+/* find oldest committed and not played transaction and play it. The transaction
-+ * was committed and journal header block was updated but the blocks from the
-+ * process of writing the atom's overwrite set in-place and updating of journal
-+ * footer block were not completed. This function completes the process by
-+ * recovering the atom's overwrite set from their wandered locations and writes
-+ * them in-place and updating the journal footer. */
-+static int replay_oldest_transaction(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ jnode *jf = sbinfo->journal_footer;
-+ unsigned int total;
-+ struct journal_footer *F;
-+ struct tx_header *T;
-+
-+ reiser4_block_nr prev_tx;
-+ reiser4_block_nr last_flushed_tx;
-+ reiser4_block_nr log_rec_block = 0;
-+
-+ jnode *tx_head;
-+
-+ int ret;
-+
-+ if ((ret = jload(jf)) < 0)
-+ return ret;
-+
-+ F = (struct journal_footer *)jdata(jf);
-+
-+ last_flushed_tx = le64_to_cpu(get_unaligned(&F->last_flushed_tx));
-+
-+ jrelse(jf);
-+
-+ if (sbinfo->last_committed_tx == last_flushed_tx) {
-+ /* all transactions are replayed */
-+ return 0;
-+ }
-+
-+ prev_tx = sbinfo->last_committed_tx;
-+
-+ /* searching for oldest not flushed transaction */
-+ while (1) {
-+ tx_head = reiser4_alloc_io_head(&prev_tx);
-+ if (!tx_head)
-+ return RETERR(-ENOMEM);
-+
-+ ret = jload(tx_head);
-+ if (ret < 0) {
-+ reiser4_drop_io_head(tx_head);
-+ return ret;
-+ }
-+
-+ ret = check_tx_head(tx_head);
-+ if (ret) {
-+ jrelse(tx_head);
-+ reiser4_drop_io_head(tx_head);
-+ return ret;
-+ }
-+
-+ T = (struct tx_header *)jdata(tx_head);
-+
-+ prev_tx = le64_to_cpu(get_unaligned(&T->prev_tx));
-+
-+ if (prev_tx == last_flushed_tx)
-+ break;
-+
-+ jrelse(tx_head);
-+ reiser4_drop_io_head(tx_head);
-+ }
-+
-+ total = le32_to_cpu(get_unaligned(&T->total));
-+ log_rec_block = le64_to_cpu(get_unaligned(&T->next_block));
-+
-+ pin_jnode_data(tx_head);
-+ jrelse(tx_head);
-+
-+ ret =
-+ replay_transaction(s, tx_head, &log_rec_block,
-+ jnode_get_block(tx_head), total - 1);
-+
-+ unpin_jnode_data(tx_head);
-+ reiser4_drop_io_head(tx_head);
-+
-+ if (ret)
-+ return ret;
-+ return -E_REPEAT;
-+}
-+
-+/* The reiser4 journal current implementation was optimized to not to capture
-+ super block if certain super blocks fields are modified. Currently, the set
-+ is (<free block count>, <OID allocator>). These fields are logged by
-+ special way which includes storing them in each transaction head block at
-+ atom commit time and writing that information to journal footer block at
-+ atom flush time. For getting info from journal footer block to the
-+ in-memory super block there is a special function
-+ reiser4_journal_recover_sb_data() which should be called after disk format
-+ plugin re-reads super block after journal replaying.
-+*/
-+
-+/* get the information from journal footer in-memory super block */
-+int reiser4_journal_recover_sb_data(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ struct journal_footer *jf;
-+ int ret;
-+
-+ assert("zam-673", sbinfo->journal_footer != NULL);
-+
-+ ret = jload(sbinfo->journal_footer);
-+ if (ret != 0)
-+ return ret;
-+
-+ ret = check_journal_footer(sbinfo->journal_footer);
-+ if (ret != 0)
-+ goto out;
-+
-+ jf = (struct journal_footer *)jdata(sbinfo->journal_footer);
-+
-+ /* was there at least one flushed transaction? */
-+ if (jf->last_flushed_tx) {
-+
-+ /* restore free block counter logged in this transaction */
-+ reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&jf->free_blocks)));
-+
-+ /* restore oid allocator state */
-+ oid_init_allocator(s,
-+ le64_to_cpu(get_unaligned(&jf->nr_files)),
-+ le64_to_cpu(get_unaligned(&jf->next_oid)));
-+ }
-+ out:
-+ jrelse(sbinfo->journal_footer);
-+ return ret;
-+}
-+
-+/* reiser4 replay journal procedure */
-+int reiser4_journal_replay(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ jnode *jh, *jf;
-+ struct journal_header *header;
-+ int nr_tx_replayed = 0;
-+ int ret;
-+
-+ assert("zam-582", sbinfo != NULL);
-+
-+ jh = sbinfo->journal_header;
-+ jf = sbinfo->journal_footer;
-+
-+ if (!jh || !jf) {
-+ /* it is possible that disk layout does not support journal
-+ structures, we just warn about this */
-+ warning("zam-583",
-+ "journal control blocks were not loaded by disk layout plugin. "
-+ "journal replaying is not possible.\n");
-+ return 0;
-+ }
-+
-+ /* Take free block count from journal footer block. The free block
-+ counter value corresponds the last flushed transaction state */
-+ ret = jload(jf);
-+ if (ret < 0)
-+ return ret;
-+
-+ ret = check_journal_footer(jf);
-+ if (ret) {
-+ jrelse(jf);
-+ return ret;
-+ }
-+
-+ jrelse(jf);
-+
-+ /* store last committed transaction info in reiser4 in-memory super
-+ block */
-+ ret = jload(jh);
-+ if (ret < 0)
-+ return ret;
-+
-+ ret = check_journal_header(jh);
-+ if (ret) {
-+ jrelse(jh);
-+ return ret;
-+ }
-+
-+ header = (struct journal_header *)jdata(jh);
-+ sbinfo->last_committed_tx = le64_to_cpu(get_unaligned(&header->last_committed_tx));
-+
-+ jrelse(jh);
-+
-+ /* replay committed transactions */
-+ while ((ret = replay_oldest_transaction(s)) == -E_REPEAT)
-+ nr_tx_replayed++;
-+
-+ return ret;
-+}
-+
-+/* load journal control block (either journal header or journal footer block) */
-+static int
-+load_journal_control_block(jnode ** node, const reiser4_block_nr * block)
-+{
-+ int ret;
-+
-+ *node = reiser4_alloc_io_head(block);
-+ if (!(*node))
-+ return RETERR(-ENOMEM);
-+
-+ ret = jload(*node);
-+
-+ if (ret) {
-+ reiser4_drop_io_head(*node);
-+ *node = NULL;
-+ return ret;
-+ }
-+
-+ pin_jnode_data(*node);
-+ jrelse(*node);
-+
-+ return 0;
-+}
-+
-+/* unload journal header or footer and free jnode */
-+static void unload_journal_control_block(jnode ** node)
-+{
-+ if (*node) {
-+ unpin_jnode_data(*node);
-+ reiser4_drop_io_head(*node);
-+ *node = NULL;
-+ }
-+}
-+
-+/* release journal control blocks */
-+void reiser4_done_journal_info(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+
-+ assert("zam-476", sbinfo != NULL);
-+
-+ unload_journal_control_block(&sbinfo->journal_header);
-+ unload_journal_control_block(&sbinfo->journal_footer);
-+ rcu_barrier();
-+}
-+
-+/* load journal control blocks */
-+int reiser4_init_journal_info(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ journal_location *loc;
-+ int ret;
-+
-+ loc = &sbinfo->jloc;
-+
-+ assert("zam-651", loc != NULL);
-+ assert("zam-652", loc->header != 0);
-+ assert("zam-653", loc->footer != 0);
-+
-+ ret = load_journal_control_block(&sbinfo->journal_header, &loc->header);
-+
-+ if (ret)
-+ return ret;
-+
-+ ret = load_journal_control_block(&sbinfo->journal_footer, &loc->footer);
-+
-+ if (ret) {
-+ unload_journal_control_block(&sbinfo->journal_header);
-+ }
-+
-+ return ret;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/wander.h linux-2.6.20/fs/reiser4/wander.h
---- linux-2.6.20.orig/fs/reiser4/wander.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/wander.h 2007-05-06 14:50:43.903039466 +0400
-@@ -0,0 +1,135 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__FS_REISER4_WANDER_H__)
-+#define __FS_REISER4_WANDER_H__
-+
-+#include "dformat.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+
-+/* REISER4 JOURNAL ON-DISK DATA STRUCTURES */
-+
-+#define TX_HEADER_MAGIC "TxMagic4"
-+#define WANDER_RECORD_MAGIC "LogMagc4"
-+
-+#define TX_HEADER_MAGIC_SIZE (8)
-+#define WANDER_RECORD_MAGIC_SIZE (8)
-+
-+/* journal header block format */
-+struct journal_header {
-+ /* last written transaction head location */
-+ d64 last_committed_tx;
-+};
-+
-+typedef struct journal_location {
-+ reiser4_block_nr footer;
-+ reiser4_block_nr header;
-+} journal_location;
-+
-+/* The wander.c head comment describes usage and semantic of all these structures */
-+/* journal footer block format */
-+struct journal_footer {
-+ /* last flushed transaction location. */
-+ /* This block number is no more valid after the transaction it points
-+ to gets flushed, this number is used only at journal replaying time
-+ for detection of the end of on-disk list of committed transactions
-+ which were not flushed completely */
-+ d64 last_flushed_tx;
-+
-+ /* free block counter is written in journal footer at transaction
-+ flushing , not in super block because free blocks counter is logged
-+ by another way than super block fields (root pointer, for
-+ example). */
-+ d64 free_blocks;
-+
-+ /* number of used OIDs and maximal used OID are logged separately from
-+ super block */
-+ d64 nr_files;
-+ d64 next_oid;
-+};
-+
-+/* Each wander record (except the first one) has unified format with wander
-+ record header followed by an array of log entries */
-+struct wander_record_header {
-+ /* when there is no predefined location for wander records, this magic
-+ string should help reiser4fsck. */
-+ char magic[WANDER_RECORD_MAGIC_SIZE];
-+
-+ /* transaction id */
-+ d64 id;
-+
-+ /* total number of wander records in current transaction */
-+ d32 total;
-+
-+ /* this block number in transaction */
-+ d32 serial;
-+
-+ /* number of previous block in commit */
-+ d64 next_block;
-+};
-+
-+/* The first wander record (transaction head) of written transaction has the
-+ special format */
-+struct tx_header {
-+ /* magic string makes first block in transaction different from other
-+ logged blocks, it should help fsck. */
-+ char magic[TX_HEADER_MAGIC_SIZE];
-+
-+ /* transaction id */
-+ d64 id;
-+
-+ /* total number of records (including this first tx head) in the
-+ transaction */
-+ d32 total;
-+
-+ /* align next field to 8-byte boundary; this field always is zero */
-+ d32 padding;
-+
-+ /* block number of previous transaction head */
-+ d64 prev_tx;
-+
-+ /* next wander record location */
-+ d64 next_block;
-+
-+ /* committed versions of free blocks counter */
-+ d64 free_blocks;
-+
-+ /* number of used OIDs (nr_files) and maximal used OID are logged
-+ separately from super block */
-+ d64 nr_files;
-+ d64 next_oid;
-+};
-+
-+/* A transaction gets written to disk as a set of wander records (each wander
-+ record size is fs block) */
-+
-+/* As it was told above a wander The rest of wander record is filled by these log entries, unused space filled
-+ by zeroes */
-+struct wander_entry {
-+ d64 original; /* block original location */
-+ d64 wandered; /* block wandered location */
-+};
-+
-+/* REISER4 JOURNAL WRITER FUNCTIONS */
-+
-+extern int reiser4_write_logs(long *);
-+extern int reiser4_journal_replay(struct super_block *);
-+extern int reiser4_journal_recover_sb_data(struct super_block *);
-+
-+extern int reiser4_init_journal_info(struct super_block *);
-+extern void reiser4_done_journal_info(struct super_block *);
-+
-+extern int write_jnode_list(struct list_head *, flush_queue_t *, long *, int);
-+
-+#endif /* __FS_REISER4_WANDER_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/writeout.h linux-2.6.20/fs/reiser4/writeout.h
---- linux-2.6.20.orig/fs/reiser4/writeout.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/writeout.h 2007-05-06 14:50:43.907040716 +0400
-@@ -0,0 +1,21 @@
-+/* Copyright 2002, 2003, 2004 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__FS_REISER4_WRITEOUT_H__)
-+
-+#define WRITEOUT_SINGLE_STREAM (0x1)
-+#define WRITEOUT_FOR_PAGE_RECLAIM (0x2)
-+#define WRITEOUT_BARRIER (0x4)
-+
-+extern int reiser4_get_writeout_flags(void);
-+
-+#endif /* __FS_REISER4_WRITEOUT_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/znode.c linux-2.6.20/fs/reiser4/znode.c
---- linux-2.6.20.orig/fs/reiser4/znode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/znode.c 2007-05-06 14:50:43.907040716 +0400
-@@ -0,0 +1,1029 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* Znode manipulation functions. */
-+/* Znode is the in-memory header for a tree node. It is stored
-+ separately from the node itself so that it does not get written to
-+ disk. In this respect znode is like buffer head or page head. We
-+ also use znodes for additional reiser4 specific purposes:
-+
-+ . they are organized into tree structure which is a part of whole
-+ reiser4 tree.
-+ . they are used to implement node grained locking
-+ . they are used to keep additional state associated with a
-+ node
-+ . they contain links to lists used by the transaction manager
-+
-+ Znode is attached to some variable "block number" which is instance of
-+ fs/reiser4/tree.h:reiser4_block_nr type. Znode can exist without
-+ appropriate node being actually loaded in memory. Existence of znode itself
-+ is regulated by reference count (->x_count) in it. Each time thread
-+ acquires reference to znode through call to zget(), ->x_count is
-+ incremented and decremented on call to zput(). Data (content of node) are
-+ brought in memory through call to zload(), which also increments ->d_count
-+ reference counter. zload can block waiting on IO. Call to zrelse()
-+ decreases this counter. Also, ->c_count keeps track of number of child
-+ znodes and prevents parent znode from being recycled until all of its
-+ children are. ->c_count is decremented whenever child goes out of existence
-+ (being actually recycled in zdestroy()) which can be some time after last
-+ reference to this child dies if we support some form of LRU cache for
-+ znodes.
-+
-+*/
-+/* EVERY ZNODE'S STORY
-+
-+ 1. His infancy.
-+
-+ Once upon a time, the znode was born deep inside of zget() by call to
-+ zalloc(). At the return from zget() znode had:
-+
-+ . reference counter (x_count) of 1
-+ . assigned block number, marked as used in bitmap
-+ . pointer to parent znode. Root znode parent pointer points
-+ to its father: "fake" znode. This, in turn, has NULL parent pointer.
-+ . hash table linkage
-+ . no data loaded from disk
-+ . no node plugin
-+ . no sibling linkage
-+
-+ 2. His childhood
-+
-+ Each node is either brought into memory as a result of tree traversal, or
-+ created afresh, creation of the root being a special case of the latter. In
-+ either case it's inserted into sibling list. This will typically require
-+ some ancillary tree traversing, but ultimately both sibling pointers will
-+ exist and JNODE_LEFT_CONNECTED and JNODE_RIGHT_CONNECTED will be true in
-+ zjnode.state.
-+
-+ 3. His youth.
-+
-+ If znode is bound to already existing node in a tree, its content is read
-+ from the disk by call to zload(). At that moment, JNODE_LOADED bit is set
-+ in zjnode.state and zdata() function starts to return non null for this
-+ znode. zload() further calls zparse() that determines which node layout
-+ this node is rendered in, and sets ->nplug on success.
-+
-+ If znode is for new node just created, memory for it is allocated and
-+ zinit_new() function is called to initialise data, according to selected
-+ node layout.
-+
-+ 4. His maturity.
-+
-+ After this point, znode lingers in memory for some time. Threads can
-+ acquire references to znode either by blocknr through call to zget(), or by
-+ following a pointer to unallocated znode from internal item. Each time
-+ reference to znode is obtained, x_count is increased. Thread can read/write
-+ lock znode. Znode data can be loaded through calls to zload(), d_count will
-+ be increased appropriately. If all references to znode are released
-+ (x_count drops to 0), znode is not recycled immediately. Rather, it is
-+ still cached in the hash table in the hope that it will be accessed
-+ shortly.
-+
-+ There are two ways in which znode existence can be terminated:
-+
-+ . sudden death: node bound to this znode is removed from the tree
-+ . overpopulation: znode is purged out of memory due to memory pressure
-+
-+ 5. His death.
-+
-+ Death is complex process.
-+
-+ When we irrevocably commit ourselves to decision to remove node from the
-+ tree, JNODE_HEARD_BANSHEE bit is set in zjnode.state of corresponding
-+ znode. This is done either in ->kill_hook() of internal item or in
-+ reiser4_kill_root() function when tree root is removed.
-+
-+ At this moment znode still has:
-+
-+ . locks held on it, necessary write ones
-+ . references to it
-+ . disk block assigned to it
-+ . data loaded from the disk
-+ . pending requests for lock
-+
-+ But once JNODE_HEARD_BANSHEE bit set, last call to unlock_znode() does node
-+ deletion. Node deletion includes two phases. First all ways to get
-+ references to that znode (sibling and parent links and hash lookup using
-+ block number stored in parent node) should be deleted -- it is done through
-+ sibling_list_remove(), also we assume that nobody uses down link from
-+ parent node due to its nonexistence or proper parent node locking and
-+ nobody uses parent pointers from children due to absence of them. Second we
-+ invalidate all pending lock requests which still are on znode's lock
-+ request queue, this is done by reiser4_invalidate_lock(). Another
-+ JNODE_IS_DYING znode status bit is used to invalidate pending lock requests.
-+ Once it set all requesters are forced to return -EINVAL from
-+ longterm_lock_znode(). Future locking attempts are not possible because all
-+ ways to get references to that znode are removed already. Last, node is
-+ uncaptured from transaction.
-+
-+ When last reference to the dying znode is just about to be released,
-+ block number for this lock is released and znode is removed from the
-+ hash table.
-+
-+ Now znode can be recycled.
-+
-+ [it's possible to free bitmap block and remove znode from the hash
-+ table when last lock is released. This will result in having
-+ referenced but completely orphaned znode]
-+
-+ 6. Limbo
-+
-+ As have been mentioned above znodes with reference counter 0 are
-+ still cached in a hash table. Once memory pressure increases they are
-+ purged out of there [this requires something like LRU list for
-+ efficient implementation. LRU list would also greatly simplify
-+ implementation of coord cache that would in this case morph to just
-+ scanning some initial segment of LRU list]. Data loaded into
-+ unreferenced znode are flushed back to the durable storage if
-+ necessary and memory is freed. Znodes themselves can be recycled at
-+ this point too.
-+
-+*/
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/plugin_header.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "tree_walk.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/pagemap.h>
-+#include <linux/spinlock.h>
-+#include <linux/slab.h>
-+#include <linux/err.h>
-+
-+static z_hash_table *get_htable(reiser4_tree *,
-+ const reiser4_block_nr * const blocknr);
-+static z_hash_table *znode_get_htable(const znode *);
-+static void zdrop(znode *);
-+
-+/* hash table support */
-+
-+/* compare two block numbers for equality. Used by hash-table macros */
-+static inline int
-+blknreq(const reiser4_block_nr * b1, const reiser4_block_nr * b2)
-+{
-+ assert("nikita-534", b1 != NULL);
-+ assert("nikita-535", b2 != NULL);
-+
-+ return *b1 == *b2;
-+}
-+
-+/* Hash znode by block number. Used by hash-table macros */
-+/* Audited by: umka (2002.06.11) */
-+static inline __u32
-+blknrhashfn(z_hash_table * table, const reiser4_block_nr * b)
-+{
-+ assert("nikita-536", b != NULL);
-+
-+ return *b & (REISER4_ZNODE_HASH_TABLE_SIZE - 1);
-+}
-+
-+/* The hash table definition */
-+#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
-+#define KFREE(ptr, size) kfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(z, znode, reiser4_block_nr, zjnode.key.z, zjnode.link.z,
-+ blknrhashfn, blknreq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+/* slab for znodes */
-+static struct kmem_cache *znode_cache;
-+
-+int znode_shift_order;
-+
-+/**
-+ * init_znodes - create znode cache
-+ *
-+ * Initializes slab cache of znodes. It is part of reiser4 module initialization.
-+ */
-+int init_znodes(void)
-+{
-+ znode_cache = kmem_cache_create("znode", sizeof(znode), 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL, NULL);
-+ if (znode_cache == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ for (znode_shift_order = 0; (1 << znode_shift_order) < sizeof(znode);
-+ ++znode_shift_order);
-+ --znode_shift_order;
-+ return 0;
-+}
-+
-+/**
-+ * done_znodes - delete znode cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_znodes(void)
-+{
-+ destroy_reiser4_cache(&znode_cache);
-+}
-+
-+/* call this to initialise tree of znodes */
-+int znodes_tree_init(reiser4_tree * tree /* tree to initialise znodes for */ )
-+{
-+ int result;
-+ assert("umka-050", tree != NULL);
-+
-+ rwlock_init(&tree->dk_lock);
-+
-+ result = z_hash_init(&tree->zhash_table, REISER4_ZNODE_HASH_TABLE_SIZE);
-+ if (result != 0)
-+ return result;
-+ result = z_hash_init(&tree->zfake_table, REISER4_ZNODE_HASH_TABLE_SIZE);
-+ return result;
-+}
-+
-+/* free this znode */
-+void zfree(znode * node /* znode to free */ )
-+{
-+ assert("nikita-465", node != NULL);
-+ assert("nikita-2120", znode_page(node) == NULL);
-+ assert("nikita-2301", list_empty_careful(&node->lock.owners));
-+ assert("nikita-2302", list_empty_careful(&node->lock.requestors));
-+ assert("nikita-2663", (list_empty_careful(&ZJNODE(node)->capture_link) &&
-+ NODE_LIST(ZJNODE(node)) == NOT_CAPTURED));
-+ assert("nikita-3220", list_empty(&ZJNODE(node)->jnodes));
-+ assert("nikita-3293", !znode_is_right_connected(node));
-+ assert("nikita-3294", !znode_is_left_connected(node));
-+ assert("nikita-3295", node->left == NULL);
-+ assert("nikita-3296", node->right == NULL);
-+
-+ /* not yet phash_jnode_destroy(ZJNODE(node)); */
-+
-+ kmem_cache_free(znode_cache, node);
-+}
-+
-+/* call this to free tree of znodes */
-+void znodes_tree_done(reiser4_tree * tree /* tree to finish with znodes of */ )
-+{
-+ znode *node;
-+ znode *next;
-+ z_hash_table *ztable;
-+
-+ /* scan znode hash-tables and kill all znodes, then free hash tables
-+ * themselves. */
-+
-+ assert("nikita-795", tree != NULL);
-+
-+ ztable = &tree->zhash_table;
-+
-+ if (ztable->_table != NULL) {
-+ for_all_in_htable(ztable, z, node, next) {
-+ node->c_count = 0;
-+ node->in_parent.node = NULL;
-+ assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
-+ zdrop(node);
-+ }
-+
-+ z_hash_done(&tree->zhash_table);
-+ }
-+
-+ ztable = &tree->zfake_table;
-+
-+ if (ztable->_table != NULL) {
-+ for_all_in_htable(ztable, z, node, next) {
-+ node->c_count = 0;
-+ node->in_parent.node = NULL;
-+ assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
-+ zdrop(node);
-+ }
-+
-+ z_hash_done(&tree->zfake_table);
-+ }
-+}
-+
-+/* ZNODE STRUCTURES */
-+
-+/* allocate fresh znode */
-+znode *zalloc(gfp_t gfp_flag /* allocation flag */ )
-+{
-+ znode *node;
-+
-+ node = kmem_cache_alloc(znode_cache, gfp_flag);
-+ return node;
-+}
-+
-+/* Initialize fields of znode
-+ @node: znode to initialize;
-+ @parent: parent znode;
-+ @tree: tree we are in. */
-+void zinit(znode * node, const znode * parent, reiser4_tree * tree)
-+{
-+ assert("nikita-466", node != NULL);
-+ assert("umka-268", current_tree != NULL);
-+
-+ memset(node, 0, sizeof *node);
-+
-+ assert("umka-051", tree != NULL);
-+
-+ jnode_init(&node->zjnode, tree, JNODE_FORMATTED_BLOCK);
-+ reiser4_init_lock(&node->lock);
-+ init_parent_coord(&node->in_parent, parent);
-+}
-+
-+/*
-+ * remove znode from indices. This is called jput() when last reference on
-+ * znode is released.
-+ */
-+void znode_remove(znode * node /* znode to remove */ , reiser4_tree * tree)
-+{
-+ assert("nikita-2108", node != NULL);
-+ assert("nikita-470", node->c_count == 0);
-+ assert_rw_write_locked(&(tree->tree_lock));
-+
-+ /* remove reference to this znode from cbk cache */
-+ cbk_cache_invalidate(node, tree);
-+
-+ /* update c_count of parent */
-+ if (znode_parent(node) != NULL) {
-+ assert("nikita-472", znode_parent(node)->c_count > 0);
-+ /* father, onto your hands I forward my spirit... */
-+ znode_parent(node)->c_count--;
-+ node->in_parent.node = NULL;
-+ } else {
-+ /* orphaned znode?! Root? */
-+ }
-+
-+ /* remove znode from hash-table */
-+ z_hash_remove_rcu(znode_get_htable(node), node);
-+}
-+
-+/* zdrop() -- Remove znode from the tree.
-+
-+ This is called when znode is removed from the memory. */
-+static void zdrop(znode * node /* znode to finish with */ )
-+{
-+ jdrop(ZJNODE(node));
-+}
-+
-+/*
-+ * put znode into right place in the hash table. This is called by relocate
-+ * code.
-+ */
-+int znode_rehash(znode * node /* node to rehash */ ,
-+ const reiser4_block_nr * new_block_nr /* new block number */ )
-+{
-+ z_hash_table *oldtable;
-+ z_hash_table *newtable;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-2018", node != NULL);
-+
-+ tree = znode_get_tree(node);
-+ oldtable = znode_get_htable(node);
-+ newtable = get_htable(tree, new_block_nr);
-+
-+ write_lock_tree(tree);
-+ /* remove znode from hash-table */
-+ z_hash_remove_rcu(oldtable, node);
-+
-+ /* assertion no longer valid due to RCU */
-+ /* assert("nikita-2019", z_hash_find(newtable, new_block_nr) == NULL); */
-+
-+ /* update blocknr */
-+ znode_set_block(node, new_block_nr);
-+ node->zjnode.key.z = *new_block_nr;
-+
-+ /* insert it into hash */
-+ z_hash_insert_rcu(newtable, node);
-+ write_unlock_tree(tree);
-+ return 0;
-+}
-+
-+/* ZNODE LOOKUP, GET, PUT */
-+
-+/* zlook() - get znode with given block_nr in a hash table or return NULL
-+
-+ If result is non-NULL then the znode's x_count is incremented. Internal version
-+ accepts pre-computed hash index. The hash table is accessed under caller's
-+ tree->hash_lock.
-+*/
-+znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const blocknr)
-+{
-+ znode *result;
-+ __u32 hash;
-+ z_hash_table *htable;
-+
-+ assert("jmacd-506", tree != NULL);
-+ assert("jmacd-507", blocknr != NULL);
-+
-+ htable = get_htable(tree, blocknr);
-+ hash = blknrhashfn(htable, blocknr);
-+
-+ rcu_read_lock();
-+ result = z_hash_find_index(htable, hash, blocknr);
-+
-+ if (result != NULL) {
-+ add_x_ref(ZJNODE(result));
-+ result = znode_rip_check(tree, result);
-+ }
-+ rcu_read_unlock();
-+
-+ return result;
-+}
-+
-+/* return hash table where znode with block @blocknr is (or should be)
-+ * stored */
-+static z_hash_table *get_htable(reiser4_tree * tree,
-+ const reiser4_block_nr * const blocknr)
-+{
-+ z_hash_table *table;
-+ if (is_disk_addr_unallocated(blocknr))
-+ table = &tree->zfake_table;
-+ else
-+ table = &tree->zhash_table;
-+ return table;
-+}
-+
-+/* return hash table where znode @node is (or should be) stored */
-+static z_hash_table *znode_get_htable(const znode * node)
-+{
-+ return get_htable(znode_get_tree(node), znode_get_block(node));
-+}
-+
-+/* zget() - get znode from hash table, allocating it if necessary.
-+
-+ First a call to zlook, locating a x-referenced znode if one
-+ exists. If znode is not found, allocate new one and return. Result
-+ is returned with x_count reference increased.
-+
-+ LOCKS TAKEN: TREE_LOCK, ZNODE_LOCK
-+ LOCK ORDERING: NONE
-+*/
-+znode *zget(reiser4_tree * tree,
-+ const reiser4_block_nr * const blocknr,
-+ znode * parent, tree_level level, gfp_t gfp_flag)
-+{
-+ znode *result;
-+ __u32 hashi;
-+
-+ z_hash_table *zth;
-+
-+ assert("jmacd-512", tree != NULL);
-+ assert("jmacd-513", blocknr != NULL);
-+ assert("jmacd-514", level < REISER4_MAX_ZTREE_HEIGHT);
-+
-+ zth = get_htable(tree, blocknr);
-+ hashi = blknrhashfn(zth, blocknr);
-+
-+ /* NOTE-NIKITA address-as-unallocated-blocknr still is not
-+ implemented. */
-+
-+ z_hash_prefetch_bucket(zth, hashi);
-+
-+ rcu_read_lock();
-+ /* Find a matching BLOCKNR in the hash table. If the znode is found,
-+ we obtain an reference (x_count) but the znode remains unlocked.
-+ Have to worry about race conditions later. */
-+ result = z_hash_find_index(zth, hashi, blocknr);
-+ /* According to the current design, the hash table lock protects new
-+ znode references. */
-+ if (result != NULL) {
-+ add_x_ref(ZJNODE(result));
-+ /* NOTE-NIKITA it should be so, but special case during
-+ creation of new root makes such assertion highly
-+ complicated. */
-+ assert("nikita-2131", 1 || znode_parent(result) == parent ||
-+ (ZF_ISSET(result, JNODE_ORPHAN)
-+ && (znode_parent(result) == NULL)));
-+ result = znode_rip_check(tree, result);
-+ }
-+
-+ rcu_read_unlock();
-+
-+ if (!result) {
-+ znode *shadow;
-+
-+ result = zalloc(gfp_flag);
-+ if (!result) {
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ }
-+
-+ zinit(result, parent, tree);
-+ ZJNODE(result)->blocknr = *blocknr;
-+ ZJNODE(result)->key.z = *blocknr;
-+ result->level = level;
-+
-+ write_lock_tree(tree);
-+
-+ shadow = z_hash_find_index(zth, hashi, blocknr);
-+ if (unlikely(shadow != NULL && !ZF_ISSET(shadow, JNODE_RIP))) {
-+ jnode_list_remove(ZJNODE(result));
-+ zfree(result);
-+ result = shadow;
-+ } else {
-+ result->version = znode_build_version(tree);
-+ z_hash_insert_index_rcu(zth, hashi, result);
-+
-+ if (parent != NULL)
-+ ++parent->c_count;
-+ }
-+
-+ add_x_ref(ZJNODE(result));
-+
-+ write_unlock_tree(tree);
-+ }
-+#if REISER4_DEBUG
-+ if (!reiser4_blocknr_is_fake(blocknr) && *blocknr != 0)
-+ reiser4_check_block(blocknr, 1);
-+#endif
-+ /* Check for invalid tree level, return -EIO */
-+ if (unlikely(znode_get_level(result) != level)) {
-+ warning("jmacd-504",
-+ "Wrong level for cached block %llu: %i expecting %i",
-+ (unsigned long long)(*blocknr), znode_get_level(result),
-+ level);
-+ zput(result);
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+
-+ assert("nikita-1227", znode_invariant(result));
-+
-+ return result;
-+}
-+
-+/* ZNODE PLUGINS/DATA */
-+
-+/* "guess" plugin for node loaded from the disk. Plugin id of node plugin is
-+ stored at the fixed offset from the beginning of the node. */
-+static node_plugin *znode_guess_plugin(const znode * node /* znode to guess
-+ * plugin of */ )
-+{
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1053", node != NULL);
-+ assert("nikita-1055", zdata(node) != NULL);
-+
-+ tree = znode_get_tree(node);
-+ assert("umka-053", tree != NULL);
-+
-+ if (reiser4_is_set(tree->super, REISER4_ONE_NODE_PLUGIN)) {
-+ return tree->nplug;
-+ } else {
-+ return node_plugin_by_disk_id
-+ (tree, &((common_node_header *) zdata(node))->plugin_id);
-+#ifdef GUESS_EXISTS
-+ reiser4_plugin *plugin;
-+
-+ /* NOTE-NIKITA add locking here when dynamic plugins will be
-+ * implemented */
-+ for_all_plugins(REISER4_NODE_PLUGIN_TYPE, plugin) {
-+ if ((plugin->u.node.guess != NULL)
-+ && plugin->u.node.guess(node))
-+ return plugin;
-+ }
-+ warning("nikita-1057", "Cannot guess node plugin");
-+ print_znode("node", node);
-+ return NULL;
-+#endif
-+ }
-+}
-+
-+/* parse node header and install ->node_plugin */
-+int zparse(znode * node /* znode to parse */ )
-+{
-+ int result;
-+
-+ assert("nikita-1233", node != NULL);
-+ assert("nikita-2370", zdata(node) != NULL);
-+
-+ if (node->nplug == NULL) {
-+ node_plugin *nplug;
-+
-+ nplug = znode_guess_plugin(node);
-+ if (likely(nplug != NULL)) {
-+ result = nplug->parse(node);
-+ if (likely(result == 0))
-+ node->nplug = nplug;
-+ } else {
-+ result = RETERR(-EIO);
-+ }
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/* zload with readahead */
-+int zload_ra(znode * node /* znode to load */ , ra_info_t * info)
-+{
-+ int result;
-+
-+ assert("nikita-484", node != NULL);
-+ assert("nikita-1377", znode_invariant(node));
-+ assert("jmacd-7771", !znode_above_root(node));
-+ assert("nikita-2125", atomic_read(&ZJNODE(node)->x_count) > 0);
-+ assert("nikita-3016", reiser4_schedulable());
-+
-+ if (info)
-+ formatted_readahead(node, info);
-+
-+ result = jload(ZJNODE(node));
-+ assert("nikita-1378", znode_invariant(node));
-+ return result;
-+}
-+
-+/* load content of node into memory */
-+int zload(znode * node)
-+{
-+ return zload_ra(node, NULL);
-+}
-+
-+/* call node plugin to initialise newly allocated node. */
-+int zinit_new(znode * node /* znode to initialise */ , gfp_t gfp_flags)
-+{
-+ return jinit_new(ZJNODE(node), gfp_flags);
-+}
-+
-+/* drop reference to node data. When last reference is dropped, data are
-+ unloaded. */
-+void zrelse(znode * node /* znode to release references to */ )
-+{
-+ assert("nikita-1381", znode_invariant(node));
-+
-+ jrelse(ZJNODE(node));
-+}
-+
-+/* returns free space in node */
-+unsigned znode_free_space(znode * node /* znode to query */ )
-+{
-+ assert("nikita-852", node != NULL);
-+ return node_plugin_by_node(node)->free_space(node);
-+}
-+
-+/* left delimiting key of znode */
-+reiser4_key *znode_get_rd_key(znode * node /* znode to query */ )
-+{
-+ assert("nikita-958", node != NULL);
-+ assert_rw_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3067", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-30671", node->rd_key_version != 0);
-+ return &node->rd_key;
-+}
-+
-+/* right delimiting key of znode */
-+reiser4_key *znode_get_ld_key(znode * node /* znode to query */ )
-+{
-+ assert("nikita-974", node != NULL);
-+ assert_rw_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3068", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-30681", node->ld_key_version != 0);
-+ return &node->ld_key;
-+}
-+
-+ON_DEBUG(atomic_t delim_key_version = ATOMIC_INIT(0);
-+ )
-+
-+/* update right-delimiting key of @node */
-+reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key)
-+{
-+ assert("nikita-2937", node != NULL);
-+ assert("nikita-2939", key != NULL);
-+ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3069", LOCK_CNT_GTZ(write_locked_dk));
-+ assert("nikita-2944",
-+ znode_is_any_locked(node) ||
-+ znode_get_level(node) != LEAF_LEVEL ||
-+ keyge(key, &node->rd_key) ||
-+ keyeq(&node->rd_key, reiser4_min_key()) ||
-+ ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ node->rd_key = *key;
-+ ON_DEBUG(node->rd_key_version = atomic_inc_return(&delim_key_version));
-+ return &node->rd_key;
-+}
-+
-+/* update left-delimiting key of @node */
-+reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key)
-+{
-+ assert("nikita-2940", node != NULL);
-+ assert("nikita-2941", key != NULL);
-+ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3070", LOCK_CNT_GTZ(write_locked_dk));
-+ assert("nikita-2943",
-+ znode_is_any_locked(node) || keyeq(&node->ld_key,
-+ reiser4_min_key()));
-+
-+ node->ld_key = *key;
-+ ON_DEBUG(node->ld_key_version = atomic_inc_return(&delim_key_version));
-+ return &node->ld_key;
-+}
-+
-+/* true if @key is inside key range for @node */
-+int znode_contains_key(znode * node /* znode to look in */ ,
-+ const reiser4_key * key /* key to look for */ )
-+{
-+ assert("nikita-1237", node != NULL);
-+ assert("nikita-1238", key != NULL);
-+
-+ /* left_delimiting_key <= key <= right_delimiting_key */
-+ return keyle(znode_get_ld_key(node), key)
-+ && keyle(key, znode_get_rd_key(node));
-+}
-+
-+/* same as znode_contains_key(), but lock dk lock */
-+int znode_contains_key_lock(znode * node /* znode to look in */ ,
-+ const reiser4_key * key /* key to look for */ )
-+{
-+ int result;
-+
-+ assert("umka-056", node != NULL);
-+ assert("umka-057", key != NULL);
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = znode_contains_key(node, key);
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+/* get parent pointer, assuming tree is not locked */
-+znode *znode_parent_nolock(const znode * node /* child znode */ )
-+{
-+ assert("nikita-1444", node != NULL);
-+ return node->in_parent.node;
-+}
-+
-+/* get parent pointer of znode */
-+znode *znode_parent(const znode * node /* child znode */ )
-+{
-+ assert("nikita-1226", node != NULL);
-+ assert("nikita-1406", LOCK_CNT_GTZ(rw_locked_tree));
-+ return znode_parent_nolock(node);
-+}
-+
-+/* detect uber znode used to protect in-superblock tree root pointer */
-+int znode_above_root(const znode * node /* znode to query */ )
-+{
-+ assert("umka-059", node != NULL);
-+
-+ return disk_addr_eq(&ZJNODE(node)->blocknr, &UBER_TREE_ADDR);
-+}
-+
-+/* check that @node is root---that its block number is recorder in the tree as
-+ that of root node */
-+#if REISER4_DEBUG
-+static int znode_is_true_root(const znode * node /* znode to query */ )
-+{
-+ assert("umka-060", node != NULL);
-+ assert("umka-061", current_tree != NULL);
-+
-+ return disk_addr_eq(znode_get_block(node),
-+ &znode_get_tree(node)->root_block);
-+}
-+#endif
-+
-+/* check that @node is root */
-+int znode_is_root(const znode * node /* znode to query */ )
-+{
-+ assert("nikita-1206", node != NULL);
-+
-+ return znode_get_level(node) == znode_get_tree(node)->height;
-+}
-+
-+/* Returns true is @node was just created by zget() and wasn't ever loaded
-+ into memory. */
-+/* NIKITA-HANS: yes */
-+int znode_just_created(const znode * node)
-+{
-+ assert("nikita-2188", node != NULL);
-+ return (znode_page(node) == NULL);
-+}
-+
-+/* obtain updated ->znode_epoch. See seal.c for description. */
-+__u64 znode_build_version(reiser4_tree * tree)
-+{
-+ __u64 result;
-+
-+ spin_lock(&tree->epoch_lock);
-+ result = ++tree->znode_epoch;
-+ spin_unlock(&tree->epoch_lock);
-+ return result;
-+}
-+
-+void init_load_count(load_count * dh)
-+{
-+ assert("nikita-2105", dh != NULL);
-+ memset(dh, 0, sizeof *dh);
-+}
-+
-+void done_load_count(load_count * dh)
-+{
-+ assert("nikita-2106", dh != NULL);
-+ if (dh->node != NULL) {
-+ for (; dh->d_ref > 0; --dh->d_ref)
-+ zrelse(dh->node);
-+ dh->node = NULL;
-+ }
-+}
-+
-+static int incr_load_count(load_count * dh)
-+{
-+ int result;
-+
-+ assert("nikita-2110", dh != NULL);
-+ assert("nikita-2111", dh->node != NULL);
-+
-+ result = zload(dh->node);
-+ if (result == 0)
-+ ++dh->d_ref;
-+ return result;
-+}
-+
-+int incr_load_count_znode(load_count * dh, znode * node)
-+{
-+ assert("nikita-2107", dh != NULL);
-+ assert("nikita-2158", node != NULL);
-+ assert("nikita-2109",
-+ ergo(dh->node != NULL, (dh->node == node) || (dh->d_ref == 0)));
-+
-+ dh->node = node;
-+ return incr_load_count(dh);
-+}
-+
-+int incr_load_count_jnode(load_count * dh, jnode * node)
-+{
-+ if (jnode_is_znode(node)) {
-+ return incr_load_count_znode(dh, JZNODE(node));
-+ }
-+ return 0;
-+}
-+
-+void copy_load_count(load_count * new, load_count * old)
-+{
-+ int ret = 0;
-+ done_load_count(new);
-+ new->node = old->node;
-+ new->d_ref = 0;
-+
-+ while ((new->d_ref < old->d_ref) && (ret = incr_load_count(new)) == 0) {
-+ }
-+
-+ assert("jmacd-87589", ret == 0);
-+}
-+
-+void move_load_count(load_count * new, load_count * old)
-+{
-+ done_load_count(new);
-+ new->node = old->node;
-+ new->d_ref = old->d_ref;
-+ old->node = NULL;
-+ old->d_ref = 0;
-+}
-+
-+/* convert parent pointer into coord */
-+void parent_coord_to_coord(const parent_coord_t * pcoord, coord_t * coord)
-+{
-+ assert("nikita-3204", pcoord != NULL);
-+ assert("nikita-3205", coord != NULL);
-+
-+ coord_init_first_unit_nocheck(coord, pcoord->node);
-+ coord_set_item_pos(coord, pcoord->item_pos);
-+ coord->between = AT_UNIT;
-+}
-+
-+/* pack coord into parent_coord_t */
-+void coord_to_parent_coord(const coord_t * coord, parent_coord_t * pcoord)
-+{
-+ assert("nikita-3206", pcoord != NULL);
-+ assert("nikita-3207", coord != NULL);
-+
-+ pcoord->node = coord->node;
-+ pcoord->item_pos = coord->item_pos;
-+}
-+
-+/* Initialize a parent hint pointer. (parent hint pointer is a field in znode,
-+ look for comments there) */
-+void init_parent_coord(parent_coord_t * pcoord, const znode * node)
-+{
-+ pcoord->node = (znode *) node;
-+ pcoord->item_pos = (unsigned short)~0;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/* debugging aid: znode invariant */
-+static int znode_invariant_f(const znode * node /* znode to check */ ,
-+ char const **msg /* where to store error
-+ * message, if any */ )
-+{
-+#define _ergo(ant, con) \
-+ ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
-+
-+#define _equi(e1, e2) \
-+ ((*msg) = "{" #e1 "} <=> {" #e2 "}", equi((e1), (e2)))
-+
-+#define _check(exp) ((*msg) = #exp, (exp))
-+
-+ return jnode_invariant_f(ZJNODE(node), msg) &&
-+ /* [znode-fake] invariant */
-+ /* fake znode doesn't have a parent, and */
-+ _ergo(znode_get_level(node) == 0, znode_parent(node) == NULL) &&
-+ /* there is another way to express this very check, and */
-+ _ergo(znode_above_root(node), znode_parent(node) == NULL) &&
-+ /* it has special block number, and */
-+ _ergo(znode_get_level(node) == 0,
-+ disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
-+ /* it is the only znode with such block number, and */
-+ _ergo(!znode_above_root(node) && znode_is_loaded(node),
-+ !disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
-+ /* it is parent of the tree root node */
-+ _ergo(znode_is_true_root(node),
-+ znode_above_root(znode_parent(node))) &&
-+ /* [znode-level] invariant */
-+ /* level of parent znode is one larger than that of child,
-+ except for the fake znode, and */
-+ _ergo(znode_parent(node) && !znode_above_root(znode_parent(node)),
-+ znode_get_level(znode_parent(node)) ==
-+ znode_get_level(node) + 1) &&
-+ /* left neighbor is at the same level, and */
-+ _ergo(znode_is_left_connected(node) && node->left != NULL,
-+ znode_get_level(node) == znode_get_level(node->left)) &&
-+ /* right neighbor is at the same level */
-+ _ergo(znode_is_right_connected(node) && node->right != NULL,
-+ znode_get_level(node) == znode_get_level(node->right)) &&
-+ /* [znode-connected] invariant */
-+ _ergo(node->left != NULL, znode_is_left_connected(node)) &&
-+ _ergo(node->right != NULL, znode_is_right_connected(node)) &&
-+ _ergo(!znode_is_root(node) && node->left != NULL,
-+ znode_is_right_connected(node->left) &&
-+ node->left->right == node) &&
-+ _ergo(!znode_is_root(node) && node->right != NULL,
-+ znode_is_left_connected(node->right) &&
-+ node->right->left == node) &&
-+ /* [znode-c_count] invariant */
-+ /* for any znode, c_count of its parent is greater than 0 */
-+ _ergo(znode_parent(node) != NULL &&
-+ !znode_above_root(znode_parent(node)),
-+ znode_parent(node)->c_count > 0) &&
-+ /* leaves don't have children */
-+ _ergo(znode_get_level(node) == LEAF_LEVEL,
-+ node->c_count == 0) &&
-+ _check(node->zjnode.jnodes.prev != NULL) &&
-+ _check(node->zjnode.jnodes.next != NULL) &&
-+ /* orphan doesn't have a parent */
-+ _ergo(ZF_ISSET(node, JNODE_ORPHAN), znode_parent(node) == 0) &&
-+ /* [znode-modify] invariant */
-+ /* if znode is not write-locked, its checksum remains
-+ * invariant */
-+ /* unfortunately, zlock is unordered w.r.t. jnode_lock, so we
-+ * cannot check this. */
-+ /* [znode-refs] invariant */
-+ /* only referenced znode can be long-term locked */
-+ _ergo(znode_is_locked(node),
-+ atomic_read(&ZJNODE(node)->x_count) != 0);
-+}
-+
-+/* debugging aid: check znode invariant and panic if it doesn't hold */
-+int znode_invariant(znode * node /* znode to check */ )
-+{
-+ char const *failed_msg;
-+ int result;
-+
-+ assert("umka-063", node != NULL);
-+ assert("umka-064", current_tree != NULL);
-+
-+ spin_lock_znode(node);
-+ read_lock_tree(znode_get_tree(node));
-+ result = znode_invariant_f(node, &failed_msg);
-+ if (!result) {
-+ /* print_znode("corrupted node", node); */
-+ warning("jmacd-555", "Condition %s failed", failed_msg);
-+ }
-+ read_unlock_tree(znode_get_tree(node));
-+ spin_unlock_znode(node);
-+ return result;
-+}
-+
-+/* return non-0 iff data are loaded into znode */
-+int znode_is_loaded(const znode * node /* znode to query */ )
-+{
-+ assert("nikita-497", node != NULL);
-+ return jnode_is_loaded(ZJNODE(node));
-+}
-+
-+unsigned long znode_times_locked(const znode * z)
-+{
-+ return z->times_locked;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/fs/reiser4/znode.h linux-2.6.20/fs/reiser4/znode.h
---- linux-2.6.20.orig/fs/reiser4/znode.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.20/fs/reiser4/znode.h 2007-05-06 14:50:43.907040716 +0400
-@@ -0,0 +1,434 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Declaration of znode (Zam's node). See znode.c for more details. */
-+
-+#ifndef __ZNODE_H__
-+#define __ZNODE_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "lock.h"
-+#include "readahead.h"
-+
-+#include <linux/types.h>
-+#include <linux/spinlock.h>
-+#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
-+#include <asm/atomic.h>
-+#include <asm/semaphore.h>
-+
-+/* znode tracks its position within parent (internal item in a parent node,
-+ * that contains znode's block number). */
-+typedef struct parent_coord {
-+ znode *node;
-+ pos_in_node_t item_pos;
-+} parent_coord_t;
-+
-+/* &znode - node in a reiser4 tree.
-+
-+ NOTE-NIKITA fields in this struct have to be rearranged (later) to reduce
-+ cacheline pressure.
-+
-+ Locking:
-+
-+ Long term: data in a disk node attached to this znode are protected
-+ by long term, deadlock aware lock ->lock;
-+
-+ Spin lock: the following fields are protected by the spin lock:
-+
-+ ->lock
-+
-+ Following fields are protected by the global tree lock:
-+
-+ ->left
-+ ->right
-+ ->in_parent
-+ ->c_count
-+
-+ Following fields are protected by the global delimiting key lock (dk_lock):
-+
-+ ->ld_key (to update ->ld_key long-term lock on the node is also required)
-+ ->rd_key
-+
-+ Following fields are protected by the long term lock:
-+
-+ ->nr_items
-+
-+ ->node_plugin is never changed once set. This means that after code made
-+ itself sure that field is valid it can be accessed without any additional
-+ locking.
-+
-+ ->level is immutable.
-+
-+ Invariants involving this data-type:
-+
-+ [znode-fake]
-+ [znode-level]
-+ [znode-connected]
-+ [znode-c_count]
-+ [znode-refs]
-+ [jnode-refs]
-+ [jnode-queued]
-+ [znode-modify]
-+
-+ For this to be made into a clustering or NUMA filesystem, we would want to eliminate all of the global locks.
-+ Suggestions for how to do that are desired.*/
-+struct znode {
-+ /* Embedded jnode. */
-+ jnode zjnode;
-+
-+ /* contains three subfields, node, pos_in_node, and pos_in_unit.
-+
-+ pos_in_node and pos_in_unit are only hints that are cached to
-+ speed up lookups during balancing. They are not required to be up to
-+ date. Synched in find_child_ptr().
-+
-+ This value allows us to avoid expensive binary searches.
-+
-+ in_parent->node points to the parent of this node, and is NOT a
-+ hint.
-+ */
-+ parent_coord_t in_parent;
-+
-+ /*
-+ * sibling list pointers
-+ */
-+
-+ /* left-neighbor */
-+ znode *left;
-+ /* right-neighbor */
-+ znode *right;
-+
-+ /* long term lock on node content. This lock supports deadlock
-+ detection. See lock.c
-+ */
-+ zlock lock;
-+
-+ /* You cannot remove from memory a node that has children in
-+ memory. This is because we rely on the fact that parent of given
-+ node can always be reached without blocking for io. When reading a
-+ node into memory you must increase the c_count of its parent, when
-+ removing it from memory you must decrease the c_count. This makes
-+ the code simpler, and the cases where it is suboptimal are truly
-+ obscure.
-+ */
-+ int c_count;
-+
-+ /* plugin of node attached to this znode. NULL if znode is not
-+ loaded. */
-+ node_plugin *nplug;
-+
-+ /* version of znode data. This is increased on each modification. This
-+ * is necessary to implement seals (see seal.[ch]) efficiently. */
-+ __u64 version;
-+
-+ /* left delimiting key. Necessary to efficiently perform
-+ balancing with node-level locking. Kept in memory only. */
-+ reiser4_key ld_key;
-+ /* right delimiting key. */
-+ reiser4_key rd_key;
-+
-+ /* znode's tree level */
-+ __u16 level;
-+ /* number of items in this node. This field is modified by node
-+ * plugin. */
-+ __u16 nr_items;
-+
-+#if REISER4_DEBUG
-+ void *creator;
-+ reiser4_key first_key;
-+ unsigned long times_locked;
-+ int left_version; /* when node->left was updated */
-+ int right_version; /* when node->right was updated */
-+ int ld_key_version; /* when node->ld_key was updated */
-+ int rd_key_version; /* when node->rd_key was updated */
-+#endif
-+
-+} __attribute__ ((aligned(16)));
-+
-+ON_DEBUG(extern atomic_t delim_key_version;
-+ )
-+
-+/* In general I think these macros should not be exposed. */
-+#define znode_is_locked(node) (lock_is_locked(&node->lock))
-+#define znode_is_rlocked(node) (lock_is_rlocked(&node->lock))
-+#define znode_is_wlocked(node) (lock_is_wlocked(&node->lock))
-+#define znode_is_wlocked_once(node) (lock_is_wlocked_once(&node->lock))
-+#define znode_can_be_rlocked(node) (lock_can_be_rlocked(&node->lock))
-+#define is_lock_compatible(node, mode) (lock_mode_compatible(&node->lock, mode))
-+/* Macros for accessing the znode state. */
-+#define ZF_CLR(p,f) JF_CLR (ZJNODE(p), (f))
-+#define ZF_ISSET(p,f) JF_ISSET(ZJNODE(p), (f))
-+#define ZF_SET(p,f) JF_SET (ZJNODE(p), (f))
-+extern znode *zget(reiser4_tree * tree, const reiser4_block_nr * const block,
-+ znode * parent, tree_level level, gfp_t gfp_flag);
-+extern znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const block);
-+extern int zload(znode * node);
-+extern int zload_ra(znode * node, ra_info_t * info);
-+extern int zinit_new(znode * node, gfp_t gfp_flags);
-+extern void zrelse(znode * node);
-+extern void znode_change_parent(znode * new_parent, reiser4_block_nr * block);
-+
-+/* size of data in znode */
-+static inline unsigned
-+znode_size(const znode * node UNUSED_ARG /* znode to query */ )
-+{
-+ assert("nikita-1416", node != NULL);
-+ return PAGE_CACHE_SIZE;
-+}
-+
-+extern void parent_coord_to_coord(const parent_coord_t * pcoord,
-+ coord_t * coord);
-+extern void coord_to_parent_coord(const coord_t * coord,
-+ parent_coord_t * pcoord);
-+extern void init_parent_coord(parent_coord_t * pcoord, const znode * node);
-+
-+extern unsigned znode_free_space(znode * node);
-+
-+extern reiser4_key *znode_get_rd_key(znode * node);
-+extern reiser4_key *znode_get_ld_key(znode * node);
-+
-+extern reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key);
-+extern reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key);
-+
-+/* `connected' state checks */
-+static inline int znode_is_right_connected(const znode * node)
-+{
-+ return ZF_ISSET(node, JNODE_RIGHT_CONNECTED);
-+}
-+
-+static inline int znode_is_left_connected(const znode * node)
-+{
-+ return ZF_ISSET(node, JNODE_LEFT_CONNECTED);
-+}
-+
-+static inline int znode_is_connected(const znode * node)
-+{
-+ return znode_is_right_connected(node) && znode_is_left_connected(node);
-+}
-+
-+extern int znode_shift_order;
-+extern int znode_rehash(znode * node, const reiser4_block_nr * new_block_nr);
-+extern void znode_remove(znode *, reiser4_tree *);
-+extern znode *znode_parent(const znode * node);
-+extern znode *znode_parent_nolock(const znode * node);
-+extern int znode_above_root(const znode * node);
-+extern int init_znodes(void);
-+extern void done_znodes(void);
-+extern int znodes_tree_init(reiser4_tree * ztree);
-+extern void znodes_tree_done(reiser4_tree * ztree);
-+extern int znode_contains_key(znode * node, const reiser4_key * key);
-+extern int znode_contains_key_lock(znode * node, const reiser4_key * key);
-+extern unsigned znode_save_free_space(znode * node);
-+extern unsigned znode_recover_free_space(znode * node);
-+extern znode *zalloc(gfp_t gfp_flag);
-+extern void zinit(znode *, const znode * parent, reiser4_tree *);
-+extern int zparse(znode * node);
-+
-+extern int znode_just_created(const znode * node);
-+
-+extern void zfree(znode * node);
-+
-+#if REISER4_DEBUG
-+extern void print_znode(const char *prefix, const znode * node);
-+#else
-+#define print_znode( p, n ) noop
-+#endif
-+
-+/* Make it look like various znode functions exist instead of treating znodes as
-+ jnodes in znode-specific code. */
-+#define znode_page(x) jnode_page ( ZJNODE(x) )
-+#define zdata(x) jdata ( ZJNODE(x) )
-+#define znode_get_block(x) jnode_get_block ( ZJNODE(x) )
-+#define znode_created(x) jnode_created ( ZJNODE(x) )
-+#define znode_set_created(x) jnode_set_created ( ZJNODE(x) )
-+#define znode_convertible(x) jnode_convertible (ZJNODE(x))
-+#define znode_set_convertible(x) jnode_set_convertible (ZJNODE(x))
-+
-+#define znode_is_dirty(x) jnode_is_dirty ( ZJNODE(x) )
-+#define znode_check_dirty(x) jnode_check_dirty ( ZJNODE(x) )
-+#define znode_make_clean(x) jnode_make_clean ( ZJNODE(x) )
-+#define znode_set_block(x, b) jnode_set_block ( ZJNODE(x), (b) )
-+
-+#define spin_lock_znode(x) spin_lock_jnode ( ZJNODE(x) )
-+#define spin_unlock_znode(x) spin_unlock_jnode ( ZJNODE(x) )
-+#define spin_trylock_znode(x) spin_trylock_jnode ( ZJNODE(x) )
-+#define spin_znode_is_locked(x) spin_jnode_is_locked ( ZJNODE(x) )
-+#define spin_znode_is_not_locked(x) spin_jnode_is_not_locked ( ZJNODE(x) )
-+
-+#if REISER4_DEBUG
-+extern int znode_x_count_is_protected(const znode * node);
-+extern int znode_invariant(znode * node);
-+#endif
-+
-+/* acquire reference to @node */
-+static inline znode *zref(znode * node)
-+{
-+ /* change of x_count from 0 to 1 is protected by tree spin-lock */
-+ return JZNODE(jref(ZJNODE(node)));
-+}
-+
-+/* release reference to @node */
-+static inline void zput(znode * node)
-+{
-+ assert("nikita-3564", znode_invariant(node));
-+ jput(ZJNODE(node));
-+}
-+
-+/* get the level field for a znode */
-+static inline tree_level znode_get_level(const znode * node)
-+{
-+ return node->level;
-+}
-+
-+/* get the level field for a jnode */
-+static inline tree_level jnode_get_level(const jnode * node)
-+{
-+ if (jnode_is_znode(node))
-+ return znode_get_level(JZNODE(node));
-+ else
-+ /* unformatted nodes are all at the LEAF_LEVEL and for
-+ "semi-formatted" nodes like bitmaps, level doesn't matter. */
-+ return LEAF_LEVEL;
-+}
-+
-+/* true if jnode is on leaf level */
-+static inline int jnode_is_leaf(const jnode * node)
-+{
-+ if (jnode_is_znode(node))
-+ return (znode_get_level(JZNODE(node)) == LEAF_LEVEL);
-+ if (jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK)
-+ return 1;
-+ return 0;
-+}
-+
-+/* return znode's tree */
-+static inline reiser4_tree *znode_get_tree(const znode * node)
-+{
-+ assert("nikita-2692", node != NULL);
-+ return jnode_get_tree(ZJNODE(node));
-+}
-+
-+/* resolve race with zput */
-+static inline znode *znode_rip_check(reiser4_tree * tree, znode * node)
-+{
-+ jnode *j;
-+
-+ j = jnode_rip_sync(tree, ZJNODE(node));
-+ if (likely(j != NULL))
-+ node = JZNODE(j);
-+ else
-+ node = NULL;
-+ return node;
-+}
-+
-+#if defined(REISER4_DEBUG)
-+int znode_is_loaded(const znode * node /* znode to query */ );
-+#endif
-+
-+extern __u64 znode_build_version(reiser4_tree * tree);
-+
-+/* Data-handles. A data handle object manages pairing calls to zload() and zrelse(). We
-+ must load the data for a node in many places. We could do this by simply calling
-+ zload() everywhere, the difficulty arises when we must release the loaded data by
-+ calling zrelse. In a function with many possible error/return paths, it requires extra
-+ work to figure out which exit paths must call zrelse and those which do not. The data
-+ handle automatically calls zrelse for every zload that it is responsible for. In that
-+ sense, it acts much like a lock_handle.
-+*/
-+typedef struct load_count {
-+ znode *node;
-+ int d_ref;
-+} load_count;
-+
-+extern void init_load_count(load_count * lc); /* Initialize a load_count set the current node to NULL. */
-+extern void done_load_count(load_count * dh); /* Finalize a load_count: call zrelse() if necessary */
-+extern int incr_load_count_znode(load_count * dh, znode * node); /* Set the argument znode to the current node, call zload(). */
-+extern int incr_load_count_jnode(load_count * dh, jnode * node); /* If the argument jnode is formatted, do the same as
-+ * incr_load_count_znode, otherwise do nothing (unformatted nodes
-+ * don't require zload/zrelse treatment). */
-+extern void move_load_count(load_count * new, load_count * old); /* Move the contents of a load_count. Old handle is released. */
-+extern void copy_load_count(load_count * new, load_count * old); /* Copy the contents of a load_count. Old handle remains held. */
-+
-+/* Variable initializers for load_count. */
-+#define INIT_LOAD_COUNT ( load_count * ){ .node = NULL, .d_ref = 0 }
-+#define INIT_LOAD_COUNT_NODE( n ) ( load_count ){ .node = ( n ), .d_ref = 0 }
-+/* A convenience macro for use in assertions or debug-only code, where loaded
-+ data is only required to perform the debugging check. This macro
-+ encapsulates an expression inside a pair of calls to zload()/zrelse(). */
-+#define WITH_DATA( node, exp ) \
-+({ \
-+ long __with_dh_result; \
-+ znode *__with_dh_node; \
-+ \
-+ __with_dh_node = ( node ); \
-+ __with_dh_result = zload( __with_dh_node ); \
-+ if( __with_dh_result == 0 ) { \
-+ __with_dh_result = ( long )( exp ); \
-+ zrelse( __with_dh_node ); \
-+ } \
-+ __with_dh_result; \
-+})
-+
-+/* Same as above, but accepts a return value in case zload fails. */
-+#define WITH_DATA_RET( node, ret, exp ) \
-+({ \
-+ int __with_dh_result; \
-+ znode *__with_dh_node; \
-+ \
-+ __with_dh_node = ( node ); \
-+ __with_dh_result = zload( __with_dh_node ); \
-+ if( __with_dh_result == 0 ) { \
-+ __with_dh_result = ( int )( exp ); \
-+ zrelse( __with_dh_node ); \
-+ } else \
-+ __with_dh_result = ( ret ); \
-+ __with_dh_result; \
-+})
-+
-+#define WITH_COORD(coord, exp) \
-+({ \
-+ coord_t *__coord; \
-+ \
-+ __coord = (coord); \
-+ coord_clear_iplug(__coord); \
-+ WITH_DATA(__coord->node, exp); \
-+})
-+
-+#if REISER4_DEBUG
-+#define STORE_COUNTERS \
-+ reiser4_lock_counters_info __entry_counters = \
-+ *reiser4_lock_counters()
-+#define CHECK_COUNTERS \
-+ON_DEBUG_CONTEXT( \
-+({ \
-+ __entry_counters.x_refs = reiser4_lock_counters() -> x_refs; \
-+ __entry_counters.t_refs = reiser4_lock_counters() -> t_refs; \
-+ __entry_counters.d_refs = reiser4_lock_counters() -> d_refs; \
-+ assert("nikita-2159", \
-+ !memcmp(&__entry_counters, reiser4_lock_counters(), \
-+ sizeof __entry_counters)); \
-+}) )
-+
-+#else
-+#define STORE_COUNTERS
-+#define CHECK_COUNTERS noop
-+#endif
-+
-+/* __ZNODE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.20.orig/include/linux/fs.h linux-2.6.20/include/linux/fs.h
---- linux-2.6.20.orig/include/linux/fs.h 2007-05-06 15:04:41.352625543 +0400
-+++ linux-2.6.20/include/linux/fs.h 2007-05-06 14:50:43.911041966 +0400
-@@ -1165,6 +1165,8 @@
- void (*clear_inode) (struct inode *);
- void (*umount_begin) (struct vfsmount *, int);
-
-+ void (*sync_inodes) (struct super_block *sb,
-+ struct writeback_control *wbc);
- int (*show_options)(struct seq_file *, struct vfsmount *);
- int (*show_stats)(struct seq_file *, struct vfsmount *);
- #ifdef CONFIG_QUOTA
-@@ -1583,6 +1585,7 @@
- extern int invalidate_inode_pages2_range(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
- extern int write_inode_now(struct inode *, int);
-+extern void generic_sync_sb_inodes(struct super_block *, struct writeback_control *);
- extern int filemap_fdatawrite(struct address_space *);
- extern int filemap_flush(struct address_space *);
- extern int filemap_fdatawait(struct address_space *);
-diff -urN linux-2.6.20.orig/lib/radix-tree.c linux-2.6.20/lib/radix-tree.c
---- linux-2.6.20.orig/lib/radix-tree.c 2007-05-06 15:04:42.096858012 +0400
-+++ linux-2.6.20/lib/radix-tree.c 2007-05-06 14:50:43.915043216 +0400
-@@ -151,6 +151,7 @@
- out:
- return ret;
- }
-+EXPORT_SYMBOL(radix_tree_preload);
-
- static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
- int offset)
-diff -urN linux-2.6.20.orig/mm/filemap.c linux-2.6.20/mm/filemap.c
---- linux-2.6.20.orig/mm/filemap.c 2007-05-06 15:04:42.108861762 +0400
-+++ linux-2.6.20/mm/filemap.c 2007-05-06 14:50:43.919044465 +0400
-@@ -121,6 +121,7 @@
- mapping->nrpages--;
- __dec_zone_page_state(page, NR_FILE_PAGES);
- }
-+EXPORT_SYMBOL(__remove_from_page_cache);
-
- void remove_from_page_cache(struct page *page)
- {
-@@ -132,6 +133,7 @@
- __remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
- }
-+EXPORT_SYMBOL(remove_from_page_cache);
-
- static int sync_page(void *word)
- {
-@@ -738,6 +740,7 @@
- read_unlock_irq(&mapping->tree_lock);
- return ret;
- }
-+EXPORT_SYMBOL(add_to_page_cache_lru);
-
- /**
- * find_get_pages_contig - gang contiguous pagecache lookup
-@@ -798,6 +801,7 @@
- read_unlock_irq(&mapping->tree_lock);
- return ret;
- }
-+EXPORT_SYMBOL(find_get_pages);
-
- /**
- * grab_cache_page_nowait - returns locked page at given index in given cache
-@@ -855,6 +859,7 @@
-
- ra->ra_pages /= 4;
- }
-+EXPORT_SYMBOL(find_get_pages_tag);
-
- /**
- * do_generic_mapping_read - generic file read routine
-diff -urN linux-2.6.20.orig/mm/readahead.c linux-2.6.20/mm/readahead.c
---- linux-2.6.20.orig/mm/readahead.c 2007-05-06 15:04:42.144873010 +0400
-+++ linux-2.6.20/mm/readahead.c 2007-05-06 14:50:43.919044465 +0400
-@@ -568,6 +568,7 @@
- ra->flags &= ~RA_FLAG_INCACHE;
- ra->cache_hit = 0;
- }
-+EXPORT_SYMBOL_GPL(handle_ra_miss);
-
- /*
- * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
-Files linux-2.6.20.orig/scripts/kconfig/mconf and linux-2.6.20/scripts/kconfig/mconf differ
+++ /dev/null
-diff -urN linux-2.6.24.orig/arch/x86/lib/usercopy_32.c linux-2.6.24/arch/x86/lib/usercopy_32.c
---- linux-2.6.24.orig/arch/x86/lib/usercopy_32.c 2008-01-25 14:24:08.234127530 +0300
-+++ linux-2.6.24/arch/x86/lib/usercopy_32.c 2008-01-25 11:39:06.872191202 +0300
-@@ -817,6 +817,7 @@
- #endif
- return n;
- }
-+EXPORT_SYMBOL(__copy_from_user_ll_nocache);
-
- unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
- unsigned long n)
-@@ -831,6 +832,7 @@
- #endif
- return n;
- }
-+EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
-
- /**
- * copy_to_user: - Copy a block of data into user space.
-diff -urN linux-2.6.24.orig/Documentation/Changes linux-2.6.24/Documentation/Changes
---- linux-2.6.24.orig/Documentation/Changes 2007-10-10 00:31:38.000000000 +0400
-+++ linux-2.6.24/Documentation/Changes 2008-01-25 11:39:06.876192233 +0300
-@@ -36,6 +36,7 @@
- o e2fsprogs 1.29 # tune2fs
- o jfsutils 1.1.3 # fsck.jfs -V
- o reiserfsprogs 3.6.3 # reiserfsck -V 2>&1|grep reiserfsprogs
-+o reiser4progs 1.0.0 # fsck.reiser4 -V
- o xfsprogs 2.6.0 # xfs_db -V
- o pcmciautils 004 # pccardctl -V
- o quota-tools 3.09 # quota -V
-@@ -145,6 +146,13 @@
- versions of mkreiserfs, resize_reiserfs, debugreiserfs and
- reiserfsck. These utils work on both i386 and alpha platforms.
-
-+Reiser4progs
-+------------
-+
-+The reiser4progs package contains utilities for the reiser4 file system.
-+Detailed instructions are provided in the README file located at:
-+<ftp://ftp.namesys.com/pub/reiser4progs/README>.
-+
- Xfsprogs
- --------
-
-@@ -323,6 +331,10 @@
- -------------
- o <http://www.namesys.com/pub/reiserfsprogs/reiserfsprogs-3.6.3.tar.gz>
-
-+Reiser4progs
-+------------
-+o <ftp://ftp.namesys.com/pub/reiser4progs/>
-+
- Xfsprogs
- --------
- o <ftp://oss.sgi.com/projects/xfs/download/>
-diff -urN linux-2.6.24.orig/Documentation/filesystems/reiser4.txt linux-2.6.24/Documentation/filesystems/reiser4.txt
---- linux-2.6.24.orig/Documentation/filesystems/reiser4.txt 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/Documentation/filesystems/reiser4.txt 2008-01-25 11:39:06.876192233 +0300
-@@ -0,0 +1,75 @@
-+Reiser4 filesystem
-+==================
-+Reiser4 is a file system based on dancing tree algorithms, and is
-+described at http://www.namesys.com
-+
-+
-+References
-+==========
-+web page http://namesys.com/v4/v4.html
-+source code ftp://ftp.namesys.com/pub/reiser4-for-2.6/
-+userland tools ftp://ftp.namesys.com/pub/reiser4progs/
-+install page http://www.namesys.com/install_v4.html
-+
-+Compile options
-+===============
-+Enable reiser4 debug mode
-+ This checks everything imaginable while reiser4
-+ runs
-+
-+Mount options
-+=============
-+tmgr.atom_max_size=N
-+ Atoms containing more than N blocks will be forced to commit.
-+ N is decimal.
-+ Default is nr_free_pagecache_pages() / 2 at mount time.
-+
-+tmgr.atom_max_age=N
-+ Atoms older than N seconds will be forced to commit. N is decimal.
-+ Default is 600.
-+
-+tmgr.atom_max_flushers=N
-+ Limit of concurrent flushers for one atom. 0 means no limit.
-+ Default is 0.
-+
-+tree.cbk_cache.nr_slots=N
-+ Number of slots in the cbk cache.
-+
-+flush.relocate_threshold=N
-+ If flush finds more than N adjacent dirty leaf-level blocks it
-+ will force them to be relocated.
-+ Default is 64.
-+
-+flush.relocate_distance=N
-+ If flush finds can find a block allocation closer than at most
-+ N from the preceder it will relocate to that position.
-+ Default is 64.
-+
-+flush.scan_maxnodes=N
-+ The maximum number of nodes to scan left on a level during
-+ flush.
-+ Default is 10000.
-+
-+optimal_io_size=N
-+ Preferred IO size. This value is used to set st_blksize of
-+ struct stat.
-+ Default is 65536.
-+
-+bsdgroups
-+ Turn on BSD-style gid assignment.
-+
-+32bittimes
-+ By default file in reiser4 have 64 bit timestamps. Files
-+ created when filesystem is mounted with 32bittimes mount
-+ option will get 32 bit timestamps.
-+
-+mtflush
-+ Turn off concurrent flushing.
-+
-+nopseudo
-+ Disable pseudo files support. See
-+ http://namesys.com/v4/pseudo.html for more about pseudo files.
-+
-+dont_load_bitmap
-+ Don't load all bitmap blocks at mount time, it is useful for
-+ machines with tiny RAM and large disks.
-diff -urN linux-2.6.24.orig/fs/fs-writeback.c linux-2.6.24/fs/fs-writeback.c
---- linux-2.6.24.orig/fs/fs-writeback.c 2008-01-25 14:24:18.344724018 +0300
-+++ linux-2.6.24/fs/fs-writeback.c 2008-01-25 11:39:06.876192233 +0300
-@@ -386,8 +386,6 @@
- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
- * that it can be located for waiting on in __writeback_single_inode().
- *
-- * Called under inode_lock.
-- *
- * If `bdi' is non-zero then we're being asked to writeback a specific queue.
- * This function assumes that the blockdev superblock's inodes are backed by
- * a variety of queues, so all inodes are searched. For other superblocks,
-@@ -403,11 +401,13 @@
- * on the writer throttling path, and we get decent balancing between many
- * throttled threads: we don't want them all piling up on inode_sync_wait.
- */
--static void
--sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
-+void
-+generic_sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
- {
- const unsigned long start = jiffies; /* livelock avoidance */
-
-+ spin_lock(&inode_lock);
-+
- if (!wbc->for_kupdate || list_empty(&sb->s_io))
- queue_io(sb, wbc->older_than_this);
-
-@@ -482,8 +482,19 @@
- if (wbc->nr_to_write <= 0)
- break;
- }
-+ spin_unlock(&inode_lock);
- return; /* Leave any unwritten inodes on s_io */
- }
-+EXPORT_SYMBOL(generic_sync_sb_inodes);
-+
-+static void
-+sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
-+{
-+ if (sb->s_op->sync_inodes)
-+ sb->s_op->sync_inodes(sb, wbc);
-+ else
-+ generic_sync_sb_inodes(sb, wbc);
-+}
-
- /*
- * Start writeback of dirty pagecache data against all unlocked inodes.
-@@ -524,11 +535,8 @@
- * be unmounted by the time it is released.
- */
- if (down_read_trylock(&sb->s_umount)) {
-- if (sb->s_root) {
-- spin_lock(&inode_lock);
-+ if (sb->s_root)
- sync_sb_inodes(sb, wbc);
-- spin_unlock(&inode_lock);
-- }
- up_read(&sb->s_umount);
- }
- spin_lock(&sb_lock);
-@@ -566,9 +574,7 @@
- (inodes_stat.nr_inodes - inodes_stat.nr_unused) +
- nr_dirty + nr_unstable;
- wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
-- spin_lock(&inode_lock);
- sync_sb_inodes(sb, &wbc);
-- spin_unlock(&inode_lock);
- }
-
- /*
-diff -urN linux-2.6.24.orig/fs/Kconfig linux-2.6.24/fs/Kconfig
---- linux-2.6.24.orig/fs/Kconfig 2008-01-25 14:24:17.976629488 +0300
-+++ linux-2.6.24/fs/Kconfig 2008-01-25 11:39:06.880193263 +0300
-@@ -273,6 +273,8 @@
- default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
- default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m
-
-+source "fs/reiser4/Kconfig"
-+
- config REISERFS_FS
- tristate "Reiserfs support"
- help
-diff -urN linux-2.6.24.orig/fs/Makefile linux-2.6.24/fs/Makefile
---- linux-2.6.24.orig/fs/Makefile 2008-01-25 14:24:17.980630515 +0300
-+++ linux-2.6.24/fs/Makefile 2008-01-25 11:39:06.884194294 +0300
-@@ -66,6 +66,7 @@
-
- # Do not add any filesystems before this line
- obj-$(CONFIG_REISERFS_FS) += reiserfs/
-+obj-$(CONFIG_REISER4_FS) += reiser4/
- obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
- obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev
- obj-$(CONFIG_JBD) += jbd/
-diff -urN linux-2.6.24.orig/fs/reiser4/as_ops.c linux-2.6.24/fs/reiser4/as_ops.c
---- linux-2.6.24.orig/fs/reiser4/as_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/as_ops.c 2008-01-25 11:39:06.884194294 +0300
-@@ -0,0 +1,377 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Interface to VFS. Reiser4 address_space_operations are defined here. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/file/file.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/disk_format/disk_format.h"
-+#include "plugin/plugin.h"
-+#include "plugin/plugin_set.h"
-+#include "plugin/object.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "reiser4.h"
-+#include "entd.h"
-+
-+#include <linux/profile.h>
-+#include <linux/types.h>
-+#include <linux/mount.h>
-+#include <linux/vfs.h>
-+#include <linux/mm.h>
-+#include <linux/buffer_head.h>
-+#include <linux/dcache.h>
-+#include <linux/list.h>
-+#include <linux/pagemap.h>
-+#include <linux/slab.h>
-+#include <linux/seq_file.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/writeback.h>
-+#include <linux/backing-dev.h>
-+#include <linux/quotaops.h>
-+#include <linux/security.h>
-+
-+/* address space operations */
-+
-+/**
-+ * reiser4_set_page_dirty - set dirty bit, tag in page tree, dirty accounting
-+ * @page: page to be dirtied
-+ *
-+ * Operation of struct address_space_operations. This implementation is used by
-+ * unix and cryptcompress file plugins.
-+ *
-+ * This is called when reiser4 page gets dirtied outside of reiser4, for
-+ * example, when dirty bit is moved from pte to physical page.
-+ *
-+ * Tags page in the mapping's page tree with special tag so that it is possible
-+ * to do all the reiser4 specific work wrt dirty pages (jnode creation,
-+ * capturing by an atom) later because it can not be done in the contexts where
-+ * set_page_dirty is called.
-+ */
-+int reiser4_set_page_dirty(struct page *page)
-+{
-+ /* this page can be unformatted only */
-+ assert("vs-1734", (page->mapping &&
-+ page->mapping->host &&
-+ reiser4_get_super_fake(page->mapping->host->i_sb) !=
-+ page->mapping->host
-+ && reiser4_get_cc_fake(page->mapping->host->i_sb) !=
-+ page->mapping->host
-+ && reiser4_get_bitmap_fake(page->mapping->host->i_sb) !=
-+ page->mapping->host));
-+
-+ if (!TestSetPageDirty(page)) {
-+ struct address_space *mapping = page->mapping;
-+
-+ if (mapping) {
-+ write_lock_irq(&mapping->tree_lock);
-+
-+ /* check for race with truncate */
-+ if (page->mapping) {
-+ assert("vs-1652", page->mapping == mapping);
-+ if (mapping_cap_account_dirty(mapping))
-+ inc_zone_page_state(page,
-+ NR_FILE_DIRTY);
-+ radix_tree_tag_set(&mapping->page_tree,
-+ page->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ }
-+ write_unlock_irq(&mapping->tree_lock);
-+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-+ }
-+ }
-+ return 0;
-+}
-+
-+/* ->invalidatepage method for reiser4 */
-+
-+/*
-+ * this is called for each truncated page from
-+ * truncate_inode_pages()->truncate_{complete,partial}_page().
-+ *
-+ * At the moment of call, page is under lock, and outstanding io (if any) has
-+ * completed.
-+ */
-+
-+/**
-+ * reiser4_invalidatepage
-+ * @page: page to invalidate
-+ * @offset: starting offset for partial invalidation
-+ *
-+ */
-+void reiser4_invalidatepage(struct page *page, unsigned long offset)
-+{
-+ int ret = 0;
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ jnode *node;
-+
-+ /*
-+ * This is called to truncate file's page.
-+ *
-+ * Originally, reiser4 implemented truncate in a standard way
-+ * (vmtruncate() calls ->invalidatepage() on all truncated pages
-+ * first, then file system ->truncate() call-back is invoked).
-+ *
-+ * This lead to the problem when ->invalidatepage() was called on a
-+ * page with jnode that was captured into atom in ASTAGE_PRE_COMMIT
-+ * process. That is, truncate was bypassing transactions. To avoid
-+ * this, try_capture_page_to_invalidate() call was added here.
-+ *
-+ * After many troubles with vmtruncate() based truncate (including
-+ * races with flush, tail conversion, etc.) it was re-written in the
-+ * top-to-bottom style: items are killed in reiser4_cut_tree_object()
-+ * and pages belonging to extent are invalidated in kill_hook_extent().
-+ * So probably now additional call to capture is not needed here.
-+ */
-+
-+ assert("nikita-3137", PageLocked(page));
-+ assert("nikita-3138", !PageWriteback(page));
-+ inode = page->mapping->host;
-+
-+ /*
-+ * ->invalidatepage() should only be called for the unformatted
-+ * jnodes. Destruction of all other types of jnodes is performed
-+ * separately. But, during some corner cases (like handling errors
-+ * during mount) it is simpler to let ->invalidatepage to be called on
-+ * them. Check for this, and do nothing.
-+ */
-+ if (reiser4_get_super_fake(inode->i_sb) == inode)
-+ return;
-+ if (reiser4_get_cc_fake(inode->i_sb) == inode)
-+ return;
-+ if (reiser4_get_bitmap_fake(inode->i_sb) == inode)
-+ return;
-+ assert("vs-1426", PagePrivate(page));
-+ assert("vs-1427",
-+ page->mapping == jnode_get_mapping(jnode_by_page(page)));
-+ assert("", jprivate(page) != NULL);
-+ assert("", ergo(inode_file_plugin(inode) !=
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID),
-+ offset == 0));
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return;
-+
-+ node = jprivate(page);
-+ spin_lock_jnode(node);
-+ if (!(node->state & ((1 << JNODE_DIRTY) | (1<< JNODE_FLUSH_QUEUED) |
-+ (1 << JNODE_WRITEBACK) | (1 << JNODE_OVRWR)))) {
-+ /* there is not need to capture */
-+ jref(node);
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ page_clear_jnode(page, node);
-+ reiser4_uncapture_jnode(node);
-+ unhash_unformatted_jnode(node);
-+ jput(node);
-+ reiser4_exit_context(ctx);
-+ return;
-+ }
-+ spin_unlock_jnode(node);
-+
-+ /* capture page being truncated. */
-+ ret = try_capture_page_to_invalidate(page);
-+ if (ret != 0)
-+ warning("nikita-3141", "Cannot capture: %i", ret);
-+
-+ if (offset == 0) {
-+ /* remove jnode from transaction and detach it from page. */
-+ jref(node);
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ /* page cannot be detached from jnode concurrently, because it
-+ * is locked */
-+ reiser4_uncapture_page(page);
-+
-+ /* this detaches page from jnode, so that jdelete will not try
-+ * to lock page which is already locked */
-+ spin_lock_jnode(node);
-+ page_clear_jnode(page, node);
-+ spin_unlock_jnode(node);
-+ unhash_unformatted_jnode(node);
-+
-+ jput(node);
-+ }
-+
-+ reiser4_exit_context(ctx);
-+}
-+
-+/* help function called from reiser4_releasepage(). It returns true if jnode
-+ * can be detached from its page and page released. */
-+int jnode_is_releasable(jnode * node /* node to check */ )
-+{
-+ assert("nikita-2781", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ assert_spin_locked(&(node->load));
-+
-+ /* is some thread is currently using jnode page, later cannot be
-+ * detached */
-+ if (atomic_read(&node->d_count) != 0) {
-+ return 0;
-+ }
-+
-+ assert("vs-1214", !jnode_is_loaded(node));
-+
-+ /*
-+ * can only release page if real block number is assigned to it. Simple
-+ * check for ->atom wouldn't do, because it is possible for node to be
-+ * clean, not it atom yet, and still having fake block number. For
-+ * example, node just created in jinit_new().
-+ */
-+ if (reiser4_blocknr_is_fake(jnode_get_block(node)))
-+ return 0;
-+
-+ /*
-+ * pages prepared for write can not be released anyway, so avoid
-+ * detaching jnode from the page
-+ */
-+ if (JF_ISSET(node, JNODE_WRITE_PREPARED))
-+ return 0;
-+
-+ /*
-+ * dirty jnode cannot be released. It can however be submitted to disk
-+ * as part of early flushing, but only after getting flush-prepped.
-+ */
-+ if (JF_ISSET(node, JNODE_DIRTY))
-+ return 0;
-+
-+ /* overwrite set is only written by log writer. */
-+ if (JF_ISSET(node, JNODE_OVRWR))
-+ return 0;
-+
-+ /* jnode is already under writeback */
-+ if (JF_ISSET(node, JNODE_WRITEBACK))
-+ return 0;
-+
-+ /* don't flush bitmaps or journal records */
-+ if (!jnode_is_znode(node) && !jnode_is_unformatted(node))
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/*
-+ * ->releasepage method for reiser4
-+ *
-+ * This is called by VM scanner when it comes across clean page. What we have
-+ * to do here is to check whether page can really be released (freed that is)
-+ * and if so, detach jnode from it and remove page from the page cache.
-+ *
-+ * Check for releasability is done by releasable() function.
-+ */
-+int reiser4_releasepage(struct page *page, gfp_t gfp UNUSED_ARG)
-+{
-+ jnode *node;
-+
-+ assert("nikita-2257", PagePrivate(page));
-+ assert("nikita-2259", PageLocked(page));
-+ assert("nikita-2892", !PageWriteback(page));
-+ assert("nikita-3019", reiser4_schedulable());
-+
-+ /* NOTE-NIKITA: this can be called in the context of reiser4 call. It
-+ is not clear what to do in this case. A lot of deadlocks seems be
-+ possible. */
-+
-+ node = jnode_by_page(page);
-+ assert("nikita-2258", node != NULL);
-+ assert("reiser4-4", page->mapping != NULL);
-+ assert("reiser4-5", page->mapping->host != NULL);
-+
-+ if (PageDirty(page))
-+ return 0;
-+
-+ /* extra page reference is used by reiser4 to protect
-+ * jnode<->page link from this ->releasepage(). */
-+ if (page_count(page) > 3)
-+ return 0;
-+
-+ /* releasable() needs jnode lock, because it looks at the jnode fields
-+ * and we need jload_lock here to avoid races with jload(). */
-+ spin_lock_jnode(node);
-+ spin_lock(&(node->load));
-+ if (jnode_is_releasable(node)) {
-+ struct address_space *mapping;
-+
-+ mapping = page->mapping;
-+ jref(node);
-+ /* there is no need to synchronize against
-+ * jnode_extent_write() here, because pages seen by
-+ * jnode_extent_write() are !releasable(). */
-+ page_clear_jnode(page, node);
-+ spin_unlock(&(node->load));
-+ spin_unlock_jnode(node);
-+
-+ /* we are under memory pressure so release jnode also. */
-+ jput(node);
-+
-+ return 1;
-+ } else {
-+ spin_unlock(&(node->load));
-+ spin_unlock_jnode(node);
-+ assert("nikita-3020", reiser4_schedulable());
-+ return 0;
-+ }
-+}
-+
-+int reiser4_readpage(struct file *file, struct page *page)
-+{
-+ assert("edward-1533", PageLocked(page));
-+ assert("edward-1534", !PageUptodate(page));
-+ assert("edward-1535", page->mapping && page->mapping->host);
-+
-+ return inode_file_plugin(page->mapping->host)->readpage(file, page);
-+}
-+
-+int reiser4_readpages(struct file *file, struct address_space *mapping,
-+ struct list_head *pages, unsigned nr_pages)
-+{
-+ return inode_file_plugin(mapping->host)->readpages(file, mapping,
-+ pages, nr_pages);
-+}
-+
-+int reiser4_writepages(struct address_space *mapping,
-+ struct writeback_control *wbc)
-+{
-+ return inode_file_plugin(mapping->host)->writepages(mapping, wbc);
-+}
-+
-+int reiser4_prepare_write(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ return inode_file_plugin(file->f_dentry->d_inode)->prepare_write(file,
-+ page,
-+ from,
-+ to);
-+}
-+
-+int reiser4_commit_write(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ return inode_file_plugin(file->f_dentry->d_inode)->commit_write(file,
-+ page,
-+ from,
-+ to);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/block_alloc.c linux-2.6.24/fs/reiser4/block_alloc.c
---- linux-2.6.24.orig/fs/reiser4/block_alloc.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/block_alloc.c 2008-01-25 11:39:06.888195324 +0300
-@@ -0,0 +1,1137 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+
-+/* THE REISER4 DISK SPACE RESERVATION SCHEME. */
-+
-+/* We need to be able to reserve enough disk space to ensure that an atomic
-+ operation will have enough disk space to flush (see flush.c and
-+ http://namesys.com/v4/v4.html) and commit it once it is started.
-+
-+ In our design a call for reserving disk space may fail but not an actual
-+ block allocation.
-+
-+ All free blocks, already allocated blocks, and all kinds of reserved blocks
-+ are counted in different per-fs block counters.
-+
-+ A reiser4 super block's set of block counters currently is:
-+
-+ free -- free blocks,
-+ used -- already allocated blocks,
-+
-+ grabbed -- initially reserved for performing an fs operation, those blocks
-+ are taken from free blocks, then grabbed disk space leaks from grabbed
-+ blocks counter to other counters like "fake allocated", "flush
-+ reserved", "used", the rest of not used grabbed space is returned to
-+ free space at the end of fs operation;
-+
-+ fake allocated -- counts all nodes without real disk block numbers assigned,
-+ we have separate accounting for formatted and unformatted
-+ nodes (for easier debugging);
-+
-+ flush reserved -- disk space needed for flushing and committing an atom.
-+ Each dirty already allocated block could be written as a
-+ part of atom's overwrite set or as a part of atom's
-+ relocate set. In both case one additional block is needed,
-+ it is used as a wandered block if we do overwrite or as a
-+ new location for a relocated block.
-+
-+ In addition, blocks in some states are counted on per-thread and per-atom
-+ basis. A reiser4 context has a counter of blocks grabbed by this transaction
-+ and the sb's grabbed blocks counter is a sum of grabbed blocks counter values
-+ of each reiser4 context. Each reiser4 atom has a counter of "flush reserved"
-+ blocks, which are reserved for flush processing and atom commit. */
-+
-+/* AN EXAMPLE: suppose we insert new item to the reiser4 tree. We estimate
-+ number of blocks to grab for most expensive case of balancing when the leaf
-+ node we insert new item to gets split and new leaf node is allocated.
-+
-+ So, we need to grab blocks for
-+
-+ 1) one block for possible dirtying the node we insert an item to. That block
-+ would be used for node relocation at flush time or for allocating of a
-+ wandered one, it depends what will be a result (what set, relocate or
-+ overwrite the node gets assigned to) of the node processing by the flush
-+ algorithm.
-+
-+ 2) one block for either allocating a new node, or dirtying of right or left
-+ clean neighbor, only one case may happen.
-+
-+ VS-FIXME-HANS: why can only one case happen? I would expect to see dirtying of left neighbor, right neighbor, current
-+ node, and creation of new node. have I forgotten something? email me.
-+
-+ These grabbed blocks are counted in both reiser4 context "grabbed blocks"
-+ counter and in the fs-wide one (both ctx->grabbed_blocks and
-+ sbinfo->blocks_grabbed get incremented by 2), sb's free blocks counter is
-+ decremented by 2.
-+
-+ Suppose both two blocks were spent for dirtying of an already allocated clean
-+ node (one block went from "grabbed" to "flush reserved") and for new block
-+ allocating (one block went from "grabbed" to "fake allocated formatted").
-+
-+ Inserting of a child pointer to the parent node caused parent node to be
-+ split, the balancing code takes care about this grabbing necessary space
-+ immediately by calling reiser4_grab with BA_RESERVED flag set which means
-+ "can use the 5% reserved disk space".
-+
-+ At this moment insertion completes and grabbed blocks (if they were not used)
-+ should be returned to the free space counter.
-+
-+ However the atom life-cycle is not completed. The atom had one "flush
-+ reserved" block added by our insertion and the new fake allocated node is
-+ counted as a "fake allocated formatted" one. The atom has to be fully
-+ processed by flush before commit. Suppose that the flush moved the first,
-+ already allocated node to the atom's overwrite list, the new fake allocated
-+ node, obviously, went into the atom relocate set. The reiser4 flush
-+ allocates the new node using one unit from "fake allocated formatted"
-+ counter, the log writer uses one from "flush reserved" for wandered block
-+ allocation.
-+
-+ And, it is not the end. When the wandered block is deallocated after the
-+ atom gets fully played (see wander.c for term description), the disk space
-+ occupied for it is returned to free blocks. */
-+
-+/* BLOCK NUMBERS */
-+
-+/* Any reiser4 node has a block number assigned to it. We use these numbers for
-+ indexing in hash tables, so if a block has not yet been assigned a location
-+ on disk we need to give it a temporary fake block number.
-+
-+ Current implementation of reiser4 uses 64-bit integers for block numbers. We
-+ use highest bit in 64-bit block number to distinguish fake and real block
-+ numbers. So, only 63 bits may be used to addressing of real device
-+ blocks. That "fake" block numbers space is divided into subspaces of fake
-+ block numbers for data blocks and for shadow (working) bitmap blocks.
-+
-+ Fake block numbers for data blocks are generated by a cyclic counter, which
-+ gets incremented after each real block allocation. We assume that it is
-+ impossible to overload this counter during one transaction life. */
-+
-+/* Initialize a blocknr hint. */
-+void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint)
-+{
-+ memset(hint, 0, sizeof(reiser4_blocknr_hint));
-+}
-+
-+/* Release any resources of a blocknr hint. */
-+void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint UNUSED_ARG)
-+{
-+ /* No resources should be freed in current blocknr_hint implementation. */
-+}
-+
-+/* see above for explanation of fake block number. */
-+/* Audited by: green(2002.06.11) */
-+int reiser4_blocknr_is_fake(const reiser4_block_nr * da)
-+{
-+ /* The reason for not simply returning result of '&' operation is that
-+ while return value is (possibly 32bit) int, the reiser4_block_nr is
-+ at least 64 bits long, and high bit (which is the only possible
-+ non zero bit after the masking) would be stripped off */
-+ return (*da & REISER4_FAKE_BLOCKNR_BIT_MASK) ? 1 : 0;
-+}
-+
-+/* Static functions for <reiser4 super block>/<reiser4 context> block counters
-+ arithmetic. Mostly, they are isolated to not to code same assertions in
-+ several places. */
-+static void sub_from_ctx_grabbed(reiser4_context * ctx, __u64 count)
-+{
-+ BUG_ON(ctx->grabbed_blocks < count);
-+ assert("zam-527", ctx->grabbed_blocks >= count);
-+ ctx->grabbed_blocks -= count;
-+}
-+
-+static void add_to_ctx_grabbed(reiser4_context * ctx, __u64 count)
-+{
-+ ctx->grabbed_blocks += count;
-+}
-+
-+static void sub_from_sb_grabbed(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("zam-525", sbinfo->blocks_grabbed >= count);
-+ sbinfo->blocks_grabbed -= count;
-+}
-+
-+/* Decrease the counter of block reserved for flush in super block. */
-+static void
-+sub_from_sb_flush_reserved(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("vpf-291", sbinfo->blocks_flush_reserved >= count);
-+ sbinfo->blocks_flush_reserved -= count;
-+}
-+
-+static void
-+sub_from_sb_fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
-+ reiser4_ba_flags_t flags)
-+{
-+ if (flags & BA_FORMATTED) {
-+ assert("zam-806", sbinfo->blocks_fake_allocated >= count);
-+ sbinfo->blocks_fake_allocated -= count;
-+ } else {
-+ assert("zam-528",
-+ sbinfo->blocks_fake_allocated_unformatted >= count);
-+ sbinfo->blocks_fake_allocated_unformatted -= count;
-+ }
-+}
-+
-+static void sub_from_sb_used(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("zam-530",
-+ sbinfo->blocks_used >= count + sbinfo->min_blocks_used);
-+ sbinfo->blocks_used -= count;
-+}
-+
-+static void
-+sub_from_cluster_reserved(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ assert("edward-501", sbinfo->blocks_clustered >= count);
-+ sbinfo->blocks_clustered -= count;
-+}
-+
-+/* Increase the counter of block reserved for flush in atom. */
-+static void add_to_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
-+{
-+ assert("zam-772", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ atom->flush_reserved += count;
-+}
-+
-+/* Decrease the counter of block reserved for flush in atom. */
-+static void sub_from_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
-+{
-+ assert("zam-774", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-2790", atom->flush_reserved >= count);
-+ atom->flush_reserved -= count;
-+}
-+
-+/* super block has 6 counters: free, used, grabbed, fake allocated
-+ (formatted and unformatted) and flush reserved. Their sum must be
-+ number of blocks on a device. This function checks this */
-+int reiser4_check_block_counters(const struct super_block *super)
-+{
-+ __u64 sum;
-+
-+ sum = reiser4_grabbed_blocks(super) + reiser4_free_blocks(super) +
-+ reiser4_data_blocks(super) + reiser4_fake_allocated(super) +
-+ reiser4_fake_allocated_unformatted(super) + reiser4_flush_reserved(super) +
-+ reiser4_clustered_blocks(super);
-+ if (reiser4_block_count(super) != sum) {
-+ printk("super block counters: "
-+ "used %llu, free %llu, "
-+ "grabbed %llu, fake allocated (formatetd %llu, unformatted %llu), "
-+ "reserved %llu, clustered %llu, sum %llu, must be (block count) %llu\n",
-+ (unsigned long long)reiser4_data_blocks(super),
-+ (unsigned long long)reiser4_free_blocks(super),
-+ (unsigned long long)reiser4_grabbed_blocks(super),
-+ (unsigned long long)reiser4_fake_allocated(super),
-+ (unsigned long long)
-+ reiser4_fake_allocated_unformatted(super),
-+ (unsigned long long)reiser4_flush_reserved(super),
-+ (unsigned long long)reiser4_clustered_blocks(super),
-+ (unsigned long long)sum,
-+ (unsigned long long)reiser4_block_count(super));
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* Adjust "working" free blocks counter for number of blocks we are going to
-+ allocate. Record number of grabbed blocks in fs-wide and per-thread
-+ counters. This function should be called before bitmap scanning or
-+ allocating fake block numbers
-+
-+ @super -- pointer to reiser4 super block;
-+ @count -- number of blocks we reserve;
-+
-+ @return -- 0 if success, -ENOSPC, if all
-+ free blocks are preserved or already allocated.
-+*/
-+
-+static int
-+reiser4_grab(reiser4_context * ctx, __u64 count, reiser4_ba_flags_t flags)
-+{
-+ __u64 free_blocks;
-+ int ret = 0, use_reserved = flags & BA_RESERVED;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("vs-1276", ctx == get_current_context());
-+
-+ /* Do not grab anything on ro-mounted fs. */
-+ if (rofs_super(ctx->super)) {
-+ ctx->grab_enabled = 0;
-+ return 0;
-+ }
-+
-+ sbinfo = get_super_private(ctx->super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ free_blocks = sbinfo->blocks_free;
-+
-+ if ((use_reserved && free_blocks < count) ||
-+ (!use_reserved && free_blocks < count + sbinfo->blocks_reserved)) {
-+ ret = RETERR(-ENOSPC);
-+ goto unlock_and_ret;
-+ }
-+
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ sbinfo->blocks_grabbed += count;
-+ sbinfo->blocks_free -= count;
-+
-+#if REISER4_DEBUG
-+ if (ctx->grabbed_initially == 0)
-+ ctx->grabbed_initially = count;
-+#endif
-+
-+ assert("nikita-2986", reiser4_check_block_counters(ctx->super));
-+
-+ /* disable grab space in current context */
-+ ctx->grab_enabled = 0;
-+
-+ unlock_and_ret:
-+ spin_unlock_reiser4_super(sbinfo);
-+
-+ return ret;
-+}
-+
-+int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags)
-+{
-+ int ret;
-+ reiser4_context *ctx;
-+
-+ assert("nikita-2964", ergo(flags & BA_CAN_COMMIT,
-+ lock_stack_isclean(get_current_lock_stack
-+ ())));
-+ ctx = get_current_context();
-+ if (!(flags & BA_FORCE) && !is_grab_enabled(ctx)) {
-+ return 0;
-+ }
-+
-+ ret = reiser4_grab(ctx, count, flags);
-+ if (ret == -ENOSPC) {
-+
-+ /* Trying to commit the all transactions if BA_CAN_COMMIT flag present */
-+ if (flags & BA_CAN_COMMIT) {
-+ txnmgr_force_commit_all(ctx->super, 0);
-+ ctx->grab_enabled = 1;
-+ ret = reiser4_grab(ctx, count, flags);
-+ }
-+ }
-+ /*
-+ * allocation from reserved pool cannot fail. This is severe error.
-+ */
-+ assert("nikita-3005", ergo(flags & BA_RESERVED, ret == 0));
-+ return ret;
-+}
-+
-+/*
-+ * SPACE RESERVED FOR UNLINK/TRUNCATE
-+ *
-+ * Unlink and truncate require space in transaction (to update stat data, at
-+ * least). But we don't want rm(1) to fail with "No space on device" error.
-+ *
-+ * Solution is to reserve 5% of disk space for truncates and
-+ * unlinks. Specifically, normal space grabbing requests don't grab space from
-+ * reserved area. Only requests with BA_RESERVED bit in flags are allowed to
-+ * drain it. Per super block delete mutex is used to allow only one
-+ * thread at a time to grab from reserved area.
-+ *
-+ * Grabbing from reserved area should always be performed with BA_CAN_COMMIT
-+ * flag.
-+ *
-+ */
-+
-+int reiser4_grab_reserved(struct super_block *super,
-+ __u64 count, reiser4_ba_flags_t flags)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+
-+ assert("nikita-3175", flags & BA_CAN_COMMIT);
-+
-+ /* Check the delete mutex already taken by us, we assume that
-+ * reading of machine word is atomic. */
-+ if (sbinfo->delete_mutex_owner == current) {
-+ if (reiser4_grab_space
-+ (count, (flags | BA_RESERVED) & ~BA_CAN_COMMIT)) {
-+ warning("zam-1003",
-+ "nested call of grab_reserved fails count=(%llu)",
-+ (unsigned long long)count);
-+ reiser4_release_reserved(super);
-+ return RETERR(-ENOSPC);
-+ }
-+ return 0;
-+ }
-+
-+ if (reiser4_grab_space(count, flags)) {
-+ mutex_lock(&sbinfo->delete_mutex);
-+ assert("nikita-2929", sbinfo->delete_mutex_owner == NULL);
-+ sbinfo->delete_mutex_owner = current;
-+
-+ if (reiser4_grab_space(count, flags | BA_RESERVED)) {
-+ warning("zam-833",
-+ "reserved space is not enough (%llu)",
-+ (unsigned long long)count);
-+ reiser4_release_reserved(super);
-+ return RETERR(-ENOSPC);
-+ }
-+ }
-+ return 0;
-+}
-+
-+void reiser4_release_reserved(struct super_block *super)
-+{
-+ reiser4_super_info_data *info;
-+
-+ info = get_super_private(super);
-+ if (info->delete_mutex_owner == current) {
-+ info->delete_mutex_owner = NULL;
-+ mutex_unlock(&info->delete_mutex);
-+ }
-+}
-+
-+static reiser4_super_info_data *grabbed2fake_allocated_head(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ sbinfo = get_super_private(ctx->super);
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ /* return sbinfo locked */
-+ return sbinfo;
-+}
-+
-+/* is called after @count fake block numbers are allocated and pointer to
-+ those blocks are inserted into tree. */
-+static void grabbed2fake_allocated_formatted(void)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = grabbed2fake_allocated_head(1);
-+ sbinfo->blocks_fake_allocated++;
-+
-+ assert("vs-922", reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/**
-+ * grabbed2fake_allocated_unformatted
-+ * @count:
-+ *
-+ */
-+static void grabbed2fake_allocated_unformatted(int count)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = grabbed2fake_allocated_head(count);
-+ sbinfo->blocks_fake_allocated_unformatted += count;
-+
-+ assert("vs-9221", reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void grabbed2cluster_reserved(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ sbinfo = get_super_private(ctx->super);
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ sbinfo->blocks_clustered += count;
-+
-+ assert("edward-504", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void cluster_reserved2grabbed(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+
-+ sbinfo = get_super_private(ctx->super);
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_cluster_reserved(sbinfo, count);
-+ sbinfo->blocks_grabbed += count;
-+
-+ assert("edward-505", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+ add_to_ctx_grabbed(ctx, count);
-+}
-+
-+void cluster_reserved2free(int count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ cluster_reserved2grabbed(count);
-+ grabbed2free(ctx, sbinfo, count);
-+}
-+
-+static DEFINE_SPINLOCK(fake_lock);
-+static reiser4_block_nr fake_gen = 0;
-+
-+/**
-+ * assign_fake_blocknr
-+ * @blocknr:
-+ * @count:
-+ *
-+ * Obtain a fake block number for new node which will be used to refer to
-+ * this newly allocated node until real allocation is done.
-+ */
-+static void assign_fake_blocknr(reiser4_block_nr *blocknr, int count)
-+{
-+ spin_lock(&fake_lock);
-+ *blocknr = fake_gen;
-+ fake_gen += count;
-+ spin_unlock(&fake_lock);
-+
-+ BUG_ON(*blocknr & REISER4_BLOCKNR_STATUS_BIT_MASK);
-+ /**blocknr &= ~REISER4_BLOCKNR_STATUS_BIT_MASK;*/
-+ *blocknr |= REISER4_UNALLOCATED_STATUS_VALUE;
-+ assert("zam-394", zlook(current_tree, blocknr) == NULL);
-+}
-+
-+int assign_fake_blocknr_formatted(reiser4_block_nr * blocknr)
-+{
-+ assign_fake_blocknr(blocknr, 1);
-+ grabbed2fake_allocated_formatted();
-+ return 0;
-+}
-+
-+/**
-+ * fake_blocknrs_unformatted
-+ * @count: number of fake numbers to get
-+ *
-+ * Allocates @count fake block numbers which will be assigned to jnodes
-+ */
-+reiser4_block_nr fake_blocknr_unformatted(int count)
-+{
-+ reiser4_block_nr blocknr;
-+
-+ assign_fake_blocknr(&blocknr, count);
-+ grabbed2fake_allocated_unformatted(count);
-+
-+ return blocknr;
-+}
-+
-+/* adjust sb block counters, if real (on-disk) block allocation immediately
-+ follows grabbing of free disk space. */
-+static void grabbed2used(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
-+ __u64 count)
-+{
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ sbinfo->blocks_used += count;
-+
-+ assert("nikita-2679", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* adjust sb block counters when @count unallocated blocks get mapped to disk */
-+static void fake_allocated2used(reiser4_super_info_data *sbinfo, __u64 count,
-+ reiser4_ba_flags_t flags)
-+{
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_fake_allocated(sbinfo, count, flags);
-+ sbinfo->blocks_used += count;
-+
-+ assert("nikita-2680",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+static void flush_reserved2used(txn_atom * atom, __u64 count)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("zam-787", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
-+
-+ sbinfo = get_current_super_private();
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_flush_reserved(sbinfo, count);
-+ sbinfo->blocks_used += count;
-+
-+ assert("zam-789",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* update the per fs blocknr hint default value. */
-+void
-+update_blocknr_hint_default(const struct super_block *s,
-+ const reiser4_block_nr * block)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+
-+ assert("nikita-3342", !reiser4_blocknr_is_fake(block));
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ if (*block < sbinfo->block_count) {
-+ sbinfo->blocknr_hint_default = *block;
-+ } else {
-+ warning("zam-676",
-+ "block number %llu is too large to be used in a blocknr hint\n",
-+ (unsigned long long)*block);
-+ dump_stack();
-+ DEBUGON(1);
-+ }
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* get current value of the default blocknr hint. */
-+void get_blocknr_hint_default(reiser4_block_nr * result)
-+{
-+ reiser4_super_info_data *sbinfo = get_current_super_private();
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ *result = sbinfo->blocknr_hint_default;
-+ assert("zam-677", *result < sbinfo->block_count);
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* Allocate "real" disk blocks by calling a proper space allocation plugin
-+ * method. Blocks are allocated in one contiguous disk region. The plugin
-+ * independent part accounts blocks by subtracting allocated amount from grabbed
-+ * or fake block counter and add the same amount to the counter of allocated
-+ * blocks.
-+ *
-+ * @hint -- a reiser4 blocknr hint object which contains further block
-+ * allocation hints and parameters (search start, a stage of block
-+ * which will be mapped to disk, etc.),
-+ * @blk -- an out parameter for the beginning of the allocated region,
-+ * @len -- in/out parameter, it should contain the maximum number of allocated
-+ * blocks, after block allocation completes, it contains the length of
-+ * allocated disk region.
-+ * @flags -- see reiser4_ba_flags_t description.
-+ *
-+ * @return -- 0 if success, error code otherwise.
-+ */
-+int
-+reiser4_alloc_blocks(reiser4_blocknr_hint * hint, reiser4_block_nr * blk,
-+ reiser4_block_nr * len, reiser4_ba_flags_t flags)
-+{
-+ __u64 needed = *len;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+ int ret;
-+
-+ assert("zam-986", hint != NULL);
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ /* For write-optimized data we use default search start value, which is
-+ * close to last write location. */
-+ if (flags & BA_USE_DEFAULT_SEARCH_START) {
-+ get_blocknr_hint_default(&hint->blk);
-+ }
-+
-+ /* VITALY: allocator should grab this for internal/tx-lists/similar only. */
-+/* VS-FIXME-HANS: why is this comment above addressed to vitaly (from vitaly)? */
-+ if (hint->block_stage == BLOCK_NOT_COUNTED) {
-+ ret = reiser4_grab_space_force(*len, flags);
-+ if (ret != 0)
-+ return ret;
-+ }
-+
-+ ret =
-+ sa_alloc_blocks(reiser4_get_space_allocator(ctx->super),
-+ hint, (int)needed, blk, len);
-+
-+ if (!ret) {
-+ assert("zam-680", *blk < reiser4_block_count(ctx->super));
-+ assert("zam-681",
-+ *blk + *len <= reiser4_block_count(ctx->super));
-+
-+ if (flags & BA_PERMANENT) {
-+ /* we assume that current atom exists at this moment */
-+ txn_atom *atom = get_current_atom_locked();
-+ atom->nr_blocks_allocated += *len;
-+ spin_unlock_atom(atom);
-+ }
-+
-+ switch (hint->block_stage) {
-+ case BLOCK_NOT_COUNTED:
-+ case BLOCK_GRABBED:
-+ grabbed2used(ctx, sbinfo, *len);
-+ break;
-+ case BLOCK_UNALLOCATED:
-+ fake_allocated2used(sbinfo, *len, flags);
-+ break;
-+ case BLOCK_FLUSH_RESERVED:
-+ {
-+ txn_atom *atom = get_current_atom_locked();
-+ flush_reserved2used(atom, *len);
-+ spin_unlock_atom(atom);
-+ }
-+ break;
-+ default:
-+ impossible("zam-531", "wrong block stage");
-+ }
-+ } else {
-+ assert("zam-821",
-+ ergo(hint->max_dist == 0
-+ && !hint->backward, ret != -ENOSPC));
-+ if (hint->block_stage == BLOCK_NOT_COUNTED)
-+ grabbed2free(ctx, sbinfo, needed);
-+ }
-+
-+ return ret;
-+}
-+
-+/* used -> fake_allocated -> grabbed -> free */
-+
-+/* adjust sb block counters when @count unallocated blocks get unmapped from
-+ disk */
-+static void
-+used2fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
-+ int formatted)
-+{
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ if (formatted)
-+ sbinfo->blocks_fake_allocated += count;
-+ else
-+ sbinfo->blocks_fake_allocated_unformatted += count;
-+
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2681",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+static void
-+used2flush_reserved(reiser4_super_info_data * sbinfo, txn_atom * atom,
-+ __u64 count, reiser4_ba_flags_t flags UNUSED_ARG)
-+{
-+ assert("nikita-2791", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ add_to_atom_flush_reserved_nolock(atom, (__u32) count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_flush_reserved += count;
-+ /*add_to_sb_flush_reserved(sbinfo, count); */
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2681",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* disk space, virtually used by fake block numbers is counted as "grabbed" again. */
-+static void
-+fake_allocated2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
-+ __u64 count, reiser4_ba_flags_t flags)
-+{
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ assert("nikita-2682", reiser4_check_block_counters(ctx->super));
-+
-+ sbinfo->blocks_grabbed += count;
-+ sub_from_sb_fake_allocated(sbinfo, count, flags & BA_FORMATTED);
-+
-+ assert("nikita-2683", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ fake_allocated2grabbed(ctx, sbinfo, count, flags);
-+ grabbed2free(ctx, sbinfo, count);
-+}
-+
-+void grabbed2free_mark(__u64 mark)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ assert("nikita-3007", (__s64) mark >= 0);
-+ assert("nikita-3006", ctx->grabbed_blocks >= mark);
-+ grabbed2free(ctx, sbinfo, ctx->grabbed_blocks - mark);
-+}
-+
-+/**
-+ * grabbed2free - adjust grabbed and free block counters
-+ * @ctx: context to update grabbed block counter of
-+ * @sbinfo: super block to update grabbed and free block counters of
-+ * @count: number of blocks to adjust counters by
-+ *
-+ * Decreases context's and per filesystem's counters of grabbed
-+ * blocks. Increases per filesystem's counter of free blocks.
-+ */
-+void grabbed2free(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
-+ __u64 count)
-+{
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sub_from_sb_grabbed(sbinfo, count);
-+ sbinfo->blocks_free += count;
-+ assert("nikita-2684", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("vs-1095", atom);
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ sub_from_ctx_grabbed(ctx, count);
-+
-+ add_to_atom_flush_reserved_nolock(atom, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_flush_reserved += count;
-+ sub_from_sb_grabbed(sbinfo, count);
-+
-+ assert("vpf-292", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+void grabbed2flush_reserved(__u64 count)
-+{
-+ txn_atom *atom = get_current_atom_locked();
-+
-+ grabbed2flush_reserved_nolock(atom, count);
-+
-+ spin_unlock_atom(atom);
-+}
-+
-+void flush_reserved2grabbed(txn_atom * atom, __u64 count)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("nikita-2788", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_grabbed += count;
-+ sub_from_sb_flush_reserved(sbinfo, count);
-+
-+ assert("vpf-292", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/**
-+ * all_grabbed2free - releases all blocks grabbed in context
-+ *
-+ * Decreases context's and super block's grabbed block counters by number of
-+ * blocks grabbed by current context and increases super block's free block
-+ * counter correspondingly.
-+ */
-+void all_grabbed2free(void)
-+{
-+ reiser4_context *ctx = get_current_context();
-+
-+ grabbed2free(ctx, get_super_private(ctx->super), ctx->grabbed_blocks);
-+}
-+
-+/* adjust sb block counters if real (on-disk) blocks do not become unallocated
-+ after freeing, @count blocks become "grabbed". */
-+static void
-+used2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
-+ __u64 count)
-+{
-+ add_to_ctx_grabbed(ctx, count);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_grabbed += count;
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2685", reiser4_check_block_counters(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+/* this used to be done through used2grabbed and grabbed2free*/
-+static void used2free(reiser4_super_info_data * sbinfo, __u64 count)
-+{
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ sbinfo->blocks_free += count;
-+ sub_from_sb_used(sbinfo, count);
-+
-+ assert("nikita-2685",
-+ reiser4_check_block_counters(reiser4_get_current_sb()));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+}
-+
-+#if REISER4_DEBUG
-+
-+/* check "allocated" state of given block range */
-+static void
-+reiser4_check_blocks(const reiser4_block_nr * start,
-+ const reiser4_block_nr * len, int desired)
-+{
-+ sa_check_blocks(start, len, desired);
-+}
-+
-+/* check "allocated" state of given block */
-+void reiser4_check_block(const reiser4_block_nr * block, int desired)
-+{
-+ const reiser4_block_nr one = 1;
-+
-+ reiser4_check_blocks(block, &one, desired);
-+}
-+
-+#endif
-+
-+/* Blocks deallocation function may do an actual deallocation through space
-+ plugin allocation or store deleted block numbers in atom's delete_set data
-+ structure depend on @defer parameter. */
-+
-+/* if BA_DEFER bit is not turned on, @target_stage means the stage of blocks which
-+ will be deleted from WORKING bitmap. They might be just unmapped from disk, or
-+ freed but disk space is still grabbed by current thread, or these blocks must
-+ not be counted in any reiser4 sb block counters, see block_stage_t comment */
-+
-+/* BA_FORMATTED bit is only used when BA_DEFER in not present: it is used to
-+ distinguish blocks allocated for unformatted and formatted nodes */
-+
-+int
-+reiser4_dealloc_blocks(const reiser4_block_nr * start,
-+ const reiser4_block_nr * len,
-+ block_stage_t target_stage, reiser4_ba_flags_t flags)
-+{
-+ txn_atom *atom = NULL;
-+ int ret;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ if (REISER4_DEBUG) {
-+ assert("zam-431", *len != 0);
-+ assert("zam-432", *start != 0);
-+ assert("zam-558", !reiser4_blocknr_is_fake(start));
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ assert("zam-562", *start < sbinfo->block_count);
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+
-+ if (flags & BA_DEFER) {
-+ blocknr_set_entry *bsep = NULL;
-+
-+ /* storing deleted block numbers in a blocknr set
-+ datastructure for further actual deletion */
-+ do {
-+ atom = get_current_atom_locked();
-+ assert("zam-430", atom != NULL);
-+
-+ ret =
-+ blocknr_set_add_extent(atom, &atom->delete_set,
-+ &bsep, start, len);
-+
-+ if (ret == -ENOMEM)
-+ return ret;
-+
-+ /* This loop might spin at most two times */
-+ } while (ret == -E_REPEAT);
-+
-+ assert("zam-477", ret == 0);
-+ assert("zam-433", atom != NULL);
-+
-+ spin_unlock_atom(atom);
-+
-+ } else {
-+ assert("zam-425", get_current_super_private() != NULL);
-+ sa_dealloc_blocks(reiser4_get_space_allocator(ctx->super),
-+ *start, *len);
-+
-+ if (flags & BA_PERMANENT) {
-+ /* These blocks were counted as allocated, we have to revert it
-+ * back if allocation is discarded. */
-+ txn_atom *atom = get_current_atom_locked();
-+ atom->nr_blocks_allocated -= *len;
-+ spin_unlock_atom(atom);
-+ }
-+
-+ switch (target_stage) {
-+ case BLOCK_NOT_COUNTED:
-+ assert("vs-960", flags & BA_FORMATTED);
-+ /* VITALY: This is what was grabbed for internal/tx-lists/similar only */
-+ used2free(sbinfo, *len);
-+ break;
-+
-+ case BLOCK_GRABBED:
-+ used2grabbed(ctx, sbinfo, *len);
-+ break;
-+
-+ case BLOCK_UNALLOCATED:
-+ used2fake_allocated(sbinfo, *len, flags & BA_FORMATTED);
-+ break;
-+
-+ case BLOCK_FLUSH_RESERVED:{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ used2flush_reserved(sbinfo, atom, *len,
-+ flags & BA_FORMATTED);
-+ spin_unlock_atom(atom);
-+ break;
-+ }
-+ default:
-+ impossible("zam-532", "wrong block stage");
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* wrappers for block allocator plugin methods */
-+int reiser4_pre_commit_hook(void)
-+{
-+ assert("zam-502", get_current_super_private() != NULL);
-+ sa_pre_commit_hook();
-+ return 0;
-+}
-+
-+/* an actor which applies delete set to block allocator data */
-+static int
-+apply_dset(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b, void *data UNUSED_ARG)
-+{
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ __u64 len = 1;
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ assert("zam-877", atom->stage >= ASTAGE_PRE_COMMIT);
-+ assert("zam-552", sbinfo != NULL);
-+
-+ if (b != NULL)
-+ len = *b;
-+
-+ if (REISER4_DEBUG) {
-+ spin_lock_reiser4_super(sbinfo);
-+
-+ assert("zam-554", *a < reiser4_block_count(ctx->super));
-+ assert("zam-555", *a + len <= reiser4_block_count(ctx->super));
-+
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+
-+ sa_dealloc_blocks(&sbinfo->space_allocator, *a, len);
-+ /* adjust sb block counters */
-+ used2free(sbinfo, len);
-+ return 0;
-+}
-+
-+void reiser4_post_commit_hook(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ assert("zam-452", atom->stage == ASTAGE_POST_COMMIT);
-+ spin_unlock_atom(atom);
-+
-+ /* do the block deallocation which was deferred
-+ until commit is done */
-+ blocknr_set_iterator(atom, &atom->delete_set, apply_dset, NULL, 1);
-+
-+ assert("zam-504", get_current_super_private() != NULL);
-+ sa_post_commit_hook();
-+}
-+
-+void reiser4_post_write_back_hook(void)
-+{
-+ assert("zam-504", get_current_super_private() != NULL);
-+
-+ sa_post_commit_hook();
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/block_alloc.h linux-2.6.24/fs/reiser4/block_alloc.h
---- linux-2.6.24.orig/fs/reiser4/block_alloc.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/block_alloc.h 2008-01-25 11:39:06.888195324 +0300
-@@ -0,0 +1,175 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__FS_REISER4_BLOCK_ALLOC_H__)
-+#define __FS_REISER4_BLOCK_ALLOC_H__
-+
-+#include "dformat.h"
-+#include "forward.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h>
-+
-+/* Mask when is applied to given block number shows is that block number is a fake one */
-+#define REISER4_FAKE_BLOCKNR_BIT_MASK 0x8000000000000000ULL
-+/* Mask which isolates a type of object this fake block number was assigned to */
-+#define REISER4_BLOCKNR_STATUS_BIT_MASK 0xC000000000000000ULL
-+
-+/*result after applying the REISER4_BLOCKNR_STATUS_BIT_MASK should be compared
-+ against these two values to understand is the object unallocated or bitmap
-+ shadow object (WORKING BITMAP block, look at the plugin/space/bitmap.c) */
-+#define REISER4_UNALLOCATED_STATUS_VALUE 0xC000000000000000ULL
-+#define REISER4_BITMAP_BLOCKS_STATUS_VALUE 0x8000000000000000ULL
-+
-+/* specification how block allocation was counted in sb block counters */
-+typedef enum {
-+ BLOCK_NOT_COUNTED = 0, /* reiser4 has no info about this block yet */
-+ BLOCK_GRABBED = 1, /* free space grabbed for further allocation
-+ of this block */
-+ BLOCK_FLUSH_RESERVED = 2, /* block is reserved for flush needs. */
-+ BLOCK_UNALLOCATED = 3, /* block is used for existing in-memory object
-+ ( unallocated formatted or unformatted
-+ node) */
-+ BLOCK_ALLOCATED = 4 /* block is mapped to disk, real on-disk block
-+ number assigned */
-+} block_stage_t;
-+
-+/* a hint for block allocator */
-+struct reiser4_blocknr_hint {
-+ /* FIXME: I think we want to add a longterm lock on the bitmap block here. This
-+ is to prevent jnode_flush() calls from interleaving allocations on the same
-+ bitmap, once a hint is established. */
-+
-+ /* search start hint */
-+ reiser4_block_nr blk;
-+ /* if not zero, it is a region size we search for free blocks in */
-+ reiser4_block_nr max_dist;
-+ /* level for allocation, may be useful have branch-level and higher
-+ write-optimized. */
-+ tree_level level;
-+ /* block allocator assumes that blocks, which will be mapped to disk,
-+ are in this specified block_stage */
-+ block_stage_t block_stage;
-+ /* If direction = 1 allocate blocks in backward direction from the end
-+ * of disk to the beginning of disk. */
-+ unsigned int backward:1;
-+
-+};
-+
-+/* These flags control block allocation/deallocation behavior */
-+enum reiser4_ba_flags {
-+ /* do allocatations from reserved (5%) area */
-+ BA_RESERVED = (1 << 0),
-+
-+ /* block allocator can do commit trying to recover free space */
-+ BA_CAN_COMMIT = (1 << 1),
-+
-+ /* if operation will be applied to formatted block */
-+ BA_FORMATTED = (1 << 2),
-+
-+ /* defer actual block freeing until transaction commit */
-+ BA_DEFER = (1 << 3),
-+
-+ /* allocate blocks for permanent fs objects (formatted or unformatted), not
-+ wandered of log blocks */
-+ BA_PERMANENT = (1 << 4),
-+
-+ /* grab space even it was disabled */
-+ BA_FORCE = (1 << 5),
-+
-+ /* use default start value for free blocks search. */
-+ BA_USE_DEFAULT_SEARCH_START = (1 << 6)
-+};
-+
-+typedef enum reiser4_ba_flags reiser4_ba_flags_t;
-+
-+extern void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint);
-+extern void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint);
-+extern void update_blocknr_hint_default(const struct super_block *,
-+ const reiser4_block_nr *);
-+extern void get_blocknr_hint_default(reiser4_block_nr *);
-+
-+extern reiser4_block_nr reiser4_fs_reserved_space(struct super_block *super);
-+
-+int assign_fake_blocknr_formatted(reiser4_block_nr *);
-+reiser4_block_nr fake_blocknr_unformatted(int);
-+
-+/* free -> grabbed -> fake_allocated -> used */
-+
-+int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags);
-+void all_grabbed2free(void);
-+void grabbed2free(reiser4_context *, reiser4_super_info_data *, __u64 count);
-+void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags);
-+void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count);
-+void grabbed2flush_reserved(__u64 count);
-+int reiser4_alloc_blocks(reiser4_blocknr_hint * hint,
-+ reiser4_block_nr * start,
-+ reiser4_block_nr * len, reiser4_ba_flags_t flags);
-+int reiser4_dealloc_blocks(const reiser4_block_nr *,
-+ const reiser4_block_nr *,
-+ block_stage_t, reiser4_ba_flags_t flags);
-+
-+static inline int reiser4_alloc_block(reiser4_blocknr_hint * hint,
-+ reiser4_block_nr * start,
-+ reiser4_ba_flags_t flags)
-+{
-+ reiser4_block_nr one = 1;
-+ return reiser4_alloc_blocks(hint, start, &one, flags);
-+}
-+
-+static inline int reiser4_dealloc_block(const reiser4_block_nr * block,
-+ block_stage_t stage,
-+ reiser4_ba_flags_t flags)
-+{
-+ const reiser4_block_nr one = 1;
-+ return reiser4_dealloc_blocks(block, &one, stage, flags);
-+}
-+
-+#define reiser4_grab_space_force(count, flags) \
-+ reiser4_grab_space(count, flags | BA_FORCE)
-+
-+extern void grabbed2free_mark(__u64 mark);
-+extern int reiser4_grab_reserved(struct super_block *,
-+ __u64, reiser4_ba_flags_t);
-+extern void reiser4_release_reserved(struct super_block *super);
-+
-+/* grabbed -> fake_allocated */
-+
-+/* fake_allocated -> used */
-+
-+/* used -> fake_allocated -> grabbed -> free */
-+
-+extern void flush_reserved2grabbed(txn_atom * atom, __u64 count);
-+
-+extern int reiser4_blocknr_is_fake(const reiser4_block_nr * da);
-+
-+extern void grabbed2cluster_reserved(int count);
-+extern void cluster_reserved2grabbed(int count);
-+extern void cluster_reserved2free(int count);
-+
-+extern int reiser4_check_block_counters(const struct super_block *);
-+
-+#if REISER4_DEBUG
-+
-+extern void reiser4_check_block(const reiser4_block_nr *, int);
-+
-+#else
-+
-+# define reiser4_check_block(beg, val) noop
-+
-+#endif
-+
-+extern int reiser4_pre_commit_hook(void);
-+extern void reiser4_post_commit_hook(void);
-+extern void reiser4_post_write_back_hook(void);
-+
-+#endif /* __FS_REISER4_BLOCK_ALLOC_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/blocknrset.c linux-2.6.24/fs/reiser4/blocknrset.c
---- linux-2.6.24.orig/fs/reiser4/blocknrset.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/blocknrset.c 2008-01-25 11:39:06.892196354 +0300
-@@ -0,0 +1,368 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* This file contains code for various block number sets used by the atom to
-+ track the deleted set and wandered block mappings. */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "txnmgr.h"
-+#include "context.h"
-+
-+#include <linux/slab.h>
-+
-+/* The proposed data structure for storing unordered block number sets is a
-+ list of elements, each of which contains an array of block number or/and
-+ array of block number pairs. That element called blocknr_set_entry is used
-+ to store block numbers from the beginning and for extents from the end of
-+ the data field (char data[...]). The ->nr_blocks and ->nr_pairs fields
-+ count numbers of blocks and extents.
-+
-+ +------------------- blocknr_set_entry->data ------------------+
-+ |block1|block2| ... <free space> ... |pair3|pair2|pair1|
-+ +------------------------------------------------------------+
-+
-+ When current blocknr_set_entry is full, allocate a new one. */
-+
-+/* Usage examples: blocknr sets are used in reiser4 for storing atom's delete
-+ * set (single blocks and block extents), in that case blocknr pair represent an
-+ * extent; atom's wandered map is also stored as a blocknr set, blocknr pairs
-+ * there represent a (real block) -> (wandered block) mapping. */
-+
-+/* Protection: blocknr sets belong to reiser4 atom, and
-+ * their modifications are performed with the atom lock held */
-+
-+/* The total size of a blocknr_set_entry. */
-+#define BLOCKNR_SET_ENTRY_SIZE 128
-+
-+/* The number of blocks that can fit the blocknr data area. */
-+#define BLOCKNR_SET_ENTRIES_NUMBER \
-+ ((BLOCKNR_SET_ENTRY_SIZE - \
-+ 2 * sizeof (unsigned) - \
-+ sizeof(struct list_head)) / \
-+ sizeof(reiser4_block_nr))
-+
-+/* An entry of the blocknr_set */
-+struct blocknr_set_entry {
-+ unsigned nr_singles;
-+ unsigned nr_pairs;
-+ struct list_head link;
-+ reiser4_block_nr entries[BLOCKNR_SET_ENTRIES_NUMBER];
-+};
-+
-+/* A pair of blocks as recorded in the blocknr_set_entry data. */
-+struct blocknr_pair {
-+ reiser4_block_nr a;
-+ reiser4_block_nr b;
-+};
-+
-+/* Return the number of blocknr slots available in a blocknr_set_entry. */
-+/* Audited by: green(2002.06.11) */
-+static unsigned bse_avail(blocknr_set_entry * bse)
-+{
-+ unsigned used = bse->nr_singles + 2 * bse->nr_pairs;
-+
-+ assert("jmacd-5088", BLOCKNR_SET_ENTRIES_NUMBER >= used);
-+ cassert(sizeof(blocknr_set_entry) == BLOCKNR_SET_ENTRY_SIZE);
-+
-+ return BLOCKNR_SET_ENTRIES_NUMBER - used;
-+}
-+
-+/* Initialize a blocknr_set_entry. */
-+static void bse_init(blocknr_set_entry *bse)
-+{
-+ bse->nr_singles = 0;
-+ bse->nr_pairs = 0;
-+ INIT_LIST_HEAD(&bse->link);
-+}
-+
-+/* Allocate and initialize a blocknr_set_entry. */
-+/* Audited by: green(2002.06.11) */
-+static blocknr_set_entry *bse_alloc(void)
-+{
-+ blocknr_set_entry *e;
-+
-+ if ((e = (blocknr_set_entry *) kmalloc(sizeof(blocknr_set_entry),
-+ reiser4_ctx_gfp_mask_get())) == NULL)
-+ return NULL;
-+
-+ bse_init(e);
-+
-+ return e;
-+}
-+
-+/* Free a blocknr_set_entry. */
-+/* Audited by: green(2002.06.11) */
-+static void bse_free(blocknr_set_entry * bse)
-+{
-+ kfree(bse);
-+}
-+
-+/* Add a block number to a blocknr_set_entry */
-+/* Audited by: green(2002.06.11) */
-+static void
-+bse_put_single(blocknr_set_entry * bse, const reiser4_block_nr * block)
-+{
-+ assert("jmacd-5099", bse_avail(bse) >= 1);
-+
-+ bse->entries[bse->nr_singles++] = *block;
-+}
-+
-+/* Get a pair of block numbers */
-+/* Audited by: green(2002.06.11) */
-+static inline struct blocknr_pair *bse_get_pair(blocknr_set_entry * bse,
-+ unsigned pno)
-+{
-+ assert("green-1", BLOCKNR_SET_ENTRIES_NUMBER >= 2 * (pno + 1));
-+
-+ return (struct blocknr_pair *) (bse->entries +
-+ BLOCKNR_SET_ENTRIES_NUMBER -
-+ 2 * (pno + 1));
-+}
-+
-+/* Add a pair of block numbers to a blocknr_set_entry */
-+/* Audited by: green(2002.06.11) */
-+static void
-+bse_put_pair(blocknr_set_entry * bse, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b)
-+{
-+ struct blocknr_pair *pair;
-+
-+ assert("jmacd-5100", bse_avail(bse) >= 2 && a != NULL && b != NULL);
-+
-+ pair = bse_get_pair(bse, bse->nr_pairs++);
-+
-+ pair->a = *a;
-+ pair->b = *b;
-+}
-+
-+/* Add either a block or pair of blocks to the block number set. The first
-+ blocknr (@a) must be non-NULL. If @b is NULL a single blocknr is added, if
-+ @b is non-NULL a pair is added. The block number set belongs to atom, and
-+ the call is made with the atom lock held. There may not be enough space in
-+ the current blocknr_set_entry. If new_bsep points to a non-NULL
-+ blocknr_set_entry then it will be added to the blocknr_set and new_bsep
-+ will be set to NULL. If new_bsep contains NULL then the atom lock will be
-+ released and a new bse will be allocated in new_bsep. E_REPEAT will be
-+ returned with the atom unlocked for the operation to be tried again. If
-+ the operation succeeds, 0 is returned. If new_bsep is non-NULL and not
-+ used during the call, it will be freed automatically. */
-+static int blocknr_set_add(txn_atom *atom, struct list_head *bset,
-+ blocknr_set_entry **new_bsep, const reiser4_block_nr *a,
-+ const reiser4_block_nr *b)
-+{
-+ blocknr_set_entry *bse;
-+ unsigned entries_needed;
-+
-+ assert("jmacd-5101", a != NULL);
-+
-+ entries_needed = (b == NULL) ? 1 : 2;
-+ if (list_empty(bset) ||
-+ bse_avail(list_entry(bset->next, blocknr_set_entry, link)) < entries_needed) {
-+ /* See if a bse was previously allocated. */
-+ if (*new_bsep == NULL) {
-+ spin_unlock_atom(atom);
-+ *new_bsep = bse_alloc();
-+ return (*new_bsep != NULL) ? -E_REPEAT :
-+ RETERR(-ENOMEM);
-+ }
-+
-+ /* Put it on the head of the list. */
-+ list_add(&((*new_bsep)->link), bset);
-+
-+ *new_bsep = NULL;
-+ }
-+
-+ /* Add the single or pair. */
-+ bse = list_entry(bset->next, blocknr_set_entry, link);
-+ if (b == NULL) {
-+ bse_put_single(bse, a);
-+ } else {
-+ bse_put_pair(bse, a, b);
-+ }
-+
-+ /* If new_bsep is non-NULL then there was an allocation race, free this copy. */
-+ if (*new_bsep != NULL) {
-+ bse_free(*new_bsep);
-+ *new_bsep = NULL;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Add an extent to the block set. If the length is 1, it is treated as a
-+ single block (e.g., reiser4_set_add_block). */
-+/* Audited by: green(2002.06.11) */
-+/* Auditor note: Entire call chain cannot hold any spinlocks, because
-+ kmalloc might schedule. The only exception is atom spinlock, which is
-+ properly freed. */
-+int
-+blocknr_set_add_extent(txn_atom * atom,
-+ struct list_head * bset,
-+ blocknr_set_entry ** new_bsep,
-+ const reiser4_block_nr * start,
-+ const reiser4_block_nr * len)
-+{
-+ assert("jmacd-5102", start != NULL && len != NULL && *len > 0);
-+ return blocknr_set_add(atom, bset, new_bsep, start,
-+ *len == 1 ? NULL : len);
-+}
-+
-+/* Add a block pair to the block set. It adds exactly a pair, which is checked
-+ * by an assertion that both arguments are not null.*/
-+/* Audited by: green(2002.06.11) */
-+/* Auditor note: Entire call chain cannot hold any spinlocks, because
-+ kmalloc might schedule. The only exception is atom spinlock, which is
-+ properly freed. */
-+int
-+blocknr_set_add_pair(txn_atom * atom,
-+ struct list_head * bset,
-+ blocknr_set_entry ** new_bsep, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b)
-+{
-+ assert("jmacd-5103", a != NULL && b != NULL);
-+ return blocknr_set_add(atom, bset, new_bsep, a, b);
-+}
-+
-+/* Initialize a blocknr_set. */
-+void blocknr_set_init(struct list_head *bset)
-+{
-+ INIT_LIST_HEAD(bset);
-+}
-+
-+/* Release the entries of a blocknr_set. */
-+void blocknr_set_destroy(struct list_head *bset)
-+{
-+ blocknr_set_entry *bse;
-+
-+ while (!list_empty(bset)) {
-+ bse = list_entry(bset->next, blocknr_set_entry, link);
-+ list_del_init(&bse->link);
-+ bse_free(bse);
-+ }
-+}
-+
-+/* Merge blocknr_set entries out of @from into @into. */
-+/* Audited by: green(2002.06.11) */
-+/* Auditor comments: This merge does not know if merged sets contain
-+ blocks pairs (As for wandered sets) or extents, so it cannot really merge
-+ overlapping ranges if there is some. So I believe it may lead to
-+ some blocks being presented several times in one blocknr_set. To help
-+ debugging such problems it might help to check for duplicate entries on
-+ actual processing of this set. Testing this kind of stuff right here is
-+ also complicated by the fact that these sets are not sorted and going
-+ through whole set on each element addition is going to be CPU-heavy task */
-+void blocknr_set_merge(struct list_head * from, struct list_head * into)
-+{
-+ blocknr_set_entry *bse_into = NULL;
-+
-+ /* If @from is empty, no work to perform. */
-+ if (list_empty(from))
-+ return;
-+ /* If @into is not empty, try merging partial-entries. */
-+ if (!list_empty(into)) {
-+
-+ /* Neither set is empty, pop the front to members and try to combine them. */
-+ blocknr_set_entry *bse_from;
-+ unsigned into_avail;
-+
-+ bse_into = list_entry(into->next, blocknr_set_entry, link);
-+ list_del_init(&bse_into->link);
-+ bse_from = list_entry(from->next, blocknr_set_entry, link);
-+ list_del_init(&bse_from->link);
-+
-+ /* Combine singles. */
-+ for (into_avail = bse_avail(bse_into);
-+ into_avail != 0 && bse_from->nr_singles != 0;
-+ into_avail -= 1) {
-+ bse_put_single(bse_into,
-+ &bse_from->entries[--bse_from->
-+ nr_singles]);
-+ }
-+
-+ /* Combine pairs. */
-+ for (; into_avail > 1 && bse_from->nr_pairs != 0;
-+ into_avail -= 2) {
-+ struct blocknr_pair *pair =
-+ bse_get_pair(bse_from, --bse_from->nr_pairs);
-+ bse_put_pair(bse_into, &pair->a, &pair->b);
-+ }
-+
-+ /* If bse_from is empty, delete it now. */
-+ if (bse_avail(bse_from) == BLOCKNR_SET_ENTRIES_NUMBER) {
-+ bse_free(bse_from);
-+ } else {
-+ /* Otherwise, bse_into is full or nearly full (e.g.,
-+ it could have one slot avail and bse_from has one
-+ pair left). Push it back onto the list. bse_from
-+ becomes bse_into, which will be the new partial. */
-+ list_add(&bse_into->link, into);
-+ bse_into = bse_from;
-+ }
-+ }
-+
-+ /* Splice lists together. */
-+ list_splice_init(from, into->prev);
-+
-+ /* Add the partial entry back to the head of the list. */
-+ if (bse_into != NULL)
-+ list_add(&bse_into->link, into);
-+}
-+
-+/* Iterate over all blocknr set elements. */
-+int blocknr_set_iterator(txn_atom *atom, struct list_head *bset,
-+ blocknr_set_actor_f actor, void *data, int delete)
-+{
-+
-+ blocknr_set_entry *entry;
-+
-+ assert("zam-429", atom != NULL);
-+ assert("zam-430", atom_is_protected(atom));
-+ assert("zam-431", bset != 0);
-+ assert("zam-432", actor != NULL);
-+
-+ entry = list_entry(bset->next, blocknr_set_entry, link);
-+ while (bset != &entry->link) {
-+ blocknr_set_entry *tmp = list_entry(entry->link.next, blocknr_set_entry, link);
-+ unsigned int i;
-+ int ret;
-+
-+ for (i = 0; i < entry->nr_singles; i++) {
-+ ret = actor(atom, &entry->entries[i], NULL, data);
-+
-+ /* We can't break a loop if delete flag is set. */
-+ if (ret != 0 && !delete)
-+ return ret;
-+ }
-+
-+ for (i = 0; i < entry->nr_pairs; i++) {
-+ struct blocknr_pair *ab;
-+
-+ ab = bse_get_pair(entry, i);
-+
-+ ret = actor(atom, &ab->a, &ab->b, data);
-+
-+ if (ret != 0 && !delete)
-+ return ret;
-+ }
-+
-+ if (delete) {
-+ list_del(&entry->link);
-+ bse_free(entry);
-+ }
-+
-+ entry = tmp;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/carry.c linux-2.6.24/fs/reiser4/carry.c
---- linux-2.6.24.orig/fs/reiser4/carry.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/carry.c 2008-01-25 11:39:06.896197385 +0300
-@@ -0,0 +1,1391 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Functions to "carry" tree modification(s) upward. */
-+/* Tree is modified one level at a time. As we modify a level we accumulate a
-+ set of changes that need to be propagated to the next level. We manage
-+ node locking such that any searches that collide with carrying are
-+ restarted, from the root if necessary.
-+
-+ Insertion of a new item may result in items being moved among nodes and
-+ this requires the delimiting key to be updated at the least common parent
-+ of the nodes modified to preserve search tree invariants. Also, insertion
-+ may require allocation of a new node. A pointer to the new node has to be
-+ inserted into some node on the parent level, etc.
-+
-+ Tree carrying is meant to be analogous to arithmetic carrying.
-+
-+ A carry operation is always associated with some node (&carry_node).
-+
-+ Carry process starts with some initial set of operations to be performed
-+ and an initial set of already locked nodes. Operations are performed one
-+ by one. Performing each single operation has following possible effects:
-+
-+ - content of carry node associated with operation is modified
-+ - new carry nodes are locked and involved into carry process on this level
-+ - new carry operations are posted to the next level
-+
-+ After all carry operations on this level are done, process is repeated for
-+ the accumulated sequence on carry operations for the next level. This
-+ starts by trying to lock (in left to right order) all carry nodes
-+ associated with carry operations on the parent level. After this, we decide
-+ whether more nodes are required on the left of already locked set. If so,
-+ all locks taken on the parent level are released, new carry nodes are
-+ added, and locking process repeats.
-+
-+ It may happen that balancing process fails owing to unrecoverable error on
-+ some of upper levels of a tree (possible causes are io error, failure to
-+ allocate new node, etc.). In this case we should unmount the filesystem,
-+ rebooting if it is the root, and possibly advise the use of fsck.
-+
-+ USAGE:
-+
-+ int some_tree_operation( znode *node, ... )
-+ {
-+ // Allocate on a stack pool of carry objects: operations and nodes.
-+ // Most carry processes will only take objects from here, without
-+ // dynamic allocation.
-+
-+I feel uneasy about this pool. It adds to code complexity, I understand why it exists, but.... -Hans
-+
-+ carry_pool pool;
-+ carry_level lowest_level;
-+ carry_op *op;
-+
-+ init_carry_pool( &pool );
-+ init_carry_level( &lowest_level, &pool );
-+
-+ // operation may be one of:
-+ // COP_INSERT --- insert new item into node
-+ // COP_CUT --- remove part of or whole node
-+ // COP_PASTE --- increase size of item
-+ // COP_DELETE --- delete pointer from parent node
-+ // COP_UPDATE --- update delimiting key in least
-+ // common ancestor of two
-+
-+ op = reiser4_post_carry( &lowest_level, operation, node, 0 );
-+ if( IS_ERR( op ) || ( op == NULL ) ) {
-+ handle error
-+ } else {
-+ // fill in remaining fields in @op, according to carry.h:carry_op
-+ result = carry( &lowest_level, NULL );
-+ }
-+ done_carry_pool( &pool );
-+ }
-+
-+ When you are implementing node plugin method that participates in carry
-+ (shifting, insertion, deletion, etc.), do the following:
-+
-+ int foo_node_method( znode *node, ..., carry_level *todo )
-+ {
-+ carry_op *op;
-+
-+ ....
-+
-+ // note, that last argument to reiser4_post_carry() is non-null
-+ // here, because @op is to be applied to the parent of @node, rather
-+ // than to the @node itself as in the previous case.
-+
-+ op = node_post_carry( todo, operation, node, 1 );
-+ // fill in remaining fields in @op, according to carry.h:carry_op
-+
-+ ....
-+
-+ }
-+
-+ BATCHING:
-+
-+ One of the main advantages of level-by-level balancing implemented here is
-+ ability to batch updates on a parent level and to peform them more
-+ efficiently as a result.
-+
-+ Description To Be Done (TBD).
-+
-+ DIFFICULTIES AND SUBTLE POINTS:
-+
-+ 1. complex plumbing is required, because:
-+
-+ a. effective allocation through pools is needed
-+
-+ b. target of operation is not exactly known when operation is
-+ posted. This is worked around through bitfields in &carry_node and
-+ logic in lock_carry_node()
-+
-+ c. of interaction with locking code: node should be added into sibling
-+ list when pointer to it is inserted into its parent, which is some time
-+ after node was created. Between these moments, node is somewhat in
-+ suspended state and is only registered in the carry lists
-+
-+ 2. whole balancing logic is implemented here, in particular, insertion
-+ logic is coded in make_space().
-+
-+ 3. special cases like insertion (reiser4_add_tree_root()) or deletion
-+ (reiser4_kill_tree_root()) of tree root and morphing of paste into insert
-+ (insert_paste()) have to be handled.
-+
-+ 4. there is non-trivial interdependency between allocation of new nodes
-+ and almost everything else. This is mainly due to the (1.c) above. I shall
-+ write about this later.
-+
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/item/extent.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "tree_mod.h"
-+#include "tree_walk.h"
-+#include "block_alloc.h"
-+#include "pool.h"
-+#include "tree.h"
-+#include "carry.h"
-+#include "carry_ops.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h>
-+
-+/* level locking/unlocking */
-+static int lock_carry_level(carry_level * level);
-+static void unlock_carry_level(carry_level * level, int failure);
-+static void done_carry_level(carry_level * level);
-+static void unlock_carry_node(carry_level * level, carry_node * node, int fail);
-+
-+int lock_carry_node(carry_level * level, carry_node * node);
-+int lock_carry_node_tail(carry_node * node);
-+
-+/* carry processing proper */
-+static int carry_on_level(carry_level * doing, carry_level * todo);
-+
-+static carry_op *add_op(carry_level * level, pool_ordering order,
-+ carry_op * reference);
-+
-+/* handlers for carry operations. */
-+
-+static void fatal_carry_error(carry_level * doing, int ecode);
-+static int add_new_root(carry_level * level, carry_node * node, znode * fake);
-+
-+static void print_level(const char *prefix, carry_level * level);
-+
-+#if REISER4_DEBUG
-+typedef enum {
-+ CARRY_TODO,
-+ CARRY_DOING
-+} carry_queue_state;
-+static int carry_level_invariant(carry_level * level, carry_queue_state state);
-+#endif
-+
-+/* main entry point for tree balancing.
-+
-+ Tree carry performs operations from @doing and while doing so accumulates
-+ information about operations to be performed on the next level ("carried"
-+ to the parent level). Carried operations are performed, causing possibly
-+ more operations to be carried upward etc. carry() takes care about
-+ locking and pinning znodes while operating on them.
-+
-+ For usage, see comment at the top of fs/reiser4/carry.c
-+
-+*/
-+int reiser4_carry(carry_level * doing /* set of carry operations to be
-+ * performed */ ,
-+ carry_level * done /* set of nodes, already performed
-+ * at the previous level.
-+ * NULL in most cases */)
-+{
-+ int result = 0;
-+ /* queue of new requests */
-+ carry_level *todo;
-+ ON_DEBUG(STORE_COUNTERS);
-+
-+ assert("nikita-888", doing != NULL);
-+ BUG_ON(done != NULL);
-+
-+ todo = doing + 1;
-+ init_carry_level(todo, doing->pool);
-+
-+ /* queue of requests preformed on the previous level */
-+ done = todo + 1;
-+ init_carry_level(done, doing->pool);
-+
-+ /* iterate until there is nothing more to do */
-+ while (result == 0 && doing->ops_num > 0) {
-+ carry_level *tmp;
-+
-+ /* at this point @done is locked. */
-+ /* repeat lock/do/unlock while
-+
-+ (1) lock_carry_level() fails due to deadlock avoidance, or
-+
-+ (2) carry_on_level() decides that more nodes have to
-+ be involved.
-+
-+ (3) some unexpected error occurred while balancing on the
-+ upper levels. In this case all changes are rolled back.
-+
-+ */
-+ while (1) {
-+ result = lock_carry_level(doing);
-+ if (result == 0) {
-+ /* perform operations from @doing and
-+ accumulate new requests in @todo */
-+ result = carry_on_level(doing, todo);
-+ if (result == 0)
-+ break;
-+ else if (result != -E_REPEAT ||
-+ !doing->restartable) {
-+ warning("nikita-1043",
-+ "Fatal error during carry: %i",
-+ result);
-+ print_level("done", done);
-+ print_level("doing", doing);
-+ print_level("todo", todo);
-+ /* do some rough stuff like aborting
-+ all pending transcrashes and thus
-+ pushing tree back to the consistent
-+ state. Alternatvely, just panic.
-+ */
-+ fatal_carry_error(doing, result);
-+ return result;
-+ }
-+ } else if (result != -E_REPEAT) {
-+ fatal_carry_error(doing, result);
-+ return result;
-+ }
-+ unlock_carry_level(doing, 1);
-+ }
-+ /* at this point @done can be safely unlocked */
-+ done_carry_level(done);
-+
-+ /* cyclically shift queues */
-+ tmp = done;
-+ done = doing;
-+ doing = todo;
-+ todo = tmp;
-+ init_carry_level(todo, doing->pool);
-+
-+ /* give other threads chance to run */
-+ reiser4_preempt_point();
-+ }
-+ done_carry_level(done);
-+
-+ /* all counters, but x_refs should remain the same. x_refs can change
-+ owing to transaction manager */
-+ ON_DEBUG(CHECK_COUNTERS);
-+ return result;
-+}
-+
-+/* perform carry operations on given level.
-+
-+ Optimizations proposed by pooh:
-+
-+ (1) don't lock all nodes from queue at the same time. Lock nodes lazily as
-+ required;
-+
-+ (2) unlock node if there are no more operations to be performed upon it and
-+ node didn't add any operation to @todo. This can be implemented by
-+ attaching to each node two counters: counter of operaions working on this
-+ node and counter and operations carried upward from this node.
-+
-+*/
-+static int carry_on_level(carry_level * doing /* queue of carry operations to
-+ * do on this level */ ,
-+ carry_level * todo /* queue where new carry
-+ * operations to be performed on
-+ * the * parent level are
-+ * accumulated during @doing
-+ * processing. */ )
-+{
-+ int result;
-+ int (*f) (carry_op *, carry_level *, carry_level *);
-+ carry_op *op;
-+ carry_op *tmp_op;
-+
-+ assert("nikita-1034", doing != NULL);
-+ assert("nikita-1035", todo != NULL);
-+
-+ /* @doing->nodes are locked. */
-+
-+ /* This function can be split into two phases: analysis and modification.
-+
-+ Analysis calculates precisely what items should be moved between
-+ nodes. This information is gathered in some structures attached to
-+ each carry_node in a @doing queue. Analysis also determines whether
-+ new nodes are to be allocated etc.
-+
-+ After analysis is completed, actual modification is performed. Here
-+ we can take advantage of "batch modification": if there are several
-+ operations acting on the same node, modifications can be performed
-+ more efficiently when batched together.
-+
-+ Above is an optimization left for the future.
-+ */
-+ /* Important, but delayed optimization: it's possible to batch
-+ operations together and perform them more efficiently as a
-+ result. For example, deletion of several neighboring items from a
-+ node can be converted to a single ->cut() operation.
-+
-+ Before processing queue, it should be scanned and "mergeable"
-+ operations merged.
-+ */
-+ result = 0;
-+ for_all_ops(doing, op, tmp_op) {
-+ carry_opcode opcode;
-+
-+ assert("nikita-1041", op != NULL);
-+ opcode = op->op;
-+ assert("nikita-1042", op->op < COP_LAST_OP);
-+ f = op_dispatch_table[op->op].handler;
-+ result = f(op, doing, todo);
-+ /* locking can fail with -E_REPEAT. Any different error is fatal
-+ and will be handled by fatal_carry_error() sledgehammer.
-+ */
-+ if (result != 0)
-+ break;
-+ }
-+ if (result == 0) {
-+ carry_plugin_info info;
-+ carry_node *scan;
-+ carry_node *tmp_scan;
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ assert("nikita-3002",
-+ carry_level_invariant(doing, CARRY_DOING));
-+ for_all_nodes(doing, scan, tmp_scan) {
-+ znode *node;
-+
-+ node = reiser4_carry_real(scan);
-+ assert("nikita-2547", node != NULL);
-+ if (node_is_empty(node)) {
-+ result =
-+ node_plugin_by_node(node)->
-+ prepare_removal(node, &info);
-+ if (result != 0)
-+ break;
-+ }
-+ }
-+ }
-+ return result;
-+}
-+
-+/* post carry operation
-+
-+ This is main function used by external carry clients: node layout plugins
-+ and tree operations to create new carry operation to be performed on some
-+ level.
-+
-+ New operation will be included in the @level queue. To actually perform it,
-+ call carry( level, ... ). This function takes write lock on @node. Carry
-+ manages all its locks by itself, don't worry about this.
-+
-+ This function adds operation and node at the end of the queue. It is up to
-+ caller to guarantee proper ordering of node queue.
-+
-+*/
-+carry_op * reiser4_post_carry(carry_level * level /* queue where new operation
-+ * is to be posted at */ ,
-+ carry_opcode op /* opcode of operation */ ,
-+ znode * node /* node on which this operation
-+ * will operate */ ,
-+ int apply_to_parent_p /* whether operation will
-+ * operate directly on @node
-+ * or on it parent. */)
-+{
-+ carry_op *result;
-+ carry_node *child;
-+
-+ assert("nikita-1046", level != NULL);
-+ assert("nikita-1788", znode_is_write_locked(node));
-+
-+ result = add_op(level, POOLO_LAST, NULL);
-+ if (IS_ERR(result))
-+ return result;
-+ child = reiser4_add_carry(level, POOLO_LAST, NULL);
-+ if (IS_ERR(child)) {
-+ reiser4_pool_free(&level->pool->op_pool, &result->header);
-+ return (carry_op *) child;
-+ }
-+ result->node = child;
-+ result->op = op;
-+ child->parent = apply_to_parent_p;
-+ if (ZF_ISSET(node, JNODE_ORPHAN))
-+ child->left_before = 1;
-+ child->node = node;
-+ return result;
-+}
-+
-+/* initialize carry queue */
-+void init_carry_level(carry_level * level /* level to initialize */ ,
-+ carry_pool * pool /* pool @level will allocate objects
-+ * from */ )
-+{
-+ assert("nikita-1045", level != NULL);
-+ assert("nikita-967", pool != NULL);
-+
-+ memset(level, 0, sizeof *level);
-+ level->pool = pool;
-+
-+ INIT_LIST_HEAD(&level->nodes);
-+ INIT_LIST_HEAD(&level->ops);
-+}
-+
-+/* allocate carry pool and initialize pools within queue */
-+carry_pool *init_carry_pool(int size)
-+{
-+ carry_pool *pool;
-+
-+ assert("", size >= sizeof(carry_pool) + 3 * sizeof(carry_level));
-+ pool = kmalloc(size, reiser4_ctx_gfp_mask_get());
-+ if (pool == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ reiser4_init_pool(&pool->op_pool, sizeof(carry_op), CARRIES_POOL_SIZE,
-+ (char *)pool->op);
-+ reiser4_init_pool(&pool->node_pool, sizeof(carry_node),
-+ NODES_LOCKED_POOL_SIZE, (char *)pool->node);
-+ return pool;
-+}
-+
-+/* finish with queue pools */
-+void done_carry_pool(carry_pool * pool /* pool to destroy */ )
-+{
-+ reiser4_done_pool(&pool->op_pool);
-+ reiser4_done_pool(&pool->node_pool);
-+ kfree(pool);
-+}
-+
-+/* add new carry node to the @level.
-+
-+ Returns pointer to the new carry node allocated from pool. It's up to
-+ callers to maintain proper order in the @level. Assumption is that if carry
-+ nodes on one level are already sorted and modifications are peroformed from
-+ left to right, carry nodes added on the parent level will be ordered
-+ automatically. To control ordering use @order and @reference parameters.
-+
-+*/
-+carry_node *reiser4_add_carry_skip(carry_level * level /* &carry_level to add
-+ * node to */ ,
-+ pool_ordering order /* where to insert:
-+ * at the beginning of
-+ * @level,
-+ * before @reference,
-+ * after @reference,
-+ * at the end of @level
-+ */ ,
-+ carry_node * reference/* reference node for
-+ * insertion */)
-+{
-+ ON_DEBUG(carry_node * orig_ref = reference);
-+
-+ if (order == POOLO_BEFORE) {
-+ reference = find_left_carry(reference, level);
-+ if (reference == NULL)
-+ reference = list_entry(level->nodes.next, carry_node,
-+ header.level_linkage);
-+ else
-+ reference = list_entry(reference->header.level_linkage.next,
-+ carry_node, header.level_linkage);
-+ } else if (order == POOLO_AFTER) {
-+ reference = find_right_carry(reference, level);
-+ if (reference == NULL)
-+ reference = list_entry(level->nodes.prev, carry_node,
-+ header.level_linkage);
-+ else
-+ reference = list_entry(reference->header.level_linkage.prev,
-+ carry_node, header.level_linkage);
-+ }
-+ assert("nikita-2209",
-+ ergo(orig_ref != NULL,
-+ reiser4_carry_real(reference) ==
-+ reiser4_carry_real(orig_ref)));
-+ return reiser4_add_carry(level, order, reference);
-+}
-+
-+carry_node *reiser4_add_carry(carry_level * level /* &carry_level to add node
-+ * to */ ,
-+ pool_ordering order /* where to insert: at the
-+ * beginning of @level, before
-+ * @reference, after @reference,
-+ * at the end of @level */ ,
-+ carry_node * reference /* reference node for
-+ * insertion */ )
-+{
-+ carry_node *result;
-+
-+ result =
-+ (carry_node *) reiser4_add_obj(&level->pool->node_pool,
-+ &level->nodes,
-+ order, &reference->header);
-+ if (!IS_ERR(result) && (result != NULL))
-+ ++level->nodes_num;
-+ return result;
-+}
-+
-+/* add new carry operation to the @level.
-+
-+ Returns pointer to the new carry operations allocated from pool. It's up to
-+ callers to maintain proper order in the @level. To control ordering use
-+ @order and @reference parameters.
-+
-+*/
-+static carry_op *add_op(carry_level * level /* &carry_level to add node to */ ,
-+ pool_ordering order /* where to insert: at the beginning of
-+ * @level, before @reference, after
-+ * @reference, at the end of @level */ ,
-+ carry_op *
-+ reference /* reference node for insertion */ )
-+{
-+ carry_op *result;
-+
-+ result =
-+ (carry_op *) reiser4_add_obj(&level->pool->op_pool, &level->ops,
-+ order, &reference->header);
-+ if (!IS_ERR(result) && (result != NULL))
-+ ++level->ops_num;
-+ return result;
-+}
-+
-+/* Return node on the right of which @node was created.
-+
-+ Each node is created on the right of some existing node (or it is new root,
-+ which is special case not handled here).
-+
-+ @node is new node created on some level, but not yet inserted into its
-+ parent, it has corresponding bit (JNODE_ORPHAN) set in zstate.
-+
-+*/
-+static carry_node *find_begetting_brother(carry_node * node /* node to start search
-+ * from */ ,
-+ carry_level * kin UNUSED_ARG /* level to
-+ * scan */ )
-+{
-+ carry_node *scan;
-+
-+ assert("nikita-1614", node != NULL);
-+ assert("nikita-1615", kin != NULL);
-+ assert("nikita-1616", LOCK_CNT_GTZ(rw_locked_tree));
-+ assert("nikita-1619", ergo(reiser4_carry_real(node) != NULL,
-+ ZF_ISSET(reiser4_carry_real(node),
-+ JNODE_ORPHAN)));
-+ for (scan = node;;
-+ scan = list_entry(scan->header.level_linkage.prev, carry_node,
-+ header.level_linkage)) {
-+ assert("nikita-1617", &kin->nodes != &scan->header.level_linkage);
-+ if ((scan->node != node->node) &&
-+ !ZF_ISSET(scan->node, JNODE_ORPHAN)) {
-+ assert("nikita-1618", reiser4_carry_real(scan) != NULL);
-+ break;
-+ }
-+ }
-+ return scan;
-+}
-+
-+static cmp_t
-+carry_node_cmp(carry_level * level, carry_node * n1, carry_node * n2)
-+{
-+ assert("nikita-2199", n1 != NULL);
-+ assert("nikita-2200", n2 != NULL);
-+
-+ if (n1 == n2)
-+ return EQUAL_TO;
-+ while (1) {
-+ n1 = carry_node_next(n1);
-+ if (carry_node_end(level, n1))
-+ return GREATER_THAN;
-+ if (n1 == n2)
-+ return LESS_THAN;
-+ }
-+ impossible("nikita-2201", "End of level reached");
-+}
-+
-+carry_node *find_carry_node(carry_level * level, const znode * node)
-+{
-+ carry_node *scan;
-+ carry_node *tmp_scan;
-+
-+ assert("nikita-2202", level != NULL);
-+ assert("nikita-2203", node != NULL);
-+
-+ for_all_nodes(level, scan, tmp_scan) {
-+ if (reiser4_carry_real(scan) == node)
-+ return scan;
-+ }
-+ return NULL;
-+}
-+
-+znode *reiser4_carry_real(const carry_node * node)
-+{
-+ assert("nikita-3061", node != NULL);
-+
-+ return node->lock_handle.node;
-+}
-+
-+carry_node *insert_carry_node(carry_level * doing, carry_level * todo,
-+ const znode * node)
-+{
-+ carry_node *base;
-+ carry_node *scan;
-+ carry_node *tmp_scan;
-+ carry_node *proj;
-+
-+ base = find_carry_node(doing, node);
-+ assert("nikita-2204", base != NULL);
-+
-+ for_all_nodes(todo, scan, tmp_scan) {
-+ proj = find_carry_node(doing, scan->node);
-+ assert("nikita-2205", proj != NULL);
-+ if (carry_node_cmp(doing, proj, base) != LESS_THAN)
-+ break;
-+ }
-+ return scan;
-+}
-+
-+static carry_node *add_carry_atplace(carry_level * doing, carry_level * todo,
-+ znode * node)
-+{
-+ carry_node *reference;
-+
-+ assert("nikita-2994", doing != NULL);
-+ assert("nikita-2995", todo != NULL);
-+ assert("nikita-2996", node != NULL);
-+
-+ reference = insert_carry_node(doing, todo, node);
-+ assert("nikita-2997", reference != NULL);
-+
-+ return reiser4_add_carry(todo, POOLO_BEFORE, reference);
-+}
-+
-+/* like reiser4_post_carry(), but designed to be called from node plugin methods.
-+ This function is different from reiser4_post_carry() in that it finds proper
-+ place to insert node in the queue. */
-+carry_op *node_post_carry(carry_plugin_info * info /* carry parameters
-+ * passed down to node
-+ * plugin */ ,
-+ carry_opcode op /* opcode of operation */ ,
-+ znode * node /* node on which this
-+ * operation will operate */ ,
-+ int apply_to_parent_p /* whether operation will
-+ * operate directly on @node
-+ * or on it parent. */ )
-+{
-+ carry_op *result;
-+ carry_node *child;
-+
-+ assert("nikita-2207", info != NULL);
-+ assert("nikita-2208", info->todo != NULL);
-+
-+ if (info->doing == NULL)
-+ return reiser4_post_carry(info->todo, op, node,
-+ apply_to_parent_p);
-+
-+ result = add_op(info->todo, POOLO_LAST, NULL);
-+ if (IS_ERR(result))
-+ return result;
-+ child = add_carry_atplace(info->doing, info->todo, node);
-+ if (IS_ERR(child)) {
-+ reiser4_pool_free(&info->todo->pool->op_pool, &result->header);
-+ return (carry_op *) child;
-+ }
-+ result->node = child;
-+ result->op = op;
-+ child->parent = apply_to_parent_p;
-+ if (ZF_ISSET(node, JNODE_ORPHAN))
-+ child->left_before = 1;
-+ child->node = node;
-+ return result;
-+}
-+
-+/* lock all carry nodes in @level */
-+static int lock_carry_level(carry_level * level /* level to lock */ )
-+{
-+ int result;
-+ carry_node *node;
-+ carry_node *tmp_node;
-+
-+ assert("nikita-881", level != NULL);
-+ assert("nikita-2229", carry_level_invariant(level, CARRY_TODO));
-+
-+ /* lock nodes from left to right */
-+ result = 0;
-+ for_all_nodes(level, node, tmp_node) {
-+ result = lock_carry_node(level, node);
-+ if (result != 0)
-+ break;
-+ }
-+ return result;
-+}
-+
-+/* Synchronize delimiting keys between @node and its left neighbor.
-+
-+ To reduce contention on dk key and simplify carry code, we synchronize
-+ delimiting keys only when carry ultimately leaves tree level (carrying
-+ changes upward) and unlocks nodes at this level.
-+
-+ This function first finds left neighbor of @node and then updates left
-+ neighbor's right delimiting key to conincide with least key in @node.
-+
-+*/
-+
-+ON_DEBUG(extern atomic_t delim_key_version;
-+ )
-+
-+static void sync_dkeys(znode * spot /* node to update */ )
-+{
-+ reiser4_key pivot;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1610", spot != NULL);
-+ assert("nikita-1612", LOCK_CNT_NIL(rw_locked_dk));
-+
-+ tree = znode_get_tree(spot);
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+
-+ assert("nikita-2192", znode_is_loaded(spot));
-+
-+ /* sync left delimiting key of @spot with key in its leftmost item */
-+ if (node_is_empty(spot))
-+ pivot = *znode_get_rd_key(spot);
-+ else
-+ leftmost_key_in_node(spot, &pivot);
-+
-+ znode_set_ld_key(spot, &pivot);
-+
-+ /* there can be sequence of empty nodes pending removal on the left of
-+ @spot. Scan them and update their left and right delimiting keys to
-+ match left delimiting key of @spot. Also, update right delimiting
-+ key of first non-empty left neighbor.
-+ */
-+ while (1) {
-+ if (!ZF_ISSET(spot, JNODE_LEFT_CONNECTED))
-+ break;
-+
-+ spot = spot->left;
-+ if (spot == NULL)
-+ break;
-+
-+ znode_set_rd_key(spot, &pivot);
-+ /* don't sink into the domain of another balancing */
-+ if (!znode_is_write_locked(spot))
-+ break;
-+ if (ZF_ISSET(spot, JNODE_HEARD_BANSHEE))
-+ znode_set_ld_key(spot, &pivot);
-+ else
-+ break;
-+ }
-+
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+}
-+
-+/* unlock all carry nodes in @level */
-+static void unlock_carry_level(carry_level * level /* level to unlock */ ,
-+ int failure /* true if unlocking owing to
-+ * failure */ )
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+
-+ assert("nikita-889", level != NULL);
-+
-+ if (!failure) {
-+ znode *spot;
-+
-+ spot = NULL;
-+ /* update delimiting keys */
-+ for_all_nodes(level, node, tmp_node) {
-+ if (reiser4_carry_real(node) != spot) {
-+ spot = reiser4_carry_real(node);
-+ sync_dkeys(spot);
-+ }
-+ }
-+ }
-+
-+ /* nodes can be unlocked in arbitrary order. In preemptible
-+ environment it's better to unlock in reverse order of locking,
-+ though.
-+ */
-+ for_all_nodes_back(level, node, tmp_node) {
-+ /* all allocated nodes should be already linked to their
-+ parents at this moment. */
-+ assert("nikita-1631",
-+ ergo(!failure, !ZF_ISSET(reiser4_carry_real(node),
-+ JNODE_ORPHAN)));
-+ ON_DEBUG(check_dkeys(reiser4_carry_real(node)));
-+ unlock_carry_node(level, node, failure);
-+ }
-+ level->new_root = NULL;
-+}
-+
-+/* finish with @level
-+
-+ Unlock nodes and release all allocated resources */
-+static void done_carry_level(carry_level * level /* level to finish */ )
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+ carry_op *op;
-+ carry_op *tmp_op;
-+
-+ assert("nikita-1076", level != NULL);
-+
-+ unlock_carry_level(level, 0);
-+ for_all_nodes(level, node, tmp_node) {
-+ assert("nikita-2113", list_empty_careful(&node->lock_handle.locks_link));
-+ assert("nikita-2114", list_empty_careful(&node->lock_handle.owners_link));
-+ reiser4_pool_free(&level->pool->node_pool, &node->header);
-+ }
-+ for_all_ops(level, op, tmp_op)
-+ reiser4_pool_free(&level->pool->op_pool, &op->header);
-+}
-+
-+/* helper function to complete locking of carry node
-+
-+ Finish locking of carry node. There are several ways in which new carry
-+ node can be added into carry level and locked. Normal is through
-+ lock_carry_node(), but also from find_{left|right}_neighbor(). This
-+ function factors out common final part of all locking scenarios. It
-+ supposes that @node -> lock_handle is lock handle for lock just taken and
-+ fills ->real_node from this lock handle.
-+
-+*/
-+int lock_carry_node_tail(carry_node * node /* node to complete locking of */ )
-+{
-+ assert("nikita-1052", node != NULL);
-+ assert("nikita-1187", reiser4_carry_real(node) != NULL);
-+ assert("nikita-1188", !node->unlock);
-+
-+ node->unlock = 1;
-+ /* Load node content into memory and install node plugin by
-+ looking at the node header.
-+
-+ Most of the time this call is cheap because the node is
-+ already in memory.
-+
-+ Corresponding zrelse() is in unlock_carry_node()
-+ */
-+ return zload(reiser4_carry_real(node));
-+}
-+
-+/* lock carry node
-+
-+ "Resolve" node to real znode, lock it and mark as locked.
-+ This requires recursive locking of znodes.
-+
-+ When operation is posted to the parent level, node it will be applied to is
-+ not yet known. For example, when shifting data between two nodes,
-+ delimiting has to be updated in parent or parents of nodes involved. But
-+ their parents is not yet locked and, moreover said nodes can be reparented
-+ by concurrent balancing.
-+
-+ To work around this, carry operation is applied to special "carry node"
-+ rather than to the znode itself. Carry node consists of some "base" or
-+ "reference" znode and flags indicating how to get to the target of carry
-+ operation (->real_node field of carry_node) from base.
-+
-+*/
-+int lock_carry_node(carry_level * level /* level @node is in */ ,
-+ carry_node * node /* node to lock */ )
-+{
-+ int result;
-+ znode *reference_point;
-+ lock_handle lh;
-+ lock_handle tmp_lh;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-887", level != NULL);
-+ assert("nikita-882", node != NULL);
-+
-+ result = 0;
-+ reference_point = node->node;
-+ init_lh(&lh);
-+ init_lh(&tmp_lh);
-+ if (node->left_before) {
-+ /* handling of new nodes, allocated on the previous level:
-+
-+ some carry ops were propably posted from the new node, but
-+ this node neither has parent pointer set, nor is
-+ connected. This will be done in ->create_hook() for
-+ internal item.
-+
-+ No then less, parent of new node has to be locked. To do
-+ this, first go to the "left" in the carry order. This
-+ depends on the decision to always allocate new node on the
-+ right of existing one.
-+
-+ Loop handles case when multiple nodes, all orphans, were
-+ inserted.
-+
-+ Strictly speaking, taking tree lock is not necessary here,
-+ because all nodes scanned by loop in
-+ find_begetting_brother() are write-locked by this thread,
-+ and thus, their sibling linkage cannot change.
-+
-+ */
-+ tree = znode_get_tree(reference_point);
-+ read_lock_tree(tree);
-+ reference_point = find_begetting_brother(node, level)->node;
-+ read_unlock_tree(tree);
-+ assert("nikita-1186", reference_point != NULL);
-+ }
-+ if (node->parent && (result == 0)) {
-+ result =
-+ reiser4_get_parent(&tmp_lh, reference_point,
-+ ZNODE_WRITE_LOCK);
-+ if (result != 0) {
-+ ; /* nothing */
-+ } else if (znode_get_level(tmp_lh.node) == 0) {
-+ assert("nikita-1347", znode_above_root(tmp_lh.node));
-+ result = add_new_root(level, node, tmp_lh.node);
-+ if (result == 0) {
-+ reference_point = level->new_root;
-+ move_lh(&lh, &node->lock_handle);
-+ }
-+ } else if ((level->new_root != NULL)
-+ && (level->new_root !=
-+ znode_parent_nolock(reference_point))) {
-+ /* parent of node exists, but this level aready
-+ created different new root, so */
-+ warning("nikita-1109",
-+ /* it should be "radicis", but tradition is
-+ tradition. do banshees read latin? */
-+ "hodie natus est radici frater");
-+ result = -EIO;
-+ } else {
-+ move_lh(&lh, &tmp_lh);
-+ reference_point = lh.node;
-+ }
-+ }
-+ if (node->left && (result == 0)) {
-+ assert("nikita-1183", node->parent);
-+ assert("nikita-883", reference_point != NULL);
-+ result =
-+ reiser4_get_left_neighbor(&tmp_lh, reference_point,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result == 0) {
-+ done_lh(&lh);
-+ move_lh(&lh, &tmp_lh);
-+ reference_point = lh.node;
-+ }
-+ }
-+ if (!node->parent && !node->left && !node->left_before) {
-+ result =
-+ longterm_lock_znode(&lh, reference_point, ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_HIPRI);
-+ }
-+ if (result == 0) {
-+ move_lh(&node->lock_handle, &lh);
-+ result = lock_carry_node_tail(node);
-+ }
-+ done_lh(&tmp_lh);
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+/* release a lock on &carry_node.
-+
-+ Release if necessary lock on @node. This opearion is pair of
-+ lock_carry_node() and is idempotent: you can call it more than once on the
-+ same node.
-+
-+*/
-+static void
-+unlock_carry_node(carry_level * level,
-+ carry_node * node /* node to be released */ ,
-+ int failure /* 0 if node is unlocked due
-+ * to some error */ )
-+{
-+ znode *real_node;
-+
-+ assert("nikita-884", node != NULL);
-+
-+ real_node = reiser4_carry_real(node);
-+ /* pair to zload() in lock_carry_node_tail() */
-+ zrelse(real_node);
-+ if (node->unlock && (real_node != NULL)) {
-+ assert("nikita-899", real_node == node->lock_handle.node);
-+ longterm_unlock_znode(&node->lock_handle);
-+ }
-+ if (failure) {
-+ if (node->deallocate && (real_node != NULL)) {
-+ /* free node in bitmap
-+
-+ Prepare node for removal. Last zput() will finish
-+ with it.
-+ */
-+ ZF_SET(real_node, JNODE_HEARD_BANSHEE);
-+ }
-+ if (node->free) {
-+ assert("nikita-2177",
-+ list_empty_careful(&node->lock_handle.locks_link));
-+ assert("nikita-2112",
-+ list_empty_careful(&node->lock_handle.owners_link));
-+ reiser4_pool_free(&level->pool->node_pool,
-+ &node->header);
-+ }
-+ }
-+}
-+
-+/* fatal_carry_error() - all-catching error handling function
-+
-+ It is possible that carry faces unrecoverable error, like unability to
-+ insert pointer at the internal level. Our simple solution is just panic in
-+ this situation. More sophisticated things like attempt to remount
-+ file-system as read-only can be implemented without much difficlties.
-+
-+ It is believed, that:
-+
-+ 1. in stead of panicking, all current transactions can be aborted rolling
-+ system back to the consistent state.
-+
-+Umm, if you simply panic without doing anything more at all, then all current
-+transactions are aborted and the system is rolled back to a consistent state,
-+by virtue of the design of the transactional mechanism. Well, wait, let's be
-+precise. If an internal node is corrupted on disk due to hardware failure,
-+then there may be no consistent state that can be rolled back to, so instead
-+we should say that it will rollback the transactions, which barring other
-+factors means rolling back to a consistent state.
-+
-+# Nikita: there is a subtle difference between panic and aborting
-+# transactions: machine doesn't reboot. Processes aren't killed. Processes
-+# don't using reiser4 (not that we care about such processes), or using other
-+# reiser4 mounts (about them we do care) will simply continue to run. With
-+# some luck, even application using aborted file system can survive: it will
-+# get some error, like EBADF, from each file descriptor on failed file system,
-+# but applications that do care about tolerance will cope with this (squid
-+# will).
-+
-+It would be a nice feature though to support rollback without rebooting
-+followed by remount, but this can wait for later versions.
-+
-+ 2. once isolated transactions will be implemented it will be possible to
-+ roll back offending transaction.
-+
-+2. is additional code complexity of inconsistent value (it implies that a broken tree should be kept in operation), so we must think about
-+it more before deciding if it should be done. -Hans
-+
-+*/
-+static void fatal_carry_error(carry_level * doing UNUSED_ARG /* carry level
-+ * where
-+ * unrecoverable
-+ * error
-+ * occurred */ ,
-+ int ecode /* error code */ )
-+{
-+ assert("nikita-1230", doing != NULL);
-+ assert("nikita-1231", ecode < 0);
-+
-+ reiser4_panic("nikita-1232", "Carry failed: %i", ecode);
-+}
-+
-+/* add new root to the tree
-+
-+ This function itself only manages changes in carry structures and delegates
-+ all hard work (allocation of znode for new root, changes of parent and
-+ sibling pointers to the reiser4_add_tree_root().
-+
-+ Locking: old tree root is locked by carry at this point. Fake znode is also
-+ locked.
-+
-+*/
-+static int add_new_root(carry_level * level /* carry level in context of which
-+ * operation is performed */ ,
-+ carry_node * node /* carry node for existing root */ ,
-+ znode * fake /* "fake" znode already locked by
-+ * us */ )
-+{
-+ int result;
-+
-+ assert("nikita-1104", level != NULL);
-+ assert("nikita-1105", node != NULL);
-+
-+ assert("nikita-1403", znode_is_write_locked(node->node));
-+ assert("nikita-1404", znode_is_write_locked(fake));
-+
-+ /* trying to create new root. */
-+ /* @node is root and it's already locked by us. This
-+ means that nobody else can be trying to add/remove
-+ tree root right now.
-+ */
-+ if (level->new_root == NULL)
-+ level->new_root = reiser4_add_tree_root(node->node, fake);
-+ if (!IS_ERR(level->new_root)) {
-+ assert("nikita-1210", znode_is_root(level->new_root));
-+ node->deallocate = 1;
-+ result =
-+ longterm_lock_znode(&node->lock_handle, level->new_root,
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
-+ if (result == 0)
-+ zput(level->new_root);
-+ } else {
-+ result = PTR_ERR(level->new_root);
-+ level->new_root = NULL;
-+ }
-+ return result;
-+}
-+
-+/* allocate new znode and add the operation that inserts the
-+ pointer to it into the parent node into the todo level
-+
-+ Allocate new znode, add it into carry queue and post into @todo queue
-+ request to add pointer to new node into its parent.
-+
-+ This is carry related routing that calls reiser4_new_node() to allocate new
-+ node.
-+*/
-+carry_node *add_new_znode(znode * brother /* existing left neighbor of new
-+ * node */ ,
-+ carry_node * ref /* carry node after which new
-+ * carry node is to be inserted
-+ * into queue. This affects
-+ * locking. */ ,
-+ carry_level * doing /* carry queue where new node is
-+ * to be added */ ,
-+ carry_level * todo /* carry queue where COP_INSERT
-+ * operation to add pointer to
-+ * new node will ne added */ )
-+{
-+ carry_node *fresh;
-+ znode *new_znode;
-+ carry_op *add_pointer;
-+ carry_plugin_info info;
-+
-+ assert("nikita-1048", brother != NULL);
-+ assert("nikita-1049", todo != NULL);
-+
-+ /* There is a lot of possible variations here: to what parent
-+ new node will be attached and where. For simplicity, always
-+ do the following:
-+
-+ (1) new node and @brother will have the same parent.
-+
-+ (2) new node is added on the right of @brother
-+
-+ */
-+
-+ fresh = reiser4_add_carry_skip(doing,
-+ ref ? POOLO_AFTER : POOLO_LAST, ref);
-+ if (IS_ERR(fresh))
-+ return fresh;
-+
-+ fresh->deallocate = 1;
-+ fresh->free = 1;
-+
-+ new_znode = reiser4_new_node(brother, znode_get_level(brother));
-+ if (IS_ERR(new_znode))
-+ /* @fresh will be deallocated automatically by error
-+ handling code in the caller. */
-+ return (carry_node *) new_znode;
-+
-+ /* new_znode returned znode with x_count 1. Caller has to decrease
-+ it. make_space() does. */
-+
-+ ZF_SET(new_znode, JNODE_ORPHAN);
-+ fresh->node = new_znode;
-+
-+ while (ZF_ISSET(reiser4_carry_real(ref), JNODE_ORPHAN)) {
-+ ref = carry_node_prev(ref);
-+ assert("nikita-1606", !carry_node_end(doing, ref));
-+ }
-+
-+ info.todo = todo;
-+ info.doing = doing;
-+ add_pointer = node_post_carry(&info, COP_INSERT,
-+ reiser4_carry_real(ref), 1);
-+ if (IS_ERR(add_pointer)) {
-+ /* no need to deallocate @new_znode here: it will be
-+ deallocated during carry error handling. */
-+ return (carry_node *) add_pointer;
-+ }
-+
-+ add_pointer->u.insert.type = COPT_CHILD;
-+ add_pointer->u.insert.child = fresh;
-+ add_pointer->u.insert.brother = brother;
-+ /* initially new node spawns empty key range */
-+ write_lock_dk(znode_get_tree(brother));
-+ znode_set_ld_key(new_znode,
-+ znode_set_rd_key(new_znode,
-+ znode_get_rd_key(brother)));
-+ write_unlock_dk(znode_get_tree(brother));
-+ return fresh;
-+}
-+
-+/* DEBUGGING FUNCTIONS.
-+
-+ Probably we also should leave them on even when
-+ debugging is turned off to print dumps at errors.
-+*/
-+#if REISER4_DEBUG
-+static int carry_level_invariant(carry_level * level, carry_queue_state state)
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+
-+ if (level == NULL)
-+ return 0;
-+
-+ if (level->track_type != 0 &&
-+ level->track_type != CARRY_TRACK_NODE &&
-+ level->track_type != CARRY_TRACK_CHANGE)
-+ return 0;
-+
-+ /* check that nodes are in ascending order */
-+ for_all_nodes(level, node, tmp_node) {
-+ znode *left;
-+ znode *right;
-+
-+ reiser4_key lkey;
-+ reiser4_key rkey;
-+
-+ if (node != carry_node_front(level)) {
-+ if (state == CARRY_TODO) {
-+ right = node->node;
-+ left = carry_node_prev(node)->node;
-+ } else {
-+ right = reiser4_carry_real(node);
-+ left = reiser4_carry_real(carry_node_prev(node));
-+ }
-+ if (right == NULL || left == NULL)
-+ continue;
-+ if (node_is_empty(right) || node_is_empty(left))
-+ continue;
-+ if (!keyle(leftmost_key_in_node(left, &lkey),
-+ leftmost_key_in_node(right, &rkey))) {
-+ warning("", "wrong key order");
-+ return 0;
-+ }
-+ }
-+ }
-+ return 1;
-+}
-+#endif
-+
-+/* get symbolic name for boolean */
-+static const char *tf(int boolean /* truth value */ )
-+{
-+ return boolean ? "t" : "f";
-+}
-+
-+/* symbolic name for carry operation */
-+static const char *carry_op_name(carry_opcode op /* carry opcode */ )
-+{
-+ switch (op) {
-+ case COP_INSERT:
-+ return "COP_INSERT";
-+ case COP_DELETE:
-+ return "COP_DELETE";
-+ case COP_CUT:
-+ return "COP_CUT";
-+ case COP_PASTE:
-+ return "COP_PASTE";
-+ case COP_UPDATE:
-+ return "COP_UPDATE";
-+ case COP_EXTENT:
-+ return "COP_EXTENT";
-+ case COP_INSERT_FLOW:
-+ return "COP_INSERT_FLOW";
-+ default:{
-+ /* not mt safe, but who cares? */
-+ static char buf[20];
-+
-+ sprintf(buf, "unknown op: %x", op);
-+ return buf;
-+ }
-+ }
-+}
-+
-+/* dump information about carry node */
-+static void print_carry(const char *prefix /* prefix to print */ ,
-+ carry_node * node /* node to print */ )
-+{
-+ if (node == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ printk
-+ ("%s: %p parent: %s, left: %s, unlock: %s, free: %s, dealloc: %s\n",
-+ prefix, node, tf(node->parent), tf(node->left), tf(node->unlock),
-+ tf(node->free), tf(node->deallocate));
-+}
-+
-+/* dump information about carry operation */
-+static void print_op(const char *prefix /* prefix to print */ ,
-+ carry_op * op /* operation to print */ )
-+{
-+ if (op == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ printk("%s: %p carry_opcode: %s\n", prefix, op, carry_op_name(op->op));
-+ print_carry("\tnode", op->node);
-+ switch (op->op) {
-+ case COP_INSERT:
-+ case COP_PASTE:
-+ print_coord("\tcoord",
-+ op->u.insert.d ? op->u.insert.d->coord : NULL, 0);
-+ reiser4_print_key("\tkey",
-+ op->u.insert.d ? op->u.insert.d->key : NULL);
-+ print_carry("\tchild", op->u.insert.child);
-+ break;
-+ case COP_DELETE:
-+ print_carry("\tchild", op->u.delete.child);
-+ break;
-+ case COP_CUT:
-+ if (op->u.cut_or_kill.is_cut) {
-+ print_coord("\tfrom",
-+ op->u.cut_or_kill.u.kill->params.from, 0);
-+ print_coord("\tto", op->u.cut_or_kill.u.kill->params.to,
-+ 0);
-+ } else {
-+ print_coord("\tfrom",
-+ op->u.cut_or_kill.u.cut->params.from, 0);
-+ print_coord("\tto", op->u.cut_or_kill.u.cut->params.to,
-+ 0);
-+ }
-+ break;
-+ case COP_UPDATE:
-+ print_carry("\tleft", op->u.update.left);
-+ break;
-+ default:
-+ /* do nothing */
-+ break;
-+ }
-+}
-+
-+/* dump information about all nodes and operations in a @level */
-+static void print_level(const char *prefix /* prefix to print */ ,
-+ carry_level * level /* level to print */ )
-+{
-+ carry_node *node;
-+ carry_node *tmp_node;
-+ carry_op *op;
-+ carry_op *tmp_op;
-+
-+ if (level == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ printk("%s: %p, restartable: %s\n",
-+ prefix, level, tf(level->restartable));
-+
-+ for_all_nodes(level, node, tmp_node)
-+ print_carry("\tcarry node", node);
-+ for_all_ops(level, op, tmp_op)
-+ print_op("\tcarry op", op);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/carry.h linux-2.6.24/fs/reiser4/carry.h
---- linux-2.6.24.orig/fs/reiser4/carry.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/carry.h 2008-01-25 11:39:06.896197385 +0300
-@@ -0,0 +1,442 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Functions and data types to "carry" tree modification(s) upward.
-+ See fs/reiser4/carry.c for details. */
-+
-+#if !defined( __FS_REISER4_CARRY_H__ )
-+#define __FS_REISER4_CARRY_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "pool.h"
-+#include "znode.h"
-+
-+#include <linux/types.h>
-+
-+/* &carry_node - "location" of carry node.
-+
-+ "location" of node that is involved or going to be involved into
-+ carry process. Node where operation will be carried to on the
-+ parent level cannot be recorded explicitly. Operation will be carried
-+ usually to the parent of some node (where changes are performed at
-+ the current level) or, to the left neighbor of its parent. But while
-+ modifications are performed at the current level, parent may
-+ change. So, we have to allow some indirection (or, positevly,
-+ flexibility) in locating carry nodes.
-+
-+*/
-+typedef struct carry_node {
-+ /* pool linkage */
-+ struct reiser4_pool_header header;
-+
-+ /* base node from which real_node is calculated. See
-+ fs/reiser4/carry.c:lock_carry_node(). */
-+ znode *node;
-+
-+ /* how to get ->real_node */
-+ /* to get ->real_node obtain parent of ->node */
-+ __u32 parent:1;
-+ /* to get ->real_node obtain left neighbor of parent of
-+ ->node */
-+ __u32 left:1;
-+ __u32 left_before:1;
-+
-+ /* locking */
-+
-+ /* this node was locked by carry process and should be
-+ unlocked when carry leaves a level */
-+ __u32 unlock:1;
-+
-+ /* disk block for this node was allocated by carry process and
-+ should be deallocated when carry leaves a level */
-+ __u32 deallocate:1;
-+ /* this carry node was allocated by carry process and should be
-+ freed when carry leaves a level */
-+ __u32 free:1;
-+
-+ /* type of lock we want to take on this node */
-+ lock_handle lock_handle;
-+} carry_node;
-+
-+/* &carry_opcode - elementary operations that can be carried upward
-+
-+ Operations that carry() can handle. This list is supposed to be
-+ expanded.
-+
-+ Each carry operation (cop) is handled by appropriate function defined
-+ in fs/reiser4/carry.c. For example COP_INSERT is handled by
-+ fs/reiser4/carry.c:carry_insert() etc. These functions in turn
-+ call plugins of nodes affected by operation to modify nodes' content
-+ and to gather operations to be performed on the next level.
-+
-+*/
-+typedef enum {
-+ /* insert new item into node. */
-+ COP_INSERT,
-+ /* delete pointer from parent node */
-+ COP_DELETE,
-+ /* remove part of or whole node. */
-+ COP_CUT,
-+ /* increase size of item. */
-+ COP_PASTE,
-+ /* insert extent (that is sequence of unformatted nodes). */
-+ COP_EXTENT,
-+ /* update delimiting key in least common ancestor of two
-+ nodes. This is performed when items are moved between two
-+ nodes.
-+ */
-+ COP_UPDATE,
-+ /* insert flow */
-+ COP_INSERT_FLOW,
-+ COP_LAST_OP,
-+} carry_opcode;
-+
-+#define CARRY_FLOW_NEW_NODES_LIMIT 20
-+
-+/* mode (or subtype) of COP_{INSERT|PASTE} operation. Specifies how target
-+ item is determined. */
-+typedef enum {
-+ /* target item is one containing pointer to the ->child node */
-+ COPT_CHILD,
-+ /* target item is given explicitly by @coord */
-+ COPT_ITEM_DATA,
-+ /* target item is given by key */
-+ COPT_KEY,
-+ /* see insert_paste_common() for more comments on this. */
-+ COPT_PASTE_RESTARTED,
-+} cop_insert_pos_type;
-+
-+/* flags to cut and delete */
-+typedef enum {
-+ /* don't kill node even if it became completely empty as results of
-+ * cut. This is needed for eottl handling. See carry_extent() for
-+ * details. */
-+ DELETE_RETAIN_EMPTY = (1 << 0)
-+} cop_delete_flag;
-+
-+/*
-+ * carry() implements "lock handle tracking" feature.
-+ *
-+ * Callers supply carry with node where to perform initial operation and lock
-+ * handle on this node. Trying to optimize node utilization carry may actually
-+ * move insertion point to different node. Callers expect that lock handle
-+ * will rebe transferred to the new node also.
-+ *
-+ */
-+typedef enum {
-+ /* transfer lock handle along with insertion point */
-+ CARRY_TRACK_CHANGE = 1,
-+ /* acquire new lock handle to the node where insertion point is. This
-+ * is used when carry() client doesn't initially possess lock handle
-+ * on the insertion point node, for example, by extent insertion
-+ * code. See carry_extent(). */
-+ CARRY_TRACK_NODE = 2
-+} carry_track_type;
-+
-+/* data supplied to COP_{INSERT|PASTE} by callers */
-+typedef struct carry_insert_data {
-+ /* position where new item is to be inserted */
-+ coord_t *coord;
-+ /* new item description */
-+ reiser4_item_data *data;
-+ /* key of new item */
-+ const reiser4_key *key;
-+} carry_insert_data;
-+
-+/* cut and kill are similar, so carry_cut_data and carry_kill_data share the below structure of parameters */
-+struct cut_kill_params {
-+ /* coord where cut starts (inclusive) */
-+ coord_t *from;
-+ /* coord where cut stops (inclusive, this item/unit will also be
-+ * cut) */
-+ coord_t *to;
-+ /* starting key. This is necessary when item and unit pos don't
-+ * uniquely identify what portion or tree to remove. For example, this
-+ * indicates what portion of extent unit will be affected. */
-+ const reiser4_key *from_key;
-+ /* exclusive stop key */
-+ const reiser4_key *to_key;
-+ /* if this is not NULL, smallest actually removed key is stored
-+ * here. */
-+ reiser4_key *smallest_removed;
-+ /* kill_node_content() is called for file truncate */
-+ int truncate;
-+};
-+
-+struct carry_cut_data {
-+ struct cut_kill_params params;
-+};
-+
-+struct carry_kill_data {
-+ struct cut_kill_params params;
-+ /* parameter to be passed to the ->kill_hook() method of item
-+ * plugin */
-+ /*void *iplug_params; *//* FIXME: unused currently */
-+ /* if not NULL---inode whose items are being removed. This is needed
-+ * for ->kill_hook() of extent item to update VM structures when
-+ * removing pages. */
-+ struct inode *inode;
-+ /* sibling list maintenance is complicated by existence of eottl. When
-+ * eottl whose left and right neighbors are formatted leaves is
-+ * removed, one has to connect said leaves in the sibling list. This
-+ * cannot be done when extent removal is just started as locking rules
-+ * require sibling list update to happen atomically with removal of
-+ * extent item. Therefore: 1. pointers to left and right neighbors
-+ * have to be passed down to the ->kill_hook() of extent item, and
-+ * 2. said neighbors have to be locked. */
-+ lock_handle *left;
-+ lock_handle *right;
-+ /* flags modifying behavior of kill. Currently, it may have DELETE_RETAIN_EMPTY set. */
-+ unsigned flags;
-+ char *buf;
-+};
-+
-+/* &carry_tree_op - operation to "carry" upward.
-+
-+ Description of an operation we want to "carry" to the upper level of
-+ a tree: e.g, when we insert something and there is not enough space
-+ we allocate a new node and "carry" the operation of inserting a
-+ pointer to the new node to the upper level, on removal of empty node,
-+ we carry up operation of removing appropriate entry from parent.
-+
-+ There are two types of carry ops: when adding or deleting node we
-+ node at the parent level where appropriate modification has to be
-+ performed is known in advance. When shifting items between nodes
-+ (split, merge), delimiting key should be changed in the least common
-+ parent of the nodes involved that is not known in advance.
-+
-+ For the operations of the first type we store in &carry_op pointer to
-+ the &carry_node at the parent level. For the operation of the second
-+ type we store &carry_node or parents of the left and right nodes
-+ modified and keep track of them upward until they coincide.
-+
-+*/
-+typedef struct carry_op {
-+ /* pool linkage */
-+ struct reiser4_pool_header header;
-+ carry_opcode op;
-+ /* node on which operation is to be performed:
-+
-+ for insert, paste: node where new item is to be inserted
-+
-+ for delete: node where pointer is to be deleted
-+
-+ for cut: node to cut from
-+
-+ for update: node where delimiting key is to be modified
-+
-+ for modify: parent of modified node
-+
-+ */
-+ carry_node *node;
-+ union {
-+ struct {
-+ /* (sub-)type of insertion/paste. Taken from
-+ cop_insert_pos_type. */
-+ __u8 type;
-+ /* various operation flags. Taken from
-+ cop_insert_flag. */
-+ __u8 flags;
-+ carry_insert_data *d;
-+ carry_node *child;
-+ znode *brother;
-+ } insert, paste, extent;
-+
-+ struct {
-+ int is_cut;
-+ union {
-+ carry_kill_data *kill;
-+ carry_cut_data *cut;
-+ } u;
-+ } cut_or_kill;
-+
-+ struct {
-+ carry_node *left;
-+ } update;
-+ struct {
-+ /* changed child */
-+ carry_node *child;
-+ /* bitmask of changes. See &cop_modify_flag */
-+ __u32 flag;
-+ } modify;
-+ struct {
-+ /* flags to deletion operation. Are taken from
-+ cop_delete_flag */
-+ __u32 flags;
-+ /* child to delete from parent. If this is
-+ NULL, delete op->node. */
-+ carry_node *child;
-+ } delete;
-+ struct {
-+ /* various operation flags. Taken from
-+ cop_insert_flag. */
-+ __u32 flags;
-+ flow_t *flow;
-+ coord_t *insert_point;
-+ reiser4_item_data *data;
-+ /* flow insertion is limited by number of new blocks
-+ added in that operation which do not get any data
-+ but part of flow. This limit is set by macro
-+ CARRY_FLOW_NEW_NODES_LIMIT. This field stores number
-+ of nodes added already during one carry_flow */
-+ int new_nodes;
-+ } insert_flow;
-+ } u;
-+} carry_op;
-+
-+/* &carry_op_pool - preallocated pool of carry operations, and nodes */
-+typedef struct carry_pool {
-+ carry_op op[CARRIES_POOL_SIZE];
-+ struct reiser4_pool op_pool;
-+ carry_node node[NODES_LOCKED_POOL_SIZE];
-+ struct reiser4_pool node_pool;
-+} carry_pool;
-+
-+/* &carry_tree_level - carry process on given level
-+
-+ Description of balancing process on the given level.
-+
-+ No need for locking here, as carry_tree_level is essentially per
-+ thread thing (for now).
-+
-+*/
-+struct carry_level {
-+ /* this level may be restarted */
-+ __u32 restartable:1;
-+ /* list of carry nodes on this level, ordered by key order */
-+ struct list_head nodes;
-+ struct list_head ops;
-+ /* pool where new objects are allocated from */
-+ carry_pool *pool;
-+ int ops_num;
-+ int nodes_num;
-+ /* new root created on this level, if any */
-+ znode *new_root;
-+ /* This is set by caller (insert_by_key(), rreiser4_esize_item(), etc.)
-+ when they want ->tracked to automagically wander to the node where
-+ insertion point moved after insert or paste.
-+ */
-+ carry_track_type track_type;
-+ /* lock handle supplied by user that we are tracking. See
-+ above. */
-+ lock_handle *tracked;
-+};
-+
-+/* information carry passes to plugin methods that may add new operations to
-+ the @todo queue */
-+struct carry_plugin_info {
-+ carry_level *doing;
-+ carry_level *todo;
-+};
-+
-+int reiser4_carry(carry_level * doing, carry_level * done);
-+
-+carry_node *reiser4_add_carry(carry_level * level, pool_ordering order,
-+ carry_node * reference);
-+carry_node *reiser4_add_carry_skip(carry_level * level, pool_ordering order,
-+ carry_node * reference);
-+
-+extern carry_node *insert_carry_node(carry_level * doing,
-+ carry_level * todo, const znode * node);
-+
-+extern carry_pool *init_carry_pool(int);
-+extern void done_carry_pool(carry_pool * pool);
-+
-+extern void init_carry_level(carry_level * level, carry_pool * pool);
-+
-+extern carry_op *reiser4_post_carry(carry_level * level, carry_opcode op,
-+ znode * node, int apply_to_parent);
-+extern carry_op *node_post_carry(carry_plugin_info * info, carry_opcode op,
-+ znode * node, int apply_to_parent_p);
-+
-+carry_node *add_new_znode(znode * brother, carry_node * reference,
-+ carry_level * doing, carry_level * todo);
-+
-+carry_node *find_carry_node(carry_level * level, const znode * node);
-+
-+extern znode *reiser4_carry_real(const carry_node * node);
-+
-+/* helper macros to iterate over carry queues */
-+
-+#define carry_node_next( node ) \
-+ list_entry((node)->header.level_linkage.next, carry_node, \
-+ header.level_linkage)
-+
-+#define carry_node_prev( node ) \
-+ list_entry((node)->header.level_linkage.prev, carry_node, \
-+ header.level_linkage)
-+
-+#define carry_node_front( level ) \
-+ list_entry((level)->nodes.next, carry_node, header.level_linkage)
-+
-+#define carry_node_back( level ) \
-+ list_entry((level)->nodes.prev, carry_node, header.level_linkage)
-+
-+#define carry_node_end( level, node ) \
-+ (&(level)->nodes == &(node)->header.level_linkage)
-+
-+/* macro to iterate over all operations in a @level */
-+#define for_all_ops( level /* carry level (of type carry_level *) */, \
-+ op /* pointer to carry operation, modified by loop (of \
-+ * type carry_op *) */, \
-+ tmp /* pointer to carry operation (of type carry_op *), \
-+ * used to make iterator stable in the face of \
-+ * deletions from the level */ ) \
-+for (op = list_entry(level->ops.next, carry_op, header.level_linkage), \
-+ tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage); \
-+ &op->header.level_linkage != &level->ops; \
-+ op = tmp, \
-+ tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage))
-+
-+#if 0
-+for( op = ( carry_op * ) pool_level_list_front( &level -> ops ), \
-+ tmp = ( carry_op * ) pool_level_list_next( &op -> header ) ; \
-+ ! pool_level_list_end( &level -> ops, &op -> header ) ; \
-+ op = tmp, tmp = ( carry_op * ) pool_level_list_next( &op -> header ) )
-+#endif
-+
-+/* macro to iterate over all nodes in a @level */ \
-+#define for_all_nodes( level /* carry level (of type carry_level *) */, \
-+ node /* pointer to carry node, modified by loop (of \
-+ * type carry_node *) */, \
-+ tmp /* pointer to carry node (of type carry_node *), \
-+ * used to make iterator stable in the face of * \
-+ * deletions from the level */ ) \
-+for (node = list_entry(level->nodes.next, carry_node, header.level_linkage), \
-+ tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage); \
-+ &node->header.level_linkage != &level->nodes; \
-+ node = tmp, \
-+ tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage))
-+
-+#if 0
-+for( node = carry_node_front( level ), \
-+ tmp = carry_node_next( node ) ; ! carry_node_end( level, node ) ; \
-+ node = tmp, tmp = carry_node_next( node ) )
-+#endif
-+
-+/* macro to iterate over all nodes in a @level in reverse order
-+
-+ This is used, because nodes are unlocked in reversed order of locking */
-+#define for_all_nodes_back( level /* carry level (of type carry_level *) */, \
-+ node /* pointer to carry node, modified by loop \
-+ * (of type carry_node *) */, \
-+ tmp /* pointer to carry node (of type carry_node \
-+ * *), used to make iterator stable in the \
-+ * face of deletions from the level */ ) \
-+for( node = carry_node_back( level ), \
-+ tmp = carry_node_prev( node ) ; ! carry_node_end( level, node ) ; \
-+ node = tmp, tmp = carry_node_prev( node ) )
-+
-+/* __FS_REISER4_CARRY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/carry_ops.c linux-2.6.24/fs/reiser4/carry_ops.c
---- linux-2.6.24.orig/fs/reiser4/carry_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/carry_ops.c 2008-01-25 11:39:06.900198415 +0300
-@@ -0,0 +1,2131 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* implementation of carry operations */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "pool.h"
-+#include "tree_mod.h"
-+#include "carry.h"
-+#include "carry_ops.h"
-+#include "tree.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h>
-+#include <linux/err.h>
-+
-+static int carry_shift_data(sideof side, coord_t * insert_coord, znode * node,
-+ carry_level * doing, carry_level * todo,
-+ unsigned int including_insert_coord_p);
-+
-+extern int lock_carry_node(carry_level * level, carry_node * node);
-+extern int lock_carry_node_tail(carry_node * node);
-+
-+/* find left neighbor of a carry node
-+
-+ Look for left neighbor of @node and add it to the @doing queue. See
-+ comments in the body.
-+
-+*/
-+static carry_node *find_left_neighbor(carry_op * op /* node to find left
-+ * neighbor of */ ,
-+ carry_level * doing /* level to scan */ )
-+{
-+ int result;
-+ carry_node *node;
-+ carry_node *left;
-+ int flags;
-+ reiser4_tree *tree;
-+
-+ node = op->node;
-+
-+ tree = current_tree;
-+ read_lock_tree(tree);
-+ /* first, check whether left neighbor is already in a @doing queue */
-+ if (reiser4_carry_real(node)->left != NULL) {
-+ /* NOTE: there is locking subtlety here. Look into
-+ * find_right_neighbor() for more info */
-+ if (find_carry_node(doing,
-+ reiser4_carry_real(node)->left) != NULL) {
-+ read_unlock_tree(tree);
-+ left = node;
-+ do {
-+ left = list_entry(left->header.level_linkage.prev,
-+ carry_node, header.level_linkage);
-+ assert("nikita-3408", !carry_node_end(doing,
-+ left));
-+ } while (reiser4_carry_real(left) ==
-+ reiser4_carry_real(node));
-+ return left;
-+ }
-+ }
-+ read_unlock_tree(tree);
-+
-+ left = reiser4_add_carry_skip(doing, POOLO_BEFORE, node);
-+ if (IS_ERR(left))
-+ return left;
-+
-+ left->node = node->node;
-+ left->free = 1;
-+
-+ flags = GN_TRY_LOCK;
-+ if (!op->u.insert.flags & COPI_LOAD_LEFT)
-+ flags |= GN_NO_ALLOC;
-+
-+ /* then, feeling lucky, peek left neighbor in the cache. */
-+ result = reiser4_get_left_neighbor(&left->lock_handle,
-+ reiser4_carry_real(node),
-+ ZNODE_WRITE_LOCK, flags);
-+ if (result == 0) {
-+ /* ok, node found and locked. */
-+ result = lock_carry_node_tail(left);
-+ if (result != 0)
-+ left = ERR_PTR(result);
-+ } else if (result == -E_NO_NEIGHBOR || result == -ENOENT) {
-+ /* node is leftmost node in a tree, or neighbor wasn't in
-+ cache, or there is an extent on the left. */
-+ reiser4_pool_free(&doing->pool->node_pool, &left->header);
-+ left = NULL;
-+ } else if (doing->restartable) {
-+ /* if left neighbor is locked, and level is restartable, add
-+ new node to @doing and restart. */
-+ assert("nikita-913", node->parent != 0);
-+ assert("nikita-914", node->node != NULL);
-+ left->left = 1;
-+ left->free = 0;
-+ left = ERR_PTR(-E_REPEAT);
-+ } else {
-+ /* left neighbor is locked, level cannot be restarted. Just
-+ ignore left neighbor. */
-+ reiser4_pool_free(&doing->pool->node_pool, &left->header);
-+ left = NULL;
-+ }
-+ return left;
-+}
-+
-+/* find right neighbor of a carry node
-+
-+ Look for right neighbor of @node and add it to the @doing queue. See
-+ comments in the body.
-+
-+*/
-+static carry_node *find_right_neighbor(carry_op * op /* node to find right
-+ * neighbor of */ ,
-+ carry_level * doing /* level to scan */ )
-+{
-+ int result;
-+ carry_node *node;
-+ carry_node *right;
-+ lock_handle lh;
-+ int flags;
-+ reiser4_tree *tree;
-+
-+ init_lh(&lh);
-+
-+ node = op->node;
-+
-+ tree = current_tree;
-+ read_lock_tree(tree);
-+ /* first, check whether right neighbor is already in a @doing queue */
-+ if (reiser4_carry_real(node)->right != NULL) {
-+ /*
-+ * Tree lock is taken here anyway, because, even if _outcome_
-+ * of (find_carry_node() != NULL) doesn't depends on
-+ * concurrent updates to ->right, find_carry_node() cannot
-+ * work with second argument NULL. Hence, following comment is
-+ * of historic importance only.
-+ *
-+ * Subtle:
-+ *
-+ * Q: why don't we need tree lock here, looking for the right
-+ * neighbor?
-+ *
-+ * A: even if value of node->real_node->right were changed
-+ * during find_carry_node() execution, outcome of execution
-+ * wouldn't change, because (in short) other thread cannot add
-+ * elements to the @doing, and if node->real_node->right
-+ * already was in @doing, value of node->real_node->right
-+ * couldn't change, because node cannot be inserted between
-+ * locked neighbors.
-+ */
-+ if (find_carry_node(doing,
-+ reiser4_carry_real(node)->right) != NULL) {
-+ read_unlock_tree(tree);
-+ /*
-+ * What we are doing here (this is also applicable to
-+ * the find_left_neighbor()).
-+ *
-+ * tree_walk.c code requires that insertion of a
-+ * pointer to a child, modification of parent pointer
-+ * in the child, and insertion of the child into
-+ * sibling list are atomic (see
-+ * plugin/item/internal.c:create_hook_internal()).
-+ *
-+ * carry allocates new node long before pointer to it
-+ * is inserted into parent and, actually, long before
-+ * parent is even known. Such allocated-but-orphaned
-+ * nodes are only trackable through carry level lists.
-+ *
-+ * Situation that is handled here is following: @node
-+ * has valid ->right pointer, but there is
-+ * allocated-but-orphaned node in the carry queue that
-+ * is logically between @node and @node->right. Here
-+ * we are searching for it. Critical point is that
-+ * this is only possible if @node->right is also in
-+ * the carry queue (this is checked above), because
-+ * this is the only way new orphaned node could be
-+ * inserted between them (before inserting new node,
-+ * make_space() first tries to shift to the right, so,
-+ * right neighbor will be locked and queued).
-+ *
-+ */
-+ right = node;
-+ do {
-+ right = list_entry(right->header.level_linkage.next,
-+ carry_node, header.level_linkage);
-+ assert("nikita-3408", !carry_node_end(doing,
-+ right));
-+ } while (reiser4_carry_real(right) ==
-+ reiser4_carry_real(node));
-+ return right;
-+ }
-+ }
-+ read_unlock_tree(tree);
-+
-+ flags = GN_CAN_USE_UPPER_LEVELS;
-+ if (!op->u.insert.flags & COPI_LOAD_RIGHT)
-+ flags = GN_NO_ALLOC;
-+
-+ /* then, try to lock right neighbor */
-+ init_lh(&lh);
-+ result = reiser4_get_right_neighbor(&lh,
-+ reiser4_carry_real(node),
-+ ZNODE_WRITE_LOCK, flags);
-+ if (result == 0) {
-+ /* ok, node found and locked. */
-+ right = reiser4_add_carry_skip(doing, POOLO_AFTER, node);
-+ if (!IS_ERR(right)) {
-+ right->node = lh.node;
-+ move_lh(&right->lock_handle, &lh);
-+ right->free = 1;
-+ result = lock_carry_node_tail(right);
-+ if (result != 0)
-+ right = ERR_PTR(result);
-+ }
-+ } else if ((result == -E_NO_NEIGHBOR) || (result == -ENOENT)) {
-+ /* node is rightmost node in a tree, or neighbor wasn't in
-+ cache, or there is an extent on the right. */
-+ right = NULL;
-+ } else
-+ right = ERR_PTR(result);
-+ done_lh(&lh);
-+ return right;
-+}
-+
-+/* how much free space in a @node is needed for @op
-+
-+ How much space in @node is required for completion of @op, where @op is
-+ insert or paste operation.
-+*/
-+static unsigned int space_needed_for_op(znode * node /* znode data are
-+ * inserted or
-+ * pasted in */ ,
-+ carry_op * op /* carry
-+ operation */ )
-+{
-+ assert("nikita-919", op != NULL);
-+
-+ switch (op->op) {
-+ default:
-+ impossible("nikita-1701", "Wrong opcode");
-+ case COP_INSERT:
-+ return space_needed(node, NULL, op->u.insert.d->data, 1);
-+ case COP_PASTE:
-+ return space_needed(node, op->u.insert.d->coord,
-+ op->u.insert.d->data, 0);
-+ }
-+}
-+
-+/* how much space in @node is required to insert or paste @data at
-+ @coord. */
-+unsigned int space_needed(const znode * node /* node data are inserted or
-+ * pasted in */ ,
-+ const coord_t * coord /* coord where data are
-+ * inserted or pasted
-+ * at */ ,
-+ const reiser4_item_data * data /* data to insert or
-+ * paste */ ,
-+ int insertion /* non-0 is inserting, 0---paste */ )
-+{
-+ int result;
-+ item_plugin *iplug;
-+
-+ assert("nikita-917", node != NULL);
-+ assert("nikita-918", node_plugin_by_node(node) != NULL);
-+ assert("vs-230", !insertion || (coord == NULL));
-+
-+ result = 0;
-+ iplug = data->iplug;
-+ if (iplug->b.estimate != NULL) {
-+ /* ask item plugin how much space is needed to insert this
-+ item */
-+ result += iplug->b.estimate(insertion ? NULL : coord, data);
-+ } else {
-+ /* reasonable default */
-+ result += data->length;
-+ }
-+ if (insertion) {
-+ node_plugin *nplug;
-+
-+ nplug = node->nplug;
-+ /* and add node overhead */
-+ if (nplug->item_overhead != NULL) {
-+ result += nplug->item_overhead(node, NULL);
-+ }
-+ }
-+ return result;
-+}
-+
-+/* find &coord in parent where pointer to new child is to be stored. */
-+static int find_new_child_coord(carry_op * op /* COP_INSERT carry operation to
-+ * insert pointer to new
-+ * child */ )
-+{
-+ int result;
-+ znode *node;
-+ znode *child;
-+
-+ assert("nikita-941", op != NULL);
-+ assert("nikita-942", op->op == COP_INSERT);
-+
-+ node = reiser4_carry_real(op->node);
-+ assert("nikita-943", node != NULL);
-+ assert("nikita-944", node_plugin_by_node(node) != NULL);
-+
-+ child = reiser4_carry_real(op->u.insert.child);
-+ result =
-+ find_new_child_ptr(node, child, op->u.insert.brother,
-+ op->u.insert.d->coord);
-+
-+ build_child_ptr_data(child, op->u.insert.d->data);
-+ return result;
-+}
-+
-+/* additional amount of free space in @node required to complete @op */
-+static int free_space_shortage(znode * node /* node to check */ ,
-+ carry_op * op /* operation being performed */ )
-+{
-+ assert("nikita-1061", node != NULL);
-+ assert("nikita-1062", op != NULL);
-+
-+ switch (op->op) {
-+ default:
-+ impossible("nikita-1702", "Wrong opcode");
-+ case COP_INSERT:
-+ case COP_PASTE:
-+ return space_needed_for_op(node, op) - znode_free_space(node);
-+ case COP_EXTENT:
-+ /* when inserting extent shift data around until insertion
-+ point is utmost in the node. */
-+ if (coord_wrt(op->u.insert.d->coord) == COORD_INSIDE)
-+ return +1;
-+ else
-+ return -1;
-+ }
-+}
-+
-+/* helper function: update node pointer in operation after insertion
-+ point was probably shifted into @target. */
-+static znode *sync_op(carry_op * op, carry_node * target)
-+{
-+ znode *insertion_node;
-+
-+ /* reget node from coord: shift might move insertion coord to
-+ the neighbor */
-+ insertion_node = op->u.insert.d->coord->node;
-+ /* if insertion point was actually moved into new node,
-+ update carry node pointer in operation. */
-+ if (insertion_node != reiser4_carry_real(op->node)) {
-+ op->node = target;
-+ assert("nikita-2540",
-+ reiser4_carry_real(target) == insertion_node);
-+ }
-+ assert("nikita-2541",
-+ reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
-+ return insertion_node;
-+}
-+
-+/*
-+ * complete make_space() call: update tracked lock handle if necessary. See
-+ * comments for fs/reiser4/carry.h:carry_track_type
-+ */
-+static int
-+make_space_tail(carry_op * op, carry_level * doing, znode * orig_node)
-+{
-+ int result;
-+ carry_track_type tracking;
-+ znode *node;
-+
-+ tracking = doing->track_type;
-+ node = op->u.insert.d->coord->node;
-+
-+ if (tracking == CARRY_TRACK_NODE ||
-+ (tracking == CARRY_TRACK_CHANGE && node != orig_node)) {
-+ /* inserting or pasting into node different from
-+ original. Update lock handle supplied by caller. */
-+ assert("nikita-1417", doing->tracked != NULL);
-+ done_lh(doing->tracked);
-+ init_lh(doing->tracked);
-+ result = longterm_lock_znode(doing->tracked, node,
-+ ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_HIPRI);
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/* This is insertion policy function. It shifts data to the left and right
-+ neighbors of insertion coord and allocates new nodes until there is enough
-+ free space to complete @op.
-+
-+ See comments in the body.
-+
-+ Assumes that the node format favors insertions at the right end of the node
-+ as node40 does.
-+
-+ See carry_flow() on detail about flow insertion
-+*/
-+static int make_space(carry_op * op /* carry operation, insert or paste */ ,
-+ carry_level * doing /* current carry queue */ ,
-+ carry_level * todo /* carry queue on the parent level */ )
-+{
-+ znode *node;
-+ int result;
-+ int not_enough_space;
-+ int blk_alloc;
-+ znode *orig_node;
-+ __u32 flags;
-+
-+ coord_t *coord;
-+
-+ assert("nikita-890", op != NULL);
-+ assert("nikita-891", todo != NULL);
-+ assert("nikita-892",
-+ op->op == COP_INSERT ||
-+ op->op == COP_PASTE || op->op == COP_EXTENT);
-+ assert("nikita-1607",
-+ reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
-+
-+ flags = op->u.insert.flags;
-+
-+ /* NOTE check that new node can only be allocated after checking left
-+ * and right neighbors. This is necessary for proper work of
-+ * find_{left,right}_neighbor(). */
-+ assert("nikita-3410", ergo(flags & COPI_DONT_ALLOCATE,
-+ flags & COPI_DONT_SHIFT_LEFT));
-+ assert("nikita-3411", ergo(flags & COPI_DONT_ALLOCATE,
-+ flags & COPI_DONT_SHIFT_RIGHT));
-+
-+ coord = op->u.insert.d->coord;
-+ orig_node = node = coord->node;
-+
-+ assert("nikita-908", node != NULL);
-+ assert("nikita-909", node_plugin_by_node(node) != NULL);
-+
-+ result = 0;
-+ /* If there is not enough space in a node, try to shift something to
-+ the left neighbor. This is a bit tricky, as locking to the left is
-+ low priority. This is handled by restart logic in carry().
-+ */
-+ not_enough_space = free_space_shortage(node, op);
-+ if (not_enough_space <= 0)
-+ /* it is possible that carry was called when there actually
-+ was enough space in the node. For example, when inserting
-+ leftmost item so that delimiting keys have to be updated.
-+ */
-+ return make_space_tail(op, doing, orig_node);
-+ if (!(flags & COPI_DONT_SHIFT_LEFT)) {
-+ carry_node *left;
-+ /* make note in statistics of an attempt to move
-+ something into the left neighbor */
-+ left = find_left_neighbor(op, doing);
-+ if (unlikely(IS_ERR(left))) {
-+ if (PTR_ERR(left) == -E_REPEAT)
-+ return -E_REPEAT;
-+ else {
-+ /* some error other than restart request
-+ occurred. This shouldn't happen. Issue a
-+ warning and continue as if left neighbor
-+ weren't existing.
-+ */
-+ warning("nikita-924",
-+ "Error accessing left neighbor: %li",
-+ PTR_ERR(left));
-+ }
-+ } else if (left != NULL) {
-+
-+ /* shift everything possible on the left of and
-+ including insertion coord into the left neighbor */
-+ result = carry_shift_data(LEFT_SIDE, coord,
-+ reiser4_carry_real(left),
-+ doing, todo,
-+ flags & COPI_GO_LEFT);
-+
-+ /* reget node from coord: shift_left() might move
-+ insertion coord to the left neighbor */
-+ node = sync_op(op, left);
-+
-+ not_enough_space = free_space_shortage(node, op);
-+ /* There is not enough free space in @node, but
-+ may be, there is enough free space in
-+ @left. Various balancing decisions are valid here.
-+ The same for the shifiting to the right.
-+ */
-+ }
-+ }
-+ /* If there still is not enough space, shift to the right */
-+ if (not_enough_space > 0 && !(flags & COPI_DONT_SHIFT_RIGHT)) {
-+ carry_node *right;
-+
-+ right = find_right_neighbor(op, doing);
-+ if (IS_ERR(right)) {
-+ warning("nikita-1065",
-+ "Error accessing right neighbor: %li",
-+ PTR_ERR(right));
-+ } else if (right != NULL) {
-+ /* node containing insertion point, and its right
-+ neighbor node are write locked by now.
-+
-+ shift everything possible on the right of but
-+ excluding insertion coord into the right neighbor
-+ */
-+ result = carry_shift_data(RIGHT_SIDE, coord,
-+ reiser4_carry_real(right),
-+ doing, todo,
-+ flags & COPI_GO_RIGHT);
-+ /* reget node from coord: shift_right() might move
-+ insertion coord to the right neighbor */
-+ node = sync_op(op, right);
-+ not_enough_space = free_space_shortage(node, op);
-+ }
-+ }
-+ /* If there is still not enough space, allocate new node(s).
-+
-+ We try to allocate new blocks if COPI_DONT_ALLOCATE is not set in
-+ the carry operation flags (currently this is needed during flush
-+ only).
-+ */
-+ for (blk_alloc = 0;
-+ not_enough_space > 0 && result == 0 && blk_alloc < 2 &&
-+ !(flags & COPI_DONT_ALLOCATE); ++blk_alloc) {
-+ carry_node *fresh; /* new node we are allocating */
-+ coord_t coord_shadow; /* remembered insertion point before
-+ * shifting data into new node */
-+ carry_node *node_shadow; /* remembered insertion node before
-+ * shifting */
-+ unsigned int gointo; /* whether insertion point should move
-+ * into newly allocated node */
-+
-+ /* allocate new node on the right of @node. Znode and disk
-+ fake block number for new node are allocated.
-+
-+ add_new_znode() posts carry operation COP_INSERT with
-+ COPT_CHILD option to the parent level to add
-+ pointer to newly created node to its parent.
-+
-+ Subtle point: if several new nodes are required to complete
-+ insertion operation at this level, they will be inserted
-+ into their parents in the order of creation, which means
-+ that @node will be valid "cookie" at the time of insertion.
-+
-+ */
-+ fresh = add_new_znode(node, op->node, doing, todo);
-+ if (IS_ERR(fresh))
-+ return PTR_ERR(fresh);
-+
-+ /* Try to shift into new node. */
-+ result = lock_carry_node(doing, fresh);
-+ zput(reiser4_carry_real(fresh));
-+ if (result != 0) {
-+ warning("nikita-947",
-+ "Cannot lock new node: %i", result);
-+ return result;
-+ }
-+
-+ /* both nodes are write locked by now.
-+
-+ shift everything possible on the right of and
-+ including insertion coord into the right neighbor.
-+ */
-+ coord_dup(&coord_shadow, op->u.insert.d->coord);
-+ node_shadow = op->node;
-+ /* move insertion point into newly created node if:
-+
-+ . insertion point is rightmost in the source node, or
-+ . this is not the first node we are allocating in a row.
-+ */
-+ gointo =
-+ (blk_alloc > 0) ||
-+ coord_is_after_rightmost(op->u.insert.d->coord);
-+
-+ if (gointo &&
-+ op->op == COP_PASTE &&
-+ coord_is_existing_item(op->u.insert.d->coord) &&
-+ is_solid_item((item_plugin_by_coord(op->u.insert.d->coord)))) {
-+ /* paste into solid (atomic) item, which can contain
-+ only one unit, so we need to shift it right, where
-+ insertion point supposed to be */
-+
-+ assert("edward-1444", op->u.insert.d->data->iplug ==
-+ item_plugin_by_id(STATIC_STAT_DATA_ID));
-+ assert("edward-1445",
-+ op->u.insert.d->data->length >
-+ node_plugin_by_node(coord->node)->free_space
-+ (coord->node));
-+
-+ op->u.insert.d->coord->between = BEFORE_UNIT;
-+ }
-+
-+ result = carry_shift_data(RIGHT_SIDE, coord,
-+ reiser4_carry_real(fresh),
-+ doing, todo, gointo);
-+ /* if insertion point was actually moved into new node,
-+ update carry node pointer in operation. */
-+ node = sync_op(op, fresh);
-+ not_enough_space = free_space_shortage(node, op);
-+ if ((not_enough_space > 0) && (node != coord_shadow.node)) {
-+ /* there is not enough free in new node. Shift
-+ insertion point back to the @shadow_node so that
-+ next new node would be inserted between
-+ @shadow_node and @fresh.
-+ */
-+ coord_normalize(&coord_shadow);
-+ coord_dup(coord, &coord_shadow);
-+ node = coord->node;
-+ op->node = node_shadow;
-+ if (1 || (flags & COPI_STEP_BACK)) {
-+ /* still not enough space?! Maybe there is
-+ enough space in the source node (i.e., node
-+ data are moved from) now.
-+ */
-+ not_enough_space =
-+ free_space_shortage(node, op);
-+ }
-+ }
-+ }
-+ if (not_enough_space > 0) {
-+ if (!(flags & COPI_DONT_ALLOCATE))
-+ warning("nikita-948", "Cannot insert new item");
-+ result = -E_NODE_FULL;
-+ }
-+ assert("nikita-1622", ergo(result == 0,
-+ reiser4_carry_real(op->node) == coord->node));
-+ assert("nikita-2616", coord == op->u.insert.d->coord);
-+ if (result == 0)
-+ result = make_space_tail(op, doing, orig_node);
-+ return result;
-+}
-+
-+/* insert_paste_common() - common part of insert and paste operations
-+
-+ This function performs common part of COP_INSERT and COP_PASTE.
-+
-+ There are two ways in which insertion/paste can be requested:
-+
-+ . by directly supplying reiser4_item_data. In this case, op ->
-+ u.insert.type is set to COPT_ITEM_DATA.
-+
-+ . by supplying child pointer to which is to inserted into parent. In this
-+ case op -> u.insert.type == COPT_CHILD.
-+
-+ . by supplying key of new item/unit. This is currently only used during
-+ extent insertion
-+
-+ This is required, because when new node is allocated we don't know at what
-+ position pointer to it is to be stored in the parent. Actually, we don't
-+ even know what its parent will be, because parent can be re-balanced
-+ concurrently and new node re-parented, and because parent can be full and
-+ pointer to the new node will go into some other node.
-+
-+ insert_paste_common() resolves pointer to child node into position in the
-+ parent by calling find_new_child_coord(), that fills
-+ reiser4_item_data. After this, insertion/paste proceeds uniformly.
-+
-+ Another complication is with finding free space during pasting. It may
-+ happen that while shifting items to the neighbors and newly allocated
-+ nodes, insertion coord can no longer be in the item we wanted to paste
-+ into. At this point, paste becomes (morphs) into insert. Moreover free
-+ space analysis has to be repeated, because amount of space required for
-+ insertion is different from that of paste (item header overhead, etc).
-+
-+ This function "unifies" different insertion modes (by resolving child
-+ pointer or key into insertion coord), and then calls make_space() to free
-+ enough space in the node by shifting data to the left and right and by
-+ allocating new nodes if necessary. Carry operation knows amount of space
-+ required for its completion. After enough free space is obtained, caller of
-+ this function (carry_{insert,paste,etc.}) performs actual insertion/paste
-+ by calling item plugin method.
-+
-+*/
-+static int insert_paste_common(carry_op * op /* carry operation being
-+ * performed */ ,
-+ carry_level * doing /* current carry level */ ,
-+ carry_level * todo /* next carry level */ ,
-+ carry_insert_data * cdata /* pointer to
-+ * cdata */ ,
-+ coord_t * coord /* insertion/paste coord */ ,
-+ reiser4_item_data * data /* data to be
-+ * inserted/pasted */ )
-+{
-+ assert("nikita-981", op != NULL);
-+ assert("nikita-980", todo != NULL);
-+ assert("nikita-979", (op->op == COP_INSERT) || (op->op == COP_PASTE)
-+ || (op->op == COP_EXTENT));
-+
-+ if (op->u.insert.type == COPT_PASTE_RESTARTED) {
-+ /* nothing to do. Fall through to make_space(). */
-+ ;
-+ } else if (op->u.insert.type == COPT_KEY) {
-+ node_search_result intra_node;
-+ znode *node;
-+ /* Problem with doing batching at the lowest level, is that
-+ operations here are given by coords where modification is
-+ to be performed, and one modification can invalidate coords
-+ of all following operations.
-+
-+ So, we are implementing yet another type for operation that
-+ will use (the only) "locator" stable across shifting of
-+ data between nodes, etc.: key (COPT_KEY).
-+
-+ This clause resolves key to the coord in the node.
-+
-+ But node can change also. Probably some pieces have to be
-+ added to the lock_carry_node(), to lock node by its key.
-+
-+ */
-+ /* NOTE-NIKITA Lookup bias is fixed to FIND_EXACT. Complain
-+ if you need something else. */
-+ op->u.insert.d->coord = coord;
-+ node = reiser4_carry_real(op->node);
-+ intra_node = node_plugin_by_node(node)->lookup
-+ (node, op->u.insert.d->key, FIND_EXACT,
-+ op->u.insert.d->coord);
-+ if ((intra_node != NS_FOUND) && (intra_node != NS_NOT_FOUND)) {
-+ warning("nikita-1715", "Intra node lookup failure: %i",
-+ intra_node);
-+ return intra_node;
-+ }
-+ } else if (op->u.insert.type == COPT_CHILD) {
-+ /* if we are asked to insert pointer to the child into
-+ internal node, first convert pointer to the child into
-+ coord within parent node.
-+ */
-+ znode *child;
-+ int result;
-+
-+ op->u.insert.d = cdata;
-+ op->u.insert.d->coord = coord;
-+ op->u.insert.d->data = data;
-+ op->u.insert.d->coord->node = reiser4_carry_real(op->node);
-+ result = find_new_child_coord(op);
-+ child = reiser4_carry_real(op->u.insert.child);
-+ if (result != NS_NOT_FOUND) {
-+ warning("nikita-993",
-+ "Cannot find a place for child pointer: %i",
-+ result);
-+ return result;
-+ }
-+ /* This only happens when we did multiple insertions at
-+ the previous level, trying to insert single item and
-+ it so happened, that insertion of pointers to all new
-+ nodes before this one already caused parent node to
-+ split (may be several times).
-+
-+ I am going to come up with better solution.
-+
-+ You are not expected to understand this.
-+ -- v6root/usr/sys/ken/slp.c
-+
-+ Basically, what happens here is the following: carry came
-+ to the parent level and is about to insert internal item
-+ pointing to the child node that it just inserted in the
-+ level below. Position where internal item is to be inserted
-+ was found by find_new_child_coord() above, but node of the
-+ current carry operation (that is, parent node of child
-+ inserted on the previous level), was determined earlier in
-+ the lock_carry_level/lock_carry_node. It could so happen
-+ that other carry operations already performed on the parent
-+ level already split parent node, so that insertion point
-+ moved into another node. Handle this by creating new carry
-+ node for insertion point if necessary.
-+ */
-+ if (reiser4_carry_real(op->node) !=
-+ op->u.insert.d->coord->node) {
-+ pool_ordering direction;
-+ znode *z1;
-+ znode *z2;
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ /*
-+ * determine in what direction insertion point
-+ * moved. Do this by comparing delimiting keys.
-+ */
-+ z1 = op->u.insert.d->coord->node;
-+ z2 = reiser4_carry_real(op->node);
-+ if (keyle(leftmost_key_in_node(z1, &k1),
-+ leftmost_key_in_node(z2, &k2)))
-+ /* insertion point moved to the left */
-+ direction = POOLO_BEFORE;
-+ else
-+ /* insertion point moved to the right */
-+ direction = POOLO_AFTER;
-+
-+ op->node = reiser4_add_carry_skip(doing,
-+ direction, op->node);
-+ if (IS_ERR(op->node))
-+ return PTR_ERR(op->node);
-+ op->node->node = op->u.insert.d->coord->node;
-+ op->node->free = 1;
-+ result = lock_carry_node(doing, op->node);
-+ if (result != 0)
-+ return result;
-+ }
-+
-+ /*
-+ * set up key of an item being inserted: we are inserting
-+ * internal item and its key is (by the very definition of
-+ * search tree) is leftmost key in the child node.
-+ */
-+ write_lock_dk(znode_get_tree(child));
-+ op->u.insert.d->key = leftmost_key_in_node(child,
-+ znode_get_ld_key(child));
-+ write_unlock_dk(znode_get_tree(child));
-+ op->u.insert.d->data->arg = op->u.insert.brother;
-+ } else {
-+ assert("vs-243", op->u.insert.d->coord != NULL);
-+ op->u.insert.d->coord->node = reiser4_carry_real(op->node);
-+ }
-+
-+ /* find free space. */
-+ return make_space(op, doing, todo);
-+}
-+
-+/* handle carry COP_INSERT operation.
-+
-+ Insert new item into node. New item can be given in one of two ways:
-+
-+ - by passing &tree_coord and &reiser4_item_data as part of @op. This is
-+ only applicable at the leaf/twig level.
-+
-+ - by passing a child node pointer to which is to be inserted by this
-+ operation.
-+
-+*/
-+static int carry_insert(carry_op * op /* operation to perform */ ,
-+ carry_level * doing /* queue of operations @op
-+ * is part of */ ,
-+ carry_level * todo /* queue where new operations
-+ * are accumulated */ )
-+{
-+ znode *node;
-+ carry_insert_data cdata;
-+ coord_t coord;
-+ reiser4_item_data data;
-+ carry_plugin_info info;
-+ int result;
-+
-+ assert("nikita-1036", op != NULL);
-+ assert("nikita-1037", todo != NULL);
-+ assert("nikita-1038", op->op == COP_INSERT);
-+
-+ coord_init_zero(&coord);
-+
-+ /* perform common functionality of insert and paste. */
-+ result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
-+ if (result != 0)
-+ return result;
-+
-+ node = op->u.insert.d->coord->node;
-+ assert("nikita-1039", node != NULL);
-+ assert("nikita-1040", node_plugin_by_node(node) != NULL);
-+
-+ assert("nikita-949",
-+ space_needed_for_op(node, op) <= znode_free_space(node));
-+
-+ /* ask node layout to create new item. */
-+ info.doing = doing;
-+ info.todo = todo;
-+ result = node_plugin_by_node(node)->create_item
-+ (op->u.insert.d->coord, op->u.insert.d->key, op->u.insert.d->data,
-+ &info);
-+ doing->restartable = 0;
-+ znode_make_dirty(node);
-+
-+ return result;
-+}
-+
-+/*
-+ * Flow insertion code. COP_INSERT_FLOW is special tree operation that is
-+ * supplied with a "flow" (that is, a stream of data) and inserts it into tree
-+ * by slicing into multiple items.
-+ */
-+
-+#define flow_insert_point(op) ( ( op ) -> u.insert_flow.insert_point )
-+#define flow_insert_flow(op) ( ( op ) -> u.insert_flow.flow )
-+#define flow_insert_data(op) ( ( op ) -> u.insert_flow.data )
-+
-+static size_t item_data_overhead(carry_op * op)
-+{
-+ if (flow_insert_data(op)->iplug->b.estimate == NULL)
-+ return 0;
-+ return (flow_insert_data(op)->iplug->b.
-+ estimate(NULL /* estimate insertion */ , flow_insert_data(op)) -
-+ flow_insert_data(op)->length);
-+}
-+
-+/* FIXME-VS: this is called several times during one make_flow_for_insertion
-+ and it will always return the same result. Some optimization could be made
-+ by calculating this value once at the beginning and passing it around. That
-+ would reduce some flexibility in future changes
-+*/
-+static int can_paste(coord_t *, const reiser4_key *, const reiser4_item_data *);
-+static size_t flow_insertion_overhead(carry_op * op)
-+{
-+ znode *node;
-+ size_t insertion_overhead;
-+
-+ node = flow_insert_point(op)->node;
-+ insertion_overhead = 0;
-+ if (node->nplug->item_overhead &&
-+ !can_paste(flow_insert_point(op), &flow_insert_flow(op)->key,
-+ flow_insert_data(op)))
-+ insertion_overhead =
-+ node->nplug->item_overhead(node, NULL) +
-+ item_data_overhead(op);
-+ return insertion_overhead;
-+}
-+
-+/* how many bytes of flow does fit to the node */
-+static int what_can_fit_into_node(carry_op * op)
-+{
-+ size_t free, overhead;
-+
-+ overhead = flow_insertion_overhead(op);
-+ free = znode_free_space(flow_insert_point(op)->node);
-+ if (free <= overhead)
-+ return 0;
-+ free -= overhead;
-+ /* FIXME: flow->length is loff_t only to not get overflowed in case of expandign truncate */
-+ if (free < op->u.insert_flow.flow->length)
-+ return free;
-+ return (int)op->u.insert_flow.flow->length;
-+}
-+
-+/* in make_space_for_flow_insertion we need to check either whether whole flow
-+ fits into a node or whether minimal fraction of flow fits into a node */
-+static int enough_space_for_whole_flow(carry_op * op)
-+{
-+ return (unsigned)what_can_fit_into_node(op) ==
-+ op->u.insert_flow.flow->length;
-+}
-+
-+#define MIN_FLOW_FRACTION 1
-+static int enough_space_for_min_flow_fraction(carry_op * op)
-+{
-+ assert("vs-902", coord_is_after_rightmost(flow_insert_point(op)));
-+
-+ return what_can_fit_into_node(op) >= MIN_FLOW_FRACTION;
-+}
-+
-+/* this returns 0 if left neighbor was obtained successfully and everything
-+ upto insertion point including it were shifted and left neighbor still has
-+ some free space to put minimal fraction of flow into it */
-+static int
-+make_space_by_shift_left(carry_op * op, carry_level * doing, carry_level * todo)
-+{
-+ carry_node *left;
-+ znode *orig;
-+
-+ left = find_left_neighbor(op, doing);
-+ if (unlikely(IS_ERR(left))) {
-+ warning("vs-899",
-+ "make_space_by_shift_left: "
-+ "error accessing left neighbor: %li", PTR_ERR(left));
-+ return 1;
-+ }
-+ if (left == NULL)
-+ /* left neighbor either does not exist or is unformatted
-+ node */
-+ return 1;
-+
-+ orig = flow_insert_point(op)->node;
-+ /* try to shift content of node @orig from its head upto insert point
-+ including insertion point into the left neighbor */
-+ carry_shift_data(LEFT_SIDE, flow_insert_point(op),
-+ reiser4_carry_real(left), doing, todo,
-+ 1 /* including insert point */);
-+ if (reiser4_carry_real(left) != flow_insert_point(op)->node) {
-+ /* insertion point did not move */
-+ return 1;
-+ }
-+
-+ /* insertion point is set after last item in the node */
-+ assert("vs-900", coord_is_after_rightmost(flow_insert_point(op)));
-+
-+ if (!enough_space_for_min_flow_fraction(op)) {
-+ /* insertion point node does not have enough free space to put
-+ even minimal portion of flow into it, therefore, move
-+ insertion point back to orig node (before first item) */
-+ coord_init_before_first_item(flow_insert_point(op), orig);
-+ return 1;
-+ }
-+
-+ /* part of flow is to be written to the end of node */
-+ op->node = left;
-+ return 0;
-+}
-+
-+/* this returns 0 if right neighbor was obtained successfully and everything to
-+ the right of insertion point was shifted to it and node got enough free
-+ space to put minimal fraction of flow into it */
-+static int
-+make_space_by_shift_right(carry_op * op, carry_level * doing,
-+ carry_level * todo)
-+{
-+ carry_node *right;
-+
-+ right = find_right_neighbor(op, doing);
-+ if (unlikely(IS_ERR(right))) {
-+ warning("nikita-1065", "shift_right_excluding_insert_point: "
-+ "error accessing right neighbor: %li", PTR_ERR(right));
-+ return 1;
-+ }
-+ if (right) {
-+ /* shift everything possible on the right of but excluding
-+ insertion coord into the right neighbor */
-+ carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
-+ reiser4_carry_real(right), doing, todo,
-+ 0 /* not including insert point */);
-+ } else {
-+ /* right neighbor either does not exist or is unformatted
-+ node */
-+ ;
-+ }
-+ if (coord_is_after_rightmost(flow_insert_point(op))) {
-+ if (enough_space_for_min_flow_fraction(op)) {
-+ /* part of flow is to be written to the end of node */
-+ return 0;
-+ }
-+ }
-+
-+ /* new node is to be added if insert point node did not get enough
-+ space for whole flow */
-+ return 1;
-+}
-+
-+/* this returns 0 when insert coord is set at the node end and fraction of flow
-+ fits into that node */
-+static int
-+make_space_by_new_nodes(carry_op * op, carry_level * doing, carry_level * todo)
-+{
-+ int result;
-+ znode *node;
-+ carry_node *new;
-+
-+ node = flow_insert_point(op)->node;
-+
-+ if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
-+ return RETERR(-E_NODE_FULL);
-+ /* add new node after insert point node */
-+ new = add_new_znode(node, op->node, doing, todo);
-+ if (unlikely(IS_ERR(new))) {
-+ return PTR_ERR(new);
-+ }
-+ result = lock_carry_node(doing, new);
-+ zput(reiser4_carry_real(new));
-+ if (unlikely(result)) {
-+ return result;
-+ }
-+ op->u.insert_flow.new_nodes++;
-+ if (!coord_is_after_rightmost(flow_insert_point(op))) {
-+ carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
-+ reiser4_carry_real(new), doing, todo,
-+ 0 /* not including insert point */);
-+ assert("vs-901",
-+ coord_is_after_rightmost(flow_insert_point(op)));
-+
-+ if (enough_space_for_min_flow_fraction(op)) {
-+ return 0;
-+ }
-+ if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
-+ return RETERR(-E_NODE_FULL);
-+
-+ /* add one more new node */
-+ new = add_new_znode(node, op->node, doing, todo);
-+ if (unlikely(IS_ERR(new))) {
-+ return PTR_ERR(new);
-+ }
-+ result = lock_carry_node(doing, new);
-+ zput(reiser4_carry_real(new));
-+ if (unlikely(result)) {
-+ return result;
-+ }
-+ op->u.insert_flow.new_nodes++;
-+ }
-+
-+ /* move insertion point to new node */
-+ coord_init_before_first_item(flow_insert_point(op),
-+ reiser4_carry_real(new));
-+ op->node = new;
-+ return 0;
-+}
-+
-+static int
-+make_space_for_flow_insertion(carry_op * op, carry_level * doing,
-+ carry_level * todo)
-+{
-+ __u32 flags = op->u.insert_flow.flags;
-+
-+ if (enough_space_for_whole_flow(op)) {
-+ /* whole flow fits into insert point node */
-+ return 0;
-+ }
-+
-+ if (!(flags & COPI_DONT_SHIFT_LEFT)
-+ && (make_space_by_shift_left(op, doing, todo) == 0)) {
-+ /* insert point is shifted to left neighbor of original insert
-+ point node and is set after last unit in that node. It has
-+ enough space to fit at least minimal fraction of flow. */
-+ return 0;
-+ }
-+
-+ if (enough_space_for_whole_flow(op)) {
-+ /* whole flow fits into insert point node */
-+ return 0;
-+ }
-+
-+ if (!(flags & COPI_DONT_SHIFT_RIGHT)
-+ && (make_space_by_shift_right(op, doing, todo) == 0)) {
-+ /* insert point is still set to the same node, but there is
-+ nothing to the right of insert point. */
-+ return 0;
-+ }
-+
-+ if (enough_space_for_whole_flow(op)) {
-+ /* whole flow fits into insert point node */
-+ return 0;
-+ }
-+
-+ return make_space_by_new_nodes(op, doing, todo);
-+}
-+
-+/* implements COP_INSERT_FLOW operation */
-+static int
-+carry_insert_flow(carry_op * op, carry_level * doing, carry_level * todo)
-+{
-+ int result;
-+ flow_t *f;
-+ coord_t *insert_point;
-+ node_plugin *nplug;
-+ carry_plugin_info info;
-+ znode *orig_node;
-+ lock_handle *orig_lh;
-+
-+ f = op->u.insert_flow.flow;
-+ result = 0;
-+
-+ /* carry system needs this to work */
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ orig_node = flow_insert_point(op)->node;
-+ orig_lh = doing->tracked;
-+
-+ while (f->length) {
-+ result = make_space_for_flow_insertion(op, doing, todo);
-+ if (result)
-+ break;
-+
-+ insert_point = flow_insert_point(op);
-+ nplug = node_plugin_by_node(insert_point->node);
-+
-+ /* compose item data for insertion/pasting */
-+ flow_insert_data(op)->data = f->data;
-+ flow_insert_data(op)->length = what_can_fit_into_node(op);
-+
-+ if (can_paste(insert_point, &f->key, flow_insert_data(op))) {
-+ /* insert point is set to item of file we are writing to and we have to append to it */
-+ assert("vs-903", insert_point->between == AFTER_UNIT);
-+ nplug->change_item_size(insert_point,
-+ flow_insert_data(op)->length);
-+ flow_insert_data(op)->iplug->b.paste(insert_point,
-+ flow_insert_data
-+ (op), &info);
-+ } else {
-+ /* new item must be inserted */
-+ pos_in_node_t new_pos;
-+ flow_insert_data(op)->length += item_data_overhead(op);
-+
-+ /* FIXME-VS: this is because node40_create_item changes
-+ insert_point for obscure reasons */
-+ switch (insert_point->between) {
-+ case AFTER_ITEM:
-+ new_pos = insert_point->item_pos + 1;
-+ break;
-+ case EMPTY_NODE:
-+ new_pos = 0;
-+ break;
-+ case BEFORE_ITEM:
-+ assert("vs-905", insert_point->item_pos == 0);
-+ new_pos = 0;
-+ break;
-+ default:
-+ impossible("vs-906",
-+ "carry_insert_flow: invalid coord");
-+ new_pos = 0;
-+ break;
-+ }
-+
-+ nplug->create_item(insert_point, &f->key,
-+ flow_insert_data(op), &info);
-+ coord_set_item_pos(insert_point, new_pos);
-+ }
-+ coord_init_after_item_end(insert_point);
-+ doing->restartable = 0;
-+ znode_make_dirty(insert_point->node);
-+
-+ move_flow_forward(f, (unsigned)flow_insert_data(op)->length);
-+ }
-+
-+ if (orig_node != flow_insert_point(op)->node) {
-+ /* move lock to new insert point */
-+ done_lh(orig_lh);
-+ init_lh(orig_lh);
-+ result =
-+ longterm_lock_znode(orig_lh, flow_insert_point(op)->node,
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
-+ }
-+
-+ return result;
-+}
-+
-+/* implements COP_DELETE operation
-+
-+ Remove pointer to @op -> u.delete.child from it's parent.
-+
-+ This function also handles killing of a tree root is last pointer from it
-+ was removed. This is complicated by our handling of "twig" level: root on
-+ twig level is never killed.
-+
-+*/
-+static int carry_delete(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing UNUSED_ARG /* current carry
-+ * level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ int result;
-+ coord_t coord;
-+ coord_t coord2;
-+ znode *parent;
-+ znode *child;
-+ carry_plugin_info info;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * This operation is called to delete internal item pointing to the
-+ * child node that was removed by carry from the tree on the previous
-+ * tree level.
-+ */
-+
-+ assert("nikita-893", op != NULL);
-+ assert("nikita-894", todo != NULL);
-+ assert("nikita-895", op->op == COP_DELETE);
-+
-+ coord_init_zero(&coord);
-+ coord_init_zero(&coord2);
-+
-+ parent = reiser4_carry_real(op->node);
-+ child = op->u.delete.child ?
-+ reiser4_carry_real(op->u.delete.child) : op->node->node;
-+ tree = znode_get_tree(child);
-+ read_lock_tree(tree);
-+
-+ /*
-+ * @parent was determined when carry entered parent level
-+ * (lock_carry_level/lock_carry_node). Since then, actual parent of
-+ * @child node could change due to other carry operations performed on
-+ * the parent level. Check for this.
-+ */
-+
-+ if (znode_parent(child) != parent) {
-+ /* NOTE-NIKITA add stat counter for this. */
-+ parent = znode_parent(child);
-+ assert("nikita-2581", find_carry_node(doing, parent));
-+ }
-+ read_unlock_tree(tree);
-+
-+ assert("nikita-1213", znode_get_level(parent) > LEAF_LEVEL);
-+
-+ /* Twig level horrors: tree should be of height at least 2. So, last
-+ pointer from the root at twig level is preserved even if child is
-+ empty. This is ugly, but so it was architectured.
-+ */
-+
-+ if (znode_is_root(parent) &&
-+ znode_get_level(parent) <= REISER4_MIN_TREE_HEIGHT &&
-+ node_num_items(parent) == 1) {
-+ /* Delimiting key manipulations. */
-+ write_lock_dk(tree);
-+ znode_set_ld_key(child, znode_set_ld_key(parent, reiser4_min_key()));
-+ znode_set_rd_key(child, znode_set_rd_key(parent, reiser4_max_key()));
-+ ZF_SET(child, JNODE_DKSET);
-+ write_unlock_dk(tree);
-+
-+ /* @child escaped imminent death! */
-+ ZF_CLR(child, JNODE_HEARD_BANSHEE);
-+ return 0;
-+ }
-+
-+ /* convert child pointer to the coord_t */
-+ result = find_child_ptr(parent, child, &coord);
-+ if (result != NS_FOUND) {
-+ warning("nikita-994", "Cannot find child pointer: %i", result);
-+ print_coord_content("coord", &coord);
-+ return result;
-+ }
-+
-+ coord_dup(&coord2, &coord);
-+ info.doing = doing;
-+ info.todo = todo;
-+ {
-+ /*
-+ * Actually kill internal item: prepare structure with
-+ * arguments for ->cut_and_kill() method...
-+ */
-+
-+ struct carry_kill_data kdata;
-+ kdata.params.from = &coord;
-+ kdata.params.to = &coord2;
-+ kdata.params.from_key = NULL;
-+ kdata.params.to_key = NULL;
-+ kdata.params.smallest_removed = NULL;
-+ kdata.params.truncate = 1;
-+ kdata.flags = op->u.delete.flags;
-+ kdata.inode = NULL;
-+ kdata.left = NULL;
-+ kdata.right = NULL;
-+ kdata.buf = NULL;
-+ /* ... and call it. */
-+ result = node_plugin_by_node(parent)->cut_and_kill(&kdata,
-+ &info);
-+ }
-+ doing->restartable = 0;
-+
-+ /* check whether root should be killed violently */
-+ if (znode_is_root(parent) &&
-+ /* don't kill roots at and lower than twig level */
-+ znode_get_level(parent) > REISER4_MIN_TREE_HEIGHT &&
-+ node_num_items(parent) == 1) {
-+ result = reiser4_kill_tree_root(coord.node);
-+ }
-+
-+ return result < 0 ? : 0;
-+}
-+
-+/* implements COP_CUT opration
-+
-+ Cuts part or whole content of node.
-+
-+*/
-+static int carry_cut(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing /* current carry level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ int result;
-+ carry_plugin_info info;
-+ node_plugin *nplug;
-+
-+ assert("nikita-896", op != NULL);
-+ assert("nikita-897", todo != NULL);
-+ assert("nikita-898", op->op == COP_CUT);
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ nplug = node_plugin_by_node(reiser4_carry_real(op->node));
-+ if (op->u.cut_or_kill.is_cut)
-+ result = nplug->cut(op->u.cut_or_kill.u.cut, &info);
-+ else
-+ result = nplug->cut_and_kill(op->u.cut_or_kill.u.kill, &info);
-+
-+ doing->restartable = 0;
-+ return result < 0 ? : 0;
-+}
-+
-+/* helper function for carry_paste(): returns true if @op can be continued as
-+ paste */
-+static int
-+can_paste(coord_t * icoord, const reiser4_key * key,
-+ const reiser4_item_data * data)
-+{
-+ coord_t circa;
-+ item_plugin *new_iplug;
-+ item_plugin *old_iplug;
-+ int result = 0; /* to keep gcc shut */
-+
-+ assert("", icoord->between != AT_UNIT);
-+
-+ /* obviously, one cannot paste when node is empty---there is nothing
-+ to paste into. */
-+ if (node_is_empty(icoord->node))
-+ return 0;
-+ /* if insertion point is at the middle of the item, then paste */
-+ if (!coord_is_between_items(icoord))
-+ return 1;
-+ coord_dup(&circa, icoord);
-+ circa.between = AT_UNIT;
-+
-+ old_iplug = item_plugin_by_coord(&circa);
-+ new_iplug = data->iplug;
-+
-+ /* check whether we can paste to the item @icoord is "at" when we
-+ ignore ->between field */
-+ if (old_iplug == new_iplug && item_can_contain_key(&circa, key, data)) {
-+ result = 1;
-+ } else if (icoord->between == BEFORE_UNIT
-+ || icoord->between == BEFORE_ITEM) {
-+ /* otherwise, try to glue to the item at the left, if any */
-+ coord_dup(&circa, icoord);
-+ if (coord_set_to_left(&circa)) {
-+ result = 0;
-+ coord_init_before_item(icoord);
-+ } else {
-+ old_iplug = item_plugin_by_coord(&circa);
-+ result = (old_iplug == new_iplug)
-+ && item_can_contain_key(icoord, key, data);
-+ if (result) {
-+ coord_dup(icoord, &circa);
-+ icoord->between = AFTER_UNIT;
-+ }
-+ }
-+ } else if (icoord->between == AFTER_UNIT
-+ || icoord->between == AFTER_ITEM) {
-+ coord_dup(&circa, icoord);
-+ /* otherwise, try to glue to the item at the right, if any */
-+ if (coord_set_to_right(&circa)) {
-+ result = 0;
-+ coord_init_after_item(icoord);
-+ } else {
-+ int (*cck) (const coord_t *, const reiser4_key *,
-+ const reiser4_item_data *);
-+
-+ old_iplug = item_plugin_by_coord(&circa);
-+
-+ cck = old_iplug->b.can_contain_key;
-+ if (cck == NULL)
-+ /* item doesn't define ->can_contain_key
-+ method? So it is not expandable. */
-+ result = 0;
-+ else {
-+ result = (old_iplug == new_iplug)
-+ && cck(&circa /*icoord */ , key, data);
-+ if (result) {
-+ coord_dup(icoord, &circa);
-+ icoord->between = BEFORE_UNIT;
-+ }
-+ }
-+ }
-+ } else
-+ impossible("nikita-2513", "Nothing works");
-+ if (result) {
-+ if (icoord->between == BEFORE_ITEM) {
-+ assert("vs-912", icoord->unit_pos == 0);
-+ icoord->between = BEFORE_UNIT;
-+ } else if (icoord->between == AFTER_ITEM) {
-+ coord_init_after_item_end(icoord);
-+ }
-+ }
-+ return result;
-+}
-+
-+/* implements COP_PASTE operation
-+
-+ Paste data into existing item. This is complicated by the fact that after
-+ we shifted something to the left or right neighbors trying to free some
-+ space, item we were supposed to paste into can be in different node than
-+ insertion coord. If so, we are no longer doing paste, but insert. See
-+ comments in insert_paste_common().
-+
-+*/
-+static int carry_paste(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing UNUSED_ARG /* current carry
-+ * level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ znode *node;
-+ carry_insert_data cdata;
-+ coord_t dcoord;
-+ reiser4_item_data data;
-+ int result;
-+ int real_size;
-+ item_plugin *iplug;
-+ carry_plugin_info info;
-+ coord_t *coord;
-+
-+ assert("nikita-982", op != NULL);
-+ assert("nikita-983", todo != NULL);
-+ assert("nikita-984", op->op == COP_PASTE);
-+
-+ coord_init_zero(&dcoord);
-+
-+ result = insert_paste_common(op, doing, todo, &cdata, &dcoord, &data);
-+ if (result != 0)
-+ return result;
-+
-+ coord = op->u.insert.d->coord;
-+
-+ /* handle case when op -> u.insert.coord doesn't point to the item
-+ of required type. restart as insert. */
-+ if (!can_paste(coord, op->u.insert.d->key, op->u.insert.d->data)) {
-+ op->op = COP_INSERT;
-+ op->u.insert.type = COPT_PASTE_RESTARTED;
-+ result = op_dispatch_table[COP_INSERT].handler(op, doing, todo);
-+
-+ return result;
-+ }
-+
-+ node = coord->node;
-+ iplug = item_plugin_by_coord(coord);
-+ assert("nikita-992", iplug != NULL);
-+
-+ assert("nikita-985", node != NULL);
-+ assert("nikita-986", node_plugin_by_node(node) != NULL);
-+
-+ assert("nikita-987",
-+ space_needed_for_op(node, op) <= znode_free_space(node));
-+
-+ assert("nikita-1286", coord_is_existing_item(coord));
-+
-+ /*
-+ * if item is expanded as a result of this operation, we should first
-+ * change item size, than call ->b.paste item method. If item is
-+ * shrunk, it should be done other way around: first call ->b.paste
-+ * method, then reduce item size.
-+ */
-+
-+ real_size = space_needed_for_op(node, op);
-+ if (real_size > 0)
-+ node->nplug->change_item_size(coord, real_size);
-+
-+ doing->restartable = 0;
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ result = iplug->b.paste(coord, op->u.insert.d->data, &info);
-+
-+ if (real_size < 0)
-+ node->nplug->change_item_size(coord, real_size);
-+
-+ /* if we pasted at the beginning of the item, update item's key. */
-+ if (coord->unit_pos == 0 && coord->between != AFTER_UNIT)
-+ node->nplug->update_item_key(coord, op->u.insert.d->key, &info);
-+
-+ znode_make_dirty(node);
-+ return result;
-+}
-+
-+/* handle carry COP_EXTENT operation. */
-+static int carry_extent(carry_op * op /* operation to perform */ ,
-+ carry_level * doing /* queue of operations @op
-+ * is part of */ ,
-+ carry_level * todo /* queue where new operations
-+ * are accumulated */ )
-+{
-+ znode *node;
-+ carry_insert_data cdata;
-+ coord_t coord;
-+ reiser4_item_data data;
-+ carry_op *delete_dummy;
-+ carry_op *insert_extent;
-+ int result;
-+ carry_plugin_info info;
-+
-+ assert("nikita-1751", op != NULL);
-+ assert("nikita-1752", todo != NULL);
-+ assert("nikita-1753", op->op == COP_EXTENT);
-+
-+ /* extent insertion overview:
-+
-+ extents live on the TWIG LEVEL, which is level one above the leaf
-+ one. This complicates extent insertion logic somewhat: it may
-+ happen (and going to happen all the time) that in logical key
-+ ordering extent has to be placed between items I1 and I2, located
-+ at the leaf level, but I1 and I2 are in the same formatted leaf
-+ node N1. To insert extent one has to
-+
-+ (1) reach node N1 and shift data between N1, its neighbors and
-+ possibly newly allocated nodes until I1 and I2 fall into different
-+ nodes. Since I1 and I2 are still neighboring items in logical key
-+ order, they will be necessary utmost items in their respective
-+ nodes.
-+
-+ (2) After this new extent item is inserted into node on the twig
-+ level.
-+
-+ Fortunately this process can reuse almost all code from standard
-+ insertion procedure (viz. make_space() and insert_paste_common()),
-+ due to the following observation: make_space() only shifts data up
-+ to and excluding or including insertion point. It never
-+ "over-moves" through insertion point. Thus, one can use
-+ make_space() to perform step (1). All required for this is just to
-+ instruct free_space_shortage() to keep make_space() shifting data
-+ until insertion point is at the node border.
-+
-+ */
-+
-+ /* perform common functionality of insert and paste. */
-+ result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
-+ if (result != 0)
-+ return result;
-+
-+ node = op->u.extent.d->coord->node;
-+ assert("nikita-1754", node != NULL);
-+ assert("nikita-1755", node_plugin_by_node(node) != NULL);
-+ assert("nikita-1700", coord_wrt(op->u.extent.d->coord) != COORD_INSIDE);
-+
-+ /* NOTE-NIKITA add some checks here. Not assertions, -EIO. Check that
-+ extent fits between items. */
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ /* there is another complication due to placement of extents on the
-+ twig level: extents are "rigid" in the sense that key-range
-+ occupied by extent cannot grow indefinitely to the right as it is
-+ for the formatted leaf nodes. Because of this when search finds two
-+ adjacent extents on the twig level, it has to "drill" to the leaf
-+ level, creating new node. Here we are removing this node.
-+ */
-+ if (node_is_empty(node)) {
-+ delete_dummy = node_post_carry(&info, COP_DELETE, node, 1);
-+ if (IS_ERR(delete_dummy))
-+ return PTR_ERR(delete_dummy);
-+ delete_dummy->u.delete.child = NULL;
-+ delete_dummy->u.delete.flags = DELETE_RETAIN_EMPTY;
-+ ZF_SET(node, JNODE_HEARD_BANSHEE);
-+ }
-+
-+ /* proceed with inserting extent item into parent. We are definitely
-+ inserting rather than pasting if we get that far. */
-+ insert_extent = node_post_carry(&info, COP_INSERT, node, 1);
-+ if (IS_ERR(insert_extent))
-+ /* @delete_dummy will be automatically destroyed on the level
-+ exiting */
-+ return PTR_ERR(insert_extent);
-+ /* NOTE-NIKITA insertion by key is simplest option here. Another
-+ possibility is to insert on the left or right of already existing
-+ item.
-+ */
-+ insert_extent->u.insert.type = COPT_KEY;
-+ insert_extent->u.insert.d = op->u.extent.d;
-+ assert("nikita-1719", op->u.extent.d->key != NULL);
-+ insert_extent->u.insert.d->data->arg = op->u.extent.d->coord;
-+ insert_extent->u.insert.flags =
-+ znode_get_tree(node)->carry.new_extent_flags;
-+
-+ /*
-+ * if carry was asked to track lock handle we should actually track
-+ * lock handle on the twig node rather than on the leaf where
-+ * operation was started from. Transfer tracked lock handle.
-+ */
-+ if (doing->track_type) {
-+ assert("nikita-3242", doing->tracked != NULL);
-+ assert("nikita-3244", todo->tracked == NULL);
-+ todo->tracked = doing->tracked;
-+ todo->track_type = CARRY_TRACK_NODE;
-+ doing->tracked = NULL;
-+ doing->track_type = 0;
-+ }
-+
-+ return 0;
-+}
-+
-+/* update key in @parent between pointers to @left and @right.
-+
-+ Find coords of @left and @right and update delimiting key between them.
-+ This is helper function called by carry_update(). Finds position of
-+ internal item involved. Updates item key. Updates delimiting keys of child
-+ nodes involved.
-+*/
-+static int update_delimiting_key(znode * parent /* node key is updated
-+ * in */ ,
-+ znode * left /* child of @parent */ ,
-+ znode * right /* child of @parent */ ,
-+ carry_level * doing /* current carry
-+ * level */ ,
-+ carry_level * todo /* parent carry
-+ * level */ ,
-+ const char **error_msg /* place to
-+ * store error
-+ * message */ )
-+{
-+ coord_t left_pos;
-+ coord_t right_pos;
-+ int result;
-+ reiser4_key ldkey;
-+ carry_plugin_info info;
-+
-+ assert("nikita-1177", right != NULL);
-+ /* find position of right left child in a parent */
-+ result = find_child_ptr(parent, right, &right_pos);
-+ if (result != NS_FOUND) {
-+ *error_msg = "Cannot find position of right child";
-+ return result;
-+ }
-+
-+ if ((left != NULL) && !coord_is_leftmost_unit(&right_pos)) {
-+ /* find position of the left child in a parent */
-+ result = find_child_ptr(parent, left, &left_pos);
-+ if (result != NS_FOUND) {
-+ *error_msg = "Cannot find position of left child";
-+ return result;
-+ }
-+ assert("nikita-1355", left_pos.node != NULL);
-+ } else
-+ left_pos.node = NULL;
-+
-+ /* check that they are separated by exactly one key and are basically
-+ sane */
-+ if (REISER4_DEBUG) {
-+ if ((left_pos.node != NULL)
-+ && !coord_is_existing_unit(&left_pos)) {
-+ *error_msg = "Left child is bastard";
-+ return RETERR(-EIO);
-+ }
-+ if (!coord_is_existing_unit(&right_pos)) {
-+ *error_msg = "Right child is bastard";
-+ return RETERR(-EIO);
-+ }
-+ if (left_pos.node != NULL &&
-+ !coord_are_neighbors(&left_pos, &right_pos)) {
-+ *error_msg = "Children are not direct siblings";
-+ return RETERR(-EIO);
-+ }
-+ }
-+ *error_msg = NULL;
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ /*
-+ * If child node is not empty, new key of internal item is a key of
-+ * leftmost item in the child node. If the child is empty, take its
-+ * right delimiting key as a new key of the internal item. Precise key
-+ * in the latter case is not important per se, because the child (and
-+ * the internal item) are going to be killed shortly anyway, but we
-+ * have to preserve correct order of keys in the parent node.
-+ */
-+
-+ if (!ZF_ISSET(right, JNODE_HEARD_BANSHEE))
-+ leftmost_key_in_node(right, &ldkey);
-+ else {
-+ read_lock_dk(znode_get_tree(parent));
-+ ldkey = *znode_get_rd_key(right);
-+ read_unlock_dk(znode_get_tree(parent));
-+ }
-+ node_plugin_by_node(parent)->update_item_key(&right_pos, &ldkey, &info);
-+ doing->restartable = 0;
-+ znode_make_dirty(parent);
-+ return 0;
-+}
-+
-+/* implements COP_UPDATE opration
-+
-+ Update delimiting keys.
-+
-+*/
-+static int carry_update(carry_op * op /* operation to be performed */ ,
-+ carry_level * doing /* current carry level */ ,
-+ carry_level * todo /* next carry level */ )
-+{
-+ int result;
-+ carry_node *missing UNUSED_ARG;
-+ znode *left;
-+ znode *right;
-+ carry_node *lchild;
-+ carry_node *rchild;
-+ const char *error_msg;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * This operation is called to update key of internal item. This is
-+ * necessary when carry shifted of cut data on the child
-+ * level. Arguments of this operation are:
-+ *
-+ * @right --- child node. Operation should update key of internal
-+ * item pointing to @right.
-+ *
-+ * @left --- left neighbor of @right. This parameter is optional.
-+ */
-+
-+ assert("nikita-902", op != NULL);
-+ assert("nikita-903", todo != NULL);
-+ assert("nikita-904", op->op == COP_UPDATE);
-+
-+ lchild = op->u.update.left;
-+ rchild = op->node;
-+
-+ if (lchild != NULL) {
-+ assert("nikita-1001", lchild->parent);
-+ assert("nikita-1003", !lchild->left);
-+ left = reiser4_carry_real(lchild);
-+ } else
-+ left = NULL;
-+
-+ tree = znode_get_tree(rchild->node);
-+ read_lock_tree(tree);
-+ right = znode_parent(rchild->node);
-+ read_unlock_tree(tree);
-+
-+ if (right != NULL) {
-+ result = update_delimiting_key(right,
-+ lchild ? lchild->node : NULL,
-+ rchild->node,
-+ doing, todo, &error_msg);
-+ } else {
-+ error_msg = "Cannot find node to update key in";
-+ result = RETERR(-EIO);
-+ }
-+ /* operation will be reposted to the next level by the
-+ ->update_item_key() method of node plugin, if necessary. */
-+
-+ if (result != 0) {
-+ warning("nikita-999", "Error updating delimiting key: %s (%i)",
-+ error_msg ? : "", result);
-+ }
-+ return result;
-+}
-+
-+/* move items from @node during carry */
-+static int carry_shift_data(sideof side /* in what direction to move data */ ,
-+ coord_t * insert_coord /* coord where new item
-+ * is to be inserted */ ,
-+ znode * node /* node which data are moved from */ ,
-+ carry_level * doing /* active carry queue */ ,
-+ carry_level * todo /* carry queue where new
-+ * operations are to be put
-+ * in */ ,
-+ unsigned int including_insert_coord_p /* true if
-+ * @insertion_coord
-+ * can be moved */ )
-+{
-+ int result;
-+ znode *source;
-+ carry_plugin_info info;
-+ node_plugin *nplug;
-+
-+ source = insert_coord->node;
-+
-+ info.doing = doing;
-+ info.todo = todo;
-+
-+ nplug = node_plugin_by_node(node);
-+ result = nplug->shift(insert_coord, node,
-+ (side == LEFT_SIDE) ? SHIFT_LEFT : SHIFT_RIGHT, 0,
-+ (int)including_insert_coord_p, &info);
-+ /* the only error ->shift() method of node plugin can return is
-+ -ENOMEM due to carry node/operation allocation. */
-+ assert("nikita-915", result >= 0 || result == -ENOMEM);
-+ if (result > 0) {
-+ /*
-+ * if some number of bytes was actually shifted, mark nodes
-+ * dirty, and carry level as non-restartable.
-+ */
-+ doing->restartable = 0;
-+ znode_make_dirty(source);
-+ znode_make_dirty(node);
-+ }
-+
-+ assert("nikita-2077", coord_check(insert_coord));
-+ return 0;
-+}
-+
-+typedef carry_node *(*carry_iterator) (carry_node * node);
-+static carry_node *find_dir_carry(carry_node * node, carry_level * level,
-+ carry_iterator iterator);
-+
-+static carry_node *pool_level_list_prev(carry_node *node)
-+{
-+ return list_entry(node->header.level_linkage.prev, carry_node, header.level_linkage);
-+}
-+
-+/* look for the left neighbor of given carry node in a carry queue.
-+
-+ This is used by find_left_neighbor(), but I am not sure that this
-+ really gives any advantage. More statistics required.
-+
-+*/
-+carry_node *find_left_carry(carry_node * node /* node to find left neighbor
-+ * of */ ,
-+ carry_level * level /* level to scan */ )
-+{
-+ return find_dir_carry(node, level,
-+ (carry_iterator) pool_level_list_prev);
-+}
-+
-+static carry_node *pool_level_list_next(carry_node *node)
-+{
-+ return list_entry(node->header.level_linkage.next, carry_node, header.level_linkage);
-+}
-+
-+/* look for the right neighbor of given carry node in a
-+ carry queue.
-+
-+ This is used by find_right_neighbor(), but I am not sure that this
-+ really gives any advantage. More statistics required.
-+
-+*/
-+carry_node *find_right_carry(carry_node * node /* node to find right neighbor
-+ * of */ ,
-+ carry_level * level /* level to scan */ )
-+{
-+ return find_dir_carry(node, level,
-+ (carry_iterator) pool_level_list_next);
-+}
-+
-+/* look for the left or right neighbor of given carry node in a carry
-+ queue.
-+
-+ Helper function used by find_{left|right}_carry().
-+*/
-+static carry_node *find_dir_carry(carry_node * node /* node to start scanning
-+ * from */ ,
-+ carry_level * level /* level to scan */ ,
-+ carry_iterator iterator /* operation to
-+ * move to the next
-+ * node */ )
-+{
-+ carry_node *neighbor;
-+
-+ assert("nikita-1059", node != NULL);
-+ assert("nikita-1060", level != NULL);
-+
-+ /* scan list of carry nodes on this list dir-ward, skipping all
-+ carry nodes referencing the same znode. */
-+ neighbor = node;
-+ while (1) {
-+ neighbor = iterator(neighbor);
-+ if (carry_node_end(level, neighbor))
-+ /* list head is reached */
-+ return NULL;
-+ if (reiser4_carry_real(neighbor) != reiser4_carry_real(node))
-+ return neighbor;
-+ }
-+}
-+
-+/*
-+ * Memory reservation estimation.
-+ *
-+ * Carry process proceeds through tree levels upwards. Carry assumes that it
-+ * takes tree in consistent state (e.g., that search tree invariants hold),
-+ * and leaves tree consistent after it finishes. This means that when some
-+ * error occurs carry cannot simply return if there are pending carry
-+ * operations. Generic solution for this problem is carry-undo either as
-+ * transaction manager feature (requiring checkpoints and isolation), or
-+ * through some carry specific mechanism.
-+ *
-+ * Our current approach is to panic if carry hits an error while tree is
-+ * inconsistent. Unfortunately -ENOMEM can easily be triggered. To work around
-+ * this "memory reservation" mechanism was added.
-+ *
-+ * Memory reservation is implemented by perthread-pages.diff patch from
-+ * core-patches. Its API is defined in <linux/gfp.h>
-+ *
-+ * int perthread_pages_reserve(int nrpages, gfp_t gfp);
-+ * void perthread_pages_release(int nrpages);
-+ * int perthread_pages_count(void);
-+ *
-+ * carry estimates its worst case memory requirements at the entry, reserved
-+ * enough memory, and released unused pages before returning.
-+ *
-+ * Code below estimates worst case memory requirements for a given carry
-+ * queue. This is dome by summing worst case memory requirements for each
-+ * operation in the queue.
-+ *
-+ */
-+
-+/*
-+ * Memory memory requirements of many operations depends on the tree
-+ * height. For example, item insertion requires new node to be inserted at
-+ * each tree level in the worst case. What tree height should be used for
-+ * estimation? Current tree height is wrong, because tree height can change
-+ * between the time when estimation was done and the time when operation is
-+ * actually performed. Maximal possible tree height (REISER4_MAX_ZTREE_HEIGHT)
-+ * is also not desirable, because it would lead to the huge over-estimation
-+ * all the time. Plausible solution is "capped tree height": if current tree
-+ * height is less than some TREE_HEIGHT_CAP constant, capped tree height is
-+ * TREE_HEIGHT_CAP, otherwise it's current tree height. Idea behind this is
-+ * that if tree height is TREE_HEIGHT_CAP or larger, it's extremely unlikely
-+ * to be increased even more during short interval of time.
-+ */
-+#define TREE_HEIGHT_CAP (5)
-+
-+/* return capped tree height for the @tree. See comment above. */
-+static int cap_tree_height(reiser4_tree * tree)
-+{
-+ return max_t(int, tree->height, TREE_HEIGHT_CAP);
-+}
-+
-+/* return capped tree height for the current tree. */
-+static int capped_height(void)
-+{
-+ return cap_tree_height(current_tree);
-+}
-+
-+/* return number of pages required to store given number of bytes */
-+static int bytes_to_pages(int bytes)
-+{
-+ return (bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-+}
-+
-+/* how many pages are required to allocate znodes during item insertion. */
-+static int carry_estimate_znodes(void)
-+{
-+ /*
-+ * Note, that there we have some problem here: there is no way to
-+ * reserve pages specifically for the given slab. This means that
-+ * these pages can be hijacked for some other end.
-+ */
-+
-+ /* in the worst case we need 3 new znode on each tree level */
-+ return bytes_to_pages(capped_height() * sizeof(znode) * 3);
-+}
-+
-+/*
-+ * how many pages are required to load bitmaps. One bitmap per level.
-+ */
-+static int carry_estimate_bitmaps(void)
-+{
-+ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_LOAD_BITMAP)) {
-+ int bytes;
-+
-+ bytes = capped_height() * (0 + /* bnode should be added, but its is private to
-+ * bitmap.c, skip for now. */
-+ 2 * sizeof(jnode)); /* working and commit jnodes */
-+ return bytes_to_pages(bytes) + 2; /* and their contents */
-+ } else
-+ /* bitmaps were pre-loaded during mount */
-+ return 0;
-+}
-+
-+/* worst case item insertion memory requirements */
-+static int carry_estimate_insert(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ capped_height() + /* new block on each level */
-+ 1 + /* and possibly extra new block at the leaf level */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case item deletion memory requirements */
-+static int carry_estimate_delete(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case tree cut memory requirements */
-+static int carry_estimate_cut(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case memory requirements of pasting into item */
-+static int carry_estimate_paste(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 + /* new atom */
-+ capped_height() + /* new block on each level */
-+ 1 + /* and possibly extra new block at the leaf level */
-+ 3; /* loading of leaves into memory */
-+}
-+
-+/* worst case memory requirements of extent insertion */
-+static int carry_estimate_extent(carry_op * op, carry_level * level)
-+{
-+ return carry_estimate_insert(op, level) + /* insert extent */
-+ carry_estimate_delete(op, level); /* kill leaf */
-+}
-+
-+/* worst case memory requirements of key update */
-+static int carry_estimate_update(carry_op * op, carry_level * level)
-+{
-+ return 0;
-+}
-+
-+/* worst case memory requirements of flow insertion */
-+static int carry_estimate_insert_flow(carry_op * op, carry_level * level)
-+{
-+ int newnodes;
-+
-+ newnodes = min(bytes_to_pages(op->u.insert_flow.flow->length),
-+ CARRY_FLOW_NEW_NODES_LIMIT);
-+ /*
-+ * roughly estimate insert_flow as a sequence of insertions.
-+ */
-+ return newnodes * carry_estimate_insert(op, level);
-+}
-+
-+/* This is dispatch table for carry operations. It can be trivially
-+ abstracted into useful plugin: tunable balancing policy is a good
-+ thing. */
-+carry_op_handler op_dispatch_table[COP_LAST_OP] = {
-+ [COP_INSERT] = {
-+ .handler = carry_insert,
-+ .estimate = carry_estimate_insert}
-+ ,
-+ [COP_DELETE] = {
-+ .handler = carry_delete,
-+ .estimate = carry_estimate_delete}
-+ ,
-+ [COP_CUT] = {
-+ .handler = carry_cut,
-+ .estimate = carry_estimate_cut}
-+ ,
-+ [COP_PASTE] = {
-+ .handler = carry_paste,
-+ .estimate = carry_estimate_paste}
-+ ,
-+ [COP_EXTENT] = {
-+ .handler = carry_extent,
-+ .estimate = carry_estimate_extent}
-+ ,
-+ [COP_UPDATE] = {
-+ .handler = carry_update,
-+ .estimate = carry_estimate_update}
-+ ,
-+ [COP_INSERT_FLOW] = {
-+ .handler = carry_insert_flow,
-+ .estimate = carry_estimate_insert_flow}
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/carry_ops.h linux-2.6.24/fs/reiser4/carry_ops.h
---- linux-2.6.24.orig/fs/reiser4/carry_ops.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/carry_ops.h 2008-01-25 11:39:06.904199446 +0300
-@@ -0,0 +1,42 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* implementation of carry operations. See carry_ops.c for details. */
-+
-+#if !defined( __CARRY_OPS_H__ )
-+#define __CARRY_OPS_H__
-+
-+#include "forward.h"
-+#include "znode.h"
-+#include "carry.h"
-+
-+/* carry operation handlers */
-+typedef struct carry_op_handler {
-+ /* perform operation */
-+ int (*handler) (carry_op * op, carry_level * doing, carry_level * todo);
-+ /* estimate memory requirements for @op */
-+ int (*estimate) (carry_op * op, carry_level * level);
-+} carry_op_handler;
-+
-+/* This is dispatch table for carry operations. It can be trivially
-+ abstracted into useful plugin: tunable balancing policy is a good
-+ thing. */
-+extern carry_op_handler op_dispatch_table[COP_LAST_OP];
-+
-+unsigned int space_needed(const znode * node, const coord_t * coord,
-+ const reiser4_item_data * data, int inserting);
-+extern carry_node *find_left_carry(carry_node * node, carry_level * level);
-+extern carry_node *find_right_carry(carry_node * node, carry_level * level);
-+
-+/* __CARRY_OPS_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/context.c linux-2.6.24/fs/reiser4/context.c
---- linux-2.6.24.orig/fs/reiser4/context.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/context.c 2008-01-25 11:39:06.904199446 +0300
-@@ -0,0 +1,288 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Manipulation of reiser4_context */
-+
-+/*
-+ * global context used during system call. Variable of this type is allocated
-+ * on the stack at the beginning of the reiser4 part of the system call and
-+ * pointer to it is stored in the current->fs_context. This allows us to avoid
-+ * passing pointer to current transaction and current lockstack (both in
-+ * one-to-one mapping with threads) all over the call chain.
-+ *
-+ * It's kind of like those global variables the prof used to tell you not to
-+ * use in CS1, except thread specific.;-) Nikita, this was a good idea.
-+ *
-+ * In some situations it is desirable to have ability to enter reiser4_context
-+ * more than once for the same thread (nested contexts). For example, there
-+ * are some functions that can be called either directly from VFS/VM or from
-+ * already active reiser4 context (->writepage, for example).
-+ *
-+ * In such situations "child" context acts like dummy: all activity is
-+ * actually performed in the top level context, and get_current_context()
-+ * always returns top level context.
-+ * Of course, reiser4_init_context()/reiser4_done_context() have to be properly
-+ * nested any way.
-+ *
-+ * Note that there is an important difference between reiser4 uses
-+ * ->fs_context and the way other file systems use it. Other file systems
-+ * (ext3 and reiserfs) use ->fs_context only for the duration of _transaction_
-+ * (this is why ->fs_context was initially called ->journal_info). This means,
-+ * that when ext3 or reiserfs finds that ->fs_context is not NULL on the entry
-+ * to the file system, they assume that some transaction is already underway,
-+ * and usually bail out, because starting nested transaction would most likely
-+ * lead to the deadlock. This gives false positives with reiser4, because we
-+ * set ->fs_context before starting transaction.
-+ */
-+
-+#include "debug.h"
-+#include "super.h"
-+#include "context.h"
-+
-+#include <linux/writeback.h> /* balance_dirty_pages() */
-+#include <linux/hardirq.h>
-+
-+static void _reiser4_init_context(reiser4_context * context,
-+ struct super_block *super)
-+{
-+ memset(context, 0, sizeof(*context));
-+
-+ context->super = super;
-+ context->magic = context_magic;
-+ context->outer = current->journal_info;
-+ current->journal_info = (void *)context;
-+ context->nr_children = 0;
-+ context->gfp_mask = GFP_KERNEL;
-+
-+ init_lock_stack(&context->stack);
-+
-+ reiser4_txn_begin(context);
-+
-+ /* initialize head of tap list */
-+ INIT_LIST_HEAD(&context->taps);
-+#if REISER4_DEBUG
-+ context->task = current;
-+#endif
-+ grab_space_enable();
-+}
-+
-+/* initialize context and bind it to the current thread
-+
-+ This function should be called at the beginning of reiser4 part of
-+ syscall.
-+*/
-+reiser4_context * reiser4_init_context(struct super_block * super)
-+{
-+ reiser4_context *context;
-+
-+ assert("nikita-2662", !in_interrupt() && !in_irq());
-+ assert("nikita-3357", super != NULL);
-+ assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
-+
-+ context = get_current_context_check();
-+ if (context && context->super == super) {
-+ context = (reiser4_context *) current->journal_info;
-+ context->nr_children++;
-+ return context;
-+ }
-+
-+ context = kmalloc(sizeof(*context), GFP_KERNEL);
-+ if (context == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ _reiser4_init_context(context, super);
-+ return context;
-+}
-+
-+/* this is used in scan_mgr which is called with spinlock held and in
-+ reiser4_fill_super magic */
-+void init_stack_context(reiser4_context *context, struct super_block *super)
-+{
-+ assert("nikita-2662", !in_interrupt() && !in_irq());
-+ assert("nikita-3357", super != NULL);
-+ assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
-+ assert("vs-12", !is_in_reiser4_context());
-+
-+ _reiser4_init_context(context, super);
-+ context->on_stack = 1;
-+ return;
-+}
-+
-+/* cast lock stack embedded into reiser4 context up to its container */
-+reiser4_context *get_context_by_lock_stack(lock_stack * owner)
-+{
-+ return container_of(owner, reiser4_context, stack);
-+}
-+
-+/* true if there is already _any_ reiser4 context for the current thread */
-+int is_in_reiser4_context(void)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = current->journal_info;
-+ return ctx != NULL && ((unsigned long)ctx->magic) == context_magic;
-+}
-+
-+/*
-+ * call balance dirty pages for the current context.
-+ *
-+ * File system is expected to call balance_dirty_pages_ratelimited() whenever
-+ * it dirties a page. reiser4 does this for unformatted nodes (that is, during
-+ * write---this covers vast majority of all dirty traffic), but we cannot do
-+ * this immediately when formatted node is dirtied, because long term lock is
-+ * usually held at that time. To work around this, dirtying of formatted node
-+ * simply increases ->nr_marked_dirty counter in the current reiser4
-+ * context. When we are about to leave this context,
-+ * balance_dirty_pages_ratelimited() is called, if necessary.
-+ *
-+ * This introduces another problem: sometimes we do not want to run
-+ * balance_dirty_pages_ratelimited() when leaving a context, for example
-+ * because some important lock (like ->i_mutex on the parent directory) is
-+ * held. To achieve this, ->nobalance flag can be set in the current context.
-+ */
-+static void balance_dirty_pages_at(reiser4_context *context)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(context->super);
-+
-+ /*
-+ * call balance_dirty_pages_ratelimited() to process formatted nodes
-+ * dirtied during this system call. Do that only if we are not in mount
-+ * and there were nodes dirtied in this context and we are not in
-+ * writepage (to avoid deadlock) and not in pdflush
-+ */
-+ if (sbinfo != NULL && sbinfo->fake != NULL &&
-+ context->nr_marked_dirty != 0 &&
-+ !(current->flags & PF_MEMALLOC) &&
-+ !current_is_pdflush())
-+ balance_dirty_pages_ratelimited(sbinfo->fake->i_mapping);
-+}
-+
-+/* release resources associated with context.
-+
-+ This function should be called at the end of "session" with reiser4,
-+ typically just before leaving reiser4 driver back to VFS.
-+
-+ This is good place to put some degugging consistency checks, like that
-+ thread released all locks and closed transcrash etc.
-+
-+*/
-+static void reiser4_done_context(reiser4_context * context /* context being released */ )
-+{
-+ assert("nikita-860", context != NULL);
-+ assert("nikita-859", context->magic == context_magic);
-+ assert("vs-646", (reiser4_context *) current->journal_info == context);
-+ assert("zam-686", !in_interrupt() && !in_irq());
-+
-+ /* only do anything when leaving top-level reiser4 context. All nested
-+ * contexts are just dummies. */
-+ if (context->nr_children == 0) {
-+ assert("jmacd-673", context->trans == NULL);
-+ assert("jmacd-1002", lock_stack_isclean(&context->stack));
-+ assert("nikita-1936", reiser4_no_counters_are_held());
-+ assert("nikita-2626", list_empty_careful(reiser4_taps_list()));
-+ assert("zam-1004", ergo(get_super_private(context->super),
-+ get_super_private(context->super)->delete_mutex_owner !=
-+ current));
-+
-+ /* release all grabbed but as yet unused blocks */
-+ if (context->grabbed_blocks != 0)
-+ all_grabbed2free();
-+
-+ /*
-+ * synchronize against longterm_unlock_znode():
-+ * wake_up_requestor() wakes up requestors without holding
-+ * zlock (otherwise they will immediately bump into that lock
-+ * after wake up on another CPU). To work around (rare)
-+ * situation where requestor has been woken up asynchronously
-+ * and managed to run until completion (and destroy its
-+ * context and lock stack) before wake_up_requestor() called
-+ * wake_up() on it, wake_up_requestor() synchronize on lock
-+ * stack spin lock. It has actually been observed that spin
-+ * lock _was_ locked at this point, because
-+ * wake_up_requestor() took interrupt.
-+ */
-+ spin_lock_stack(&context->stack);
-+ spin_unlock_stack(&context->stack);
-+
-+ assert("zam-684", context->nr_children == 0);
-+ /* restore original ->fs_context value */
-+ current->journal_info = context->outer;
-+ if (context->on_stack == 0)
-+ kfree(context);
-+ } else {
-+ context->nr_children--;
-+#if REISER4_DEBUG
-+ assert("zam-685", context->nr_children >= 0);
-+#endif
-+ }
-+}
-+
-+/*
-+ * exit reiser4 context. Call balance_dirty_pages_at() if necessary. Close
-+ * transaction. Call done_context() to do context related book-keeping.
-+ */
-+void reiser4_exit_context(reiser4_context * context)
-+{
-+ assert("nikita-3021", reiser4_schedulable());
-+
-+ if (context->nr_children == 0) {
-+ if (!context->nobalance) {
-+ reiser4_txn_restart(context);
-+ balance_dirty_pages_at(context);
-+ }
-+
-+ /* if filesystem is mounted with -o sync or -o dirsync - commit
-+ transaction. FIXME: TXNH_DONT_COMMIT is used to avoid
-+ commiting on exit_context when inode semaphore is held and
-+ to have ktxnmgrd to do commit instead to get better
-+ concurrent filesystem accesses. But, when one mounts with -o
-+ sync, he cares more about reliability than about
-+ performance. So, for now we have this simple mount -o sync
-+ support. */
-+ if (context->super->s_flags & (MS_SYNCHRONOUS | MS_DIRSYNC)) {
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked_nocheck();
-+ if (atom) {
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+ context->trans->flags &= ~TXNH_DONT_COMMIT;
-+ spin_unlock_atom(atom);
-+ }
-+ }
-+ reiser4_txn_end(context);
-+ }
-+ reiser4_done_context(context);
-+}
-+
-+void reiser4_ctx_gfp_mask_set(void)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = get_current_context();
-+ if (ctx->entd == 0 &&
-+ list_empty(&ctx->stack.locks) &&
-+ ctx->trans->atom == NULL)
-+ ctx->gfp_mask = GFP_KERNEL;
-+ else
-+ ctx->gfp_mask = GFP_NOFS;
-+}
-+
-+void reiser4_ctx_gfp_mask_force (gfp_t mask)
-+{
-+ reiser4_context *ctx;
-+ ctx = get_current_context();
-+
-+ assert("edward-1454", ctx != NULL);
-+
-+ ctx->gfp_mask = mask;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/context.h linux-2.6.24/fs/reiser4/context.h
---- linux-2.6.24.orig/fs/reiser4/context.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/context.h 2008-01-25 11:39:06.904199446 +0300
-@@ -0,0 +1,228 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Reiser4 context. See context.c for details. */
-+
-+#if !defined( __REISER4_CONTEXT_H__ )
-+#define __REISER4_CONTEXT_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "tap.h"
-+#include "lock.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+/* reiser4 per-thread context */
-+struct reiser4_context {
-+ /* magic constant. For identification of reiser4 contexts. */
-+ __u32 magic;
-+
-+ /* current lock stack. See lock.[ch]. This is where list of all
-+ locks taken by current thread is kept. This is also used in
-+ deadlock detection. */
-+ lock_stack stack;
-+
-+ /* current transcrash. */
-+ txn_handle *trans;
-+ /* transaction handle embedded into reiser4_context. ->trans points
-+ * here by default. */
-+ txn_handle trans_in_ctx;
-+
-+ /* super block we are working with. To get the current tree
-+ use &get_super_private (reiser4_get_current_sb ())->tree. */
-+ struct super_block *super;
-+
-+ /* parent fs activation */
-+ struct fs_activation *outer;
-+
-+ /* per-thread grabbed (for further allocation) blocks counter */
-+ reiser4_block_nr grabbed_blocks;
-+
-+ /* list of taps currently monitored. See tap.c */
-+ struct list_head taps;
-+
-+ /* grabbing space is enabled */
-+ unsigned int grab_enabled:1;
-+ /* should be set when we are write dirty nodes to disk in jnode_flush or
-+ * reiser4_write_logs() */
-+ unsigned int writeout_mode:1;
-+ /* true, if current thread is an ent thread */
-+ unsigned int entd:1;
-+ /* true, if balance_dirty_pages() should not be run when leaving this
-+ * context. This is used to avoid lengthly balance_dirty_pages()
-+ * operation when holding some important resource, like directory
-+ * ->i_mutex */
-+ unsigned int nobalance:1;
-+
-+ /* this bit is used on reiser4_done_context to decide whether context is
-+ kmalloc-ed and has to be kfree-ed */
-+ unsigned int on_stack:1;
-+
-+ /* count non-trivial jnode_set_dirty() calls */
-+ unsigned long nr_marked_dirty;
-+
-+ /* reiser4_sync_inodes calls (via generic_sync_sb_inodes)
-+ * reiser4_writepages for each of dirty inodes. Reiser4_writepages
-+ * captures pages. When number of pages captured in one
-+ * reiser4_sync_inodes reaches some threshold - some atoms get
-+ * flushed */
-+ int nr_captured;
-+ int nr_children; /* number of child contexts */
-+#if REISER4_DEBUG
-+ /* debugging information about reiser4 locks held by the current
-+ * thread */
-+ reiser4_lock_cnt_info locks;
-+ struct task_struct *task; /* so we can easily find owner of the stack */
-+
-+ /*
-+ * disk space grabbing debugging support
-+ */
-+ /* how many disk blocks were grabbed by the first call to
-+ * reiser4_grab_space() in this context */
-+ reiser4_block_nr grabbed_initially;
-+
-+ /* list of all threads doing flush currently */
-+ struct list_head flushers_link;
-+ /* information about last error encountered by reiser4 */
-+ err_site err;
-+#endif
-+ void *vp;
-+ gfp_t gfp_mask;
-+};
-+
-+extern reiser4_context *get_context_by_lock_stack(lock_stack *);
-+
-+/* Debugging helps. */
-+#if REISER4_DEBUG
-+extern void print_contexts(void);
-+#endif
-+
-+#define current_tree (&(get_super_private(reiser4_get_current_sb())->tree))
-+#define current_blocksize reiser4_get_current_sb()->s_blocksize
-+#define current_blocksize_bits reiser4_get_current_sb()->s_blocksize_bits
-+
-+extern reiser4_context *reiser4_init_context(struct super_block *);
-+extern void init_stack_context(reiser4_context *, struct super_block *);
-+extern void reiser4_exit_context(reiser4_context *);
-+
-+/* magic constant we store in reiser4_context allocated at the stack. Used to
-+ catch accesses to staled or uninitialized contexts. */
-+#define context_magic ((__u32) 0x4b1b5d0b)
-+
-+extern int is_in_reiser4_context(void);
-+
-+/*
-+ * return reiser4_context for the thread @tsk
-+ */
-+static inline reiser4_context *get_context(const struct task_struct *tsk)
-+{
-+ assert("vs-1682",
-+ ((reiser4_context *) tsk->journal_info)->magic == context_magic);
-+ return (reiser4_context *) tsk->journal_info;
-+}
-+
-+/*
-+ * return reiser4 context of the current thread, or NULL if there is none.
-+ */
-+static inline reiser4_context *get_current_context_check(void)
-+{
-+ if (is_in_reiser4_context())
-+ return get_context(current);
-+ else
-+ return NULL;
-+}
-+
-+static inline reiser4_context *get_current_context(void); /* __attribute__((const)); */
-+
-+/* return context associated with current thread */
-+static inline reiser4_context *get_current_context(void)
-+{
-+ return get_context(current);
-+}
-+
-+static inline gfp_t reiser4_ctx_gfp_mask_get(void)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = get_current_context_check();
-+ return (ctx == NULL) ? GFP_KERNEL : ctx->gfp_mask;
-+}
-+
-+void reiser4_ctx_gfp_mask_set(void);
-+void reiser4_ctx_gfp_mask_force (gfp_t mask);
-+
-+/*
-+ * true if current thread is in the write-out mode. Thread enters write-out
-+ * mode during jnode_flush and reiser4_write_logs().
-+ */
-+static inline int is_writeout_mode(void)
-+{
-+ return get_current_context()->writeout_mode;
-+}
-+
-+/*
-+ * enter write-out mode
-+ */
-+static inline void writeout_mode_enable(void)
-+{
-+ assert("zam-941", !get_current_context()->writeout_mode);
-+ get_current_context()->writeout_mode = 1;
-+}
-+
-+/*
-+ * leave write-out mode
-+ */
-+static inline void writeout_mode_disable(void)
-+{
-+ assert("zam-942", get_current_context()->writeout_mode);
-+ get_current_context()->writeout_mode = 0;
-+}
-+
-+static inline void grab_space_enable(void)
-+{
-+ get_current_context()->grab_enabled = 1;
-+}
-+
-+static inline void grab_space_disable(void)
-+{
-+ get_current_context()->grab_enabled = 0;
-+}
-+
-+static inline void grab_space_set_enabled(int enabled)
-+{
-+ get_current_context()->grab_enabled = enabled;
-+}
-+
-+static inline int is_grab_enabled(reiser4_context * ctx)
-+{
-+ return ctx->grab_enabled;
-+}
-+
-+/* mark transaction handle in @ctx as TXNH_DONT_COMMIT, so that no commit or
-+ * flush would be performed when it is closed. This is necessary when handle
-+ * has to be closed under some coarse semaphore, like i_mutex of
-+ * directory. Commit will be performed by ktxnmgrd. */
-+static inline void context_set_commit_async(reiser4_context * context)
-+{
-+ context->nobalance = 1;
-+ context->trans->flags |= TXNH_DONT_COMMIT;
-+}
-+
-+/* __REISER4_CONTEXT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/coord.c linux-2.6.24/fs/reiser4/coord.c
---- linux-2.6.24.orig/fs/reiser4/coord.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/coord.c 2008-01-25 11:39:06.904199446 +0300
-@@ -0,0 +1,935 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "tree.h"
-+#include "plugin/item/item.h"
-+#include "znode.h"
-+#include "coord.h"
-+
-+/* Internal constructor. */
-+static inline void
-+coord_init_values(coord_t * coord, const znode * node, pos_in_node_t item_pos,
-+ pos_in_node_t unit_pos, between_enum between)
-+{
-+ coord->node = (znode *) node;
-+ coord_set_item_pos(coord, item_pos);
-+ coord->unit_pos = unit_pos;
-+ coord->between = between;
-+ ON_DEBUG(coord->plug_v = 0);
-+ ON_DEBUG(coord->body_v = 0);
-+
-+ /*ON_TRACE (TRACE_COORDS, "init coord %p node %p: %u %u %s\n", coord, node, item_pos, unit_pos, coord_tween_tostring (between)); */
-+}
-+
-+/* after shifting of node content, coord previously set properly may become
-+ invalid, try to "normalize" it. */
-+void coord_normalize(coord_t * coord)
-+{
-+ znode *node;
-+
-+ node = coord->node;
-+ assert("vs-683", node);
-+
-+ coord_clear_iplug(coord);
-+
-+ if (node_is_empty(node)) {
-+ coord_init_first_unit(coord, node);
-+ } else if ((coord->between == AFTER_ITEM)
-+ || (coord->between == AFTER_UNIT)) {
-+ return;
-+ } else if (coord->item_pos == coord_num_items(coord)
-+ && coord->between == BEFORE_ITEM) {
-+ coord_dec_item_pos(coord);
-+ coord->between = AFTER_ITEM;
-+ } else if (coord->unit_pos == coord_num_units(coord)
-+ && coord->between == BEFORE_UNIT) {
-+ coord->unit_pos--;
-+ coord->between = AFTER_UNIT;
-+ } else if (coord->item_pos == coord_num_items(coord)
-+ && coord->unit_pos == 0 && coord->between == BEFORE_UNIT) {
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+ }
-+}
-+
-+/* Copy a coordinate. */
-+void coord_dup(coord_t * coord, const coord_t * old_coord)
-+{
-+ assert("jmacd-9800", coord_check(old_coord));
-+ coord_dup_nocheck(coord, old_coord);
-+}
-+
-+/* Copy a coordinate without check. Useful when old_coord->node is not
-+ loaded. As in cbk_tree_lookup -> connect_znode -> connect_one_side */
-+void coord_dup_nocheck(coord_t * coord, const coord_t * old_coord)
-+{
-+ coord->node = old_coord->node;
-+ coord_set_item_pos(coord, old_coord->item_pos);
-+ coord->unit_pos = old_coord->unit_pos;
-+ coord->between = old_coord->between;
-+ coord->iplugid = old_coord->iplugid;
-+ ON_DEBUG(coord->plug_v = old_coord->plug_v);
-+ ON_DEBUG(coord->body_v = old_coord->body_v);
-+}
-+
-+/* Initialize an invalid coordinate. */
-+void coord_init_invalid(coord_t * coord, const znode * node)
-+{
-+ coord_init_values(coord, node, 0, 0, INVALID_COORD);
-+}
-+
-+void coord_init_first_unit_nocheck(coord_t * coord, const znode * node)
-+{
-+ coord_init_values(coord, node, 0, 0, AT_UNIT);
-+}
-+
-+/* Initialize a coordinate to point at the first unit of the first item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+void coord_init_first_unit(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node, 0, 0, (is_empty ? EMPTY_NODE : AT_UNIT));
-+
-+ assert("jmacd-9801", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to point at the last unit of the last item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+void coord_init_last_unit(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node,
-+ (is_empty ? 0 : node_num_items(node) - 1), 0,
-+ (is_empty ? EMPTY_NODE : AT_UNIT));
-+ if (!is_empty)
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ assert("jmacd-9802", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to before the first item. If the node is empty, it is
-+ positioned at the EMPTY_NODE. */
-+void coord_init_before_first_item(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node, 0, 0,
-+ (is_empty ? EMPTY_NODE : BEFORE_UNIT));
-+
-+ assert("jmacd-9803", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to after the last item. If the node is empty, it is positioned
-+ at the EMPTY_NODE. */
-+void coord_init_after_last_item(coord_t * coord, const znode * node)
-+{
-+ int is_empty = node_is_empty(node);
-+
-+ coord_init_values(coord, node,
-+ (is_empty ? 0 : node_num_items(node) - 1), 0,
-+ (is_empty ? EMPTY_NODE : AFTER_ITEM));
-+
-+ assert("jmacd-9804", coord_check(coord));
-+}
-+
-+/* Initialize a coordinate to after last unit in the item. Coord must be set
-+ already to existing item */
-+void coord_init_after_item_end(coord_t * coord)
-+{
-+ coord->between = AFTER_UNIT;
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+}
-+
-+/* Initialize a coordinate to before the item. Coord must be set already to existing item */
-+void coord_init_before_item(coord_t * coord)
-+{
-+ coord->unit_pos = 0;
-+ coord->between = BEFORE_ITEM;
-+}
-+
-+/* Initialize a coordinate to after the item. Coord must be set already to existing item */
-+void coord_init_after_item(coord_t * coord)
-+{
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+}
-+
-+/* Initialize a coordinate by 0s. Used in places where init_coord was used and
-+ it was not clear how actually */
-+void coord_init_zero(coord_t * coord)
-+{
-+ memset(coord, 0, sizeof(*coord));
-+}
-+
-+/* Return the number of units at the present item. Asserts coord_is_existing_item(). */
-+unsigned coord_num_units(const coord_t * coord)
-+{
-+ assert("jmacd-9806", coord_is_existing_item(coord));
-+
-+ return item_plugin_by_coord(coord)->b.nr_units(coord);
-+}
-+
-+/* Returns true if the coord was initializewd by coord_init_invalid (). */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_invalid(const coord_t * coord)
-+{
-+ return coord->between == INVALID_COORD;
-+}
-+
-+/* Returns true if the coordinate is positioned at an existing item, not before or after
-+ an item. It may be placed at, before, or after any unit within the item, whether
-+ existing or not. */
-+int coord_is_existing_item(const coord_t * coord)
-+{
-+ switch (coord->between) {
-+ case EMPTY_NODE:
-+ case BEFORE_ITEM:
-+ case AFTER_ITEM:
-+ case INVALID_COORD:
-+ return 0;
-+
-+ case BEFORE_UNIT:
-+ case AT_UNIT:
-+ case AFTER_UNIT:
-+ return coord->item_pos < coord_num_items(coord);
-+ }
-+
-+ impossible("jmacd-9900", "unreachable coord: %p", coord);
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned at an existing unit, not before or after a
-+ unit. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_existing_unit(const coord_t * coord)
-+{
-+ switch (coord->between) {
-+ case EMPTY_NODE:
-+ case BEFORE_UNIT:
-+ case AFTER_UNIT:
-+ case BEFORE_ITEM:
-+ case AFTER_ITEM:
-+ case INVALID_COORD:
-+ return 0;
-+
-+ case AT_UNIT:
-+ return (coord->item_pos < coord_num_items(coord)
-+ && coord->unit_pos < coord_num_units(coord));
-+ }
-+
-+ impossible("jmacd-9902", "unreachable");
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned at the first unit of the first item. Not
-+ true for empty nodes nor coordinates positioned before the first item. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_leftmost_unit(const coord_t * coord)
-+{
-+ return (coord->between == AT_UNIT && coord->item_pos == 0
-+ && coord->unit_pos == 0);
-+}
-+
-+#if REISER4_DEBUG
-+/* For assertions only, checks for a valid coordinate. */
-+int coord_check(const coord_t * coord)
-+{
-+ if (coord->node == NULL) {
-+ return 0;
-+ }
-+ if (znode_above_root(coord->node))
-+ return 1;
-+
-+ switch (coord->between) {
-+ default:
-+ case INVALID_COORD:
-+ return 0;
-+ case EMPTY_NODE:
-+ if (!node_is_empty(coord->node)) {
-+ return 0;
-+ }
-+ return coord->item_pos == 0 && coord->unit_pos == 0;
-+
-+ case BEFORE_UNIT:
-+ case AFTER_UNIT:
-+ if (node_is_empty(coord->node) && (coord->item_pos == 0)
-+ && (coord->unit_pos == 0))
-+ return 1;
-+ case AT_UNIT:
-+ break;
-+ case AFTER_ITEM:
-+ case BEFORE_ITEM:
-+ /* before/after item should not set unit_pos. */
-+ if (coord->unit_pos != 0) {
-+ return 0;
-+ }
-+ break;
-+ }
-+
-+ if (coord->item_pos >= node_num_items(coord->node)) {
-+ return 0;
-+ }
-+
-+ /* FIXME-VS: we are going to check unit_pos. This makes no sense when
-+ between is set either AFTER_ITEM or BEFORE_ITEM */
-+ if (coord->between == AFTER_ITEM || coord->between == BEFORE_ITEM)
-+ return 1;
-+
-+ if (coord_is_iplug_set(coord) &&
-+ coord->unit_pos >
-+ item_plugin_by_coord(coord)->b.nr_units(coord) - 1) {
-+ return 0;
-+ }
-+ return 1;
-+}
-+#endif
-+
-+/* Adjust coordinate boundaries based on the number of items prior to coord_next/prev.
-+ Returns 1 if the new position is does not exist. */
-+static int coord_adjust_items(coord_t * coord, unsigned items, int is_next)
-+{
-+ /* If the node is invalid, leave it. */
-+ if (coord->between == INVALID_COORD) {
-+ return 1;
-+ }
-+
-+ /* If the node is empty, set it appropriately. */
-+ if (items == 0) {
-+ coord->between = EMPTY_NODE;
-+ coord_set_item_pos(coord, 0);
-+ coord->unit_pos = 0;
-+ return 1;
-+ }
-+
-+ /* If it was empty and it no longer is, set to BEFORE/AFTER_ITEM. */
-+ if (coord->between == EMPTY_NODE) {
-+ coord->between = (is_next ? BEFORE_ITEM : AFTER_ITEM);
-+ coord_set_item_pos(coord, 0);
-+ coord->unit_pos = 0;
-+ return 0;
-+ }
-+
-+ /* If the item_pos is out-of-range, set it appropriatly. */
-+ if (coord->item_pos >= items) {
-+ coord->between = AFTER_ITEM;
-+ coord_set_item_pos(coord, items - 1);
-+ coord->unit_pos = 0;
-+ /* If is_next, return 1 (can't go any further). */
-+ return is_next;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one unit to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is an
-+ existing unit. */
-+int coord_next_unit(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 1) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case BEFORE_UNIT:
-+ /* Now it is positioned at the same unit. */
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ case AT_UNIT:
-+ /* If it was at or after a unit and there are more units in this item,
-+ advance to the next one. */
-+ if (coord->unit_pos < coord_last_unit_pos(coord)) {
-+ coord->unit_pos += 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ /* Otherwise, it is crossing an item boundary and treated as if it was
-+ after the current item. */
-+ coord->between = AFTER_ITEM;
-+ coord->unit_pos = 0;
-+ /* FALLTHROUGH */
-+
-+ case AFTER_ITEM:
-+ /* Check for end-of-node. */
-+ if (coord->item_pos == items - 1) {
-+ return 1;
-+ }
-+
-+ coord_inc_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ /* The adjust_items checks ensure that we are valid here. */
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ /* Handled in coord_adjust_items(). */
-+ break;
-+ }
-+
-+ impossible("jmacd-9902", "unreachable");
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one item to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is
-+ an existing item. */
-+int coord_next_item(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 1) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AFTER_UNIT:
-+ case AT_UNIT:
-+ case BEFORE_UNIT:
-+ case AFTER_ITEM:
-+ /* Check for end-of-node. */
-+ if (coord->item_pos == items - 1) {
-+ coord->between = AFTER_ITEM;
-+ coord->unit_pos = 0;
-+ coord_clear_iplug(coord);
-+ return 1;
-+ }
-+
-+ /* Anywhere in an item, go to the next one. */
-+ coord->between = AT_UNIT;
-+ coord_inc_item_pos(coord);
-+ coord->unit_pos = 0;
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ /* The out-of-range check ensures that we are valid here. */
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ /* Handled in coord_adjust_items(). */
-+ break;
-+ }
-+
-+ impossible("jmacd-9903", "unreachable");
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one unit to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing unit. */
-+int coord_prev_unit(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 0) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ case BEFORE_UNIT:
-+ if (coord->unit_pos > 0) {
-+ coord->unit_pos -= 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ if (coord->item_pos == 0) {
-+ coord->between = BEFORE_ITEM;
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ /* What if unit_pos is out-of-range? */
-+ assert("jmacd-5442",
-+ coord->unit_pos <= coord_last_unit_pos(coord));
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ if (coord->item_pos == 0) {
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ /* FALLTHROUGH */
-+
-+ case AFTER_ITEM:
-+ coord->between = AT_UNIT;
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ return 0;
-+
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ break;
-+ }
-+
-+ impossible("jmacd-9904", "unreachable");
-+ return 0;
-+}
-+
-+/* Advances the coordinate by one item to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing item. */
-+int coord_prev_item(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 0) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ case AFTER_UNIT:
-+ case BEFORE_UNIT:
-+ case BEFORE_ITEM:
-+
-+ if (coord->item_pos == 0) {
-+ coord->between = BEFORE_ITEM;
-+ coord->unit_pos = 0;
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_ITEM:
-+ coord->between = AT_UNIT;
-+ coord->unit_pos = 0;
-+ return 0;
-+
-+ case INVALID_COORD:
-+ case EMPTY_NODE:
-+ break;
-+ }
-+
-+ impossible("jmacd-9905", "unreachable");
-+ return 0;
-+}
-+
-+/* Calls either coord_init_first_unit or coord_init_last_unit depending on sideof argument. */
-+void coord_init_sideof_unit(coord_t * coord, const znode * node, sideof dir)
-+{
-+ assert("jmacd-9821", dir == LEFT_SIDE || dir == RIGHT_SIDE);
-+ if (dir == LEFT_SIDE) {
-+ coord_init_first_unit(coord, node);
-+ } else {
-+ coord_init_last_unit(coord, node);
-+ }
-+}
-+
-+/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending on sideof
-+ argument. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_after_sideof_unit(coord_t * coord, sideof dir)
-+{
-+ assert("jmacd-9822", dir == LEFT_SIDE || dir == RIGHT_SIDE);
-+ if (dir == LEFT_SIDE) {
-+ return coord_is_before_leftmost(coord);
-+ } else {
-+ return coord_is_after_rightmost(coord);
-+ }
-+}
-+
-+/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument. */
-+/* Audited by: green(2002.06.15) */
-+int coord_sideof_unit(coord_t * coord, sideof dir)
-+{
-+ assert("jmacd-9823", dir == LEFT_SIDE || dir == RIGHT_SIDE);
-+ if (dir == LEFT_SIDE) {
-+ return coord_prev_unit(coord);
-+ } else {
-+ return coord_next_unit(coord);
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+int coords_equal(const coord_t * c1, const coord_t * c2)
-+{
-+ assert("nikita-2840", c1 != NULL);
-+ assert("nikita-2841", c2 != NULL);
-+
-+ return
-+ c1->node == c2->node &&
-+ c1->item_pos == c2->item_pos &&
-+ c1->unit_pos == c2->unit_pos && c1->between == c2->between;
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if coord_is_after_leftmost
-+ return NCOORD_ON_THE_LEFT, otherwise return NCOORD_INSIDE. */
-+/* Audited by: green(2002.06.15) */
-+coord_wrt_node coord_wrt(const coord_t * coord)
-+{
-+ if (coord_is_before_leftmost(coord)) {
-+ return COORD_ON_THE_LEFT;
-+ }
-+
-+ if (coord_is_after_rightmost(coord)) {
-+ return COORD_ON_THE_RIGHT;
-+ }
-+
-+ return COORD_INSIDE;
-+}
-+
-+/* Returns true if the coordinate is positioned after the last item or after the last unit
-+ of the last item or it is an empty node. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_after_rightmost(const coord_t * coord)
-+{
-+ assert("jmacd-7313", coord_check(coord));
-+
-+ switch (coord->between) {
-+ case INVALID_COORD:
-+ case AT_UNIT:
-+ case BEFORE_UNIT:
-+ case BEFORE_ITEM:
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case AFTER_ITEM:
-+ return (coord->item_pos == node_num_items(coord->node) - 1);
-+
-+ case AFTER_UNIT:
-+ return ((coord->item_pos == node_num_items(coord->node) - 1) &&
-+ coord->unit_pos == coord_last_unit_pos(coord));
-+ }
-+
-+ impossible("jmacd-9908", "unreachable");
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned before the first item or it is an empty
-+ node. */
-+int coord_is_before_leftmost(const coord_t * coord)
-+{
-+ /* FIXME-VS: coord_check requires node to be loaded whereas it is not
-+ necessary to check if coord is set before leftmost
-+ assert ("jmacd-7313", coord_check (coord)); */
-+ switch (coord->between) {
-+ case INVALID_COORD:
-+ case AT_UNIT:
-+ case AFTER_ITEM:
-+ case AFTER_UNIT:
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case BEFORE_ITEM:
-+ case BEFORE_UNIT:
-+ return (coord->item_pos == 0) && (coord->unit_pos == 0);
-+ }
-+
-+ impossible("jmacd-9908", "unreachable");
-+ return 0;
-+}
-+
-+/* Returns true if the coordinate is positioned after a item, before a item, after the
-+ last unit of an item, before the first unit of an item, or at an empty node. */
-+/* Audited by: green(2002.06.15) */
-+int coord_is_between_items(const coord_t * coord)
-+{
-+ assert("jmacd-7313", coord_check(coord));
-+
-+ switch (coord->between) {
-+ case INVALID_COORD:
-+ case AT_UNIT:
-+ return 0;
-+
-+ case AFTER_ITEM:
-+ case BEFORE_ITEM:
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case BEFORE_UNIT:
-+ return coord->unit_pos == 0;
-+
-+ case AFTER_UNIT:
-+ return coord->unit_pos == coord_last_unit_pos(coord);
-+ }
-+
-+ impossible("jmacd-9908", "unreachable");
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+/* Returns true if the coordinates are positioned at adjacent units, regardless of
-+ before-after or item boundaries. */
-+int coord_are_neighbors(coord_t * c1, coord_t * c2)
-+{
-+ coord_t *left;
-+ coord_t *right;
-+
-+ assert("nikita-1241", c1 != NULL);
-+ assert("nikita-1242", c2 != NULL);
-+ assert("nikita-1243", c1->node == c2->node);
-+ assert("nikita-1244", coord_is_existing_unit(c1));
-+ assert("nikita-1245", coord_is_existing_unit(c2));
-+
-+ left = right = NULL;
-+ switch (coord_compare(c1, c2)) {
-+ case COORD_CMP_ON_LEFT:
-+ left = c1;
-+ right = c2;
-+ break;
-+ case COORD_CMP_ON_RIGHT:
-+ left = c2;
-+ right = c1;
-+ break;
-+ case COORD_CMP_SAME:
-+ return 0;
-+ default:
-+ wrong_return_value("nikita-1246", "compare_coords()");
-+ }
-+ assert("vs-731", left && right);
-+ if (left->item_pos == right->item_pos) {
-+ return left->unit_pos + 1 == right->unit_pos;
-+ } else if (left->item_pos + 1 == right->item_pos) {
-+ return (left->unit_pos == coord_last_unit_pos(left))
-+ && (right->unit_pos == 0);
-+ } else {
-+ return 0;
-+ }
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* Assuming two coordinates are positioned in the same node, return COORD_CMP_ON_RIGHT,
-+ COORD_CMP_ON_LEFT, or COORD_CMP_SAME depending on c1's position relative to c2. */
-+/* Audited by: green(2002.06.15) */
-+coord_cmp coord_compare(coord_t * c1, coord_t * c2)
-+{
-+ assert("vs-209", c1->node == c2->node);
-+ assert("vs-194", coord_is_existing_unit(c1)
-+ && coord_is_existing_unit(c2));
-+
-+ if (c1->item_pos > c2->item_pos)
-+ return COORD_CMP_ON_RIGHT;
-+ if (c1->item_pos < c2->item_pos)
-+ return COORD_CMP_ON_LEFT;
-+ if (c1->unit_pos > c2->unit_pos)
-+ return COORD_CMP_ON_RIGHT;
-+ if (c1->unit_pos < c2->unit_pos)
-+ return COORD_CMP_ON_LEFT;
-+ return COORD_CMP_SAME;
-+}
-+
-+/* If the coordinate is between items, shifts it to the right. Returns 0 on success and
-+ non-zero if there is no position to the right. */
-+int coord_set_to_right(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 1) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ return 0;
-+
-+ case BEFORE_ITEM:
-+ case BEFORE_UNIT:
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ if (coord->unit_pos < coord_last_unit_pos(coord)) {
-+ coord->unit_pos += 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ } else {
-+
-+ coord->unit_pos = 0;
-+
-+ if (coord->item_pos == items - 1) {
-+ coord->between = AFTER_ITEM;
-+ return 1;
-+ }
-+
-+ coord_inc_item_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ case AFTER_ITEM:
-+ if (coord->item_pos == items - 1) {
-+ return 1;
-+ }
-+
-+ coord_inc_item_pos(coord);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case INVALID_COORD:
-+ break;
-+ }
-+
-+ impossible("jmacd-9920", "unreachable");
-+ return 0;
-+}
-+
-+/* If the coordinate is between items, shifts it to the left. Returns 0 on success and
-+ non-zero if there is no position to the left. */
-+int coord_set_to_left(coord_t * coord)
-+{
-+ unsigned items = coord_num_items(coord);
-+
-+ if (coord_adjust_items(coord, items, 0) == 1) {
-+ return 1;
-+ }
-+
-+ switch (coord->between) {
-+ case AT_UNIT:
-+ return 0;
-+
-+ case AFTER_UNIT:
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case AFTER_ITEM:
-+ coord->between = AT_UNIT;
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ return 0;
-+
-+ case BEFORE_UNIT:
-+ if (coord->unit_pos > 0) {
-+ coord->unit_pos -= 1;
-+ coord->between = AT_UNIT;
-+ return 0;
-+ } else {
-+
-+ if (coord->item_pos == 0) {
-+ coord->between = BEFORE_ITEM;
-+ return 1;
-+ }
-+
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ coord_dec_item_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+ }
-+
-+ case BEFORE_ITEM:
-+ if (coord->item_pos == 0) {
-+ return 1;
-+ }
-+
-+ coord_dec_item_pos(coord);
-+ coord->unit_pos = coord_last_unit_pos(coord);
-+ coord->between = AT_UNIT;
-+ return 0;
-+
-+ case EMPTY_NODE:
-+ return 1;
-+
-+ case INVALID_COORD:
-+ break;
-+ }
-+
-+ impossible("jmacd-9920", "unreachable");
-+ return 0;
-+}
-+
-+static const char *coord_tween_tostring(between_enum n)
-+{
-+ switch (n) {
-+ case BEFORE_UNIT:
-+ return "before unit";
-+ case BEFORE_ITEM:
-+ return "before item";
-+ case AT_UNIT:
-+ return "at unit";
-+ case AFTER_UNIT:
-+ return "after unit";
-+ case AFTER_ITEM:
-+ return "after item";
-+ case EMPTY_NODE:
-+ return "empty node";
-+ case INVALID_COORD:
-+ return "invalid";
-+ default:
-+ {
-+ static char buf[30];
-+
-+ sprintf(buf, "unknown: %i", n);
-+ return buf;
-+ }
-+ }
-+}
-+
-+void print_coord(const char *mes, const coord_t * coord, int node)
-+{
-+ if (coord == NULL) {
-+ printk("%s: null\n", mes);
-+ return;
-+ }
-+ printk("%s: item_pos = %d, unit_pos %d, tween=%s, iplug=%d\n",
-+ mes, coord->item_pos, coord->unit_pos,
-+ coord_tween_tostring(coord->between), coord->iplugid);
-+}
-+
-+int
-+item_utmost_child_real_block(const coord_t * coord, sideof side,
-+ reiser4_block_nr * blk)
-+{
-+ return item_plugin_by_coord(coord)->f.utmost_child_real_block(coord,
-+ side,
-+ blk);
-+}
-+
-+int item_utmost_child(const coord_t * coord, sideof side, jnode ** child)
-+{
-+ return item_plugin_by_coord(coord)->f.utmost_child(coord, side, child);
-+}
-+
-+/* @count bytes of flow @f got written, update correspondingly f->length,
-+ f->data and f->key */
-+void move_flow_forward(flow_t * f, unsigned count)
-+{
-+ if (f->data)
-+ f->data += count;
-+ f->length -= count;
-+ set_key_offset(&f->key, get_key_offset(&f->key) + count);
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/coord.h linux-2.6.24/fs/reiser4/coord.h
---- linux-2.6.24.orig/fs/reiser4/coord.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/coord.h 2008-01-25 11:39:06.908200476 +0300
-@@ -0,0 +1,389 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Coords */
-+
-+#if !defined( __REISER4_COORD_H__ )
-+#define __REISER4_COORD_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+
-+/* insertions happen between coords in the tree, so we need some means
-+ of specifying the sense of betweenness. */
-+typedef enum {
-+ BEFORE_UNIT, /* Note: we/init_coord depends on this value being zero. */
-+ AT_UNIT,
-+ AFTER_UNIT,
-+ BEFORE_ITEM,
-+ AFTER_ITEM,
-+ INVALID_COORD,
-+ EMPTY_NODE,
-+} between_enum;
-+
-+/* location of coord w.r.t. its node */
-+typedef enum {
-+ COORD_ON_THE_LEFT = -1,
-+ COORD_ON_THE_RIGHT = +1,
-+ COORD_INSIDE = 0
-+} coord_wrt_node;
-+
-+typedef enum {
-+ COORD_CMP_SAME = 0, COORD_CMP_ON_LEFT = -1, COORD_CMP_ON_RIGHT = +1
-+} coord_cmp;
-+
-+struct coord {
-+ /* node in a tree */
-+ /* 0 */ znode *node;
-+
-+ /* position of item within node */
-+ /* 4 */ pos_in_node_t item_pos;
-+ /* position of unit within item */
-+ /* 6 */ pos_in_node_t unit_pos;
-+ /* optimization: plugin of item is stored in coord_t. Until this was
-+ implemented, item_plugin_by_coord() was major CPU consumer. ->iplugid
-+ is invalidated (set to 0xff) on each modification of ->item_pos,
-+ and all such modifications are funneled through coord_*_item_pos()
-+ functions below.
-+ */
-+ /* 8 */ char iplugid;
-+ /* position of coord w.r.t. to neighboring items and/or units.
-+ Values are taken from &between_enum above.
-+ */
-+ /* 9 */ char between;
-+ /* padding. It will be added by the compiler anyway to conform to the
-+ * C language alignment requirements. We keep it here to be on the
-+ * safe side and to have a clear picture of the memory layout of this
-+ * structure. */
-+ /* 10 */ __u16 pad;
-+ /* 12 */ int offset;
-+#if REISER4_DEBUG
-+ unsigned long plug_v;
-+ unsigned long body_v;
-+#endif
-+};
-+
-+#define INVALID_PLUGID ((char)((1 << 8) - 1))
-+#define INVALID_OFFSET -1
-+
-+static inline void coord_clear_iplug(coord_t * coord)
-+{
-+ assert("nikita-2835", coord != NULL);
-+ coord->iplugid = INVALID_PLUGID;
-+ coord->offset = INVALID_OFFSET;
-+}
-+
-+static inline int coord_is_iplug_set(const coord_t * coord)
-+{
-+ assert("nikita-2836", coord != NULL);
-+ return coord->iplugid != INVALID_PLUGID;
-+}
-+
-+static inline void coord_set_item_pos(coord_t * coord, pos_in_node_t pos)
-+{
-+ assert("nikita-2478", coord != NULL);
-+ coord->item_pos = pos;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_dec_item_pos(coord_t * coord)
-+{
-+ assert("nikita-2480", coord != NULL);
-+ --coord->item_pos;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_inc_item_pos(coord_t * coord)
-+{
-+ assert("nikita-2481", coord != NULL);
-+ ++coord->item_pos;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_add_item_pos(coord_t * coord, int delta)
-+{
-+ assert("nikita-2482", coord != NULL);
-+ coord->item_pos += delta;
-+ coord_clear_iplug(coord);
-+}
-+
-+static inline void coord_invalid_item_pos(coord_t * coord)
-+{
-+ assert("nikita-2832", coord != NULL);
-+ coord->item_pos = (unsigned short)~0;
-+ coord_clear_iplug(coord);
-+}
-+
-+/* Reverse a direction. */
-+static inline sideof sideof_reverse(sideof side)
-+{
-+ return side == LEFT_SIDE ? RIGHT_SIDE : LEFT_SIDE;
-+}
-+
-+/* NOTE: There is a somewhat odd mixture of the following opposed terms:
-+
-+ "first" and "last"
-+ "next" and "prev"
-+ "before" and "after"
-+ "leftmost" and "rightmost"
-+
-+ But I think the chosen names are decent the way they are.
-+*/
-+
-+/* COORD INITIALIZERS */
-+
-+/* Initialize an invalid coordinate. */
-+extern void coord_init_invalid(coord_t * coord, const znode * node);
-+
-+extern void coord_init_first_unit_nocheck(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to point at the first unit of the first item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+extern void coord_init_first_unit(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to point at the last unit of the last item. If the node is
-+ empty, it is positioned at the EMPTY_NODE. */
-+extern void coord_init_last_unit(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to before the first item. If the node is empty, it is
-+ positioned at the EMPTY_NODE. */
-+extern void coord_init_before_first_item(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to after the last item. If the node is empty, it is positioned
-+ at the EMPTY_NODE. */
-+extern void coord_init_after_last_item(coord_t * coord, const znode * node);
-+
-+/* Initialize a coordinate to after last unit in the item. Coord must be set
-+ already to existing item */
-+void coord_init_after_item_end(coord_t * coord);
-+
-+/* Initialize a coordinate to before the item. Coord must be set already to existing item */
-+void coord_init_before_item(coord_t *);
-+/* Initialize a coordinate to after the item. Coord must be set already to existing item */
-+void coord_init_after_item(coord_t *);
-+
-+/* Calls either coord_init_first_unit or coord_init_last_unit depending on sideof argument. */
-+extern void coord_init_sideof_unit(coord_t * coord, const znode * node,
-+ sideof dir);
-+
-+/* Initialize a coordinate by 0s. Used in places where init_coord was used and
-+ it was not clear how actually
-+ FIXME-VS: added by vs (2002, june, 8) */
-+extern void coord_init_zero(coord_t * coord);
-+
-+/* COORD METHODS */
-+
-+/* after shifting of node content, coord previously set properly may become
-+ invalid, try to "normalize" it. */
-+void coord_normalize(coord_t * coord);
-+
-+/* Copy a coordinate. */
-+extern void coord_dup(coord_t * coord, const coord_t * old_coord);
-+
-+/* Copy a coordinate without check. */
-+void coord_dup_nocheck(coord_t * coord, const coord_t * old_coord);
-+
-+unsigned coord_num_units(const coord_t * coord);
-+
-+/* Return the last valid unit number at the present item (i.e.,
-+ coord_num_units() - 1). */
-+static inline unsigned coord_last_unit_pos(const coord_t * coord)
-+{
-+ return coord_num_units(coord) - 1;
-+}
-+
-+#if REISER4_DEBUG
-+/* For assertions only, checks for a valid coordinate. */
-+extern int coord_check(const coord_t * coord);
-+
-+extern unsigned long znode_times_locked(const znode * z);
-+
-+static inline void coord_update_v(coord_t * coord)
-+{
-+ coord->plug_v = coord->body_v = znode_times_locked(coord->node);
-+}
-+#endif
-+
-+extern int coords_equal(const coord_t * c1, const coord_t * c2);
-+
-+extern void print_coord(const char *mes, const coord_t * coord, int print_node);
-+
-+/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if coord_is_after_leftmost
-+ return NCOORD_ON_THE_LEFT, otherwise return NCOORD_INSIDE. */
-+extern coord_wrt_node coord_wrt(const coord_t * coord);
-+
-+/* Returns true if the coordinates are positioned at adjacent units, regardless of
-+ before-after or item boundaries. */
-+extern int coord_are_neighbors(coord_t * c1, coord_t * c2);
-+
-+/* Assuming two coordinates are positioned in the same node, return NCOORD_CMP_ON_RIGHT,
-+ NCOORD_CMP_ON_LEFT, or NCOORD_CMP_SAME depending on c1's position relative to c2. */
-+extern coord_cmp coord_compare(coord_t * c1, coord_t * c2);
-+
-+/* COORD PREDICATES */
-+
-+/* Returns true if the coord was initializewd by coord_init_invalid (). */
-+extern int coord_is_invalid(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at an existing item, not before or after
-+ an item. It may be placed at, before, or after any unit within the item, whether
-+ existing or not. If this is true you can call methods of the item plugin. */
-+extern int coord_is_existing_item(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned after a item, before a item, after the
-+ last unit of an item, before the first unit of an item, or at an empty node. */
-+extern int coord_is_between_items(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at an existing unit, not before or after a
-+ unit. */
-+extern int coord_is_existing_unit(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at an empty node. */
-+extern int coord_is_empty(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned at the first unit of the first item. Not
-+ true for empty nodes nor coordinates positioned before the first item. */
-+extern int coord_is_leftmost_unit(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned after the last item or after the last unit
-+ of the last item or it is an empty node. */
-+extern int coord_is_after_rightmost(const coord_t * coord);
-+
-+/* Returns true if the coordinate is positioned before the first item or it is an empty
-+ node. */
-+extern int coord_is_before_leftmost(const coord_t * coord);
-+
-+/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending on sideof
-+ argument. */
-+extern int coord_is_after_sideof_unit(coord_t * coord, sideof dir);
-+
-+/* COORD MODIFIERS */
-+
-+/* Advances the coordinate by one unit to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is
-+ an existing unit. */
-+extern int coord_next_unit(coord_t * coord);
-+
-+/* Advances the coordinate by one item to the right. If empty, no change. If
-+ coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new position is
-+ an existing item. */
-+extern int coord_next_item(coord_t * coord);
-+
-+/* Advances the coordinate by one unit to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing unit. */
-+extern int coord_prev_unit(coord_t * coord);
-+
-+/* Advances the coordinate by one item to the left. If empty, no change. If
-+ coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new position
-+ is an existing item. */
-+extern int coord_prev_item(coord_t * coord);
-+
-+/* If the coordinate is between items, shifts it to the right. Returns 0 on success and
-+ non-zero if there is no position to the right. */
-+extern int coord_set_to_right(coord_t * coord);
-+
-+/* If the coordinate is between items, shifts it to the left. Returns 0 on success and
-+ non-zero if there is no position to the left. */
-+extern int coord_set_to_left(coord_t * coord);
-+
-+/* If the coordinate is at an existing unit, set to after that unit. Returns 0 on success
-+ and non-zero if the unit did not exist. */
-+extern int coord_set_after_unit(coord_t * coord);
-+
-+/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument. */
-+extern int coord_sideof_unit(coord_t * coord, sideof dir);
-+
-+/* iterate over all units in @node */
-+#define for_all_units( coord, node ) \
-+ for( coord_init_before_first_item( ( coord ), ( node ) ) ; \
-+ coord_next_unit( coord ) == 0 ; )
-+
-+/* iterate over all items in @node */
-+#define for_all_items( coord, node ) \
-+ for( coord_init_before_first_item( ( coord ), ( node ) ) ; \
-+ coord_next_item( coord ) == 0 ; )
-+
-+/* COORD/ITEM METHODS */
-+
-+extern int item_utmost_child_real_block(const coord_t * coord, sideof side,
-+ reiser4_block_nr * blk);
-+extern int item_utmost_child(const coord_t * coord, sideof side,
-+ jnode ** child);
-+
-+/* a flow is a sequence of bytes being written to or read from the tree. The
-+ tree will slice the flow into items while storing it into nodes, but all of
-+ that is hidden from anything outside the tree. */
-+
-+struct flow {
-+ reiser4_key key; /* key of start of flow's sequence of bytes */
-+ loff_t length; /* length of flow's sequence of bytes */
-+ char *data; /* start of flow's sequence of bytes */
-+ int user; /* if 1 data is user space, 0 - kernel space */
-+ rw_op op; /* NIKITA-FIXME-HANS: comment is where? */
-+};
-+
-+void move_flow_forward(flow_t * f, unsigned count);
-+
-+/* &reiser4_item_data - description of data to be inserted or pasted
-+
-+ Q: articulate the reasons for the difference between this and flow.
-+
-+ A: Becides flow we insert into tree other things: stat data, directory
-+ entry, etc. To insert them into tree one has to provide this structure. If
-+ one is going to insert flow - he can use insert_flow, where this structure
-+ does not have to be created
-+*/
-+struct reiser4_item_data {
-+ /* actual data to be inserted. If NULL, ->create_item() will not
-+ do xmemcpy itself, leaving this up to the caller. This can
-+ save some amount of unnecessary memory copying, for example,
-+ during insertion of stat data.
-+
-+ */
-+ char *data;
-+ /* 1 if 'char * data' contains pointer to user space and 0 if it is
-+ kernel space */
-+ int user;
-+ /* amount of data we are going to insert or paste */
-+ int length;
-+ /* "Arg" is opaque data that is passed down to the
-+ ->create_item() method of node layout, which in turn
-+ hands it to the ->create_hook() of item being created. This
-+ arg is currently used by:
-+
-+ . ->create_hook() of internal item
-+ (fs/reiser4/plugin/item/internal.c:internal_create_hook()),
-+ . ->paste() method of directory item.
-+ . ->create_hook() of extent item
-+
-+ For internal item, this is left "brother" of new node being
-+ inserted and it is used to add new node into sibling list
-+ after parent to it was just inserted into parent.
-+
-+ While ->arg does look somewhat of unnecessary compication,
-+ it actually saves a lot of headache in many places, because
-+ all data necessary to insert or paste new data into tree are
-+ collected in one place, and this eliminates a lot of extra
-+ argument passing and storing everywhere.
-+
-+ */
-+ void *arg;
-+ /* plugin of item we are inserting */
-+ item_plugin *iplug;
-+};
-+
-+/* __REISER4_COORD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/debug.c linux-2.6.24/fs/reiser4/debug.c
---- linux-2.6.24.orig/fs/reiser4/debug.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/debug.c 2008-01-25 11:39:06.908200476 +0300
-@@ -0,0 +1,308 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Debugging facilities. */
-+
-+/*
-+ * This file contains generic debugging functions used by reiser4. Roughly
-+ * following:
-+ *
-+ * panicking: reiser4_do_panic(), reiser4_print_prefix().
-+ *
-+ * locking:
-+ * reiser4_schedulable(), reiser4_lock_counters(), print_lock_counters(),
-+ * reiser4_no_counters_are_held(), reiser4_commit_check_locks()
-+ *
-+ * error code monitoring (see comment before RETERR macro):
-+ * reiser4_return_err(), reiser4_report_err().
-+ *
-+ * stack back-tracing: fill_backtrace()
-+ *
-+ * miscellaneous: reiser4_preempt_point(), call_on_each_assert(),
-+ * reiser4_debugtrap().
-+ *
-+ */
-+
-+#include "reiser4.h"
-+#include "context.h"
-+#include "super.h"
-+#include "txnmgr.h"
-+#include "znode.h"
-+
-+#include <linux/sysfs.h>
-+#include <linux/slab.h>
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/spinlock.h>
-+#include <linux/kallsyms.h>
-+#include <linux/vmalloc.h>
-+#include <linux/ctype.h>
-+#include <linux/sysctl.h>
-+#include <linux/hardirq.h>
-+
-+#if 0
-+#if REISER4_DEBUG
-+static void reiser4_report_err(void);
-+#else
-+#define reiser4_report_err() noop
-+#endif
-+#endif /* 0 */
-+
-+/*
-+ * global buffer where message given to reiser4_panic is formatted.
-+ */
-+static char panic_buf[REISER4_PANIC_MSG_BUFFER_SIZE];
-+
-+/*
-+ * lock protecting consistency of panic_buf under concurrent panics
-+ */
-+static DEFINE_SPINLOCK(panic_guard);
-+
-+/* Your best friend. Call it on each occasion. This is called by
-+ fs/reiser4/debug.h:reiser4_panic(). */
-+void reiser4_do_panic(const char *format /* format string */ , ... /* rest */ )
-+{
-+ static int in_panic = 0;
-+ va_list args;
-+
-+ /*
-+ * check for recursive panic.
-+ */
-+ if (in_panic == 0) {
-+ in_panic = 1;
-+
-+ spin_lock(&panic_guard);
-+ va_start(args, format);
-+ vsnprintf(panic_buf, sizeof(panic_buf), format, args);
-+ va_end(args);
-+ printk(KERN_EMERG "reiser4 panicked cowardly: %s", panic_buf);
-+ spin_unlock(&panic_guard);
-+
-+ /*
-+ * if kernel debugger is configured---drop in. Early dropping
-+ * into kgdb is not always convenient, because panic message
-+ * is not yet printed most of the times. But:
-+ *
-+ * (1) message can be extracted from printk_buf[]
-+ * (declared static inside of printk()), and
-+ *
-+ * (2) sometimes serial/kgdb combo dies while printing
-+ * long panic message, so it's more prudent to break into
-+ * debugger earlier.
-+ *
-+ */
-+ DEBUGON(1);
-+ }
-+ /* to make gcc happy about noreturn attribute */
-+ panic("%s", panic_buf);
-+}
-+
-+#if 0
-+void
-+reiser4_print_prefix(const char *level, int reperr, const char *mid,
-+ const char *function, const char *file, int lineno)
-+{
-+ const char *comm;
-+ int pid;
-+
-+ if (unlikely(in_interrupt() || in_irq())) {
-+ comm = "interrupt";
-+ pid = 0;
-+ } else {
-+ comm = current->comm;
-+ pid = current->pid;
-+ }
-+ printk("%sreiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n",
-+ level, comm, pid, function, file, lineno, mid);
-+ if (reperr)
-+ reiser4_report_err();
-+}
-+#endif /* 0 */
-+
-+/* Preemption point: this should be called periodically during long running
-+ operations (carry, allocate, and squeeze are best examples) */
-+int reiser4_preempt_point(void)
-+{
-+ assert("nikita-3008", reiser4_schedulable());
-+ cond_resched();
-+ return signal_pending(current);
-+}
-+
-+#if REISER4_DEBUG
-+/* Debugging aid: return struct where information about locks taken by current
-+ thread is accumulated. This can be used to formulate lock ordering
-+ constraints and various assertions.
-+
-+*/
-+reiser4_lock_cnt_info *reiser4_lock_counters(void)
-+{
-+ reiser4_context *ctx = get_current_context();
-+ assert("jmacd-1123", ctx != NULL);
-+ return &ctx->locks;
-+}
-+
-+/*
-+ * print human readable information about locks held by the reiser4 context.
-+ */
-+static void print_lock_counters(const char *prefix,
-+ const reiser4_lock_cnt_info * info)
-+{
-+ printk("%s: jnode: %i, tree: %i (r:%i,w:%i), dk: %i (r:%i,w:%i)\n"
-+ "jload: %i, "
-+ "txnh: %i, atom: %i, stack: %i, txnmgr: %i, "
-+ "ktxnmgrd: %i, fq: %i\n"
-+ "inode: %i, "
-+ "cbk_cache: %i (r:%i,w%i), "
-+ "eflush: %i, "
-+ "zlock: %i,\n"
-+ "spin: %i, long: %i inode_sem: (r:%i,w:%i)\n"
-+ "d: %i, x: %i, t: %i\n", prefix,
-+ info->spin_locked_jnode,
-+ info->rw_locked_tree, info->read_locked_tree,
-+ info->write_locked_tree,
-+ info->rw_locked_dk, info->read_locked_dk, info->write_locked_dk,
-+ info->spin_locked_jload,
-+ info->spin_locked_txnh,
-+ info->spin_locked_atom, info->spin_locked_stack,
-+ info->spin_locked_txnmgr, info->spin_locked_ktxnmgrd,
-+ info->spin_locked_fq,
-+ info->spin_locked_inode,
-+ info->rw_locked_cbk_cache,
-+ info->read_locked_cbk_cache,
-+ info->write_locked_cbk_cache,
-+ info->spin_locked_super_eflush,
-+ info->spin_locked_zlock,
-+ info->spin_locked,
-+ info->long_term_locked_znode,
-+ info->inode_sem_r, info->inode_sem_w,
-+ info->d_refs, info->x_refs, info->t_refs);
-+}
-+
-+/* check that no spinlocks are held */
-+int reiser4_schedulable(void)
-+{
-+ if (get_current_context_check() != NULL) {
-+ if (!LOCK_CNT_NIL(spin_locked)) {
-+ print_lock_counters("in atomic", reiser4_lock_counters());
-+ return 0;
-+ }
-+ }
-+ might_sleep();
-+ return 1;
-+}
-+/*
-+ * return true, iff no locks are held.
-+ */
-+int reiser4_no_counters_are_held(void)
-+{
-+ reiser4_lock_cnt_info *counters;
-+
-+ counters = reiser4_lock_counters();
-+ return
-+ (counters->spin_locked_zlock == 0) &&
-+ (counters->spin_locked_jnode == 0) &&
-+ (counters->rw_locked_tree == 0) &&
-+ (counters->read_locked_tree == 0) &&
-+ (counters->write_locked_tree == 0) &&
-+ (counters->rw_locked_dk == 0) &&
-+ (counters->read_locked_dk == 0) &&
-+ (counters->write_locked_dk == 0) &&
-+ (counters->spin_locked_txnh == 0) &&
-+ (counters->spin_locked_atom == 0) &&
-+ (counters->spin_locked_stack == 0) &&
-+ (counters->spin_locked_txnmgr == 0) &&
-+ (counters->spin_locked_inode == 0) &&
-+ (counters->spin_locked == 0) &&
-+ (counters->long_term_locked_znode == 0) &&
-+ (counters->inode_sem_r == 0) &&
-+ (counters->inode_sem_w == 0) && (counters->d_refs == 0);
-+}
-+
-+/*
-+ * return true, iff transaction commit can be done under locks held by the
-+ * current thread.
-+ */
-+int reiser4_commit_check_locks(void)
-+{
-+ reiser4_lock_cnt_info *counters;
-+ int inode_sem_r;
-+ int inode_sem_w;
-+ int result;
-+
-+ /*
-+ * inode's read/write semaphore is the only reiser4 lock that can be
-+ * held during commit.
-+ */
-+
-+ counters = reiser4_lock_counters();
-+ inode_sem_r = counters->inode_sem_r;
-+ inode_sem_w = counters->inode_sem_w;
-+
-+ counters->inode_sem_r = counters->inode_sem_w = 0;
-+ result = reiser4_no_counters_are_held();
-+ counters->inode_sem_r = inode_sem_r;
-+ counters->inode_sem_w = inode_sem_w;
-+ return result;
-+}
-+
-+/*
-+ * fill "error site" in the current reiser4 context. See comment before RETERR
-+ * macro for more details.
-+ */
-+void reiser4_return_err(int code, const char *file, int line)
-+{
-+ if (code < 0 && is_in_reiser4_context()) {
-+ reiser4_context *ctx = get_current_context();
-+
-+ if (ctx != NULL) {
-+ ctx->err.code = code;
-+ ctx->err.file = file;
-+ ctx->err.line = line;
-+ }
-+ }
-+}
-+
-+#if 0
-+/*
-+ * report error information recorder by reiser4_return_err().
-+ */
-+static void reiser4_report_err(void)
-+{
-+ reiser4_context *ctx = get_current_context_check();
-+
-+ if (ctx != NULL) {
-+ if (ctx->err.code != 0) {
-+ printk("code: %i at %s:%i\n",
-+ ctx->err.code, ctx->err.file, ctx->err.line);
-+ }
-+ }
-+}
-+#endif /* 0 */
-+
-+#endif /* REISER4_DEBUG */
-+
-+#if KERNEL_DEBUGGER
-+
-+/*
-+ * this functions just drops into kernel debugger. It is a convenient place to
-+ * put breakpoint in.
-+ */
-+void reiser4_debugtrap(void)
-+{
-+ /* do nothing. Put break point here. */
-+#if defined(CONFIG_KGDB) && !defined(CONFIG_REISER4_FS_MODULE)
-+ extern void breakpoint(void);
-+ breakpoint();
-+#endif
-+}
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/debug.h linux-2.6.24/fs/reiser4/debug.h
---- linux-2.6.24.orig/fs/reiser4/debug.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/debug.h 2008-01-25 11:39:06.908200476 +0300
-@@ -0,0 +1,350 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Declarations of debug macros. */
-+
-+#if !defined( __FS_REISER4_DEBUG_H__ )
-+#define __FS_REISER4_DEBUG_H__
-+
-+#include "forward.h"
-+#include "reiser4.h"
-+
-+/* generic function to produce formatted output, decorating it with
-+ whatever standard prefixes/postfixes we want. "Fun" is a function
-+ that will be actually called, can be printk, panic etc.
-+ This is for use by other debugging macros, not by users. */
-+#define DCALL(lev, fun, reperr, label, format, ...) \
-+({ \
-+ fun(lev "reiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n" format "\n" , \
-+ current->comm, current->pid, __FUNCTION__, \
-+ __FILE__, __LINE__, label, ## __VA_ARGS__); \
-+})
-+
-+/*
-+ * cause kernel to crash
-+ */
-+#define reiser4_panic(mid, format, ...) \
-+ DCALL("", reiser4_do_panic, 1, mid, format , ## __VA_ARGS__)
-+
-+/* print message with indication of current process, file, line and
-+ function */
-+#define reiser4_log(label, format, ...) \
-+ DCALL(KERN_DEBUG, printk, 0, label, format , ## __VA_ARGS__)
-+
-+/* Assertion checked during compilation.
-+ If "cond" is false (0) we get duplicate case label in switch.
-+ Use this to check something like famous
-+ cassert (sizeof(struct reiserfs_journal_commit) == 4096) ;
-+ in 3.x journal.c. If cassertion fails you get compiler error,
-+ so no "maintainer-id".
-+*/
-+#define cassert(cond) ({ switch(-1) { case (cond): case 0: break; } })
-+
-+#define noop do {;} while(0)
-+
-+#if REISER4_DEBUG
-+/* version of info that only actually prints anything when _d_ebugging
-+ is on */
-+#define dinfo(format, ...) printk(format , ## __VA_ARGS__)
-+/* macro to catch logical errors. Put it into `default' clause of
-+ switch() statement. */
-+#define impossible(label, format, ...) \
-+ reiser4_panic(label, "impossible: " format , ## __VA_ARGS__)
-+/* assert assures that @cond is true. If it is not, reiser4_panic() is
-+ called. Use this for checking logical consistency and _never_ call
-+ this to check correctness of external data: disk blocks and user-input . */
-+#define assert(label, cond) \
-+({ \
-+ /* call_on_each_assert(); */ \
-+ if (cond) { \
-+ /* put negated check to avoid using !(cond) that would lose \
-+ * warnings for things like assert(a = b); */ \
-+ ; \
-+ } else { \
-+ DEBUGON(1); \
-+ reiser4_panic(label, "assertion failed: %s", #cond); \
-+ } \
-+})
-+
-+/* like assertion, but @expr is evaluated even if REISER4_DEBUG is off. */
-+#define check_me( label, expr ) assert( label, ( expr ) )
-+
-+#define ON_DEBUG( exp ) exp
-+
-+extern int reiser4_schedulable(void);
-+extern void call_on_each_assert(void);
-+
-+#else
-+
-+#define dinfo( format, args... ) noop
-+#define impossible( label, format, args... ) noop
-+#define assert( label, cond ) noop
-+#define check_me( label, expr ) ( ( void ) ( expr ) )
-+#define ON_DEBUG( exp )
-+#define reiser4_schedulable() might_sleep()
-+
-+/* REISER4_DEBUG */
-+#endif
-+
-+#if REISER4_DEBUG
-+/* per-thread information about lock acquired by this thread. Used by lock
-+ * ordering checking in spin_macros.h */
-+typedef struct reiser4_lock_cnt_info {
-+ int rw_locked_tree;
-+ int read_locked_tree;
-+ int write_locked_tree;
-+
-+ int rw_locked_dk;
-+ int read_locked_dk;
-+ int write_locked_dk;
-+
-+ int rw_locked_cbk_cache;
-+ int read_locked_cbk_cache;
-+ int write_locked_cbk_cache;
-+
-+ int spin_locked_zlock;
-+ int spin_locked_jnode;
-+ int spin_locked_jload;
-+ int spin_locked_txnh;
-+ int spin_locked_atom;
-+ int spin_locked_stack;
-+ int spin_locked_txnmgr;
-+ int spin_locked_ktxnmgrd;
-+ int spin_locked_fq;
-+ int spin_locked_inode;
-+ int spin_locked_super_eflush;
-+ int spin_locked;
-+ int long_term_locked_znode;
-+
-+ int inode_sem_r;
-+ int inode_sem_w;
-+
-+ int d_refs;
-+ int x_refs;
-+ int t_refs;
-+} reiser4_lock_cnt_info;
-+
-+extern struct reiser4_lock_cnt_info *reiser4_lock_counters(void);
-+#define IN_CONTEXT(a, b) (is_in_reiser4_context() ? (a) : (b))
-+
-+/* increment lock-counter @counter, if present */
-+#define LOCK_CNT_INC(counter) \
-+ IN_CONTEXT(++(reiser4_lock_counters()->counter), 0)
-+
-+/* decrement lock-counter @counter, if present */
-+#define LOCK_CNT_DEC(counter) \
-+ IN_CONTEXT(--(reiser4_lock_counters()->counter), 0)
-+
-+/* check that lock-counter is zero. This is for use in assertions */
-+#define LOCK_CNT_NIL(counter) \
-+ IN_CONTEXT(reiser4_lock_counters()->counter == 0, 1)
-+
-+/* check that lock-counter is greater than zero. This is for use in
-+ * assertions */
-+#define LOCK_CNT_GTZ(counter) \
-+ IN_CONTEXT(reiser4_lock_counters()->counter > 0, 1)
-+#define LOCK_CNT_LT(counter,n) \
-+ IN_CONTEXT(reiser4_lock_counters()->counter < n, 1)
-+
-+#else /* REISER4_DEBUG */
-+
-+/* no-op versions on the above */
-+
-+typedef struct reiser4_lock_cnt_info {
-+} reiser4_lock_cnt_info;
-+
-+#define reiser4_lock_counters() ((reiser4_lock_cnt_info *)NULL)
-+#define LOCK_CNT_INC(counter) noop
-+#define LOCK_CNT_DEC(counter) noop
-+#define LOCK_CNT_NIL(counter) (1)
-+#define LOCK_CNT_GTZ(counter) (1)
-+#define LOCK_CNT_LT(counter,n) (1)
-+
-+#endif /* REISER4_DEBUG */
-+
-+#define assert_spin_not_locked(lock) BUG_ON(0)
-+#define assert_rw_write_locked(lock) BUG_ON(0)
-+#define assert_rw_read_locked(lock) BUG_ON(0)
-+#define assert_rw_locked(lock) BUG_ON(0)
-+#define assert_rw_not_write_locked(lock) BUG_ON(0)
-+#define assert_rw_not_read_locked(lock) BUG_ON(0)
-+#define assert_rw_not_locked(lock) BUG_ON(0)
-+
-+/* flags controlling debugging behavior. Are set through debug_flags=N mount
-+ option. */
-+typedef enum {
-+ /* print a lot of information during panic. When this is on all jnodes
-+ * are listed. This can be *very* large output. Usually you don't want
-+ * this. Especially over serial line. */
-+ REISER4_VERBOSE_PANIC = 0x00000001,
-+ /* print a lot of information during umount */
-+ REISER4_VERBOSE_UMOUNT = 0x00000002,
-+ /* print gathered statistics on umount */
-+ REISER4_STATS_ON_UMOUNT = 0x00000004,
-+ /* check node consistency */
-+ REISER4_CHECK_NODE = 0x00000008
-+} reiser4_debug_flags;
-+
-+extern int is_in_reiser4_context(void);
-+
-+/*
-+ * evaluate expression @e only if with reiser4 context
-+ */
-+#define ON_CONTEXT(e) do { \
-+ if(is_in_reiser4_context()) { \
-+ e; \
-+ } } while(0)
-+
-+/*
-+ * evaluate expression @e only when within reiser4_context and debugging is
-+ * on.
-+ */
-+#define ON_DEBUG_CONTEXT( e ) ON_DEBUG( ON_CONTEXT( e ) )
-+
-+/*
-+ * complain about unexpected function result and crash. Used in "default"
-+ * branches of switch statements and alike to assert that invalid results are
-+ * not silently ignored.
-+ */
-+#define wrong_return_value( label, function ) \
-+ impossible( label, "wrong return value from " function )
-+
-+/* Issue different types of reiser4 messages to the console */
-+#define warning( label, format, ... ) \
-+ DCALL( KERN_WARNING, \
-+ printk, 1, label, "WARNING: " format , ## __VA_ARGS__ )
-+#define notice( label, format, ... ) \
-+ DCALL( KERN_NOTICE, \
-+ printk, 1, label, "NOTICE: " format , ## __VA_ARGS__ )
-+
-+/* mark not yet implemented functionality */
-+#define not_yet( label, format, ... ) \
-+ reiser4_panic( label, "NOT YET IMPLEMENTED: " format , ## __VA_ARGS__ )
-+
-+extern void reiser4_do_panic(const char *format, ...)
-+ __attribute__ ((noreturn, format(printf, 1, 2)));
-+
-+extern int reiser4_preempt_point(void);
-+extern void reiser4_print_stats(void);
-+
-+#if REISER4_DEBUG
-+extern int reiser4_no_counters_are_held(void);
-+extern int reiser4_commit_check_locks(void);
-+#else
-+#define reiser4_no_counters_are_held() (1)
-+#define reiser4_commit_check_locks() (1)
-+#endif
-+
-+/* true if @i is power-of-two. Useful for rate-limited warnings, etc. */
-+#define IS_POW(i) \
-+({ \
-+ typeof(i) __i; \
-+ \
-+ __i = (i); \
-+ !(__i & (__i - 1)); \
-+})
-+
-+#define KERNEL_DEBUGGER (1)
-+
-+#if KERNEL_DEBUGGER
-+
-+extern void reiser4_debugtrap(void);
-+
-+/*
-+ * Check condition @cond and drop into kernel debugger (kgdb) if it's true. If
-+ * kgdb is not compiled in, do nothing.
-+ */
-+#define DEBUGON(cond) \
-+({ \
-+ if (unlikely(cond)) \
-+ reiser4_debugtrap(); \
-+})
-+#else
-+#define DEBUGON(cond) noop
-+#endif
-+
-+/*
-+ * Error code tracing facility. (Idea is borrowed from XFS code.)
-+ *
-+ * Suppose some strange and/or unexpected code is returned from some function
-+ * (for example, write(2) returns -EEXIST). It is possible to place a
-+ * breakpoint in the reiser4_write(), but it is too late here. How to find out
-+ * in what particular place -EEXIST was generated first?
-+ *
-+ * In reiser4 all places where actual error codes are produced (that is,
-+ * statements of the form
-+ *
-+ * return -EFOO; // (1), or
-+ *
-+ * result = -EFOO; // (2)
-+ *
-+ * are replaced with
-+ *
-+ * return RETERR(-EFOO); // (1a), and
-+ *
-+ * result = RETERR(-EFOO); // (2a) respectively
-+ *
-+ * RETERR() macro fills a backtrace in reiser4_context. This back-trace is
-+ * printed in error and warning messages. Moreover, it's possible to put a
-+ * conditional breakpoint in reiser4_return_err (low-level function called
-+ * by RETERR() to do the actual work) to break into debugger immediately
-+ * when particular error happens.
-+ *
-+ */
-+
-+#if REISER4_DEBUG
-+
-+/*
-+ * data-type to store information about where error happened ("error site").
-+ */
-+typedef struct err_site {
-+ int code; /* error code */
-+ const char *file; /* source file, filled by __FILE__ */
-+ int line; /* source file line, filled by __LINE__ */
-+} err_site;
-+
-+extern void reiser4_return_err(int code, const char *file, int line);
-+
-+/*
-+ * fill &get_current_context()->err_site with error information.
-+ */
-+#define RETERR(code) \
-+({ \
-+ typeof(code) __code; \
-+ \
-+ __code = (code); \
-+ reiser4_return_err(__code, __FILE__, __LINE__); \
-+ __code; \
-+})
-+
-+#else
-+
-+/*
-+ * no-op versions of the above
-+ */
-+
-+typedef struct err_site {
-+} err_site;
-+#define RETERR(code) code
-+#endif
-+
-+#if REISER4_LARGE_KEY
-+/*
-+ * conditionally compile arguments only if REISER4_LARGE_KEY is on.
-+ */
-+#define ON_LARGE_KEY(...) __VA_ARGS__
-+#else
-+#define ON_LARGE_KEY(...)
-+#endif
-+
-+/* __FS_REISER4_DEBUG_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/dformat.h linux-2.6.24/fs/reiser4/dformat.h
---- linux-2.6.24.orig/fs/reiser4/dformat.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/dformat.h 2008-01-25 11:39:06.908200476 +0300
-@@ -0,0 +1,70 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Formats of on-disk data and conversion functions. */
-+
-+/* put all item formats in the files describing the particular items,
-+ our model is, everything you need to do to add an item to reiser4,
-+ (excepting the changes to the plugin that uses the item which go
-+ into the file defining that plugin), you put into one file. */
-+/* Data on disk are stored in little-endian format.
-+ To declare fields of on-disk structures, use d8, d16, d32 and d64.
-+ d??tocpu() and cputod??() to convert. */
-+
-+#if !defined( __FS_REISER4_DFORMAT_H__ )
-+#define __FS_REISER4_DFORMAT_H__
-+
-+#include <asm/byteorder.h>
-+#include <asm/unaligned.h>
-+#include <linux/types.h>
-+
-+typedef __u8 d8;
-+typedef __le16 d16;
-+typedef __le32 d32;
-+typedef __le64 d64;
-+
-+#define PACKED __attribute__((packed))
-+
-+/* data-type for block number */
-+typedef __u64 reiser4_block_nr;
-+
-+/* data-type for block number on disk, disk format */
-+typedef __le64 reiser4_dblock_nr;
-+
-+/**
-+ * disk_addr_eq - compare disk addresses
-+ * @b1: pointer to block number ot compare
-+ * @b2: pointer to block number ot compare
-+ *
-+ * Returns true if if disk addresses are the same
-+ */
-+static inline int disk_addr_eq(const reiser4_block_nr *b1,
-+ const reiser4_block_nr * b2)
-+{
-+ assert("nikita-1033", b1 != NULL);
-+ assert("nikita-1266", b2 != NULL);
-+
-+ return !memcmp(b1, b2, sizeof *b1);
-+}
-+
-+/* structure of master reiser4 super block */
-+typedef struct reiser4_master_sb {
-+ char magic[16]; /* "ReIsEr4" */
-+ __le16 disk_plugin_id; /* id of disk layout plugin */
-+ __le16 blocksize;
-+ char uuid[16]; /* unique id */
-+ char label[16]; /* filesystem label */
-+ __le64 diskmap; /* location of the diskmap. 0 if not present */
-+} reiser4_master_sb;
-+
-+/* __FS_REISER4_DFORMAT_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/dscale.c linux-2.6.24/fs/reiser4/dscale.c
---- linux-2.6.24.orig/fs/reiser4/dscale.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/dscale.c 2008-01-25 11:55:43.884539336 +0300
-@@ -0,0 +1,192 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Scalable on-disk integers */
-+
-+/*
-+ * Various on-disk structures contain integer-like structures. Stat-data
-+ * contain [yes, "data" is plural, check the dictionary] file size, link
-+ * count; extent unit contains extent width etc. To accommodate for general
-+ * case enough space is reserved to keep largest possible value. 64 bits in
-+ * all cases above. But in overwhelming majority of cases numbers actually
-+ * stored in these fields will be comparatively small and reserving 8 bytes is
-+ * a waste of precious disk bandwidth.
-+ *
-+ * Scalable integers are one way to solve this problem. dscale_write()
-+ * function stores __u64 value in the given area consuming from 1 to 9 bytes,
-+ * depending on the magnitude of the value supplied. dscale_read() reads value
-+ * previously stored by dscale_write().
-+ *
-+ * dscale_write() produces format not completely unlike of UTF: two highest
-+ * bits of the first byte are used to store "tag". One of 4 possible tag
-+ * values is chosen depending on the number being encoded:
-+ *
-+ * 0 ... 0x3f => 0 [table 1]
-+ * 0x40 ... 0x3fff => 1
-+ * 0x4000 ... 0x3fffffff => 2
-+ * 0x40000000 ... 0xffffffffffffffff => 3
-+ *
-+ * (see dscale_range() function)
-+ *
-+ * Values in the range 0x40000000 ... 0xffffffffffffffff require 8 full bytes
-+ * to be stored, so in this case there is no place in the first byte to store
-+ * tag. For such values tag is stored in an extra 9th byte.
-+ *
-+ * As _highest_ bits are used for the test (which is natural) scaled integers
-+ * are stored in BIG-ENDIAN format in contrast with the rest of reiser4 which
-+ * uses LITTLE-ENDIAN.
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "dscale.h"
-+
-+/* return tag of scaled integer stored at @address */
-+static int gettag(const unsigned char *address)
-+{
-+ /* tag is stored in two highest bits */
-+ return (*address) >> 6;
-+}
-+
-+/* clear tag from value. Clear tag embedded into @value. */
-+static void cleartag(__u64 * value, int tag)
-+{
-+ /*
-+ * W-w-what ?!
-+ *
-+ * Actually, this is rather simple: @value passed here was read by
-+ * dscale_read(), converted from BIG-ENDIAN, and padded to __u64 by
-+ * zeroes. Tag is still stored in the highest (arithmetically)
-+ * non-zero bits of @value, but relative position of tag within __u64
-+ * depends on @tag.
-+ *
-+ * For example if @tag is 0, it's stored 2 highest bits of lowest
-+ * byte, and its offset (counting from lowest bit) is 8 - 2 == 6 bits.
-+ *
-+ * If tag is 1, it's stored in two highest bits of 2nd lowest byte,
-+ * and it's offset if (2 * 8) - 2 == 14 bits.
-+ *
-+ * See table 1 above for details.
-+ *
-+ * All these cases are captured by the formula:
-+ */
-+ *value &= ~(3 << (((1 << tag) << 3) - 2));
-+ /*
-+ * That is, clear two (3 == 0t11) bits at the offset
-+ *
-+ * 8 * (2 ^ tag) - 2,
-+ *
-+ * that is, two highest bits of (2 ^ tag)-th byte of @value.
-+ */
-+}
-+
-+/* return tag for @value. See table 1 above for details. */
-+static int dscale_range(__u64 value)
-+{
-+ if (value > 0x3fffffff)
-+ return 3;
-+ if (value > 0x3fff)
-+ return 2;
-+ if (value > 0x3f)
-+ return 1;
-+ return 0;
-+}
-+
-+/* restore value stored at @adderss by dscale_write() and return number of
-+ * bytes consumed */
-+int dscale_read(unsigned char *address, __u64 * value)
-+{
-+ int tag;
-+
-+ /* read tag */
-+ tag = gettag(address);
-+ switch (tag) {
-+ case 3:
-+ /* In this case tag is stored in an extra byte, skip this byte
-+ * and decode value stored in the next 8 bytes.*/
-+ *value = __be64_to_cpu(get_unaligned((__be64 *)(address + 1)));
-+ /* worst case: 8 bytes for value itself plus one byte for
-+ * tag. */
-+ return 9;
-+ case 0:
-+ *value = get_unaligned(address);
-+ break;
-+ case 1:
-+ *value = __be16_to_cpu(get_unaligned((__be16 *)address));
-+ break;
-+ case 2:
-+ *value = __be32_to_cpu(get_unaligned((__be32 *)address));
-+ break;
-+ default:
-+ return RETERR(-EIO);
-+ }
-+ /* clear tag embedded into @value */
-+ cleartag(value, tag);
-+ /* number of bytes consumed is (2 ^ tag)---see table 1. */
-+ return 1 << tag;
-+}
-+
-+/* number of bytes consumed */
-+int dscale_bytes_to_read(unsigned char *address)
-+{
-+ int tag;
-+
-+ tag = gettag(address);
-+ switch (tag) {
-+ case 0:
-+ case 1:
-+ case 2:
-+ return 1 << tag;
-+ case 3:
-+ return 9;
-+ default:
-+ return RETERR(-EIO);
-+ }
-+}
-+
-+/* store @value at @address and return number of bytes consumed */
-+int dscale_write(unsigned char *address, __u64 value)
-+{
-+ int tag;
-+ int shift;
-+ __be64 v;
-+ unsigned char *valarr;
-+
-+ tag = dscale_range(value);
-+ v = __cpu_to_be64(value);
-+ valarr = (unsigned char *)&v;
-+ shift = (tag == 3) ? 1 : 0;
-+ memcpy(address + shift, valarr + sizeof v - (1 << tag), 1 << tag);
-+ *address |= (tag << 6);
-+ return shift + (1 << tag);
-+}
-+
-+/* number of bytes required to store @value */
-+int dscale_bytes_to_write(__u64 value)
-+{
-+ int bytes;
-+
-+ bytes = 1 << dscale_range(value);
-+ if (bytes == 8)
-+ ++bytes;
-+ return bytes;
-+}
-+
-+/* returns true if @value and @other require the same number of bytes to be
-+ * stored. Used by detect when data structure (like stat-data) has to be
-+ * expanded or contracted. */
-+int dscale_fit(__u64 value, __u64 other)
-+{
-+ return dscale_range(value) == dscale_range(other);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/dscale.h linux-2.6.24/fs/reiser4/dscale.h
---- linux-2.6.24.orig/fs/reiser4/dscale.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/dscale.h 2008-01-25 11:55:43.884539336 +0300
-@@ -0,0 +1,28 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Scalable on-disk integers. See dscale.h for details. */
-+
-+#if !defined( __FS_REISER4_DSCALE_H__ )
-+#define __FS_REISER4_DSCALE_H__
-+
-+#include "dformat.h"
-+
-+extern int dscale_read(unsigned char *address, __u64 * value);
-+extern int dscale_write(unsigned char *address, __u64 value);
-+extern int dscale_bytes_to_read(unsigned char *address);
-+extern int dscale_bytes_to_write(__u64 value);
-+extern int dscale_fit(__u64 value, __u64 other);
-+
-+/* __FS_REISER4_DSCALE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/entd.c linux-2.6.24/fs/reiser4/entd.c
---- linux-2.6.24.orig/fs/reiser4/entd.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/entd.c 2008-01-25 11:39:06.912201506 +0300
-@@ -0,0 +1,335 @@
-+/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Ent daemon. */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "tree.h"
-+#include "entd.h"
-+#include "super.h"
-+#include "context.h"
-+#include "reiser4.h"
-+#include "vfs_ops.h"
-+#include "page_cache.h"
-+#include "inode.h"
-+
-+#include <linux/sched.h> /* struct task_struct */
-+#include <linux/suspend.h>
-+#include <linux/kernel.h>
-+#include <linux/writeback.h>
-+#include <linux/time.h> /* INITIAL_JIFFIES */
-+#include <linux/backing-dev.h> /* bdi_write_congested */
-+#include <linux/wait.h>
-+#include <linux/kthread.h>
-+#include <linux/freezer.h>
-+
-+#define DEF_PRIORITY 12
-+#define MAX_ENTD_ITERS 10
-+
-+static void entd_flush(struct super_block *, struct wbq *);
-+static int entd(void *arg);
-+
-+/*
-+ * set ->comm field of end thread to make its state visible to the user level
-+ */
-+#define entd_set_comm(state) \
-+ snprintf(current->comm, sizeof(current->comm), \
-+ "ent:%s%s", super->s_id, (state))
-+
-+/**
-+ * reiser4_init_entd - initialize entd context and start kernel daemon
-+ * @super: super block to start ent thread for
-+ *
-+ * Creates entd contexts, starts kernel thread and waits until it
-+ * initializes.
-+ */
-+int reiser4_init_entd(struct super_block *super)
-+{
-+ entd_context *ctx;
-+
-+ assert("nikita-3104", super != NULL);
-+
-+ ctx = get_entd_context(super);
-+
-+ memset(ctx, 0, sizeof *ctx);
-+ spin_lock_init(&ctx->guard);
-+ init_waitqueue_head(&ctx->wait);
-+#if REISER4_DEBUG
-+ INIT_LIST_HEAD(&ctx->flushers_list);
-+#endif
-+ /* lists of writepage requests */
-+ INIT_LIST_HEAD(&ctx->todo_list);
-+ INIT_LIST_HEAD(&ctx->done_list);
-+ /* start entd */
-+ ctx->tsk = kthread_run(entd, super, "ent:%s", super->s_id);
-+ if (IS_ERR(ctx->tsk))
-+ return PTR_ERR(ctx->tsk);
-+ return 0;
-+}
-+
-+static void put_wbq(struct wbq *rq)
-+{
-+ iput(rq->mapping->host);
-+ complete(&rq->completion);
-+}
-+
-+/* ent should be locked */
-+static struct wbq *__get_wbq(entd_context * ent)
-+{
-+ struct wbq *wbq;
-+
-+ if (list_empty(&ent->todo_list))
-+ return NULL;
-+
-+ ent->nr_todo_reqs --;
-+ wbq = list_entry(ent->todo_list.next, struct wbq, link);
-+ list_del_init(&wbq->link);
-+ return wbq;
-+}
-+
-+/* ent thread function */
-+static int entd(void *arg)
-+{
-+ struct super_block *super;
-+ entd_context *ent;
-+ int done = 0;
-+
-+ super = arg;
-+ /* do_fork() just copies task_struct into the new
-+ thread. ->fs_context shouldn't be copied of course. This shouldn't
-+ be a problem for the rest of the code though.
-+ */
-+ current->journal_info = NULL;
-+
-+ ent = get_entd_context(super);
-+
-+ while (!done) {
-+ try_to_freeze();
-+
-+ spin_lock(&ent->guard);
-+ while (ent->nr_todo_reqs != 0) {
-+ struct wbq *rq;
-+
-+ assert("", list_empty(&ent->done_list));
-+
-+ /* take request from the queue head */
-+ rq = __get_wbq(ent);
-+ assert("", rq != NULL);
-+ ent->cur_request = rq;
-+ spin_unlock(&ent->guard);
-+
-+ entd_set_comm("!");
-+ entd_flush(super, rq);
-+
-+ put_wbq(rq);
-+
-+ /*
-+ * wakeup all requestors and iput their inodes
-+ */
-+ spin_lock(&ent->guard);
-+ while (!list_empty(&ent->done_list)) {
-+ rq = list_entry(ent->done_list.next, struct wbq, link);
-+ list_del_init(&rq->link);
-+ ent->nr_done_reqs --;
-+ spin_unlock(&ent->guard);
-+ assert("", rq->written == 1);
-+ put_wbq(rq);
-+ spin_lock(&ent->guard);
-+ }
-+ }
-+ spin_unlock(&ent->guard);
-+
-+ entd_set_comm(".");
-+
-+ {
-+ DEFINE_WAIT(__wait);
-+
-+ do {
-+ prepare_to_wait(&ent->wait, &__wait, TASK_INTERRUPTIBLE);
-+ if (kthread_should_stop()) {
-+ done = 1;
-+ break;
-+ }
-+ if (ent->nr_todo_reqs != 0)
-+ break;
-+ schedule();
-+ } while (0);
-+ finish_wait(&ent->wait, &__wait);
-+ }
-+ }
-+ BUG_ON(ent->nr_todo_reqs != 0);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_entd - stop entd kernel thread
-+ * @super: super block to stop ent thread for
-+ *
-+ * It is called on umount. Sends stop signal to entd and wait until it handles
-+ * it.
-+ */
-+void reiser4_done_entd(struct super_block *super)
-+{
-+ entd_context *ent;
-+
-+ assert("nikita-3103", super != NULL);
-+
-+ ent = get_entd_context(super);
-+ assert("zam-1055", ent->tsk != NULL);
-+ kthread_stop(ent->tsk);
-+}
-+
-+/* called at the beginning of jnode_flush to register flusher thread with ent
-+ * daemon */
-+void reiser4_enter_flush(struct super_block *super)
-+{
-+ entd_context *ent;
-+
-+ assert("zam-1029", super != NULL);
-+ ent = get_entd_context(super);
-+
-+ assert("zam-1030", ent != NULL);
-+
-+ spin_lock(&ent->guard);
-+ ent->flushers++;
-+#if REISER4_DEBUG
-+ list_add(&get_current_context()->flushers_link, &ent->flushers_list);
-+#endif
-+ spin_unlock(&ent->guard);
-+}
-+
-+/* called at the end of jnode_flush */
-+void reiser4_leave_flush(struct super_block *super)
-+{
-+ entd_context *ent;
-+ int wake_up_ent;
-+
-+ assert("zam-1027", super != NULL);
-+ ent = get_entd_context(super);
-+
-+ assert("zam-1028", ent != NULL);
-+
-+ spin_lock(&ent->guard);
-+ ent->flushers--;
-+ wake_up_ent = (ent->flushers == 0 && ent->nr_todo_reqs != 0);
-+#if REISER4_DEBUG
-+ list_del_init(&get_current_context()->flushers_link);
-+#endif
-+ spin_unlock(&ent->guard);
-+ if (wake_up_ent)
-+ wake_up(&ent->wait);
-+}
-+
-+#define ENTD_CAPTURE_APAGE_BURST SWAP_CLUSTER_MAX
-+
-+static void entd_flush(struct super_block *super, struct wbq *rq)
-+{
-+ reiser4_context ctx;
-+ int tmp;
-+
-+ init_stack_context(&ctx, super);
-+ ctx.entd = 1;
-+ ctx.gfp_mask = GFP_NOFS;
-+
-+ rq->wbc->range_start = page_offset(rq->page);
-+ rq->wbc->range_end = rq->wbc->range_start +
-+ (ENTD_CAPTURE_APAGE_BURST << PAGE_CACHE_SHIFT);
-+ tmp = rq->wbc->nr_to_write;
-+ rq->mapping->a_ops->writepages(rq->mapping, rq->wbc);
-+
-+ if (rq->wbc->nr_to_write > 0) {
-+ rq->wbc->range_start = 0;
-+ rq->wbc->range_end = LLONG_MAX;
-+ generic_sync_sb_inodes(super, rq->wbc);
-+ }
-+ rq->wbc->nr_to_write = ENTD_CAPTURE_APAGE_BURST;
-+ reiser4_writeout(super, rq->wbc);
-+
-+ context_set_commit_async(&ctx);
-+ reiser4_exit_context(&ctx);
-+}
-+
-+/**
-+ * write_page_by_ent - ask entd thread to flush this page as part of slum
-+ * @page: page to be written
-+ * @wbc: writeback control passed to reiser4_writepage
-+ *
-+ * Creates a request, puts it on entd list of requests, wakeups entd if
-+ * necessary, waits until entd completes with the request.
-+ */
-+int write_page_by_ent(struct page *page, struct writeback_control *wbc)
-+{
-+ struct super_block *sb;
-+ struct inode *inode;
-+ entd_context *ent;
-+ struct wbq rq;
-+
-+ assert("", PageLocked(page));
-+ assert("", page->mapping != NULL);
-+
-+ sb = page->mapping->host->i_sb;
-+ ent = get_entd_context(sb);
-+ assert("", ent && ent->done == 0);
-+
-+ /*
-+ * we are going to unlock page and ask ent thread to write the
-+ * page. Re-dirty page before unlocking so that if ent thread fails to
-+ * write it - it will remain dirty
-+ */
-+ reiser4_set_page_dirty_internal(page);
-+
-+ /*
-+ * pin inode in memory, unlock page, entd_flush will iput. We can not
-+ * iput here becasue we can not allow delete_inode to be called here
-+ */
-+ inode = igrab(page->mapping->host);
-+ unlock_page(page);
-+ if (inode == NULL)
-+ /* inode is getting freed */
-+ return 0;
-+
-+ /* init wbq */
-+ INIT_LIST_HEAD(&rq.link);
-+ rq.magic = WBQ_MAGIC;
-+ rq.wbc = wbc;
-+ rq.page = page;
-+ rq.mapping = inode->i_mapping;
-+ rq.node = NULL;
-+ rq.written = 0;
-+ init_completion(&rq.completion);
-+
-+ /* add request to entd's list of writepage requests */
-+ spin_lock(&ent->guard);
-+ ent->nr_todo_reqs++;
-+ list_add_tail(&rq.link, &ent->todo_list);
-+ if (ent->nr_todo_reqs == 1)
-+ wake_up(&ent->wait);
-+
-+ spin_unlock(&ent->guard);
-+
-+ /* wait until entd finishes */
-+ wait_for_completion(&rq.completion);
-+
-+ if (rq.written)
-+ /* Eventually ENTD has written the page to disk. */
-+ return 0;
-+ return 0;
-+}
-+
-+int wbq_available(void)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ entd_context *ent = get_entd_context(sb);
-+ return ent->nr_todo_reqs;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/entd.h linux-2.6.24/fs/reiser4/entd.h
---- linux-2.6.24.orig/fs/reiser4/entd.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/entd.h 2008-01-25 11:39:06.912201506 +0300
-@@ -0,0 +1,90 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Ent daemon. */
-+
-+#ifndef __ENTD_H__
-+#define __ENTD_H__
-+
-+#include "context.h"
-+
-+#include <linux/fs.h>
-+#include <linux/completion.h>
-+#include <linux/wait.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+#define WBQ_MAGIC 0x7876dc76
-+
-+/* write-back request. */
-+struct wbq {
-+ int magic;
-+ struct list_head link; /* list head of this list is in entd context */
-+ struct writeback_control *wbc;
-+ struct page *page;
-+ struct address_space *mapping;
-+ struct completion completion;
-+ jnode *node; /* set if ent thread captured requested page */
-+ int written; /* set if ent thread wrote requested page */
-+};
-+
-+/* ent-thread context. This is used to synchronize starting/stopping ent
-+ * threads. */
-+typedef struct entd_context {
-+ /* wait queue that ent thread waits on for more work. It's
-+ * signaled by write_page_by_ent(). */
-+ wait_queue_head_t wait;
-+ /* spinlock protecting other fields */
-+ spinlock_t guard;
-+ /* ent thread */
-+ struct task_struct *tsk;
-+ /* set to indicate that ent thread should leave. */
-+ int done;
-+ /* counter of active flushers */
-+ int flushers;
-+ /*
-+ * when reiser4_writepage asks entd to write a page - it adds struct
-+ * wbq to this list
-+ */
-+ struct list_head todo_list;
-+ /* number of elements on the above list */
-+ int nr_todo_reqs;
-+
-+ struct wbq *cur_request;
-+ /*
-+ * when entd writes a page it moves write-back request from todo_list
-+ * to done_list. This list is used at the end of entd iteration to
-+ * wakeup requestors and iput inodes.
-+ */
-+ struct list_head done_list;
-+ /* number of elements on the above list */
-+ int nr_done_reqs;
-+
-+#if REISER4_DEBUG
-+ /* list of all active flushers */
-+ struct list_head flushers_list;
-+#endif
-+} entd_context;
-+
-+extern int reiser4_init_entd(struct super_block *);
-+extern void reiser4_done_entd(struct super_block *);
-+
-+extern void reiser4_enter_flush(struct super_block *);
-+extern void reiser4_leave_flush(struct super_block *);
-+
-+extern int write_page_by_ent(struct page *, struct writeback_control *);
-+extern int wbq_available(void);
-+extern void ent_writes_page(struct super_block *, struct page *);
-+
-+extern jnode *get_jnode_by_wbq(struct super_block *, struct wbq *);
-+/* __ENTD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/eottl.c linux-2.6.24/fs/reiser4/eottl.c
---- linux-2.6.24.orig/fs/reiser4/eottl.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/eottl.c 2008-01-25 11:39:06.912201506 +0300
-@@ -0,0 +1,509 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "tree_mod.h"
-+#include "carry.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+
-+/*
-+ * Extents on the twig level (EOTTL) handling.
-+ *
-+ * EOTTL poses some problems to the tree traversal, that are better explained
-+ * by example.
-+ *
-+ * Suppose we have block B1 on the twig level with the following items:
-+ *
-+ * 0. internal item I0 with key (0:0:0:0) (locality, key-type, object-id,
-+ * offset)
-+ * 1. extent item E1 with key (1:4:100:0), having 10 blocks of 4k each
-+ * 2. internal item I2 with key (10:0:0:0)
-+ *
-+ * We are trying to insert item with key (5:0:0:0). Lookup finds node B1, and
-+ * then intra-node lookup is done. This lookup finished on the E1, because the
-+ * key we are looking for is larger than the key of E1 and is smaller than key
-+ * the of I2.
-+ *
-+ * Here search is stuck.
-+ *
-+ * After some thought it is clear what is wrong here: extents on the twig level
-+ * break some basic property of the *search* tree (on the pretext, that they
-+ * restore property of balanced tree).
-+ *
-+ * Said property is the following: if in the internal node of the search tree
-+ * we have [ ... Key1 Pointer Key2 ... ] then, all data that are or will be
-+ * keyed in the tree with the Key such that Key1 <= Key < Key2 are accessible
-+ * through the Pointer.
-+ *
-+ * This is not true, when Pointer is Extent-Pointer, simply because extent
-+ * cannot expand indefinitely to the right to include any item with
-+ *
-+ * Key1 <= Key <= Key2.
-+ *
-+ * For example, our E1 extent is only responsible for the data with keys
-+ *
-+ * (1:4:100:0) <= key <= (1:4:100:0xffffffffffffffff), and
-+ *
-+ * so, key range
-+ *
-+ * ( (1:4:100:0xffffffffffffffff), (10:0:0:0) )
-+ *
-+ * is orphaned: there is no way to get there from the tree root.
-+ *
-+ * In other words, extent pointers are different than normal child pointers as
-+ * far as search tree is concerned, and this creates such problems.
-+ *
-+ * Possible solution for this problem is to insert our item into node pointed
-+ * to by I2. There are some problems through:
-+ *
-+ * (1) I2 can be in a different node.
-+ * (2) E1 can be immediately followed by another extent E2.
-+ *
-+ * (1) is solved by calling reiser4_get_right_neighbor() and accounting
-+ * for locks/coords as necessary.
-+ *
-+ * (2) is more complex. Solution here is to insert new empty leaf node and
-+ * insert internal item between E1 and E2 pointing to said leaf node. This is
-+ * further complicated by possibility that E2 is in a different node, etc.
-+ *
-+ * Problems:
-+ *
-+ * (1) if there was internal item I2 immediately on the right of an extent E1
-+ * we and we decided to insert new item S1 into node N2 pointed to by I2, then
-+ * key of S1 will be less than smallest key in the N2. Normally, search key
-+ * checks that key we are looking for is in the range of keys covered by the
-+ * node key is being looked in. To work around of this situation, while
-+ * preserving useful consistency check new flag CBK_TRUST_DK was added to the
-+ * cbk falgs bitmask. This flag is automatically set on entrance to the
-+ * coord_by_key() and is only cleared when we are about to enter situation
-+ * described above.
-+ *
-+ * (2) If extent E1 is immediately followed by another extent E2 and we are
-+ * searching for the key that is between E1 and E2 we only have to insert new
-+ * empty leaf node when coord_by_key was called for insertion, rather than just
-+ * for lookup. To distinguish these cases, new flag CBK_FOR_INSERT was added to
-+ * the cbk falgs bitmask. This flag is automatically set by coord_by_key calls
-+ * performed by insert_by_key() and friends.
-+ *
-+ * (3) Insertion of new empty leaf node (possibly) requires balancing. In any
-+ * case it requires modification of node content which is only possible under
-+ * write lock. It may well happen that we only have read lock on the node where
-+ * new internal pointer is to be inserted (common case: lookup of non-existent
-+ * stat-data that fells between two extents). If only read lock is held, tree
-+ * traversal is restarted with lock_level modified so that next time we hit
-+ * this problem, write lock will be held. Once we have write lock, balancing
-+ * will be performed.
-+ */
-+
-+/**
-+ * is_next_item_internal - check whether next item is internal
-+ * @coord: coordinate of extent item in twig node
-+ * @key: search key
-+ * @lh: twig node lock handle
-+ *
-+ * Looks at the unit next to @coord. If it is an internal one - 1 is returned,
-+ * @coord is set to that unit. If that unit is in right neighbor, @lh is moved
-+ * to that node, @coord is set to its first unit. If next item is not internal
-+ * or does not exist then 0 is returned, @coord and @lh are left unchanged. 2
-+ * is returned if search restart has to be done.
-+ */
-+static int
-+is_next_item_internal(coord_t *coord, const reiser4_key *key,
-+ lock_handle *lh)
-+{
-+ coord_t next;
-+ lock_handle rn;
-+ int result;
-+
-+ coord_dup(&next, coord);
-+ if (coord_next_unit(&next) == 0) {
-+ /* next unit is in this node */
-+ if (item_is_internal(&next)) {
-+ coord_dup(coord, &next);
-+ return 1;
-+ }
-+ assert("vs-3", item_is_extent(&next));
-+ return 0;
-+ }
-+
-+ /*
-+ * next unit either does not exist or is in right neighbor. If it is in
-+ * right neighbor we have to check right delimiting key because
-+ * concurrent thread could get their first and insert item with a key
-+ * smaller than @key
-+ */
-+ read_lock_dk(current_tree);
-+ result = keycmp(key, znode_get_rd_key(coord->node));
-+ read_unlock_dk(current_tree);
-+ assert("vs-6", result != EQUAL_TO);
-+ if (result == GREATER_THAN)
-+ return 2;
-+
-+ /* lock right neighbor */
-+ init_lh(&rn);
-+ result = reiser4_get_right_neighbor(&rn, coord->node,
-+ znode_is_wlocked(coord->node) ?
-+ ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result == -E_NO_NEIGHBOR) {
-+ /* we are on the rightmost edge of the tree */
-+ done_lh(&rn);
-+ return 0;
-+ }
-+
-+ if (result) {
-+ assert("vs-4", result < 0);
-+ done_lh(&rn);
-+ return result;
-+ }
-+
-+ /*
-+ * check whether concurrent thread managed to insert item with a key
-+ * smaller than @key
-+ */
-+ read_lock_dk(current_tree);
-+ result = keycmp(key, znode_get_ld_key(rn.node));
-+ read_unlock_dk(current_tree);
-+ assert("vs-6", result != EQUAL_TO);
-+ if (result == GREATER_THAN) {
-+ done_lh(&rn);
-+ return 2;
-+ }
-+
-+ result = zload(rn.node);
-+ if (result) {
-+ assert("vs-5", result < 0);
-+ done_lh(&rn);
-+ return result;
-+ }
-+
-+ coord_init_first_unit(&next, rn.node);
-+ if (item_is_internal(&next)) {
-+ /*
-+ * next unit is in right neighbor and it is an unit of internal
-+ * item. Unlock coord->node. Move @lh to right neighbor. @coord
-+ * is set to the first unit of right neighbor.
-+ */
-+ coord_dup(coord, &next);
-+ zrelse(rn.node);
-+ done_lh(lh);
-+ move_lh(lh, &rn);
-+ return 1;
-+ }
-+
-+ /*
-+ * next unit is unit of extent item. Return without chaning @lh and
-+ * @coord.
-+ */
-+ assert("vs-6", item_is_extent(&next));
-+ zrelse(rn.node);
-+ done_lh(&rn);
-+ return 0;
-+}
-+
-+/**
-+ * rd_key - calculate key of an item next to the given one
-+ * @coord: position in a node
-+ * @key: storage for result key
-+ *
-+ * @coord is set between items or after the last item in a node. Calculate key
-+ * of item to the right of @coord.
-+ */
-+static reiser4_key *rd_key(const coord_t *coord, reiser4_key *key)
-+{
-+ coord_t dup;
-+
-+ assert("nikita-2281", coord_is_between_items(coord));
-+ coord_dup(&dup, coord);
-+
-+ if (coord_set_to_right(&dup) == 0)
-+ /* next item is in this node. Return its key. */
-+ unit_key_by_coord(&dup, key);
-+ else {
-+ /*
-+ * next item either does not exist or is in right
-+ * neighbor. Return znode's right delimiting key.
-+ */
-+ read_lock_dk(current_tree);
-+ *key = *znode_get_rd_key(coord->node);
-+ read_unlock_dk(current_tree);
-+ }
-+ return key;
-+}
-+
-+/**
-+ * add_empty_leaf - insert empty leaf between two extents
-+ * @insert_coord: position in twig node between two extents
-+ * @lh: twig node lock handle
-+ * @key: left delimiting key of new node
-+ * @rdkey: right delimiting key of new node
-+ *
-+ * Inserts empty leaf node between two extent items. It is necessary when we
-+ * have to insert an item on leaf level between two extents (items on the twig
-+ * level).
-+ */
-+static int
-+add_empty_leaf(coord_t *insert_coord, lock_handle *lh,
-+ const reiser4_key *key, const reiser4_key *rdkey)
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *todo;
-+ reiser4_item_data *item;
-+ carry_insert_data *cdata;
-+ carry_op *op;
-+ znode *node;
-+ reiser4_tree *tree;
-+
-+ assert("vs-49827", znode_contains_key_lock(insert_coord->node, key));
-+ tree = znode_get_tree(insert_coord->node);
-+ node = reiser4_new_node(insert_coord->node, LEAF_LEVEL);
-+ if (IS_ERR(node))
-+ return PTR_ERR(node);
-+
-+ /* setup delimiting keys for node being inserted */
-+ write_lock_dk(tree);
-+ znode_set_ld_key(node, key);
-+ znode_set_rd_key(node, rdkey);
-+ ON_DEBUG(node->creator = current);
-+ ON_DEBUG(node->first_key = *key);
-+ write_unlock_dk(tree);
-+
-+ ZF_SET(node, JNODE_ORPHAN);
-+
-+ /*
-+ * allocate carry_pool, 3 carry_level-s, reiser4_item_data and
-+ * carry_insert_data
-+ */
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
-+ sizeof(*item) + sizeof(*cdata));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ todo = (carry_level *) (pool + 1);
-+ init_carry_level(todo, pool);
-+
-+ item = (reiser4_item_data *) (todo + 3);
-+ cdata = (carry_insert_data *) (item + 1);
-+
-+ op = reiser4_post_carry(todo, COP_INSERT, insert_coord->node, 0);
-+ if (!IS_ERR(op)) {
-+ cdata->coord = insert_coord;
-+ cdata->key = key;
-+ cdata->data = item;
-+ op->u.insert.d = cdata;
-+ op->u.insert.type = COPT_ITEM_DATA;
-+ build_child_ptr_data(node, item);
-+ item->arg = NULL;
-+ /* have @insert_coord to be set at inserted item after
-+ insertion is done */
-+ todo->track_type = CARRY_TRACK_CHANGE;
-+ todo->tracked = lh;
-+
-+ result = reiser4_carry(todo, NULL);
-+ if (result == 0) {
-+ /*
-+ * pin node in memory. This is necessary for
-+ * znode_make_dirty() below.
-+ */
-+ result = zload(node);
-+ if (result == 0) {
-+ lock_handle local_lh;
-+
-+ /*
-+ * if we inserted new child into tree we have
-+ * to mark it dirty so that flush will be able
-+ * to process it.
-+ */
-+ init_lh(&local_lh);
-+ result = longterm_lock_znode(&local_lh, node,
-+ ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (result == 0) {
-+ znode_make_dirty(node);
-+
-+ /*
-+ * when internal item pointing to @node
-+ * was inserted into twig node
-+ * create_hook_internal did not connect
-+ * it properly because its right
-+ * neighbor was not known. Do it
-+ * here
-+ */
-+ write_lock_tree(tree);
-+ assert("nikita-3312",
-+ znode_is_right_connected(node));
-+ assert("nikita-2984",
-+ node->right == NULL);
-+ ZF_CLR(node, JNODE_RIGHT_CONNECTED);
-+ write_unlock_tree(tree);
-+ result =
-+ connect_znode(insert_coord, node);
-+ ON_DEBUG(if (result == 0) check_dkeys(node););
-+
-+ done_lh(lh);
-+ move_lh(lh, &local_lh);
-+ assert("vs-1676", node_is_empty(node));
-+ coord_init_first_unit(insert_coord,
-+ node);
-+ } else {
-+ warning("nikita-3136",
-+ "Cannot lock child");
-+ }
-+ done_lh(&local_lh);
-+ zrelse(node);
-+ }
-+ }
-+ } else
-+ result = PTR_ERR(op);
-+ zput(node);
-+ done_carry_pool(pool);
-+ return result;
-+}
-+
-+/**
-+ * handle_eottl - handle extent-on-the-twig-level cases in tree traversal
-+ * @h: search handle
-+ * @outcome: flag saying whether search has to restart or is done
-+ *
-+ * Handles search on twig level. If this function completes search itself then
-+ * it returns 1. If search has to go one level down then 0 is returned. If
-+ * error happens then LOOKUP_DONE is returned via @outcome and error code is saved
-+ * in @h->result.
-+ */
-+int handle_eottl(cbk_handle *h, int *outcome)
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t *coord;
-+
-+ coord = h->coord;
-+
-+ if (h->level != TWIG_LEVEL ||
-+ (coord_is_existing_item(coord) && item_is_internal(coord))) {
-+ /* Continue to traverse tree downward. */
-+ return 0;
-+ }
-+
-+ /*
-+ * make sure that @h->coord is set to twig node and that it is either
-+ * set to extent item or after extent item
-+ */
-+ assert("vs-356", h->level == TWIG_LEVEL);
-+ assert("vs-357", ( {
-+ coord_t lcoord;
-+ coord_dup(&lcoord, coord);
-+ check_me("vs-733", coord_set_to_left(&lcoord) == 0);
-+ item_is_extent(&lcoord);
-+ }
-+ ));
-+
-+ if (*outcome == NS_FOUND) {
-+ /* we have found desired key on twig level in extent item */
-+ h->result = CBK_COORD_FOUND;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+
-+ if (!(h->flags & CBK_FOR_INSERT)) {
-+ /* tree traversal is not for insertion. Just return
-+ CBK_COORD_NOTFOUND. */
-+ h->result = CBK_COORD_NOTFOUND;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+
-+ /* take a look at the item to the right of h -> coord */
-+ result = is_next_item_internal(coord, h->key, h->active_lh);
-+ if (unlikely(result < 0)) {
-+ h->error = "get_right_neighbor failed";
-+ h->result = result;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+ if (result == 0) {
-+ /*
-+ * item to the right is also an extent one. Allocate a new node
-+ * and insert pointer to it after item h -> coord.
-+ *
-+ * This is a result of extents being located at the twig
-+ * level. For explanation, see comment just above
-+ * is_next_item_internal().
-+ */
-+ znode *loaded;
-+
-+ if (cbk_lock_mode(h->level, h) != ZNODE_WRITE_LOCK) {
-+ /*
-+ * we got node read locked, restart coord_by_key to
-+ * have write lock on twig level
-+ */
-+ h->lock_level = TWIG_LEVEL;
-+ h->lock_mode = ZNODE_WRITE_LOCK;
-+ *outcome = LOOKUP_REST;
-+ return 1;
-+ }
-+
-+ loaded = coord->node;
-+ result =
-+ add_empty_leaf(coord, h->active_lh, h->key,
-+ rd_key(coord, &key));
-+ if (result) {
-+ h->error = "could not add empty leaf";
-+ h->result = result;
-+ *outcome = LOOKUP_DONE;
-+ return 1;
-+ }
-+ /* added empty leaf is locked (h->active_lh), its parent node
-+ is unlocked, h->coord is set as EMPTY */
-+ assert("vs-13", coord->between == EMPTY_NODE);
-+ assert("vs-14", znode_is_write_locked(coord->node));
-+ assert("vs-15",
-+ WITH_DATA(coord->node, node_is_empty(coord->node)));
-+ assert("vs-16", jnode_is_leaf(ZJNODE(coord->node)));
-+ assert("vs-17", coord->node == h->active_lh->node);
-+ *outcome = LOOKUP_DONE;
-+ h->result = CBK_COORD_NOTFOUND;
-+ return 1;
-+ } else if (result == 1) {
-+ /*
-+ * this is special case mentioned in the comment on
-+ * tree.h:cbk_flags. We have found internal item immediately on
-+ * the right of extent, and we are going to insert new item
-+ * there. Key of item we are going to insert is smaller than
-+ * leftmost key in the node pointed to by said internal item
-+ * (otherwise search wouldn't come to the extent in the first
-+ * place).
-+ *
-+ * This is a result of extents being located at the twig
-+ * level. For explanation, see comment just above
-+ * is_next_item_internal().
-+ */
-+ h->flags &= ~CBK_TRUST_DK;
-+ } else {
-+ assert("vs-8", result == 2);
-+ *outcome = LOOKUP_REST;
-+ return 1;
-+ }
-+ assert("vs-362", WITH_DATA(coord->node, item_is_internal(coord)));
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/estimate.c linux-2.6.24/fs/reiser4/estimate.c
---- linux-2.6.24.orig/fs/reiser4/estimate.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/estimate.c 2008-01-25 11:39:06.912201506 +0300
-@@ -0,0 +1,120 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "tree.h"
-+#include "carry.h"
-+#include "inode.h"
-+#include "plugin/cluster.h"
-+#include "plugin/item/ctail.h"
-+
-+/* this returns how many nodes might get dirty and added nodes if @children nodes are dirtied
-+
-+ Amount of internals which will get dirty or get allocated we estimate as 5% of the childs + 1 balancing. 1 balancing
-+ is 2 neighbours, 2 new blocks and the current block on the leaf level, 2 neighbour nodes + the current (or 1
-+ neighbour and 1 new and the current) on twig level, 2 neighbour nodes on upper levels and 1 for a new root. So 5 for
-+ leaf level, 3 for twig level, 2 on upper + 1 for root.
-+
-+ Do not calculate the current node of the lowest level here - this is overhead only.
-+
-+ children is almost always 1 here. Exception is flow insertion
-+*/
-+static reiser4_block_nr
-+max_balance_overhead(reiser4_block_nr childen, tree_level tree_height)
-+{
-+ reiser4_block_nr ten_percent;
-+
-+ ten_percent = ((103 * childen) >> 10);
-+
-+ /* If we have too many balancings at the time, tree height can raise on more
-+ then 1. Assume that if tree_height is 5, it can raise on 1 only. */
-+ return ((tree_height < 5 ? 5 : tree_height) * 2 + (4 + ten_percent));
-+}
-+
-+/* this returns maximal possible number of nodes which can be modified plus number of new nodes which can be required to
-+ perform insertion of one item into the tree */
-+/* it is only called when tree height changes, or gets initialized */
-+reiser4_block_nr calc_estimate_one_insert(tree_level height)
-+{
-+ return 1 + max_balance_overhead(1, height);
-+}
-+
-+reiser4_block_nr estimate_one_insert_item(reiser4_tree * tree)
-+{
-+ return tree->estimate_one_insert;
-+}
-+
-+/* this returns maximal possible number of nodes which can be modified plus number of new nodes which can be required to
-+ perform insertion of one unit into an item in the tree */
-+reiser4_block_nr estimate_one_insert_into_item(reiser4_tree * tree)
-+{
-+ /* estimate insert into item just like item insertion */
-+ return tree->estimate_one_insert;
-+}
-+
-+reiser4_block_nr estimate_one_item_removal(reiser4_tree * tree)
-+{
-+ /* on item removal reiser4 does not try to pack nodes more complact, so, only one node may be dirtied on leaf
-+ level */
-+ return tree->estimate_one_insert;
-+}
-+
-+/* on leaf level insert_flow may add CARRY_FLOW_NEW_NODES_LIMIT new nodes and dirty 3 existing nodes (insert point and
-+ both its neighbors). Max_balance_overhead should estimate number of blocks which may change/get added on internal
-+ levels */
-+reiser4_block_nr estimate_insert_flow(tree_level height)
-+{
-+ return 3 + CARRY_FLOW_NEW_NODES_LIMIT + max_balance_overhead(3 +
-+ CARRY_FLOW_NEW_NODES_LIMIT,
-+ height);
-+}
-+
-+/* returnes max number of nodes can be occupied by disk cluster */
-+static reiser4_block_nr estimate_cluster(struct inode * inode, int unprepped)
-+{
-+ int per_cluster;
-+ per_cluster = (unprepped ? 1 : cluster_nrpages(inode));
-+ return 3 + per_cluster +
-+ max_balance_overhead(3 + per_cluster,
-+ REISER4_MAX_ZTREE_HEIGHT);
-+}
-+
-+/* how many nodes might get dirty and added
-+ during insertion of a disk cluster */
-+reiser4_block_nr estimate_insert_cluster(struct inode * inode)
-+{
-+ return estimate_cluster(inode, 1); /* 24 */
-+}
-+
-+/* how many nodes might get dirty and added
-+ during update of a (prepped or unprepped) disk cluster */
-+reiser4_block_nr estimate_update_cluster(struct inode * inode)
-+{
-+ return estimate_cluster(inode, 0); /* 44, for 64K-cluster */
-+}
-+
-+/* How many nodes occupied by a disk cluster might get dirty.
-+ Note that this estimation is not precise (i.e. disk cluster
-+ can occupy more nodes).
-+ Q: Why we don't use precise estimation?
-+ A: 1.Because precise estimation is fairly bad: 65536 nodes
-+ for 64K logical cluster, it means 256M of dead space on
-+ a partition
-+ 2.It is a very rare case when disk cluster occupies more
-+ nodes then this estimation returns.
-+*/
-+reiser4_block_nr estimate_dirty_cluster(struct inode * inode)
-+{
-+ return cluster_nrpages(inode) + 4;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/export_ops.c linux-2.6.24/fs/reiser4/export_ops.c
---- linux-2.6.24.orig/fs/reiser4/export_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/export_ops.c 2008-01-25 12:03:29.960445090 +0300
-@@ -0,0 +1,319 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "inode.h"
-+#include "plugin/plugin.h"
-+
-+/*
-+ * Supported file-handle types
-+ */
-+typedef enum {
-+ FH_WITH_PARENT = 0x10, /* file handle with parent */
-+ FH_WITHOUT_PARENT = 0x11 /* file handle without parent */
-+} reiser4_fhtype;
-+
-+#define NFSERROR (255)
-+
-+/* initialize place-holder for object */
-+static void object_on_wire_init(reiser4_object_on_wire *o)
-+{
-+ o->plugin = NULL;
-+}
-+
-+/* finish with @o */
-+static void object_on_wire_done(reiser4_object_on_wire *o)
-+{
-+ if (o->plugin != NULL)
-+ o->plugin->wire.done(o);
-+}
-+
-+/*
-+ * read serialized object identity from @addr and store information about
-+ * object in @obj. This is dual to encode_inode().
-+ */
-+static char *decode_inode(struct super_block *s, char *addr,
-+ reiser4_object_on_wire * obj)
-+{
-+ file_plugin *fplug;
-+
-+ /* identifier of object plugin is stored in the first two bytes,
-+ * followed by... */
-+ fplug = file_plugin_by_disk_id(reiser4_get_tree(s), (d16 *) addr);
-+ if (fplug != NULL) {
-+ addr += sizeof(d16);
-+ obj->plugin = fplug;
-+ assert("nikita-3520", fplug->wire.read != NULL);
-+ /* plugin specific encoding of object identity. */
-+ addr = fplug->wire.read(addr, obj);
-+ } else
-+ addr = ERR_PTR(RETERR(-EINVAL));
-+ return addr;
-+}
-+
-+static struct dentry *reiser4_get_dentry(struct super_block *super,
-+ void *data);
-+/**
-+ * reiser4_decode_fh: decode on-wire object - helper function
-+ * for fh_to_dentry, fh_to_parent export operations;
-+ * @super: super block;
-+ * @addr: onwire object to be decoded;
-+ *
-+ * Returns dentry referring to the object being decoded.
-+ */
-+static struct dentry *reiser4_decode_fh(struct super_block * super,
-+ char * addr)
-+{
-+ reiser4_object_on_wire object;
-+
-+ object_on_wire_init(&object);
-+
-+ addr = decode_inode(super, addr, &object);
-+ if (!IS_ERR(addr)) {
-+ struct dentry *d;
-+ d = reiser4_get_dentry(super, &object);
-+ if (d != NULL && !IS_ERR(d))
-+ /* FIXME check for -ENOMEM */
-+ reiser4_get_dentry_fsdata(d)->stateless = 1;
-+ addr = (char *)d;
-+ }
-+ object_on_wire_done(&object);
-+ return (void *)addr;
-+}
-+
-+static struct dentry *reiser4_fh_to_dentry(struct super_block *sb,
-+ struct fid *fid,
-+ int fh_len, int fh_type)
-+{
-+ reiser4_context *ctx;
-+ struct dentry *d;
-+
-+ assert("edward-1536",
-+ fh_type == FH_WITH_PARENT || fh_type == FH_WITHOUT_PARENT);
-+
-+ ctx = reiser4_init_context(sb);
-+ if (IS_ERR(ctx))
-+ return (struct dentry *)ctx;
-+
-+ d = reiser4_decode_fh(sb, (char *)fid->raw);
-+
-+ reiser4_exit_context(ctx);
-+ return d;
-+}
-+
-+static struct dentry *reiser4_fh_to_parent(struct super_block *sb,
-+ struct fid *fid,
-+ int fh_len, int fh_type)
-+{
-+ char * addr;
-+ struct dentry * d;
-+ reiser4_context *ctx;
-+ file_plugin *fplug;
-+
-+ if (fh_type == FH_WITHOUT_PARENT)
-+ return NULL;
-+ assert("edward-1537", fh_type == FH_WITH_PARENT);
-+
-+ ctx = reiser4_init_context(sb);
-+ if (IS_ERR(ctx))
-+ return (struct dentry *)ctx;
-+ addr = (char *)fid->raw;
-+ /* extract 2-bytes file plugin id */
-+ fplug = file_plugin_by_disk_id(reiser4_get_tree(sb), (d16 *)addr);
-+ if (fplug == NULL) {
-+ d = ERR_PTR(RETERR(-EINVAL));
-+ goto exit;
-+ }
-+ addr += sizeof(d16);
-+ /* skip previously encoded object */
-+ addr = fplug->wire.read(addr, NULL /* skip */);
-+ if (IS_ERR(addr)) {
-+ d = (struct dentry *)addr;
-+ goto exit;
-+ }
-+ /* @extract and decode parent object */
-+ d = reiser4_decode_fh(sb, addr);
-+ exit:
-+ reiser4_exit_context(ctx);
-+ return d;
-+}
-+
-+/*
-+ * Object serialization support.
-+ *
-+ * To support knfsd file system provides export_operations that are used to
-+ * construct and interpret NFS file handles. As a generalization of this,
-+ * reiser4 object plugins have serialization support: it provides methods to
-+ * create on-wire representation of identity of reiser4 object, and
-+ * re-create/locate object given its on-wire identity.
-+ *
-+ */
-+
-+/*
-+ * return number of bytes that on-wire representation of @inode's identity
-+ * consumes.
-+ */
-+static int encode_inode_size(struct inode *inode)
-+{
-+ assert("nikita-3514", inode != NULL);
-+ assert("nikita-3515", inode_file_plugin(inode) != NULL);
-+ assert("nikita-3516", inode_file_plugin(inode)->wire.size != NULL);
-+
-+ return inode_file_plugin(inode)->wire.size(inode) + sizeof(d16);
-+}
-+
-+/*
-+ * store on-wire representation of @inode's identity at the area beginning at
-+ * @start.
-+ */
-+static char *encode_inode(struct inode *inode, char *start)
-+{
-+ assert("nikita-3517", inode != NULL);
-+ assert("nikita-3518", inode_file_plugin(inode) != NULL);
-+ assert("nikita-3519", inode_file_plugin(inode)->wire.write != NULL);
-+
-+ /*
-+ * first, store two-byte identifier of object plugin, then
-+ */
-+ save_plugin_id(file_plugin_to_plugin(inode_file_plugin(inode)),
-+ (d16 *) start);
-+ start += sizeof(d16);
-+ /*
-+ * call plugin to serialize object's identity
-+ */
-+ return inode_file_plugin(inode)->wire.write(inode, start);
-+}
-+
-+/* this returns number of 32 bit long numbers encoded in @lenp. 255 is
-+ * returned if file handle can not be stored */
-+/**
-+ * reiser4_encode_fh - encode_fh of export operations
-+ * @dentry:
-+ * @fh:
-+ * @lenp:
-+ * @need_parent:
-+ *
-+ */
-+static int
-+reiser4_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp,
-+ int need_parent)
-+{
-+ struct inode *inode;
-+ struct inode *parent;
-+ char *addr;
-+ int need;
-+ int delta;
-+ int result;
-+ reiser4_context *ctx;
-+
-+ /*
-+ * knfsd asks as to serialize object in @dentry, and, optionally its
-+ * parent (if need_parent != 0).
-+ *
-+ * encode_inode() and encode_inode_size() is used to build
-+ * representation of object and its parent. All hard work is done by
-+ * object plugins.
-+ */
-+ inode = dentry->d_inode;
-+ parent = dentry->d_parent->d_inode;
-+
-+ addr = (char *)fh;
-+
-+ need = encode_inode_size(inode);
-+ if (need < 0)
-+ return NFSERROR;
-+ if (need_parent) {
-+ delta = encode_inode_size(parent);
-+ if (delta < 0)
-+ return NFSERROR;
-+ need += delta;
-+ }
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ if (need <= sizeof(__u32) * (*lenp)) {
-+ addr = encode_inode(inode, addr);
-+ if (need_parent)
-+ addr = encode_inode(parent, addr);
-+
-+ /* store in lenp number of 32bit words required for file
-+ * handle. */
-+ *lenp = (need + sizeof(__u32) - 1) >> 2;
-+ result = need_parent ? FH_WITH_PARENT : FH_WITHOUT_PARENT;
-+ } else
-+ /* no enough space in file handle */
-+ result = NFSERROR;
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * reiser4_get_dentry_parent - get_parent of export operations
-+ * @child:
-+ *
-+ */
-+static struct dentry *reiser4_get_dentry_parent(struct dentry *child)
-+{
-+ struct inode *dir;
-+ dir_plugin *dplug;
-+
-+ assert("nikita-3527", child != NULL);
-+ /* see comment in reiser4_get_dentry() about following assertion */
-+ assert("nikita-3528", is_in_reiser4_context());
-+
-+ dir = child->d_inode;
-+ assert("nikita-3529", dir != NULL);
-+ dplug = inode_dir_plugin(dir);
-+ assert("nikita-3531", ergo(dplug != NULL, dplug->get_parent != NULL));
-+ if (dplug != NULL)
-+ return dplug->get_parent(dir);
-+ else
-+ return ERR_PTR(RETERR(-ENOTDIR));
-+}
-+
-+/**
-+ * reiser4_get_dentry - get_dentry of export operations
-+ * @super:
-+ * @data:
-+ *
-+ *
-+ */
-+static struct dentry *reiser4_get_dentry(struct super_block *super, void *data)
-+{
-+ reiser4_object_on_wire *o;
-+
-+ assert("nikita-3522", super != NULL);
-+ assert("nikita-3523", data != NULL);
-+ /*
-+ * this is only supposed to be called by
-+ *
-+ * reiser4_decode_fh->find_exported_dentry
-+ *
-+ * so, reiser4_context should be here already.
-+ */
-+ assert("nikita-3526", is_in_reiser4_context());
-+
-+ o = (reiser4_object_on_wire *)data;
-+ assert("nikita-3524", o->plugin != NULL);
-+ assert("nikita-3525", o->plugin->wire.get != NULL);
-+
-+ return o->plugin->wire.get(super, o);
-+}
-+
-+struct export_operations reiser4_export_operations = {
-+ .encode_fh = reiser4_encode_fh,
-+ .fh_to_dentry = reiser4_fh_to_dentry,
-+ .fh_to_parent = reiser4_fh_to_parent,
-+ .get_parent = reiser4_get_dentry_parent,
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/flush.c linux-2.6.24/fs/reiser4/flush.c
---- linux-2.6.24.orig/fs/reiser4/flush.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/flush.c 2008-01-25 11:39:06.000000000 +0300
-@@ -0,0 +1,3625 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* The design document for this file is at http://www.namesys.com/v4/v4.html. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/plugin.h"
-+#include "plugin/object.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "carry.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "wander.h"
-+#include "super.h"
-+#include "entd.h"
-+#include "reiser4.h"
-+#include "flush.h"
-+#include "writeout.h"
-+
-+#include <asm/atomic.h>
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/bio.h> /* for struct bio */
-+#include <linux/pagemap.h>
-+#include <linux/blkdev.h>
-+
-+/* IMPLEMENTATION NOTES */
-+
-+/* PARENT-FIRST: Some terminology: A parent-first traversal is a way of assigning a total
-+ order to the nodes of the tree in which the parent is placed before its children, which
-+ are ordered (recursively) in left-to-right order. When we speak of a "parent-first preceder", it
-+ describes the node that "came before in forward parent-first order". When we speak of a
-+ "parent-first follower", it describes the node that "comes next in parent-first
-+ order" (alternatively the node that "came before in reverse parent-first order").
-+
-+ The following pseudo-code prints the nodes of a tree in forward parent-first order:
-+
-+ void parent_first (node)
-+ {
-+ print_node (node);
-+ if (node->level > leaf) {
-+ for (i = 0; i < num_children; i += 1) {
-+ parent_first (node->child[i]);
-+ }
-+ }
-+ }
-+*/
-+
-+/* JUST WHAT ARE WE TRYING TO OPTIMIZE, HERE? The idea is to optimize block allocation so
-+ that a left-to-right scan of the tree's data (i.e., the leaves in left-to-right order)
-+ can be accomplished with sequential reads, which results in reading nodes in their
-+ parent-first order. This is a read-optimization aspect of the flush algorithm, and
-+ there is also a write-optimization aspect, which is that we wish to make large
-+ sequential writes to the disk by allocating or reallocating blocks so that they can be
-+ written in sequence. Sometimes the read-optimization and write-optimization goals
-+ conflict with each other, as we discuss in more detail below.
-+*/
-+
-+/* STATE BITS: The flush code revolves around the state of the jnodes it covers. Here are
-+ the relevant jnode->state bits and their relevence to flush:
-+
-+ JNODE_DIRTY: If a node is dirty, it must be flushed. But in order to be written it
-+ must be allocated first. In order to be considered allocated, the jnode must have
-+ exactly one of { JNODE_OVRWR, JNODE_RELOC } set. These two bits are exclusive, and
-+ all dirtied jnodes eventually have one of these bits set during each transaction.
-+
-+ JNODE_CREATED: The node was freshly created in its transaction and has no previous
-+ block address, so it is unconditionally assigned to be relocated, although this is
-+ mainly for code-convenience. It is not being 'relocated' from anything, but in
-+ almost every regard it is treated as part of the relocate set. The JNODE_CREATED bit
-+ remains set even after JNODE_RELOC is set, so the actual relocate can be
-+ distinguished from the created-and-allocated set easily: relocate-set members
-+ (belonging to the preserve-set) have (JNODE_RELOC) set and created-set members which
-+ have no previous location to preserve have (JNODE_RELOC | JNODE_CREATED) set.
-+
-+ JNODE_OVRWR: The node belongs to atom's overwrite set. The flush algorithm made the
-+ decision to maintain the pre-existing location for this node and it will be written
-+ to the wandered-log.
-+
-+ JNODE_RELOC: The flush algorithm made the decision to relocate this block (if it was
-+ not created, see note above). A block with JNODE_RELOC set is eligible for
-+ early-flushing and may be submitted during flush_empty_queues. When the JNODE_RELOC
-+ bit is set on a znode, the parent node's internal item is modified and the znode is
-+ rehashed.
-+
-+ JNODE_SQUEEZABLE: Before shifting everything left, the flush algorithm scans the node
-+ and calls plugin->f.squeeze() method for its items. By this technology we update disk
-+ clusters of cryptcompress objects. Also if leftmost point that was found by flush scan
-+ has this flag (races with write(), rare case) the flush algorythm makes the decision
-+ to pass it to squalloc() in spite of its flushprepped status for squeezing, not for
-+ repeated allocation.
-+
-+ JNODE_FLUSH_QUEUED: This bit is set when a call to flush enters the jnode into its
-+ flush queue. This means the jnode is not on any clean or dirty list, instead it is
-+ moved to one of the flush queue (see flush_queue.h) object private list. This
-+ prevents multiple concurrent flushes from attempting to start flushing from the
-+ same node.
-+
-+ (DEAD STATE BIT) JNODE_FLUSH_BUSY: This bit was set during the bottom-up
-+ squeeze-and-allocate on a node while its children are actively being squeezed and
-+ allocated. This flag was created to avoid submitting a write request for a node
-+ while its children are still being allocated and squeezed. Then flush queue was
-+ re-implemented to allow unlimited number of nodes be queued. This flag support was
-+ commented out in source code because we decided that there was no reason to submit
-+ queued nodes before jnode_flush() finishes. However, current code calls fq_write()
-+ during a slum traversal and may submit "busy nodes" to disk. Probably we can
-+ re-enable the JNODE_FLUSH_BUSY bit support in future.
-+
-+ With these state bits, we describe a test used frequently in the code below,
-+ jnode_is_flushprepped() (and the spin-lock-taking jnode_check_flushprepped()). The
-+ test for "flushprepped" returns true if any of the following are true:
-+
-+ - The node is not dirty
-+ - The node has JNODE_RELOC set
-+ - The node has JNODE_OVRWR set
-+
-+ If either the node is not dirty or it has already been processed by flush (and assigned
-+ JNODE_OVRWR or JNODE_RELOC), then it is prepped. If jnode_is_flushprepped() returns
-+ true then flush has work to do on that node.
-+*/
-+
-+/* FLUSH_PREP_ONCE_PER_TRANSACTION: Within a single transaction a node is never
-+ flushprepped twice (unless an explicit call to flush_unprep is made as described in
-+ detail below). For example a node is dirtied, allocated, and then early-flushed to
-+ disk and set clean. Before the transaction commits, the page is dirtied again and, due
-+ to memory pressure, the node is flushed again. The flush algorithm will not relocate
-+ the node to a new disk location, it will simply write it to the same, previously
-+ relocated position again.
-+*/
-+
-+/* THE BOTTOM-UP VS. TOP-DOWN ISSUE: This code implements a bottom-up algorithm where we
-+ start at a leaf node and allocate in parent-first order by iterating to the right. At
-+ each step of the iteration, we check for the right neighbor. Before advancing to the
-+ right neighbor, we check if the current position and the right neighbor share the same
-+ parent. If they do not share the same parent, the parent is allocated before the right
-+ neighbor.
-+
-+ This process goes recursively up the tree and squeeze nodes level by level as long as
-+ the right neighbor and the current position have different parents, then it allocates
-+ the right-neighbors-with-different-parents on the way back down. This process is
-+ described in more detail in flush_squalloc_changed_ancestor and the recursive function
-+ squalloc_one_changed_ancestor. But the purpose here is not to discuss the
-+ specifics of the bottom-up approach as it is to contrast the bottom-up and top-down
-+ approaches.
-+
-+ The top-down algorithm was implemented earlier (April-May 2002). In the top-down
-+ approach, we find a starting point by scanning left along each level past dirty nodes,
-+ then going up and repeating the process until the left node and the parent node are
-+ clean. We then perform a parent-first traversal from the starting point, which makes
-+ allocating in parent-first order trivial. After one subtree has been allocated in this
-+ manner, we move to the right, try moving upward, then repeat the parent-first
-+ traversal.
-+
-+ Both approaches have problems that need to be addressed. Both are approximately the
-+ same amount of code, but the bottom-up approach has advantages in the order it acquires
-+ locks which, at the very least, make it the better approach. At first glance each one
-+ makes the other one look simpler, so it is important to remember a few of the problems
-+ with each one.
-+
-+ Main problem with the top-down approach: When you encounter a clean child during the
-+ parent-first traversal, what do you do? You would like to avoid searching through a
-+ large tree of nodes just to find a few dirty leaves at the bottom, and there is not an
-+ obvious solution. One of the advantages of the top-down approach is that during the
-+ parent-first traversal you check every child of a parent to see if it is dirty. In
-+ this way, the top-down approach easily handles the main problem of the bottom-up
-+ approach: unallocated children.
-+
-+ The unallocated children problem is that before writing a node to disk we must make
-+ sure that all of its children are allocated. Otherwise, the writing the node means
-+ extra I/O because the node will have to be written again when the child is finally
-+ allocated.
-+
-+ WE HAVE NOT YET ELIMINATED THE UNALLOCATED CHILDREN PROBLEM. Except for bugs, this
-+ should not cause any file system corruption, it only degrades I/O performance because a
-+ node may be written when it is sure to be written at least one more time in the same
-+ transaction when the remaining children are allocated. What follows is a description
-+ of how we will solve the problem.
-+*/
-+
-+/* HANDLING UNALLOCATED CHILDREN: During flush we may allocate a parent node then,
-+ proceeding in parent first order, allocate some of its left-children, then encounter a
-+ clean child in the middle of the parent. We do not allocate the clean child, but there
-+ may remain unallocated (dirty) children to the right of the clean child. If we were to
-+ stop flushing at this moment and write everything to disk, the parent might still
-+ contain unallocated children.
-+
-+ We could try to allocate all the descendents of every node that we allocate, but this
-+ is not necessary. Doing so could result in allocating the entire tree: if the root
-+ node is allocated then every unallocated node would have to be allocated before
-+ flushing. Actually, we do not have to write a node just because we allocate it. It is
-+ possible to allocate but not write a node during flush, when it still has unallocated
-+ children. However, this approach is probably not optimal for the following reason.
-+
-+ The flush algorithm is designed to allocate nodes in parent-first order in an attempt
-+ to optimize reads that occur in the same order. Thus we are read-optimizing for a
-+ left-to-right scan through all the leaves in the system, and we are hoping to
-+ write-optimize at the same time because those nodes will be written together in batch.
-+ What happens, however, if we assign a block number to a node in its read-optimized
-+ order but then avoid writing it because it has unallocated children? In that
-+ situation, we lose out on the write-optimization aspect because a node will have to be
-+ written again to the its location on the device, later, which likely means seeking back
-+ to that location.
-+
-+ So there are tradeoffs. We can choose either:
-+
-+ A. Allocate all unallocated children to preserve both write-optimization and
-+ read-optimization, but this is not always desirable because it may mean having to
-+ allocate and flush very many nodes at once.
-+
-+ B. Defer writing nodes with unallocated children, keep their read-optimized locations,
-+ but sacrifice write-optimization because those nodes will be written again.
-+
-+ C. Defer writing nodes with unallocated children, but do not keep their read-optimized
-+ locations. Instead, choose to write-optimize them later, when they are written. To
-+ facilitate this, we "undo" the read-optimized allocation that was given to the node so
-+ that later it can be write-optimized, thus "unpreparing" the flush decision. This is a
-+ case where we disturb the FLUSH_PREP_ONCE_PER_TRANSACTION rule described above. By a
-+ call to flush_unprep() we will: if the node was wandered, unset the JNODE_OVRWR bit;
-+ if the node was relocated, unset the JNODE_RELOC bit, non-deferred-deallocate its block
-+ location, and set the JNODE_CREATED bit, effectively setting the node back to an
-+ unallocated state.
-+
-+ We will take the following approach in v4.0: for twig nodes we will always finish
-+ allocating unallocated children (A). For nodes with (level > TWIG) we will defer
-+ writing and choose write-optimization (C).
-+
-+ To summarize, there are several parts to a solution that avoids the problem with
-+ unallocated children:
-+
-+ FIXME-ZAM: Still no one approach is implemented to eliminate the "UNALLOCATED CHILDREN"
-+ problem because there was an experiment which was done showed that we have 1-2 nodes
-+ with unallocated children for thousands of written nodes. The experiment was simple
-+ like coping / deletion of linux kernel sources. However the problem can arise in more
-+ complex tests. I think we have jnode_io_hook to insert a check for unallocated
-+ children and see what kind of problem we have.
-+
-+ 1. When flush reaches a stopping point (e.g., a clean node), it should continue calling
-+ squeeze-and-allocate on any remaining unallocated children. FIXME: Difficulty to
-+ implement: should be simple -- amounts to adding a while loop to jnode_flush, see
-+ comments in that function.
-+
-+ 2. When flush reaches flush_empty_queue(), some of the (level > TWIG) nodes may still
-+ have unallocated children. If the twig level has unallocated children it is an
-+ assertion failure. If a higher-level node has unallocated children, then it should be
-+ explicitly de-allocated by a call to flush_unprep(). FIXME: Difficulty to implement:
-+ should be simple.
-+
-+ 3. (CPU-Optimization) Checking whether a node has unallocated children may consume more
-+ CPU cycles than we would like, and it is possible (but medium complexity) to optimize
-+ this somewhat in the case where large sub-trees are flushed. The following observation
-+ helps: if both the left- and right-neighbor of a node are processed by the flush
-+ algorithm then the node itself is guaranteed to have all of its children allocated.
-+ However, the cost of this check may not be so expensive after all: it is not needed for
-+ leaves and flush can guarantee this property for twigs. That leaves only (level >
-+ TWIG) nodes that have to be checked, so this optimization only helps if at least three
-+ (level > TWIG) nodes are flushed in one pass, and the savings will be very small unless
-+ there are many more (level > TWIG) nodes. But if there are many (level > TWIG) nodes
-+ then the number of blocks being written will be very large, so the savings may be
-+ insignificant. That said, the idea is to maintain both the left and right edges of
-+ nodes that are processed in flush. When flush_empty_queue() is called, a relatively
-+ simple test will tell whether the (level > TWIG) node is on the edge. If it is on the
-+ edge, the slow check is necessary, but if it is in the interior then it can be assumed
-+ to have all of its children allocated. FIXME: medium complexity to implement, but
-+ simple to verify given that we must have a slow check anyway.
-+
-+ 4. (Optional) This part is optional, not for v4.0--flush should work independently of
-+ whether this option is used or not. Called RAPID_SCAN, the idea is to amend the
-+ left-scan operation to take unallocated children into account. Normally, the left-scan
-+ operation goes left as long as adjacent nodes are dirty up until some large maximum
-+ value (FLUSH_SCAN_MAXNODES) at which point it stops and begins flushing. But scan-left
-+ may stop at a position where there are unallocated children to the left with the same
-+ parent. When RAPID_SCAN is enabled, the ordinary scan-left operation stops after
-+ FLUSH_RELOCATE_THRESHOLD, which is much smaller than FLUSH_SCAN_MAXNODES, then procedes
-+ with a rapid scan. The rapid scan skips all the interior children of a node--if the
-+ leftmost child of a twig is dirty, check its left neighbor (the rightmost child of the
-+ twig to the left). If the left neighbor of the leftmost child is also dirty, then
-+ continue the scan at the left twig and repeat. This option will cause flush to
-+ allocate more twigs in a single pass, but it also has the potential to write many more
-+ nodes than would otherwise be written without the RAPID_SCAN option. RAPID_SCAN
-+ was partially implemented, code removed August 12, 2002 by JMACD.
-+*/
-+
-+/* FLUSH CALLED ON NON-LEAF LEVEL. Most of our design considerations assume that the
-+ starting point for flush is a leaf node, but actually the flush code cares very little
-+ about whether or not this is true. It is possible that all the leaf nodes are flushed
-+ and dirty parent nodes still remain, in which case jnode_flush() is called on a
-+ non-leaf argument. Flush doesn't care--it treats the argument node as if it were a
-+ leaf, even when it is not. This is a simple approach, and there may be a more optimal
-+ policy but until a problem with this approach is discovered, simplest is probably best.
-+
-+ NOTE: In this case, the ordering produced by flush is parent-first only if you ignore
-+ the leaves. This is done as a matter of simplicity and there is only one (shaky)
-+ justification. When an atom commits, it flushes all leaf level nodes first, followed
-+ by twigs, and so on. With flushing done in this order, if flush is eventually called
-+ on a non-leaf node it means that (somehow) we reached a point where all leaves are
-+ clean and only internal nodes need to be flushed. If that it the case, then it means
-+ there were no leaves that were the parent-first preceder/follower of the parent. This
-+ is expected to be a rare case, which is why we do nothing special about it. However,
-+ memory pressure may pass an internal node to flush when there are still dirty leaf
-+ nodes that need to be flushed, which could prove our original assumptions
-+ "inoperative". If this needs to be fixed, then scan_left/right should have
-+ special checks for the non-leaf levels. For example, instead of passing from a node to
-+ the left neighbor, it should pass from the node to the left neighbor's rightmost
-+ descendent (if dirty).
-+
-+*/
-+
-+/* UNIMPLEMENTED AS YET: REPACKING AND RESIZING. We walk the tree in 4MB-16MB chunks, dirtying everything and putting
-+ it into a transaction. We tell the allocator to allocate the blocks as far as possible towards one end of the
-+ logical device--the left (starting) end of the device if we are walking from left to right, the right end of the
-+ device if we are walking from right to left. We then make passes in alternating directions, and as we do this the
-+ device becomes sorted such that tree order and block number order fully correlate.
-+
-+ Resizing is done by shifting everything either all the way to the left or all the way
-+ to the right, and then reporting the last block.
-+*/
-+
-+/* RELOCATE DECISIONS: The code makes a decision to relocate in several places. This
-+ descibes the policy from the highest level:
-+
-+ The FLUSH_RELOCATE_THRESHOLD parameter: If we count this many consecutive nodes on the
-+ leaf level during flush-scan (right, left), then we unconditionally decide to relocate
-+ leaf nodes.
-+
-+ Otherwise, there are two contexts in which we make a decision to relocate:
-+
-+ 1. The REVERSE PARENT-FIRST context: Implemented in reverse_relocate_test().
-+ During the initial stages of flush, after scan-right completes, we want to ask the
-+ question: should we relocate this leaf node and thus dirty the parent node. Then if
-+ the node is a leftmost child its parent is its own parent-first preceder, thus we repeat
-+ the question at the next level up, and so on. In these cases we are moving in the
-+ reverse-parent first direction.
-+
-+ There is another case which is considered the reverse direction, which comes at the end
-+ of a twig in reverse_relocate_end_of_twig(). As we finish processing a twig we may
-+ reach a point where there is a clean twig to the right with a dirty leftmost child. In
-+ this case, we may wish to relocate the child by testing if it should be relocated
-+ relative to its parent.
-+
-+ 2. The FORWARD PARENT-FIRST context: Testing for forward relocation is done in
-+ allocate_znode. What distinguishes the forward parent-first case from the
-+ reverse-parent first case is that the preceder has already been allocated in the
-+ forward case, whereas in the reverse case we don't know what the preceder is until we
-+ finish "going in reverse". That simplifies the forward case considerably, and there we
-+ actually use the block allocator to determine whether, e.g., a block closer to the
-+ preceder is available.
-+*/
-+
-+/* SQUEEZE_LEFT_EDGE: Unimplemented idea for future consideration. The idea is, once we
-+ finish scan-left and find a starting point, if the parent's left neighbor is dirty then
-+ squeeze the parent's left neighbor and the parent. This may change the
-+ flush-starting-node's parent. Repeat until the child's parent is stable. If the child
-+ is a leftmost child, repeat this left-edge squeezing operation at the next level up.
-+ Note that we cannot allocate extents during this or they will be out of parent-first
-+ order. There is also some difficult coordinate maintenence issues. We can't do a tree
-+ search to find coordinates again (because we hold locks), we have to determine them
-+ from the two nodes being squeezed. Looks difficult, but has potential to increase
-+ space utilization. */
-+
-+/* Flush-scan helper functions. */
-+static void scan_init(flush_scan * scan);
-+static void scan_done(flush_scan * scan);
-+
-+/* Flush-scan algorithm. */
-+static int scan_left(flush_scan * scan, flush_scan * right, jnode * node,
-+ unsigned limit);
-+static int scan_right(flush_scan * scan, jnode * node, unsigned limit);
-+static int scan_common(flush_scan * scan, flush_scan * other);
-+static int scan_formatted(flush_scan * scan);
-+static int scan_unformatted(flush_scan * scan, flush_scan * other);
-+static int scan_by_coord(flush_scan * scan);
-+
-+/* Initial flush-point ancestor allocation. */
-+static int alloc_pos_and_ancestors(flush_pos_t * pos);
-+static int alloc_one_ancestor(const coord_t * coord, flush_pos_t * pos);
-+static int set_preceder(const coord_t * coord_in, flush_pos_t * pos);
-+
-+/* Main flush algorithm. Note on abbreviation: "squeeze and allocate" == "squalloc". */
-+static int squalloc(flush_pos_t * pos);
-+
-+/* Flush squeeze implementation. */
-+static int squeeze_right_non_twig(znode * left, znode * right);
-+static int shift_one_internal_unit(znode * left, znode * right);
-+
-+/* Flush reverse parent-first relocation routines. */
-+static int reverse_relocate_if_close_enough(const reiser4_block_nr * pblk,
-+ const reiser4_block_nr * nblk);
-+static int reverse_relocate_test(jnode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+static int reverse_relocate_check_dirty_parent(jnode * node,
-+ const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+
-+/* Flush allocate write-queueing functions: */
-+static int allocate_znode(znode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+static int allocate_znode_update(znode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos);
-+static int lock_parent_and_allocate_znode(znode *, flush_pos_t *);
-+
-+/* Flush helper functions: */
-+static int jnode_lock_parent_coord(jnode * node,
-+ coord_t * coord,
-+ lock_handle * parent_lh,
-+ load_count * parent_zh,
-+ znode_lock_mode mode, int try);
-+static int neighbor_in_slum(znode * node, lock_handle * right_lock, sideof side,
-+ znode_lock_mode mode, int check_dirty, int expected);
-+static int znode_same_parents(znode * a, znode * b);
-+
-+static int znode_check_flushprepped(znode * node)
-+{
-+ return jnode_check_flushprepped(ZJNODE(node));
-+}
-+
-+/* Flush position functions */
-+static void pos_init(flush_pos_t * pos);
-+static int pos_valid(flush_pos_t * pos);
-+static void pos_done(flush_pos_t * pos);
-+static int pos_stop(flush_pos_t * pos);
-+
-+/* check that @org is first jnode extent unit, if extent is unallocated,
-+ * because all jnodes of unallocated extent are dirty and of the same atom. */
-+#define checkchild(scan) \
-+assert("nikita-3435", \
-+ ergo(scan->direction == LEFT_SIDE && \
-+ (scan->parent_coord.node->level == TWIG_LEVEL) && \
-+ jnode_is_unformatted(scan->node) && \
-+ extent_is_unallocated(&scan->parent_coord), \
-+ extent_unit_index(&scan->parent_coord) == index_jnode(scan->node)))
-+
-+/* This flush_cnt variable is used to track the number of concurrent flush operations,
-+ useful for debugging. It is initialized in txnmgr.c out of laziness (because flush has
-+ no static initializer function...) */
-+ON_DEBUG(atomic_t flush_cnt;
-+ )
-+
-+/* check fs backing device for write congestion */
-+static int check_write_congestion(void)
-+{
-+ struct super_block *sb;
-+ struct backing_dev_info *bdi;
-+
-+ sb = reiser4_get_current_sb();
-+ bdi = reiser4_get_super_fake(sb)->i_mapping->backing_dev_info;
-+ return bdi_write_congested(bdi);
-+}
-+
-+/* conditionally write flush queue */
-+static int write_prepped_nodes(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ assert("zam-831", pos);
-+ assert("zam-832", pos->fq);
-+
-+ if (!(pos->flags & JNODE_FLUSH_WRITE_BLOCKS))
-+ return 0;
-+
-+ if (check_write_congestion())
-+ return 0;
-+
-+ ret = reiser4_write_fq(pos->fq, pos->nr_written,
-+ WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
-+ return ret;
-+}
-+
-+/* Proper release all flush pos. resources then move flush position to new
-+ locked node */
-+static void move_flush_pos(flush_pos_t * pos, lock_handle * new_lock,
-+ load_count * new_load, const coord_t * new_coord)
-+{
-+ assert("zam-857", new_lock->node == new_load->node);
-+
-+ if (new_coord) {
-+ assert("zam-858", new_coord->node == new_lock->node);
-+ coord_dup(&pos->coord, new_coord);
-+ } else {
-+ coord_init_first_unit(&pos->coord, new_lock->node);
-+ }
-+
-+ if (pos->child) {
-+ jput(pos->child);
-+ pos->child = NULL;
-+ }
-+
-+ move_load_count(&pos->load, new_load);
-+ done_lh(&pos->lock);
-+ move_lh(&pos->lock, new_lock);
-+}
-+
-+/* delete empty node which link from the parent still exists. */
-+static int delete_empty_node(znode * node)
-+{
-+ reiser4_key smallest_removed;
-+
-+ assert("zam-1019", node != NULL);
-+ assert("zam-1020", node_is_empty(node));
-+ assert("zam-1023", znode_is_wlocked(node));
-+
-+ return reiser4_delete_node(node, &smallest_removed, NULL, 1);
-+}
-+
-+/* Prepare flush position for alloc_pos_and_ancestors() and squalloc() */
-+static int prepare_flush_pos(flush_pos_t * pos, jnode * org)
-+{
-+ int ret;
-+ load_count load;
-+ lock_handle lock;
-+
-+ init_lh(&lock);
-+ init_load_count(&load);
-+
-+ if (jnode_is_znode(org)) {
-+ ret = longterm_lock_znode(&lock, JZNODE(org),
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
-+ if (ret)
-+ return ret;
-+
-+ ret = incr_load_count_znode(&load, JZNODE(org));
-+ if (ret)
-+ return ret;
-+
-+ pos->state =
-+ (jnode_get_level(org) ==
-+ LEAF_LEVEL) ? POS_ON_LEAF : POS_ON_INTERNAL;
-+ move_flush_pos(pos, &lock, &load, NULL);
-+ } else {
-+ coord_t parent_coord;
-+ ret = jnode_lock_parent_coord(org, &parent_coord, &lock,
-+ &load, ZNODE_WRITE_LOCK, 0);
-+ if (ret)
-+ goto done;
-+ if (!item_is_extent(&parent_coord)) {
-+ /* file was converted to tail, org became HB, we found internal
-+ item */
-+ ret = -EAGAIN;
-+ goto done;
-+ }
-+
-+ pos->state = POS_ON_EPOINT;
-+ move_flush_pos(pos, &lock, &load, &parent_coord);
-+ pos->child = jref(org);
-+ if (extent_is_unallocated(&parent_coord)
-+ && extent_unit_index(&parent_coord) != index_jnode(org)) {
-+ /* @org is not first child of its parent unit. This may happen
-+ because longerm lock of its parent node was released between
-+ scan_left and scan_right. For now work around this having flush to repeat */
-+ ret = -EAGAIN;
-+ }
-+ }
-+
-+ done:
-+ done_load_count(&load);
-+ done_lh(&lock);
-+ return ret;
-+}
-+
-+/* TODO LIST (no particular order): */
-+/* I have labelled most of the legitimate FIXME comments in this file with letters to
-+ indicate which issue they relate to. There are a few miscellaneous FIXMEs with
-+ specific names mentioned instead that need to be inspected/resolved. */
-+/* B. There is an issue described in reverse_relocate_test having to do with an
-+ imprecise is_preceder? check having to do with partially-dirty extents. The code that
-+ sets preceder hints and computes the preceder is basically untested. Careful testing
-+ needs to be done that preceder calculations are done correctly, since if it doesn't
-+ affect correctness we will not catch this stuff during regular testing. */
-+/* C. EINVAL, E_DEADLOCK, E_NO_NEIGHBOR, ENOENT handling. It is unclear which of these are
-+ considered expected but unlikely conditions. Flush currently returns 0 (i.e., success
-+ but no progress, i.e., restart) whenever it receives any of these in jnode_flush().
-+ Many of the calls that may produce one of these return values (i.e.,
-+ longterm_lock_znode, reiser4_get_parent, reiser4_get_neighbor, ...) check some of these
-+ values themselves and, for instance, stop flushing instead of resulting in a restart.
-+ If any of these results are true error conditions then flush will go into a busy-loop,
-+ as we noticed during testing when a corrupt tree caused find_child_ptr to return
-+ ENOENT. It needs careful thought and testing of corner conditions.
-+*/
-+/* D. Atomicity of flush_prep against deletion and flush concurrency. Suppose a created
-+ block is assigned a block number then early-flushed to disk. It is dirtied again and
-+ flush is called again. Concurrently, that block is deleted, and the de-allocation of
-+ its block number does not need to be deferred, since it is not part of the preserve set
-+ (i.e., it didn't exist before the transaction). I think there may be a race condition
-+ where flush writes the dirty, created block after the non-deferred deallocated block
-+ number is re-allocated, making it possible to write deleted data on top of non-deleted
-+ data. Its just a theory, but it needs to be thought out. */
-+/* F. bio_alloc() failure is not handled gracefully. */
-+/* G. Unallocated children. */
-+/* H. Add a WANDERED_LIST to the atom to clarify the placement of wandered blocks. */
-+/* I. Rename flush-scan to scan-point, (flush-pos to flush-point?) */
-+
-+/* JNODE_FLUSH: MAIN ENTRY POINT */
-+/* This is the main entry point for flushing a jnode and its dirty neighborhood (dirty
-+ neighborhood is named "slum"). Jnode_flush() is called if reiser4 has to write dirty
-+ blocks to disk, it happens when Linux VM decides to reduce number of dirty pages or as
-+ a part of transaction commit.
-+
-+ Our objective here is to prep and flush the slum the jnode belongs to. We want to
-+ squish the slum together, and allocate the nodes in it as we squish because allocation
-+ of children affects squishing of parents.
-+
-+ The "argument" @node tells flush where to start. From there, flush finds the left edge
-+ of the slum, and calls squalloc (in which nodes are squeezed and allocated). To find a
-+ "better place" to start squalloc first we perform a flush_scan.
-+
-+ Flush-scanning may be performed in both left and right directions, but for different
-+ purposes. When scanning to the left, we are searching for a node that precedes a
-+ sequence of parent-first-ordered nodes which we will then flush in parent-first order.
-+ During flush-scanning, we also take the opportunity to count the number of consecutive
-+ leaf nodes. If this number is past some threshold (FLUSH_RELOCATE_THRESHOLD), then we
-+ make a decision to reallocate leaf nodes (thus favoring write-optimization).
-+
-+ Since the flush argument node can be anywhere in a sequence of dirty leaves, there may
-+ also be dirty nodes to the right of the argument. If the scan-left operation does not
-+ count at least FLUSH_RELOCATE_THRESHOLD nodes then we follow it with a right-scan
-+ operation to see whether there is, in fact, enough nodes to meet the relocate
-+ threshold. Each right- and left-scan operation uses a single flush_scan object.
-+
-+ After left-scan and possibly right-scan, we prepare a flush_position object with the
-+ starting flush point or parent coordinate, which was determined using scan-left.
-+
-+ Next we call the main flush routine, squalloc, which iterates along the
-+ leaf level, squeezing and allocating nodes (and placing them into the flush queue).
-+
-+ After squalloc returns we take extra steps to ensure that all the children
-+ of the final twig node are allocated--this involves repeating squalloc
-+ until we finish at a twig with no unallocated children.
-+
-+ Finally, we call flush_empty_queue to submit write-requests to disk. If we encounter
-+ any above-twig nodes during flush_empty_queue that still have unallocated children, we
-+ flush_unprep them.
-+
-+ Flush treats several "failure" cases as non-failures, essentially causing them to start
-+ over. E_DEADLOCK is one example. FIXME:(C) EINVAL, E_NO_NEIGHBOR, ENOENT: these should
-+ probably be handled properly rather than restarting, but there are a bunch of cases to
-+ audit.
-+*/
-+
-+static int
-+jnode_flush(jnode * node, long nr_to_write, long *nr_written,
-+ flush_queue_t * fq, int flags)
-+{
-+ long ret = 0;
-+ flush_scan *right_scan;
-+ flush_scan *left_scan;
-+ flush_pos_t *flush_pos;
-+ int todo;
-+ struct super_block *sb;
-+ reiser4_super_info_data *sbinfo;
-+ jnode *leftmost_in_slum = NULL;
-+
-+ assert("jmacd-76619", lock_stack_isclean(get_current_lock_stack()));
-+ assert("nikita-3022", reiser4_schedulable());
-+
-+ assert("nikita-3185",
-+ get_current_super_private()->delete_mutex_owner != current);
-+
-+ /* allocate right_scan, left_scan and flush_pos */
-+ right_scan =
-+ kmalloc(2 * sizeof(*right_scan) + sizeof(*flush_pos),
-+ reiser4_ctx_gfp_mask_get());
-+ if (right_scan == NULL)
-+ return RETERR(-ENOMEM);
-+ left_scan = right_scan + 1;
-+ flush_pos = (flush_pos_t *) (left_scan + 1);
-+
-+ sb = reiser4_get_current_sb();
-+ sbinfo = get_super_private(sb);
-+
-+ /* Flush-concurrency debug code */
-+#if REISER4_DEBUG
-+ atomic_inc(&flush_cnt);
-+#endif
-+
-+ reiser4_enter_flush(sb);
-+
-+ /* Initialize a flush position. */
-+ pos_init(flush_pos);
-+
-+ flush_pos->nr_written = nr_written;
-+ flush_pos->fq = fq;
-+ flush_pos->flags = flags;
-+ flush_pos->nr_to_write = nr_to_write;
-+
-+ scan_init(right_scan);
-+ scan_init(left_scan);
-+
-+ /* First scan left and remember the leftmost scan position. If the leftmost
-+ position is unformatted we remember its parent_coord. We scan until counting
-+ FLUSH_SCAN_MAXNODES.
-+
-+ If starting @node is unformatted, at the beginning of left scan its
-+ parent (twig level node, containing extent item) will be long term
-+ locked and lock handle will be stored in the
-+ @right_scan->parent_lock. This lock is used to start the rightward
-+ scan without redoing the tree traversal (necessary to find parent)
-+ and, hence, is kept during leftward scan. As a result, we have to
-+ use try-lock when taking long term locks during the leftward scan.
-+ */
-+ ret = scan_left(left_scan, right_scan,
-+ node, sbinfo->flush.scan_maxnodes);
-+ if (ret != 0)
-+ goto failed;
-+
-+ leftmost_in_slum = jref(left_scan->node);
-+ scan_done(left_scan);
-+
-+ /* Then possibly go right to decide if we will use a policy of relocating leaves.
-+ This is only done if we did not scan past (and count) enough nodes during the
-+ leftward scan. If we do scan right, we only care to go far enough to establish
-+ that at least FLUSH_RELOCATE_THRESHOLD number of nodes are being flushed. The
-+ scan limit is the difference between left_scan.count and the threshold. */
-+
-+ todo = sbinfo->flush.relocate_threshold - left_scan->count;
-+ /* scan right is inherently deadlock prone, because we are
-+ * (potentially) holding a lock on the twig node at this moment.
-+ * FIXME: this is incorrect comment: lock is not held */
-+ if (todo > 0) {
-+ ret = scan_right(right_scan, node, (unsigned)todo);
-+ if (ret != 0)
-+ goto failed;
-+ }
-+
-+ /* Only the right-scan count is needed, release any rightward locks right away. */
-+ scan_done(right_scan);
-+
-+ /* ... and the answer is: we should relocate leaf nodes if at least
-+ FLUSH_RELOCATE_THRESHOLD nodes were found. */
-+ flush_pos->leaf_relocate = JF_ISSET(node, JNODE_REPACK) ||
-+ (left_scan->count + right_scan->count >=
-+ sbinfo->flush.relocate_threshold);
-+
-+ /* Funny business here. We set the 'point' in the flush_position at prior to
-+ starting squalloc regardless of whether the first point is
-+ formatted or unformatted. Without this there would be an invariant, in the
-+ rest of the code, that if the flush_position is unformatted then
-+ flush_position->point is NULL and flush_position->parent_{lock,coord} is set,
-+ and if the flush_position is formatted then flush_position->point is non-NULL
-+ and no parent info is set.
-+
-+ This seems lazy, but it makes the initial calls to reverse_relocate_test
-+ (which ask "is it the pos->point the leftmost child of its parent") much easier
-+ because we know the first child already. Nothing is broken by this, but the
-+ reasoning is subtle. Holding an extra reference on a jnode during flush can
-+ cause us to see nodes with HEARD_BANSHEE during squalloc, because nodes are not
-+ removed from sibling lists until they have zero reference count. Flush would
-+ never observe a HEARD_BANSHEE node on the left-edge of flush, nodes are only
-+ deleted to the right. So if nothing is broken, why fix it?
-+
-+ NOTE-NIKITA actually, flush can meet HEARD_BANSHEE node at any
-+ point and in any moment, because of the concurrent file system
-+ activity (for example, truncate). */
-+
-+ /* Check jnode state after flush_scan completed. Having a lock on this
-+ node or its parent (in case of unformatted) helps us in case of
-+ concurrent flushing. */
-+ if (jnode_check_flushprepped(leftmost_in_slum)
-+ && !jnode_convertible(leftmost_in_slum)) {
-+ ret = 0;
-+ goto failed;
-+ }
-+
-+ /* Now setup flush_pos using scan_left's endpoint. */
-+ ret = prepare_flush_pos(flush_pos, leftmost_in_slum);
-+ if (ret)
-+ goto failed;
-+
-+ if (znode_get_level(flush_pos->coord.node) == LEAF_LEVEL
-+ && node_is_empty(flush_pos->coord.node)) {
-+ znode *empty = flush_pos->coord.node;
-+
-+ assert("zam-1022", !ZF_ISSET(empty, JNODE_HEARD_BANSHEE));
-+ ret = delete_empty_node(empty);
-+ goto failed;
-+ }
-+
-+ if (jnode_check_flushprepped(leftmost_in_slum)
-+ && !jnode_convertible(leftmost_in_slum)) {
-+ ret = 0;
-+ goto failed;
-+ }
-+
-+ /* Set pos->preceder and (re)allocate pos and its ancestors if it is needed */
-+ ret = alloc_pos_and_ancestors(flush_pos);
-+ if (ret)
-+ goto failed;
-+
-+ /* Do the main rightward-bottom-up squeeze and allocate loop. */
-+ ret = squalloc(flush_pos);
-+ pos_stop(flush_pos);
-+ if (ret)
-+ goto failed;
-+
-+ /* FIXME_NFQUCMPD: Here, handle the twig-special case for unallocated children.
-+ First, the pos_stop() and pos_valid() routines should be modified
-+ so that pos_stop() sets a flush_position->stop flag to 1 without
-+ releasing the current position immediately--instead release it in
-+ pos_done(). This is a better implementation than the current one anyway.
-+
-+ It is not clear that all fields of the flush_position should not be released,
-+ but at the very least the parent_lock, parent_coord, and parent_load should
-+ remain held because they are hold the last twig when pos_stop() is
-+ called.
-+
-+ When we reach this point in the code, if the parent_coord is set to after the
-+ last item then we know that flush reached the end of a twig (and according to
-+ the new flush queueing design, we will return now). If parent_coord is not
-+ past the last item, we should check if the current twig has any unallocated
-+ children to the right (we are not concerned with unallocated children to the
-+ left--in that case the twig itself should not have been allocated). If the
-+ twig has unallocated children to the right, set the parent_coord to that
-+ position and then repeat the call to squalloc.
-+
-+ Testing for unallocated children may be defined in two ways: if any internal
-+ item has a fake block number, it is unallocated; if any extent item is
-+ unallocated then all of its children are unallocated. But there is a more
-+ aggressive approach: if there are any dirty children of the twig to the right
-+ of the current position, we may wish to relocate those nodes now. Checking for
-+ potential relocation is more expensive as it requires knowing whether there are
-+ any dirty children that are not unallocated. The extent_needs_allocation
-+ should be used after setting the correct preceder.
-+
-+ When we reach the end of a twig at this point in the code, if the flush can
-+ continue (when the queue is ready) it will need some information on the future
-+ starting point. That should be stored away in the flush_handle using a seal, I
-+ believe. Holding a jref() on the future starting point may break other code
-+ that deletes that node.
-+ */
-+
-+ /* FIXME_NFQUCMPD: Also, we don't want to do any flushing when flush is called
-+ above the twig level. If the VM calls flush above the twig level, do nothing
-+ and return (but figure out why this happens). The txnmgr should be modified to
-+ only flush its leaf-level dirty list. This will do all the necessary squeeze
-+ and allocate steps but leave unallocated branches and possibly unallocated
-+ twigs (when the twig's leftmost child is not dirty). After flushing the leaf
-+ level, the remaining unallocated nodes should be given write-optimized
-+ locations. (Possibly, the remaining unallocated twigs should be allocated just
-+ before their leftmost child.)
-+ */
-+
-+ /* Any failure reaches this point. */
-+ failed:
-+
-+ switch (ret) {
-+ case -E_REPEAT:
-+ case -EINVAL:
-+ case -E_DEADLOCK:
-+ case -E_NO_NEIGHBOR:
-+ case -ENOENT:
-+ /* FIXME(C): Except for E_DEADLOCK, these should probably be handled properly
-+ in each case. They already are handled in many cases. */
-+ /* Something bad happened, but difficult to avoid... Try again! */
-+ ret = 0;
-+ }
-+
-+ if (leftmost_in_slum)
-+ jput(leftmost_in_slum);
-+
-+ pos_done(flush_pos);
-+ scan_done(left_scan);
-+ scan_done(right_scan);
-+ kfree(right_scan);
-+
-+ ON_DEBUG(atomic_dec(&flush_cnt));
-+
-+ reiser4_leave_flush(sb);
-+
-+ return ret;
-+}
-+
-+/* The reiser4 flush subsystem can be turned into "rapid flush mode" means that
-+ * flusher should submit all prepped nodes immediately without keeping them in
-+ * flush queues for long time. The reason for rapid flush mode is to free
-+ * memory as fast as possible. */
-+
-+#if REISER4_USE_RAPID_FLUSH
-+
-+/**
-+ * submit all prepped nodes if rapid flush mode is set,
-+ * turn rapid flush mode off.
-+ */
-+
-+static int rapid_flush(flush_pos_t * pos)
-+{
-+ if (!wbq_available())
-+ return 0;
-+
-+ return write_prepped_nodes(pos);
-+}
-+
-+#else
-+
-+#define rapid_flush(pos) (0)
-+
-+#endif /* REISER4_USE_RAPID_FLUSH */
-+
-+static jnode *find_flush_start_jnode(jnode *start, txn_atom *atom,
-+ flush_queue_t *fq, int *nr_queued,
-+ int flags)
-+{
-+ jnode * node;
-+
-+ if (start != NULL) {
-+ spin_lock_jnode(start);
-+ if (!jnode_is_flushprepped(start)) {
-+ assert("zam-1056", start->atom == atom);
-+ node = start;
-+ goto enter;
-+ }
-+ spin_unlock_jnode(start);
-+ }
-+ /*
-+ * In this loop we process all already prepped (RELOC or OVRWR) and dirtied again
-+ * nodes. The atom spin lock is not released until all dirty nodes processed or
-+ * not prepped node found in the atom dirty lists.
-+ */
-+ while ((node = find_first_dirty_jnode(atom, flags))) {
-+ spin_lock_jnode(node);
-+ enter:
-+ assert("zam-881", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-898", !JF_ISSET(node, JNODE_OVRWR));
-+
-+ if (JF_ISSET(node, JNODE_WRITEBACK)) {
-+ /* move node to the end of atom's writeback list */
-+ list_move_tail(&node->capture_link, ATOM_WB_LIST(atom));
-+
-+ /*
-+ * jnode is not necessarily on dirty list: if it was dirtied when
-+ * it was on flush queue - it does not get moved to dirty list
-+ */
-+ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node),
-+ WB_LIST, 1));
-+
-+ } else if (jnode_is_znode(node)
-+ && znode_above_root(JZNODE(node))) {
-+ /*
-+ * A special case for znode-above-root. The above-root (fake)
-+ * znode is captured and dirtied when the tree height changes or
-+ * when the root node is relocated. This causes atoms to fuse so
-+ * that changes at the root are serialized. However, this node is
-+ * never flushed. This special case used to be in lock.c to
-+ * prevent the above-root node from ever being captured, but now
-+ * that it is captured we simply prevent it from flushing. The
-+ * log-writer code relies on this to properly log superblock
-+ * modifications of the tree height.
-+ */
-+ jnode_make_wander_nolock(node);
-+ } else if (JF_ISSET(node, JNODE_RELOC)) {
-+ queue_jnode(fq, node);
-+ ++(*nr_queued);
-+ } else
-+ break;
-+
-+ spin_unlock_jnode(node);
-+ }
-+ return node;
-+}
-+
-+/* Flush some nodes of current atom, usually slum, return -E_REPEAT if there are more nodes
-+ * to flush, return 0 if atom's dirty lists empty and keep current atom locked, return
-+ * other errors as they are. */
-+int
-+flush_current_atom(int flags, long nr_to_write, long *nr_submitted,
-+ txn_atom ** atom, jnode *start)
-+{
-+ reiser4_super_info_data *sinfo = get_current_super_private();
-+ flush_queue_t *fq = NULL;
-+ jnode *node;
-+ int nr_queued;
-+ int ret;
-+
-+ assert("zam-889", atom != NULL && *atom != NULL);
-+ assert_spin_locked(&((*atom)->alock));
-+ assert("zam-892", get_current_context()->trans->atom == *atom);
-+
-+ nr_to_write = LONG_MAX;
-+ while (1) {
-+ ret = reiser4_fq_by_atom(*atom, &fq);
-+ if (ret != -E_REPEAT)
-+ break;
-+ *atom = get_current_atom_locked();
-+ }
-+ if (ret)
-+ return ret;
-+
-+ assert_spin_locked(&((*atom)->alock));
-+
-+ /* parallel flushers limit */
-+ if (sinfo->tmgr.atom_max_flushers != 0) {
-+ while ((*atom)->nr_flushers >= sinfo->tmgr.atom_max_flushers) {
-+ /* An reiser4_atom_send_event() call is inside
-+ reiser4_fq_put_nolock() which is called when flush is
-+ finished and nr_flushers is decremented. */
-+ reiser4_atom_wait_event(*atom);
-+ *atom = get_current_atom_locked();
-+ }
-+ }
-+
-+ /* count ourself as a flusher */
-+ (*atom)->nr_flushers++;
-+
-+ writeout_mode_enable();
-+
-+ nr_queued = 0;
-+ node = find_flush_start_jnode(start, *atom, fq, &nr_queued, flags);
-+
-+ if (node == NULL) {
-+ if (nr_queued == 0) {
-+ (*atom)->nr_flushers--;
-+ reiser4_fq_put_nolock(fq);
-+ reiser4_atom_send_event(*atom);
-+ /* current atom remains locked */
-+ writeout_mode_disable();
-+ return 0;
-+ }
-+ spin_unlock_atom(*atom);
-+ } else {
-+ jref(node);
-+ BUG_ON((*atom)->super != node->tree->super);
-+ spin_unlock_atom(*atom);
-+ spin_unlock_jnode(node);
-+ BUG_ON(nr_to_write == 0);
-+ ret = jnode_flush(node, nr_to_write, nr_submitted, fq, flags);
-+ jput(node);
-+ }
-+
-+ ret =
-+ reiser4_write_fq(fq, nr_submitted,
-+ WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
-+
-+ *atom = get_current_atom_locked();
-+ (*atom)->nr_flushers--;
-+ reiser4_fq_put_nolock(fq);
-+ reiser4_atom_send_event(*atom);
-+ spin_unlock_atom(*atom);
-+
-+ writeout_mode_disable();
-+
-+ if (ret == 0)
-+ ret = -E_REPEAT;
-+
-+ return ret;
-+}
-+
-+/* REVERSE PARENT-FIRST RELOCATION POLICIES */
-+
-+/* This implements the is-it-close-enough-to-its-preceder? test for relocation in the
-+ reverse parent-first relocate context. Here all we know is the preceder and the block
-+ number. Since we are going in reverse, the preceder may still be relocated as well, so
-+ we can't ask the block allocator "is there a closer block available to relocate?" here.
-+ In the _forward_ parent-first relocate context (not here) we actually call the block
-+ allocator to try and find a closer location. */
-+static int
-+reverse_relocate_if_close_enough(const reiser4_block_nr * pblk,
-+ const reiser4_block_nr * nblk)
-+{
-+ reiser4_block_nr dist;
-+
-+ assert("jmacd-7710", *pblk != 0 && *nblk != 0);
-+ assert("jmacd-7711", !reiser4_blocknr_is_fake(pblk));
-+ assert("jmacd-7712", !reiser4_blocknr_is_fake(nblk));
-+
-+ /* Distance is the absolute value. */
-+ dist = (*pblk > *nblk) ? (*pblk - *nblk) : (*nblk - *pblk);
-+
-+ /* If the block is less than FLUSH_RELOCATE_DISTANCE blocks away from its preceder
-+ block, do not relocate. */
-+ if (dist <= get_current_super_private()->flush.relocate_distance) {
-+ return 0;
-+ }
-+
-+ return 1;
-+}
-+
-+/* This function is a predicate that tests for relocation. Always called in the
-+ reverse-parent-first context, when we are asking whether the current node should be
-+ relocated in order to expand the flush by dirtying the parent level (and thus
-+ proceeding to flush that level). When traversing in the forward parent-first direction
-+ (not here), relocation decisions are handled in two places: allocate_znode() and
-+ extent_needs_allocation(). */
-+static int
-+reverse_relocate_test(jnode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos)
-+{
-+ reiser4_block_nr pblk = 0;
-+ reiser4_block_nr nblk = 0;
-+
-+ assert("jmacd-8989", !jnode_is_root(node));
-+
-+ /*
-+ * This function is called only from the
-+ * reverse_relocate_check_dirty_parent() and only if the parent
-+ * node is clean. This implies that the parent has the real (i.e., not
-+ * fake) block number, and, so does the child, because otherwise the
-+ * parent would be dirty.
-+ */
-+
-+ /* New nodes are treated as if they are being relocated. */
-+ if (JF_ISSET (node, JNODE_CREATED) ||
-+ (pos->leaf_relocate && jnode_get_level(node) == LEAF_LEVEL)) {
-+ return 1;
-+ }
-+
-+ /* Find the preceder. FIXME(B): When the child is an unformatted, previously
-+ existing node, the coord may be leftmost even though the child is not the
-+ parent-first preceder of the parent. If the first dirty node appears somewhere
-+ in the middle of the first extent unit, this preceder calculation is wrong.
-+ Needs more logic in here. */
-+ if (coord_is_leftmost_unit(parent_coord)) {
-+ pblk = *znode_get_block(parent_coord->node);
-+ } else {
-+ pblk = pos->preceder.blk;
-+ }
-+ check_preceder(pblk);
-+
-+ /* If (pblk == 0) then the preceder isn't allocated or isn't known: relocate. */
-+ if (pblk == 0) {
-+ return 1;
-+ }
-+
-+ nblk = *jnode_get_block(node);
-+
-+ if (reiser4_blocknr_is_fake(&nblk))
-+ /* child is unallocated, mark parent dirty */
-+ return 1;
-+
-+ return reverse_relocate_if_close_enough(&pblk, &nblk);
-+}
-+
-+/* This function calls reverse_relocate_test to make a reverse-parent-first
-+ relocation decision and then, if yes, it marks the parent dirty. */
-+static int
-+reverse_relocate_check_dirty_parent(jnode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ if (!JF_ISSET(ZJNODE(parent_coord->node), JNODE_DIRTY)) {
-+
-+ ret = reverse_relocate_test(node, parent_coord, pos);
-+ if (ret < 0) {
-+ return ret;
-+ }
-+
-+ /* FIXME-ZAM
-+ if parent is already relocated - we do not want to grab space, right? */
-+ if (ret == 1) {
-+ int grabbed;
-+
-+ grabbed = get_current_context()->grabbed_blocks;
-+ if (reiser4_grab_space_force((__u64) 1, BA_RESERVED) !=
-+ 0)
-+ reiser4_panic("umka-1250",
-+ "No space left during flush.");
-+
-+ assert("jmacd-18923",
-+ znode_is_write_locked(parent_coord->node));
-+ znode_make_dirty(parent_coord->node);
-+ grabbed2free_mark(grabbed);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* INITIAL ALLOCATE ANCESTORS STEP (REVERSE PARENT-FIRST ALLOCATION BEFORE FORWARD
-+ PARENT-FIRST LOOP BEGINS) */
-+
-+/* Get the leftmost child for given coord. */
-+static int get_leftmost_child_of_unit(const coord_t * coord, jnode ** child)
-+{
-+ int ret;
-+
-+ ret = item_utmost_child(coord, LEFT_SIDE, child);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (IS_ERR(*child))
-+ return PTR_ERR(*child);
-+
-+ return 0;
-+}
-+
-+/* This step occurs after the left- and right-scans are completed, before starting the
-+ forward parent-first traversal. Here we attempt to allocate ancestors of the starting
-+ flush point, which means continuing in the reverse parent-first direction to the
-+ parent, grandparent, and so on (as long as the child is a leftmost child). This
-+ routine calls a recursive process, alloc_one_ancestor, which does the real work,
-+ except there is special-case handling here for the first ancestor, which may be a twig.
-+ At each level (here and alloc_one_ancestor), we check for relocation and then, if
-+ the child is a leftmost child, repeat at the next level. On the way back down (the
-+ recursion), we allocate the ancestors in parent-first order. */
-+static int alloc_pos_and_ancestors(flush_pos_t * pos)
-+{
-+ int ret = 0;
-+ lock_handle plock;
-+ load_count pload;
-+ coord_t pcoord;
-+
-+ if (znode_check_flushprepped(pos->lock.node))
-+ return 0;
-+
-+ coord_init_invalid(&pcoord, NULL);
-+ init_lh(&plock);
-+ init_load_count(&pload);
-+
-+ if (pos->state == POS_ON_EPOINT) {
-+ /* a special case for pos on twig level, where we already have
-+ a lock on parent node. */
-+ /* The parent may not be dirty, in which case we should decide
-+ whether to relocate the child now. If decision is made to
-+ relocate the child, the parent is marked dirty. */
-+ ret =
-+ reverse_relocate_check_dirty_parent(pos->child, &pos->coord,
-+ pos);
-+ if (ret)
-+ goto exit;
-+
-+ /* FIXME_NFQUCMPD: We only need to allocate the twig (if child
-+ is leftmost) and the leaf/child, so recursion is not needed.
-+ Levels above the twig will be allocated for
-+ write-optimization before the transaction commits. */
-+
-+ /* Do the recursive step, allocating zero or more of our
-+ * ancestors. */
-+ ret = alloc_one_ancestor(&pos->coord, pos);
-+
-+ } else {
-+ if (!znode_is_root(pos->lock.node)) {
-+ /* all formatted nodes except tree root */
-+ ret =
-+ reiser4_get_parent(&plock, pos->lock.node,
-+ ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto exit;
-+
-+ ret = incr_load_count_znode(&pload, plock.node);
-+ if (ret)
-+ goto exit;
-+
-+ ret =
-+ find_child_ptr(plock.node, pos->lock.node, &pcoord);
-+ if (ret)
-+ goto exit;
-+
-+ ret =
-+ reverse_relocate_check_dirty_parent(ZJNODE
-+ (pos->lock.
-+ node), &pcoord,
-+ pos);
-+ if (ret)
-+ goto exit;
-+
-+ ret = alloc_one_ancestor(&pcoord, pos);
-+ if (ret)
-+ goto exit;
-+ }
-+
-+ ret = allocate_znode(pos->lock.node, &pcoord, pos);
-+ }
-+ exit:
-+ done_load_count(&pload);
-+ done_lh(&plock);
-+ return ret;
-+}
-+
-+/* This is the recursive step described in alloc_pos_and_ancestors, above. Ignoring the
-+ call to set_preceder, which is the next function described, this checks if the
-+ child is a leftmost child and returns if it is not. If the child is a leftmost child
-+ it checks for relocation, possibly dirtying the parent. Then it performs the recursive
-+ step. */
-+static int alloc_one_ancestor(const coord_t * coord, flush_pos_t * pos)
-+{
-+ int ret = 0;
-+ lock_handle alock;
-+ load_count aload;
-+ coord_t acoord;
-+
-+ /* As we ascend at the left-edge of the region to flush, take this opportunity at
-+ the twig level to find our parent-first preceder unless we have already set
-+ it. */
-+ if (pos->preceder.blk == 0) {
-+ ret = set_preceder(coord, pos);
-+ if (ret != 0)
-+ return ret;
-+ }
-+
-+ /* If the ancestor is clean or already allocated, or if the child is not a
-+ leftmost child, stop going up, even leaving coord->node not flushprepped. */
-+ if (znode_check_flushprepped(coord->node)
-+ || !coord_is_leftmost_unit(coord))
-+ return 0;
-+
-+ init_lh(&alock);
-+ init_load_count(&aload);
-+ coord_init_invalid(&acoord, NULL);
-+
-+ /* Only ascend to the next level if it is a leftmost child, but write-lock the
-+ parent in case we will relocate the child. */
-+ if (!znode_is_root(coord->node)) {
-+
-+ ret =
-+ jnode_lock_parent_coord(ZJNODE(coord->node), &acoord,
-+ &alock, &aload, ZNODE_WRITE_LOCK,
-+ 0);
-+ if (ret != 0) {
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ goto exit;
-+ }
-+
-+ ret =
-+ reverse_relocate_check_dirty_parent(ZJNODE(coord->node),
-+ &acoord, pos);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ /* Recursive call. */
-+ if (!znode_check_flushprepped(acoord.node)) {
-+ ret = alloc_one_ancestor(&acoord, pos);
-+ if (ret)
-+ goto exit;
-+ }
-+ }
-+
-+ /* Note: we call allocate with the parent write-locked (except at the root) in
-+ case we relocate the child, in which case it will modify the parent during this
-+ call. */
-+ ret = allocate_znode(coord->node, &acoord, pos);
-+
-+ exit:
-+ done_load_count(&aload);
-+ done_lh(&alock);
-+ return ret;
-+}
-+
-+/* During the reverse parent-first alloc_pos_and_ancestors process described above there is
-+ a call to this function at the twig level. During alloc_pos_and_ancestors we may ask:
-+ should this node be relocated (in reverse parent-first context)? We repeat this
-+ process as long as the child is the leftmost child, eventually reaching an ancestor of
-+ the flush point that is not a leftmost child. The preceder of that ancestors, which is
-+ not a leftmost child, is actually on the leaf level. The preceder of that block is the
-+ left-neighbor of the flush point. The preceder of that block is the rightmost child of
-+ the twig on the left. So, when alloc_pos_and_ancestors passes upward through the twig
-+ level, it stops momentarily to remember the block of the rightmost child of the twig on
-+ the left and sets it to the flush_position's preceder_hint.
-+
-+ There is one other place where we may set the flush_position's preceder hint, which is
-+ during scan-left.
-+*/
-+static int set_preceder(const coord_t * coord_in, flush_pos_t * pos)
-+{
-+ int ret;
-+ coord_t coord;
-+ lock_handle left_lock;
-+ load_count left_load;
-+
-+ coord_dup(&coord, coord_in);
-+
-+ init_lh(&left_lock);
-+ init_load_count(&left_load);
-+
-+ /* FIXME(B): Same FIXME as in "Find the preceder" in reverse_relocate_test.
-+ coord_is_leftmost_unit is not the right test if the unformatted child is in the
-+ middle of the first extent unit. */
-+ if (!coord_is_leftmost_unit(&coord)) {
-+ coord_prev_unit(&coord);
-+ } else {
-+ ret =
-+ reiser4_get_left_neighbor(&left_lock, coord.node,
-+ ZNODE_READ_LOCK, GN_SAME_ATOM);
-+ if (ret) {
-+ /* If we fail for any reason it doesn't matter because the
-+ preceder is only a hint. We are low-priority at this point, so
-+ this must be the case. */
-+ if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
-+ ret == -ENOENT || ret == -EINVAL
-+ || ret == -E_DEADLOCK) {
-+ ret = 0;
-+ }
-+ goto exit;
-+ }
-+
-+ ret = incr_load_count_znode(&left_load, left_lock.node);
-+ if (ret)
-+ goto exit;
-+
-+ coord_init_last_unit(&coord, left_lock.node);
-+ }
-+
-+ ret =
-+ item_utmost_child_real_block(&coord, RIGHT_SIDE,
-+ &pos->preceder.blk);
-+ exit:
-+ check_preceder(pos->preceder.blk);
-+ done_load_count(&left_load);
-+ done_lh(&left_lock);
-+ return ret;
-+}
-+
-+/* MAIN SQUEEZE AND ALLOCATE LOOP (THREE BIG FUNCTIONS) */
-+
-+/* This procedure implements the outer loop of the flush algorithm. To put this in
-+ context, here is the general list of steps taken by the flush routine as a whole:
-+
-+ 1. Scan-left
-+ 2. Scan-right (maybe)
-+ 3. Allocate initial flush position and its ancestors
-+ 4. <handle extents>
-+ 5. <squeeze and next position and its ancestors to-the-right,
-+ then update position to-the-right>
-+ 6. <repeat from #4 until flush is stopped>
-+
-+ This procedure implements the loop in steps 4 through 6 in the above listing.
-+
-+ Step 4: if the current flush position is an extent item (position on the twig level),
-+ it allocates the extent (allocate_extent_item_in_place) then shifts to the next
-+ coordinate. If the next coordinate's leftmost child needs flushprep, we will continue.
-+ If the next coordinate is an internal item, we descend back to the leaf level,
-+ otherwise we repeat a step #4 (labeled ALLOC_EXTENTS below). If the "next coordinate"
-+ brings us past the end of the twig level, then we call
-+ reverse_relocate_end_of_twig to possibly dirty the next (right) twig, prior to
-+ step #5 which moves to the right.
-+
-+ Step 5: calls squalloc_changed_ancestors, which initiates a recursive call up the
-+ tree to allocate any ancestors of the next-right flush position that are not also
-+ ancestors of the current position. Those ancestors (in top-down order) are the next in
-+ parent-first order. We squeeze adjacent nodes on the way up until the right node and
-+ current node share the same parent, then allocate on the way back down. Finally, this
-+ step sets the flush position to the next-right node. Then repeat steps 4 and 5.
-+*/
-+
-+/* SQUEEZE CODE */
-+
-+/* squalloc_right_twig helper function, cut a range of extent items from
-+ cut node to->node from the beginning up to coord @to. */
-+static int squalloc_right_twig_cut(coord_t * to, reiser4_key * to_key,
-+ znode * left)
-+{
-+ coord_t from;
-+ reiser4_key from_key;
-+
-+ coord_init_first_unit(&from, to->node);
-+ item_key_by_coord(&from, &from_key);
-+
-+ return cut_node_content(&from, to, &from_key, to_key, NULL);
-+}
-+
-+/* Copy as much of the leading extents from @right to @left, allocating
-+ unallocated extents as they are copied. Returns SQUEEZE_TARGET_FULL or
-+ SQUEEZE_SOURCE_EMPTY when no more can be shifted. If the next item is an
-+ internal item it calls shift_one_internal_unit and may then return
-+ SUBTREE_MOVED. */
-+static int squeeze_right_twig(znode * left, znode * right, flush_pos_t * pos)
-+{
-+ int ret = SUBTREE_MOVED;
-+ coord_t coord; /* used to iterate over items */
-+ reiser4_key stop_key;
-+
-+ assert("jmacd-2008", !node_is_empty(right));
-+ coord_init_first_unit(&coord, right);
-+
-+ /* FIXME: can be optimized to cut once */
-+ while (!node_is_empty(coord.node) && item_is_extent(&coord)) {
-+ ON_DEBUG(void *vp);
-+
-+ assert("vs-1468", coord_is_leftmost_unit(&coord));
-+ ON_DEBUG(vp = shift_check_prepare(left, coord.node));
-+
-+ /* stop_key is used to find what was copied and what to cut */
-+ stop_key = *reiser4_min_key();
-+ ret = squalloc_extent(left, &coord, pos, &stop_key);
-+ if (ret != SQUEEZE_CONTINUE) {
-+ ON_DEBUG(kfree(vp));
-+ break;
-+ }
-+ assert("vs-1465", !keyeq(&stop_key, reiser4_min_key()));
-+
-+ /* Helper function to do the cutting. */
-+ set_key_offset(&stop_key, get_key_offset(&stop_key) - 1);
-+ check_me("vs-1466",
-+ squalloc_right_twig_cut(&coord, &stop_key, left) == 0);
-+
-+ ON_DEBUG(shift_check(vp, left, coord.node));
-+ }
-+
-+ if (node_is_empty(coord.node))
-+ ret = SQUEEZE_SOURCE_EMPTY;
-+
-+ if (ret == SQUEEZE_TARGET_FULL) {
-+ goto out;
-+ }
-+
-+ if (node_is_empty(right)) {
-+ /* The whole right node was copied into @left. */
-+ assert("vs-464", ret == SQUEEZE_SOURCE_EMPTY);
-+ goto out;
-+ }
-+
-+ coord_init_first_unit(&coord, right);
-+
-+ if (!item_is_internal(&coord)) {
-+ /* we do not want to squeeze anything else to left neighbor because "slum"
-+ is over */
-+ ret = SQUEEZE_TARGET_FULL;
-+ goto out;
-+ }
-+ assert("jmacd-433", item_is_internal(&coord));
-+
-+ /* Shift an internal unit. The child must be allocated before shifting any more
-+ extents, so we stop here. */
-+ ret = shift_one_internal_unit(left, right);
-+
-+ out:
-+ assert("jmacd-8612", ret < 0 || ret == SQUEEZE_TARGET_FULL
-+ || ret == SUBTREE_MOVED || ret == SQUEEZE_SOURCE_EMPTY);
-+
-+ if (ret == SQUEEZE_TARGET_FULL) {
-+ /* We submit prepped nodes here and expect that this @left twig
-+ * will not be modified again during this jnode_flush() call. */
-+ int ret1;
-+
-+ /* NOTE: seems like io is done under long term locks. */
-+ ret1 = write_prepped_nodes(pos);
-+ if (ret1 < 0)
-+ return ret1;
-+ }
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+static void item_convert_invariant(flush_pos_t * pos)
-+{
-+ assert("edward-1225", coord_is_existing_item(&pos->coord));
-+ if (chaining_data_present(pos)) {
-+ item_plugin *iplug = item_convert_plug(pos);
-+
-+ assert("edward-1000",
-+ iplug == item_plugin_by_coord(&pos->coord));
-+ assert("edward-1001", iplug->f.convert != NULL);
-+ } else
-+ assert("edward-1226", pos->child == NULL);
-+}
-+#else
-+
-+#define item_convert_invariant(pos) noop
-+
-+#endif
-+
-+/* Scan node items starting from the first one and apply for each
-+ item its flush ->convert() method (if any). This method may
-+ resize/kill the item so the tree will be changed.
-+*/
-+static int convert_node(flush_pos_t * pos, znode * node)
-+{
-+ int ret = 0;
-+ item_plugin *iplug;
-+
-+ assert("edward-304", pos != NULL);
-+ assert("edward-305", pos->child == NULL);
-+ assert("edward-475", znode_convertible(node));
-+ assert("edward-669", znode_is_wlocked(node));
-+ assert("edward-1210", !node_is_empty(node));
-+
-+ if (znode_get_level(node) != LEAF_LEVEL)
-+ /* unsupported */
-+ goto exit;
-+
-+ coord_init_first_unit(&pos->coord, node);
-+
-+ while (1) {
-+ ret = 0;
-+ coord_set_to_left(&pos->coord);
-+ item_convert_invariant(pos);
-+
-+ iplug = item_plugin_by_coord(&pos->coord);
-+ assert("edward-844", iplug != NULL);
-+
-+ if (iplug->f.convert) {
-+ ret = iplug->f.convert(pos);
-+ if (ret)
-+ goto exit;
-+ }
-+ assert("edward-307", pos->child == NULL);
-+
-+ if (coord_next_item(&pos->coord)) {
-+ /* node is over */
-+
-+ if (!chaining_data_present(pos))
-+ /* finished this node */
-+ break;
-+ if (should_chain_next_node(pos)) {
-+ /* go to next node */
-+ move_chaining_data(pos, 0 /* to next node */ );
-+ break;
-+ }
-+ /* repeat this node */
-+ move_chaining_data(pos, 1 /* this node */ );
-+ continue;
-+ }
-+ /* Node is not over.
-+ Check if there is attached convert data.
-+ If so roll one item position back and repeat
-+ on this node
-+ */
-+ if (chaining_data_present(pos)) {
-+
-+ if (iplug != item_plugin_by_coord(&pos->coord))
-+ set_item_convert_count(pos, 0);
-+
-+ ret = coord_prev_item(&pos->coord);
-+ assert("edward-1003", !ret);
-+
-+ move_chaining_data(pos, 1 /* this node */ );
-+ }
-+ }
-+ JF_CLR(ZJNODE(node), JNODE_CONVERTIBLE);
-+ znode_make_dirty(node);
-+ exit:
-+ assert("edward-1004", !ret);
-+ return ret;
-+}
-+
-+/* Squeeze and allocate the right neighbor. This is called after @left and
-+ its current children have been squeezed and allocated already. This
-+ procedure's job is to squeeze and items from @right to @left.
-+
-+ If at the leaf level, use the shift_everything_left memcpy-optimized
-+ version of shifting (squeeze_right_leaf).
-+
-+ If at the twig level, extents are allocated as they are shifted from @right
-+ to @left (squalloc_right_twig).
-+
-+ At any other level, shift one internal item and return to the caller
-+ (squalloc_parent_first) so that the shifted-subtree can be processed in
-+ parent-first order.
-+
-+ When unit of internal item is moved, squeezing stops and SUBTREE_MOVED is
-+ returned. When all content of @right is squeezed, SQUEEZE_SOURCE_EMPTY is
-+ returned. If nothing can be moved into @left anymore, SQUEEZE_TARGET_FULL
-+ is returned.
-+*/
-+
-+static int squeeze_right_neighbor(flush_pos_t * pos, znode * left,
-+ znode * right)
-+{
-+ int ret;
-+
-+ /* FIXME it is possible to see empty hasn't-heard-banshee node in a
-+ * tree owing to error (for example, ENOSPC) in write */
-+ /* assert("jmacd-9321", !node_is_empty(left)); */
-+ assert("jmacd-9322", !node_is_empty(right));
-+ assert("jmacd-9323", znode_get_level(left) == znode_get_level(right));
-+
-+ switch (znode_get_level(left)) {
-+ case TWIG_LEVEL:
-+ /* Shift with extent allocating until either an internal item
-+ is encountered or everything is shifted or no free space
-+ left in @left */
-+ ret = squeeze_right_twig(left, right, pos);
-+ break;
-+
-+ default:
-+ /* All other levels can use shift_everything until we implement per-item
-+ flush plugins. */
-+ ret = squeeze_right_non_twig(left, right);
-+ break;
-+ }
-+
-+ assert("jmacd-2011", (ret < 0 ||
-+ ret == SQUEEZE_SOURCE_EMPTY
-+ || ret == SQUEEZE_TARGET_FULL
-+ || ret == SUBTREE_MOVED));
-+ return ret;
-+}
-+
-+static int squeeze_right_twig_and_advance_coord(flush_pos_t * pos,
-+ znode * right)
-+{
-+ int ret;
-+
-+ ret = squeeze_right_twig(pos->lock.node, right, pos);
-+ if (ret < 0)
-+ return ret;
-+ if (ret > 0) {
-+ coord_init_after_last_item(&pos->coord, pos->lock.node);
-+ return ret;
-+ }
-+
-+ coord_init_last_unit(&pos->coord, pos->lock.node);
-+ return 0;
-+}
-+
-+/* forward declaration */
-+static int squalloc_upper_levels(flush_pos_t *, znode *, znode *);
-+
-+/* do a fast check for "same parents" condition before calling
-+ * squalloc_upper_levels() */
-+static inline int check_parents_and_squalloc_upper_levels(flush_pos_t * pos,
-+ znode * left,
-+ znode * right)
-+{
-+ if (znode_same_parents(left, right))
-+ return 0;
-+
-+ return squalloc_upper_levels(pos, left, right);
-+}
-+
-+/* Check whether the parent of given @right node needs to be processes
-+ ((re)allocated) prior to processing of the child. If @left and @right do not
-+ share at least the parent of the @right is after the @left but before the
-+ @right in parent-first order, we have to (re)allocate it before the @right
-+ gets (re)allocated. */
-+static int squalloc_upper_levels(flush_pos_t * pos, znode * left, znode * right)
-+{
-+ int ret;
-+
-+ lock_handle left_parent_lock;
-+ lock_handle right_parent_lock;
-+
-+ load_count left_parent_load;
-+ load_count right_parent_load;
-+
-+ init_lh(&left_parent_lock);
-+ init_lh(&right_parent_lock);
-+
-+ init_load_count(&left_parent_load);
-+ init_load_count(&right_parent_load);
-+
-+ ret = reiser4_get_parent(&left_parent_lock, left, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ ret = reiser4_get_parent(&right_parent_lock, right, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ /* Check for same parents */
-+ if (left_parent_lock.node == right_parent_lock.node)
-+ goto out;
-+
-+ if (znode_check_flushprepped(right_parent_lock.node)) {
-+ /* Keep parent-first order. In the order, the right parent node stands
-+ before the @right node. If it is already allocated, we set the
-+ preceder (next block search start point) to its block number, @right
-+ node should be allocated after it.
-+
-+ However, preceder is set only if the right parent is on twig level.
-+ The explanation is the following: new branch nodes are allocated over
-+ already allocated children while the tree grows, it is difficult to
-+ keep tree ordered, we assume that only leaves and twings are correctly
-+ allocated. So, only twigs are used as a preceder for allocating of the
-+ rest of the slum. */
-+ if (znode_get_level(right_parent_lock.node) == TWIG_LEVEL) {
-+ pos->preceder.blk =
-+ *znode_get_block(right_parent_lock.node);
-+ check_preceder(pos->preceder.blk);
-+ }
-+ goto out;
-+ }
-+
-+ ret = incr_load_count_znode(&left_parent_load, left_parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&right_parent_load, right_parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret =
-+ squeeze_right_neighbor(pos, left_parent_lock.node,
-+ right_parent_lock.node);
-+ /* We stop if error. We stop if some items/units were shifted (ret == 0)
-+ * and thus @right changed its parent. It means we have not process
-+ * right_parent node prior to processing of @right. Positive return
-+ * values say that shifting items was not happen because of "empty
-+ * source" or "target full" conditions. */
-+ if (ret <= 0)
-+ goto out;
-+
-+ /* parent(@left) and parent(@right) may have different parents also. We
-+ * do a recursive call for checking that. */
-+ ret =
-+ check_parents_and_squalloc_upper_levels(pos, left_parent_lock.node,
-+ right_parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ /* allocate znode when going down */
-+ ret = lock_parent_and_allocate_znode(right_parent_lock.node, pos);
-+
-+ out:
-+ done_load_count(&left_parent_load);
-+ done_load_count(&right_parent_load);
-+
-+ done_lh(&left_parent_lock);
-+ done_lh(&right_parent_lock);
-+
-+ return ret;
-+}
-+
-+/* Check the leftmost child "flushprepped" status, also returns true if child
-+ * node was not found in cache. */
-+static int leftmost_child_of_unit_check_flushprepped(const coord_t * coord)
-+{
-+ int ret;
-+ int prepped;
-+
-+ jnode *child;
-+
-+ ret = get_leftmost_child_of_unit(coord, &child);
-+
-+ if (ret)
-+ return ret;
-+
-+ if (child) {
-+ prepped = jnode_check_flushprepped(child);
-+ jput(child);
-+ } else {
-+ /* We consider not existing child as a node which slum
-+ processing should not continue to. Not cached node is clean,
-+ so it is flushprepped. */
-+ prepped = 1;
-+ }
-+
-+ return prepped;
-+}
-+
-+/* (re)allocate znode with automated getting parent node */
-+static int lock_parent_and_allocate_znode(znode * node, flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle parent_lock;
-+ load_count parent_load;
-+ coord_t pcoord;
-+
-+ assert("zam-851", znode_is_write_locked(node));
-+
-+ init_lh(&parent_lock);
-+ init_load_count(&parent_load);
-+
-+ ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&parent_load, parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret = find_child_ptr(parent_lock.node, node, &pcoord);
-+ if (ret)
-+ goto out;
-+
-+ ret = allocate_znode(node, &pcoord, pos);
-+
-+ out:
-+ done_load_count(&parent_load);
-+ done_lh(&parent_lock);
-+ return ret;
-+}
-+
-+/* Process nodes on leaf level until unformatted node or rightmost node in the
-+ * slum reached. */
-+static int handle_pos_on_formatted(flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle right_lock;
-+ load_count right_load;
-+
-+ init_lh(&right_lock);
-+ init_load_count(&right_load);
-+
-+ if (should_convert_node(pos, pos->lock.node)) {
-+ ret = convert_node(pos, pos->lock.node);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ while (1) {
-+ int expected;
-+ expected = should_convert_next_node(pos);
-+ ret = neighbor_in_slum(pos->lock.node, &right_lock, RIGHT_SIDE,
-+ ZNODE_WRITE_LOCK, !expected, expected);
-+ if (ret) {
-+ if (expected)
-+ warning("edward-1495",
-+ "Expected neighbor not found (ret = %d). Fsck?",
-+ ret);
-+ break;
-+ }
-+
-+ /* we don't prep(allocate) nodes for flushing twice. This can be suboptimal, or it
-+ * can be optimal. For now we choose to live with the risk that it will
-+ * be suboptimal because it would be quite complex to code it to be
-+ * smarter. */
-+ if (znode_check_flushprepped(right_lock.node)
-+ && !znode_convertible(right_lock.node)) {
-+ assert("edward-1005", !should_convert_next_node(pos));
-+ pos_stop(pos);
-+ break;
-+ }
-+
-+ ret = incr_load_count_znode(&right_load, right_lock.node);
-+ if (ret)
-+ break;
-+ if (should_convert_node(pos, right_lock.node)) {
-+ ret = convert_node(pos, right_lock.node);
-+ if (ret)
-+ break;
-+ if (node_is_empty(right_lock.node)) {
-+ /* node became empty after converting, repeat */
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+ continue;
-+ }
-+ }
-+
-+ /* squeeze _before_ going upward. */
-+ ret =
-+ squeeze_right_neighbor(pos, pos->lock.node,
-+ right_lock.node);
-+ if (ret < 0)
-+ break;
-+
-+ if (znode_check_flushprepped(right_lock.node)) {
-+ if (should_convert_next_node(pos)) {
-+ /* in spite of flushprepped status of the node,
-+ its right slum neighbor should be converted */
-+ assert("edward-953", convert_data(pos));
-+ assert("edward-954", item_convert_data(pos));
-+
-+ if (node_is_empty(right_lock.node)) {
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+ } else
-+ move_flush_pos(pos, &right_lock,
-+ &right_load, NULL);
-+ continue;
-+ }
-+ pos_stop(pos);
-+ break;
-+ }
-+
-+ if (node_is_empty(right_lock.node)) {
-+ /* repeat if right node was squeezed completely */
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+ continue;
-+ }
-+
-+ /* parent(right_lock.node) has to be processed before
-+ * (right_lock.node) due to "parent-first" allocation order. */
-+ ret =
-+ check_parents_and_squalloc_upper_levels(pos, pos->lock.node,
-+ right_lock.node);
-+ if (ret)
-+ break;
-+ /* (re)allocate _after_ going upward */
-+ ret = lock_parent_and_allocate_znode(right_lock.node, pos);
-+ if (ret)
-+ break;
-+ if (should_terminate_squalloc(pos)) {
-+ set_item_convert_count(pos, 0);
-+ break;
-+ }
-+
-+ /* advance the flush position to the right neighbor */
-+ move_flush_pos(pos, &right_lock, &right_load, NULL);
-+
-+ ret = rapid_flush(pos);
-+ if (ret)
-+ break;
-+ }
-+ check_convert_info(pos);
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+
-+ /* This function indicates via pos whether to stop or go to twig or continue on current
-+ * level. */
-+ return ret;
-+
-+}
-+
-+/* Process nodes on leaf level until unformatted node or rightmost node in the
-+ * slum reached. */
-+static int handle_pos_on_leaf(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ assert("zam-845", pos->state == POS_ON_LEAF);
-+
-+ ret = handle_pos_on_formatted(pos);
-+
-+ if (ret == -E_NO_NEIGHBOR) {
-+ /* cannot get right neighbor, go process extents. */
-+ pos->state = POS_TO_TWIG;
-+ return 0;
-+ }
-+
-+ return ret;
-+}
-+
-+/* Process slum on level > 1 */
-+static int handle_pos_on_internal(flush_pos_t * pos)
-+{
-+ assert("zam-850", pos->state == POS_ON_INTERNAL);
-+ return handle_pos_on_formatted(pos);
-+}
-+
-+/* check whether squalloc should stop before processing given extent */
-+static int squalloc_extent_should_stop(flush_pos_t * pos)
-+{
-+ assert("zam-869", item_is_extent(&pos->coord));
-+
-+ /* pos->child is a jnode handle_pos_on_extent() should start with in
-+ * stead of the first child of the first extent unit. */
-+ if (pos->child) {
-+ int prepped;
-+
-+ assert("vs-1383", jnode_is_unformatted(pos->child));
-+ prepped = jnode_check_flushprepped(pos->child);
-+ pos->pos_in_unit =
-+ jnode_get_index(pos->child) -
-+ extent_unit_index(&pos->coord);
-+ assert("vs-1470",
-+ pos->pos_in_unit < extent_unit_width(&pos->coord));
-+ assert("nikita-3434",
-+ ergo(extent_is_unallocated(&pos->coord),
-+ pos->pos_in_unit == 0));
-+ jput(pos->child);
-+ pos->child = NULL;
-+
-+ return prepped;
-+ }
-+
-+ pos->pos_in_unit = 0;
-+ if (extent_is_unallocated(&pos->coord))
-+ return 0;
-+
-+ return leftmost_child_of_unit_check_flushprepped(&pos->coord);
-+}
-+
-+/* Handle the case when regular reiser4 tree (znodes connected one to its
-+ * neighbors by sibling pointers) is interrupted on leaf level by one or more
-+ * unformatted nodes. By having a lock on twig level and use extent code
-+ * routines to process unformatted nodes we swim around an irregular part of
-+ * reiser4 tree. */
-+static int handle_pos_on_twig(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ assert("zam-844", pos->state == POS_ON_EPOINT);
-+ assert("zam-843", item_is_extent(&pos->coord));
-+
-+ /* We decide should we continue slum processing with current extent
-+ unit: if leftmost child of current extent unit is flushprepped
-+ (i.e. clean or already processed by flush) we stop squalloc(). There
-+ is a fast check for unallocated extents which we assume contain all
-+ not flushprepped nodes. */
-+ /* FIXME: Here we implement simple check, we are only looking on the
-+ leftmost child. */
-+ ret = squalloc_extent_should_stop(pos);
-+ if (ret != 0) {
-+ pos_stop(pos);
-+ return ret;
-+ }
-+
-+ while (pos_valid(pos) && coord_is_existing_unit(&pos->coord)
-+ && item_is_extent(&pos->coord)) {
-+ ret = reiser4_alloc_extent(pos);
-+ if (ret) {
-+ break;
-+ }
-+ coord_next_unit(&pos->coord);
-+ }
-+
-+ if (coord_is_after_rightmost(&pos->coord)) {
-+ pos->state = POS_END_OF_TWIG;
-+ return 0;
-+ }
-+ if (item_is_internal(&pos->coord)) {
-+ pos->state = POS_TO_LEAF;
-+ return 0;
-+ }
-+
-+ assert("zam-860", item_is_extent(&pos->coord));
-+
-+ /* "slum" is over */
-+ pos->state = POS_INVALID;
-+ return 0;
-+}
-+
-+/* When we about to return flush position from twig to leaf level we can process
-+ * the right twig node or move position to the leaf. This processes right twig
-+ * if it is possible and jump to leaf level if not. */
-+static int handle_pos_end_of_twig(flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle right_lock;
-+ load_count right_load;
-+ coord_t at_right;
-+ jnode *child = NULL;
-+
-+ assert("zam-848", pos->state == POS_END_OF_TWIG);
-+ assert("zam-849", coord_is_after_rightmost(&pos->coord));
-+
-+ init_lh(&right_lock);
-+ init_load_count(&right_load);
-+
-+ /* We get a lock on the right twig node even it is not dirty because
-+ * slum continues or discontinues on leaf level not on next twig. This
-+ * lock on the right twig is needed for getting its leftmost child. */
-+ ret =
-+ reiser4_get_right_neighbor(&right_lock, pos->lock.node,
-+ ZNODE_WRITE_LOCK, GN_SAME_ATOM);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&right_load, right_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ /* right twig could be not dirty */
-+ if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) {
-+ /* If right twig node is dirty we always attempt to squeeze it
-+ * content to the left... */
-+ became_dirty:
-+ ret =
-+ squeeze_right_twig_and_advance_coord(pos, right_lock.node);
-+ if (ret <= 0) {
-+ /* pos->coord is on internal item, go to leaf level, or
-+ * we have an error which will be caught in squalloc() */
-+ pos->state = POS_TO_LEAF;
-+ goto out;
-+ }
-+
-+ /* If right twig was squeezed completely we wave to re-lock
-+ * right twig. now it is done through the top-level squalloc
-+ * routine. */
-+ if (node_is_empty(right_lock.node))
-+ goto out;
-+
-+ /* ... and prep it if it is not yet prepped */
-+ if (!znode_check_flushprepped(right_lock.node)) {
-+ /* As usual, process parent before ... */
-+ ret =
-+ check_parents_and_squalloc_upper_levels(pos,
-+ pos->lock.
-+ node,
-+ right_lock.
-+ node);
-+ if (ret)
-+ goto out;
-+
-+ /* ... processing the child */
-+ ret =
-+ lock_parent_and_allocate_znode(right_lock.node,
-+ pos);
-+ if (ret)
-+ goto out;
-+ }
-+ } else {
-+ coord_init_first_unit(&at_right, right_lock.node);
-+
-+ /* check first child of next twig, should we continue there ? */
-+ ret = get_leftmost_child_of_unit(&at_right, &child);
-+ if (ret || child == NULL || jnode_check_flushprepped(child)) {
-+ pos_stop(pos);
-+ goto out;
-+ }
-+
-+ /* check clean twig for possible relocation */
-+ if (!znode_check_flushprepped(right_lock.node)) {
-+ ret =
-+ reverse_relocate_check_dirty_parent(child,
-+ &at_right, pos);
-+ if (ret)
-+ goto out;
-+ if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY))
-+ goto became_dirty;
-+ }
-+ }
-+
-+ assert("zam-875", znode_check_flushprepped(right_lock.node));
-+
-+ /* Update the preceder by a block number of just processed right twig
-+ * node. The code above could miss the preceder updating because
-+ * allocate_znode() could not be called for this node. */
-+ pos->preceder.blk = *znode_get_block(right_lock.node);
-+ check_preceder(pos->preceder.blk);
-+
-+ coord_init_first_unit(&at_right, right_lock.node);
-+ assert("zam-868", coord_is_existing_unit(&at_right));
-+
-+ pos->state = item_is_extent(&at_right) ? POS_ON_EPOINT : POS_TO_LEAF;
-+ move_flush_pos(pos, &right_lock, &right_load, &at_right);
-+
-+ out:
-+ done_load_count(&right_load);
-+ done_lh(&right_lock);
-+
-+ if (child)
-+ jput(child);
-+
-+ return ret;
-+}
-+
-+/* Move the pos->lock to leaf node pointed by pos->coord, check should we
-+ * continue there. */
-+static int handle_pos_to_leaf(flush_pos_t * pos)
-+{
-+ int ret;
-+ lock_handle child_lock;
-+ load_count child_load;
-+ jnode *child;
-+
-+ assert("zam-846", pos->state == POS_TO_LEAF);
-+ assert("zam-847", item_is_internal(&pos->coord));
-+
-+ init_lh(&child_lock);
-+ init_load_count(&child_load);
-+
-+ ret = get_leftmost_child_of_unit(&pos->coord, &child);
-+ if (ret)
-+ return ret;
-+ if (child == NULL) {
-+ pos_stop(pos);
-+ return 0;
-+ }
-+
-+ if (jnode_check_flushprepped(child)) {
-+ pos->state = POS_INVALID;
-+ goto out;
-+ }
-+
-+ ret =
-+ longterm_lock_znode(&child_lock, JZNODE(child), ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&child_load, JZNODE(child));
-+ if (ret)
-+ goto out;
-+
-+ ret = allocate_znode(JZNODE(child), &pos->coord, pos);
-+ if (ret)
-+ goto out;
-+
-+ /* move flush position to leaf level */
-+ pos->state = POS_ON_LEAF;
-+ move_flush_pos(pos, &child_lock, &child_load, NULL);
-+
-+ if (node_is_empty(JZNODE(child))) {
-+ ret = delete_empty_node(JZNODE(child));
-+ pos->state = POS_INVALID;
-+ }
-+ out:
-+ done_load_count(&child_load);
-+ done_lh(&child_lock);
-+ jput(child);
-+
-+ return ret;
-+}
-+
-+/* move pos from leaf to twig, and move lock from leaf to twig. */
-+/* Move pos->lock to upper (twig) level */
-+static int handle_pos_to_twig(flush_pos_t * pos)
-+{
-+ int ret;
-+
-+ lock_handle parent_lock;
-+ load_count parent_load;
-+ coord_t pcoord;
-+
-+ assert("zam-852", pos->state == POS_TO_TWIG);
-+
-+ init_lh(&parent_lock);
-+ init_load_count(&parent_load);
-+
-+ ret =
-+ reiser4_get_parent(&parent_lock, pos->lock.node, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ goto out;
-+
-+ ret = incr_load_count_znode(&parent_load, parent_lock.node);
-+ if (ret)
-+ goto out;
-+
-+ ret = find_child_ptr(parent_lock.node, pos->lock.node, &pcoord);
-+ if (ret)
-+ goto out;
-+
-+ assert("zam-870", item_is_internal(&pcoord));
-+ coord_next_item(&pcoord);
-+
-+ if (coord_is_after_rightmost(&pcoord))
-+ pos->state = POS_END_OF_TWIG;
-+ else if (item_is_extent(&pcoord))
-+ pos->state = POS_ON_EPOINT;
-+ else {
-+ /* Here we understand that getting -E_NO_NEIGHBOR in
-+ * handle_pos_on_leaf() was because of just a reaching edge of
-+ * slum */
-+ pos_stop(pos);
-+ goto out;
-+ }
-+
-+ move_flush_pos(pos, &parent_lock, &parent_load, &pcoord);
-+
-+ out:
-+ done_load_count(&parent_load);
-+ done_lh(&parent_lock);
-+
-+ return ret;
-+}
-+
-+typedef int (*pos_state_handle_t) (flush_pos_t *);
-+static pos_state_handle_t flush_pos_handlers[] = {
-+ /* process formatted nodes on leaf level, keep lock on a leaf node */
-+ [POS_ON_LEAF] = handle_pos_on_leaf,
-+ /* process unformatted nodes, keep lock on twig node, pos->coord points to extent currently
-+ * being processed */
-+ [POS_ON_EPOINT] = handle_pos_on_twig,
-+ /* move a lock from leaf node to its parent for further processing of unformatted nodes */
-+ [POS_TO_TWIG] = handle_pos_to_twig,
-+ /* move a lock from twig to leaf level when a processing of unformatted nodes finishes,
-+ * pos->coord points to the leaf node we jump to */
-+ [POS_TO_LEAF] = handle_pos_to_leaf,
-+ /* after processing last extent in the twig node, attempting to shift items from the twigs
-+ * right neighbor and process them while shifting */
-+ [POS_END_OF_TWIG] = handle_pos_end_of_twig,
-+ /* process formatted nodes on internal level, keep lock on an internal node */
-+ [POS_ON_INTERNAL] = handle_pos_on_internal
-+};
-+
-+/* Advance flush position horizontally, prepare for flushing ((re)allocate, squeeze,
-+ * encrypt) nodes and their ancestors in "parent-first" order */
-+static int squalloc(flush_pos_t * pos)
-+{
-+ int ret = 0;
-+
-+ /* maybe needs to be made a case statement with handle_pos_on_leaf as first case, for
-+ * greater CPU efficiency? Measure and see.... -Hans */
-+ while (pos_valid(pos)) {
-+ ret = flush_pos_handlers[pos->state] (pos);
-+ if (ret < 0)
-+ break;
-+
-+ ret = rapid_flush(pos);
-+ if (ret)
-+ break;
-+ }
-+
-+ /* any positive value or -E_NO_NEIGHBOR are legal return codes for handle_pos*
-+ routines, -E_NO_NEIGHBOR means that slum edge was reached */
-+ if (ret > 0 || ret == -E_NO_NEIGHBOR)
-+ ret = 0;
-+
-+ return ret;
-+}
-+
-+static void update_ldkey(znode * node)
-+{
-+ reiser4_key ldkey;
-+
-+ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
-+ if (node_is_empty(node))
-+ return;
-+
-+ znode_set_ld_key(node, leftmost_key_in_node(node, &ldkey));
-+}
-+
-+/* this is to be called after calling of shift node's method to shift data from @right to
-+ @left. It sets left delimiting keys of @left and @right to keys of first items of @left
-+ and @right correspondingly and sets right delimiting key of @left to first key of @right */
-+static void update_znode_dkeys(znode * left, znode * right)
-+{
-+ assert_rw_write_locked(&(znode_get_tree(right)->dk_lock));
-+ assert("vs-1629", (znode_is_write_locked(left) &&
-+ znode_is_write_locked(right)));
-+
-+ /* we need to update left delimiting of left if it was empty before shift */
-+ update_ldkey(left);
-+ update_ldkey(right);
-+ if (node_is_empty(right))
-+ znode_set_rd_key(left, znode_get_rd_key(right));
-+ else
-+ znode_set_rd_key(left, znode_get_ld_key(right));
-+}
-+
-+/* try to shift everything from @right to @left. If everything was shifted -
-+ @right is removed from the tree. Result is the number of bytes shifted. */
-+static int
-+shift_everything_left(znode * right, znode * left, carry_level * todo)
-+{
-+ coord_t from;
-+ node_plugin *nplug;
-+ carry_plugin_info info;
-+
-+ coord_init_after_last_item(&from, right);
-+
-+ nplug = node_plugin_by_node(right);
-+ info.doing = NULL;
-+ info.todo = todo;
-+ return nplug->shift(&from, left, SHIFT_LEFT,
-+ 1 /* delete @right if it becomes empty */ ,
-+ 1
-+ /* move coord @from to node @left if everything will be shifted */
-+ ,
-+ &info);
-+}
-+
-+/* Shift as much as possible from @right to @left using the memcpy-optimized
-+ shift_everything_left. @left and @right are formatted neighboring nodes on
-+ leaf level. */
-+static int squeeze_right_non_twig(znode * left, znode * right)
-+{
-+ int ret;
-+ carry_pool *pool;
-+ carry_level *todo;
-+
-+ assert("nikita-2246", znode_get_level(left) == znode_get_level(right));
-+
-+ if (!JF_ISSET(ZJNODE(left), JNODE_DIRTY) ||
-+ !JF_ISSET(ZJNODE(right), JNODE_DIRTY))
-+ return SQUEEZE_TARGET_FULL;
-+
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ todo = (carry_level *) (pool + 1);
-+ init_carry_level(todo, pool);
-+
-+ ret = shift_everything_left(right, left, todo);
-+ if (ret > 0) {
-+ /* something was shifted */
-+ reiser4_tree *tree;
-+ __u64 grabbed;
-+
-+ znode_make_dirty(left);
-+ znode_make_dirty(right);
-+
-+ /* update delimiting keys of nodes which participated in
-+ shift. FIXME: it would be better to have this in shift
-+ node's operation. But it can not be done there. Nobody
-+ remembers why, though */
-+ tree = znode_get_tree(left);
-+ write_lock_dk(tree);
-+ update_znode_dkeys(left, right);
-+ write_unlock_dk(tree);
-+
-+ /* Carry is called to update delimiting key and, maybe, to remove empty
-+ node. */
-+ grabbed = get_current_context()->grabbed_blocks;
-+ ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
-+ assert("nikita-3003", ret == 0); /* reserved space is exhausted. Ask Hans. */
-+ ret = reiser4_carry(todo, NULL /* previous level */ );
-+ grabbed2free_mark(grabbed);
-+ } else {
-+ /* Shifting impossible, we return appropriate result code */
-+ ret =
-+ node_is_empty(right) ? SQUEEZE_SOURCE_EMPTY :
-+ SQUEEZE_TARGET_FULL;
-+ }
-+
-+ done_carry_pool(pool);
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+static int sibling_link_is_ok(const znode *left, const znode *right)
-+{
-+ int result;
-+
-+ read_lock_tree(znode_get_tree(left));
-+ result = (left->right == right && left == right->left);
-+ read_unlock_tree(znode_get_tree(left));
-+ return result;
-+}
-+#endif
-+
-+/* Shift first unit of first item if it is an internal one. Return
-+ SQUEEZE_TARGET_FULL if it fails to shift an item, otherwise return
-+ SUBTREE_MOVED. */
-+static int shift_one_internal_unit(znode * left, znode * right)
-+{
-+ int ret;
-+ carry_pool *pool;
-+ carry_level *todo;
-+ coord_t *coord;
-+ carry_plugin_info *info;
-+ int size, moved;
-+
-+ assert("nikita-2247", znode_get_level(left) == znode_get_level(right));
-+ assert("nikita-2435", znode_is_write_locked(left));
-+ assert("nikita-2436", znode_is_write_locked(right));
-+ assert("nikita-2434", sibling_link_is_ok(left, right));
-+
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
-+ sizeof(*coord) + sizeof(*info)
-+#if REISER4_DEBUG
-+ + sizeof(*coord) + 2 * sizeof(reiser4_key)
-+#endif
-+ );
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ todo = (carry_level *) (pool + 1);
-+ init_carry_level(todo, pool);
-+
-+ coord = (coord_t *) (todo + 3);
-+ coord_init_first_unit(coord, right);
-+ info = (carry_plugin_info *) (coord + 1);
-+
-+#if REISER4_DEBUG
-+ if (!node_is_empty(left)) {
-+ coord_t *last;
-+ reiser4_key *right_key;
-+ reiser4_key *left_key;
-+
-+ last = (coord_t *) (info + 1);
-+ right_key = (reiser4_key *) (last + 1);
-+ left_key = right_key + 1;
-+ coord_init_last_unit(last, left);
-+
-+ assert("nikita-2463",
-+ keyle(item_key_by_coord(last, left_key),
-+ item_key_by_coord(coord, right_key)));
-+ }
-+#endif
-+
-+ assert("jmacd-2007", item_is_internal(coord));
-+
-+ size = item_length_by_coord(coord);
-+ info->todo = todo;
-+ info->doing = NULL;
-+
-+ ret = node_plugin_by_node(left)->shift(coord, left, SHIFT_LEFT,
-+ 1
-+ /* delete @right if it becomes empty */
-+ ,
-+ 0
-+ /* do not move coord @coord to node @left */
-+ ,
-+ info);
-+
-+ /* If shift returns positive, then we shifted the item. */
-+ assert("vs-423", ret <= 0 || size == ret);
-+ moved = (ret > 0);
-+
-+ if (moved) {
-+ /* something was moved */
-+ reiser4_tree *tree;
-+ int grabbed;
-+
-+ znode_make_dirty(left);
-+ znode_make_dirty(right);
-+ tree = znode_get_tree(left);
-+ write_lock_dk(tree);
-+ update_znode_dkeys(left, right);
-+ write_unlock_dk(tree);
-+
-+ /* reserve space for delimiting keys after shifting */
-+ grabbed = get_current_context()->grabbed_blocks;
-+ ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
-+ assert("nikita-3003", ret == 0); /* reserved space is exhausted. Ask Hans. */
-+
-+ ret = reiser4_carry(todo, NULL /* previous level */ );
-+ grabbed2free_mark(grabbed);
-+ }
-+
-+ done_carry_pool(pool);
-+
-+ if (ret != 0) {
-+ /* Shift or carry operation failed. */
-+ assert("jmacd-7325", ret < 0);
-+ return ret;
-+ }
-+
-+ return moved ? SUBTREE_MOVED : SQUEEZE_TARGET_FULL;
-+}
-+
-+/* Make the final relocate/wander decision during forward parent-first squalloc for a
-+ znode. For unformatted nodes this is done in plugin/item/extent.c:extent_needs_allocation(). */
-+static int
-+allocate_znode_loaded(znode * node,
-+ const coord_t * parent_coord, flush_pos_t * pos)
-+{
-+ int ret;
-+ reiser4_super_info_data *sbinfo = get_current_super_private();
-+ /* FIXME(D): We have the node write-locked and should have checked for !
-+ allocated() somewhere before reaching this point, but there can be a race, so
-+ this assertion is bogus. */
-+ assert("jmacd-7987", !jnode_check_flushprepped(ZJNODE(node)));
-+ assert("jmacd-7988", znode_is_write_locked(node));
-+ assert("jmacd-7989", coord_is_invalid(parent_coord)
-+ || znode_is_write_locked(parent_coord->node));
-+
-+ if (ZF_ISSET(node, JNODE_REPACK) || ZF_ISSET(node, JNODE_CREATED) ||
-+ znode_is_root(node) ||
-+ /* We have enough nodes to relocate no matter what. */
-+ (pos->leaf_relocate != 0 && znode_get_level(node) == LEAF_LEVEL)) {
-+ /* No need to decide with new nodes, they are treated the same as
-+ relocate. If the root node is dirty, relocate. */
-+ if (pos->preceder.blk == 0) {
-+ /* preceder is unknown and we have decided to relocate node --
-+ using of default value for search start is better than search
-+ from block #0. */
-+ get_blocknr_hint_default(&pos->preceder.blk);
-+ check_preceder(pos->preceder.blk);
-+ }
-+
-+ goto best_reloc;
-+
-+ } else if (pos->preceder.blk == 0) {
-+ /* If we don't know the preceder, leave it where it is. */
-+ jnode_make_wander(ZJNODE(node));
-+ } else {
-+ /* Make a decision based on block distance. */
-+ reiser4_block_nr dist;
-+ reiser4_block_nr nblk = *znode_get_block(node);
-+
-+ assert("jmacd-6172", !reiser4_blocknr_is_fake(&nblk));
-+ assert("jmacd-6173", !reiser4_blocknr_is_fake(&pos->preceder.blk));
-+ assert("jmacd-6174", pos->preceder.blk != 0);
-+
-+ if (pos->preceder.blk == nblk - 1) {
-+ /* Ideal. */
-+ jnode_make_wander(ZJNODE(node));
-+ } else {
-+
-+ dist =
-+ (nblk <
-+ pos->preceder.blk) ? (pos->preceder.blk -
-+ nblk) : (nblk -
-+ pos->preceder.blk);
-+
-+ /* See if we can find a closer block (forward direction only). */
-+ pos->preceder.max_dist =
-+ min((reiser4_block_nr) sbinfo->flush.
-+ relocate_distance, dist);
-+ pos->preceder.level = znode_get_level(node);
-+
-+ ret = allocate_znode_update(node, parent_coord, pos);
-+
-+ pos->preceder.max_dist = 0;
-+
-+ if (ret && (ret != -ENOSPC))
-+ return ret;
-+
-+ if (ret == 0) {
-+ /* Got a better allocation. */
-+ znode_make_reloc(node, pos->fq);
-+ } else if (dist < sbinfo->flush.relocate_distance) {
-+ /* The present allocation is good enough. */
-+ jnode_make_wander(ZJNODE(node));
-+ } else {
-+ /* Otherwise, try to relocate to the best position. */
-+ best_reloc:
-+ ret =
-+ allocate_znode_update(node, parent_coord,
-+ pos);
-+ if (ret != 0)
-+ return ret;
-+
-+ /* set JNODE_RELOC bit _after_ node gets allocated */
-+ znode_make_reloc(node, pos->fq);
-+ }
-+ }
-+ }
-+
-+ /* This is the new preceder. */
-+ pos->preceder.blk = *znode_get_block(node);
-+ check_preceder(pos->preceder.blk);
-+ pos->alloc_cnt += 1;
-+
-+ assert("jmacd-4277", !reiser4_blocknr_is_fake(&pos->preceder.blk));
-+
-+ return 0;
-+}
-+
-+static int
-+allocate_znode(znode * node, const coord_t * parent_coord, flush_pos_t * pos)
-+{
-+ /*
-+ * perform znode allocation with znode pinned in memory to avoid races
-+ * with asynchronous emergency flush (which plays with
-+ * JNODE_FLUSH_RESERVED bit).
-+ */
-+ return WITH_DATA(node, allocate_znode_loaded(node, parent_coord, pos));
-+}
-+
-+/* A subroutine of allocate_znode, this is called first to see if there is a close
-+ position to relocate to. It may return ENOSPC if there is no close position. If there
-+ is no close position it may not relocate. This takes care of updating the parent node
-+ with the relocated block address. */
-+static int
-+allocate_znode_update(znode * node, const coord_t * parent_coord,
-+ flush_pos_t * pos)
-+{
-+ int ret;
-+ reiser4_block_nr blk;
-+ lock_handle uber_lock;
-+ int flush_reserved_used = 0;
-+ int grabbed;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ init_lh(&uber_lock);
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ grabbed = ctx->grabbed_blocks;
-+
-+ /* discard e-flush allocation */
-+ ret = zload(node);
-+ if (ret)
-+ return ret;
-+
-+ if (ZF_ISSET(node, JNODE_CREATED)) {
-+ assert("zam-816", reiser4_blocknr_is_fake(znode_get_block(node)));
-+ pos->preceder.block_stage = BLOCK_UNALLOCATED;
-+ } else {
-+ pos->preceder.block_stage = BLOCK_GRABBED;
-+
-+ /* The disk space for relocating the @node is already reserved in "flush reserved"
-+ * counter if @node is leaf, otherwise we grab space using BA_RESERVED (means grab
-+ * space from whole disk not from only 95%). */
-+ if (znode_get_level(node) == LEAF_LEVEL) {
-+ /*
-+ * earlier (during do_jnode_make_dirty()) we decided
-+ * that @node can possibly go into overwrite set and
-+ * reserved block for its wandering location.
-+ */
-+ txn_atom *atom = get_current_atom_locked();
-+ assert("nikita-3449",
-+ ZF_ISSET(node, JNODE_FLUSH_RESERVED));
-+ flush_reserved2grabbed(atom, (__u64) 1);
-+ spin_unlock_atom(atom);
-+ /*
-+ * we are trying to move node into relocate
-+ * set. Allocation of relocated position "uses"
-+ * reserved block.
-+ */
-+ ZF_CLR(node, JNODE_FLUSH_RESERVED);
-+ flush_reserved_used = 1;
-+ } else {
-+ ret = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
-+ if (ret != 0)
-+ goto exit;
-+ }
-+ }
-+
-+ /* We may do not use 5% of reserved disk space here and flush will not pack tightly. */
-+ ret = reiser4_alloc_block(&pos->preceder, &blk,
-+ BA_FORMATTED | BA_PERMANENT);
-+ if (ret)
-+ goto exit;
-+
-+ if (!ZF_ISSET(node, JNODE_CREATED) &&
-+ (ret =
-+ reiser4_dealloc_block(znode_get_block(node), 0,
-+ BA_DEFER | BA_FORMATTED)))
-+ goto exit;
-+
-+ if (likely(!znode_is_root(node))) {
-+ item_plugin *iplug;
-+
-+ iplug = item_plugin_by_coord(parent_coord);
-+ assert("nikita-2954", iplug->f.update != NULL);
-+ iplug->f.update(parent_coord, &blk);
-+
-+ znode_make_dirty(parent_coord->node);
-+
-+ } else {
-+ reiser4_tree *tree = znode_get_tree(node);
-+ znode *uber;
-+
-+ /* We take a longterm lock on the fake node in order to change
-+ the root block number. This may cause atom fusion. */
-+ ret = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
-+ &uber_lock);
-+ /* The fake node cannot be deleted, and we must have priority
-+ here, and may not be confused with ENOSPC. */
-+ assert("jmacd-74412",
-+ ret != -EINVAL && ret != -E_DEADLOCK && ret != -ENOSPC);
-+
-+ if (ret)
-+ goto exit;
-+
-+ uber = uber_lock.node;
-+
-+ write_lock_tree(tree);
-+ tree->root_block = blk;
-+ write_unlock_tree(tree);
-+
-+ znode_make_dirty(uber);
-+ }
-+
-+ ret = znode_rehash(node, &blk);
-+ exit:
-+ if (ret) {
-+ /* Get flush reserved block back if something fails, because
-+ * callers assume that on error block wasn't relocated and its
-+ * flush reserved block wasn't used. */
-+ if (flush_reserved_used) {
-+ /*
-+ * ok, we failed to move node into relocate
-+ * set. Restore status quo.
-+ */
-+ grabbed2flush_reserved((__u64) 1);
-+ ZF_SET(node, JNODE_FLUSH_RESERVED);
-+ }
-+ }
-+ zrelse(node);
-+ done_lh(&uber_lock);
-+ grabbed2free_mark(grabbed);
-+ return ret;
-+}
-+
-+/* JNODE INTERFACE */
-+
-+/* Lock a node (if formatted) and then get its parent locked, set the child's
-+ coordinate in the parent. If the child is the root node, the above_root
-+ znode is returned but the coord is not set. This function may cause atom
-+ fusion, but it is only used for read locks (at this point) and therefore
-+ fusion only occurs when the parent is already dirty. */
-+/* Hans adds this note: remember to ask how expensive this operation is vs. storing parent
-+ pointer in jnodes. */
-+static int
-+jnode_lock_parent_coord(jnode * node,
-+ coord_t * coord,
-+ lock_handle * parent_lh,
-+ load_count * parent_zh,
-+ znode_lock_mode parent_mode, int try)
-+{
-+ int ret;
-+
-+ assert("edward-53", jnode_is_unformatted(node) || jnode_is_znode(node));
-+ assert("edward-54", jnode_is_unformatted(node)
-+ || znode_is_any_locked(JZNODE(node)));
-+
-+ if (!jnode_is_znode(node)) {
-+ reiser4_key key;
-+ tree_level stop_level = TWIG_LEVEL;
-+ lookup_bias bias = FIND_EXACT;
-+
-+ assert("edward-168", !(jnode_get_type(node) == JNODE_BITMAP));
-+
-+ /* The case when node is not znode, but can have parent coord
-+ (unformatted node, node which represents cluster page,
-+ etc..). Generate a key for the appropriate entry, search
-+ in the tree using coord_by_key, which handles locking for
-+ us. */
-+
-+ /*
-+ * nothing is locked at this moment, so, nothing prevents
-+ * concurrent truncate from removing jnode from inode. To
-+ * prevent this spin-lock jnode. jnode can be truncated just
-+ * after call to the jnode_build_key(), but this is ok,
-+ * because coord_by_key() will just fail to find appropriate
-+ * extent.
-+ */
-+ spin_lock_jnode(node);
-+ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
-+ jnode_build_key(node, &key);
-+ ret = 0;
-+ } else
-+ ret = RETERR(-ENOENT);
-+ spin_unlock_jnode(node);
-+
-+ if (ret != 0)
-+ return ret;
-+
-+ if (jnode_is_cluster_page(node))
-+ stop_level = LEAF_LEVEL;
-+
-+ assert("jmacd-1812", coord != NULL);
-+
-+ ret = coord_by_key(jnode_get_tree(node), &key, coord, parent_lh,
-+ parent_mode, bias, stop_level, stop_level,
-+ CBK_UNIQUE, NULL /*ra_info */ );
-+ switch (ret) {
-+ case CBK_COORD_NOTFOUND:
-+ assert("edward-1038",
-+ ergo(jnode_is_cluster_page(node),
-+ JF_ISSET(node, JNODE_HEARD_BANSHEE)));
-+ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE))
-+ warning("nikita-3177", "Parent not found");
-+ return ret;
-+ case CBK_COORD_FOUND:
-+ if (coord->between != AT_UNIT) {
-+ /* FIXME: comment needed */
-+ done_lh(parent_lh);
-+ if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
-+ warning("nikita-3178",
-+ "Found but not happy: %i",
-+ coord->between);
-+ }
-+ return RETERR(-ENOENT);
-+ }
-+ ret = incr_load_count_znode(parent_zh, parent_lh->node);
-+ if (ret != 0)
-+ return ret;
-+ /* if (jnode_is_cluster_page(node)) {
-+ races with write() are possible
-+ check_child_cluster (parent_lh->node);
-+ }
-+ */
-+ break;
-+ default:
-+ return ret;
-+ }
-+
-+ } else {
-+ int flags;
-+ znode *z;
-+
-+ z = JZNODE(node);
-+ /* Formatted node case: */
-+ assert("jmacd-2061", !znode_is_root(z));
-+
-+ flags = GN_ALLOW_NOT_CONNECTED;
-+ if (try)
-+ flags |= GN_TRY_LOCK;
-+
-+ ret =
-+ reiser4_get_parent_flags(parent_lh, z, parent_mode, flags);
-+ if (ret != 0)
-+ /* -E_REPEAT is ok here, it is handled by the caller. */
-+ return ret;
-+
-+ /* Make the child's position "hint" up-to-date. (Unless above
-+ root, which caller must check.) */
-+ if (coord != NULL) {
-+
-+ ret = incr_load_count_znode(parent_zh, parent_lh->node);
-+ if (ret != 0) {
-+ warning("jmacd-976812386",
-+ "incr_load_count_znode failed: %d",
-+ ret);
-+ return ret;
-+ }
-+
-+ ret = find_child_ptr(parent_lh->node, z, coord);
-+ if (ret != 0) {
-+ warning("jmacd-976812",
-+ "find_child_ptr failed: %d", ret);
-+ return ret;
-+ }
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* Get the (locked) next neighbor of a znode which is dirty and a member of the same atom.
-+ If there is no next neighbor or the neighbor is not in memory or if there is a
-+ neighbor but it is not dirty or not in the same atom, -E_NO_NEIGHBOR is returned.
-+ In some cases the slum may include nodes which are not dirty, if so @check_dirty should be 0 */
-+static int neighbor_in_slum(znode * node, /* starting point */
-+ lock_handle * lock, /* lock on starting point */
-+ sideof side, /* left or right direction we seek the next node in */
-+ znode_lock_mode mode, /* kind of lock we want */
-+ int check_dirty, /* true if the neighbor should be dirty */
-+ int use_upper_levels /* get neighbor by going though
-+ upper levels */)
-+{
-+ int ret;
-+ int flags;
-+
-+ assert("jmacd-6334", znode_is_connected(node));
-+
-+ flags = GN_SAME_ATOM | (side == LEFT_SIDE ? GN_GO_LEFT : 0);
-+ if (use_upper_levels)
-+ flags |= GN_CAN_USE_UPPER_LEVELS;
-+
-+ ret = reiser4_get_neighbor(lock, node, mode, flags);
-+ if (ret) {
-+ /* May return -ENOENT or -E_NO_NEIGHBOR. */
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ if (ret == -ENOENT) {
-+ ret = RETERR(-E_NO_NEIGHBOR);
-+ }
-+ return ret;
-+ }
-+ if (!check_dirty)
-+ return 0;
-+ /* Check dirty bit of locked znode, no races here */
-+ if (JF_ISSET(ZJNODE(lock->node), JNODE_DIRTY))
-+ return 0;
-+
-+ done_lh(lock);
-+ return RETERR(-E_NO_NEIGHBOR);
-+}
-+
-+/* Return true if two znodes have the same parent. This is called with both nodes
-+ write-locked (for squeezing) so no tree lock is needed. */
-+static int znode_same_parents(znode * a, znode * b)
-+{
-+ int result;
-+
-+ assert("jmacd-7011", znode_is_write_locked(a));
-+ assert("jmacd-7012", znode_is_write_locked(b));
-+
-+ /* We lock the whole tree for this check.... I really don't like whole tree
-+ * locks... -Hans */
-+ read_lock_tree(znode_get_tree(a));
-+ result = (znode_parent(a) == znode_parent(b));
-+ read_unlock_tree(znode_get_tree(a));
-+ return result;
-+}
-+
-+/* FLUSH SCAN */
-+
-+/* Initialize the flush_scan data structure. */
-+static void scan_init(flush_scan * scan)
-+{
-+ memset(scan, 0, sizeof(*scan));
-+ init_lh(&scan->node_lock);
-+ init_lh(&scan->parent_lock);
-+ init_load_count(&scan->parent_load);
-+ init_load_count(&scan->node_load);
-+ coord_init_invalid(&scan->parent_coord, NULL);
-+}
-+
-+/* Release any resources held by the flush scan, e.g., release locks, free memory, etc. */
-+static void scan_done(flush_scan * scan)
-+{
-+ done_load_count(&scan->node_load);
-+ if (scan->node != NULL) {
-+ jput(scan->node);
-+ scan->node = NULL;
-+ }
-+ done_load_count(&scan->parent_load);
-+ done_lh(&scan->parent_lock);
-+ done_lh(&scan->node_lock);
-+}
-+
-+/* Returns true if flush scanning is finished. */
-+int reiser4_scan_finished(flush_scan * scan)
-+{
-+ return scan->stop || (scan->direction == RIGHT_SIDE &&
-+ scan->count >= scan->max_count);
-+}
-+
-+/* Return true if the scan should continue to the @tonode. True if the node meets the
-+ same_slum_check condition. If not, deref the "left" node and stop the scan. */
-+int reiser4_scan_goto(flush_scan * scan, jnode * tonode)
-+{
-+ int go = same_slum_check(scan->node, tonode, 1, 0);
-+
-+ if (!go) {
-+ scan->stop = 1;
-+ jput(tonode);
-+ }
-+
-+ return go;
-+}
-+
-+/* Set the current scan->node, refcount it, increment count by the @add_count (number to
-+ count, e.g., skipped unallocated nodes), deref previous current, and copy the current
-+ parent coordinate. */
-+int
-+scan_set_current(flush_scan * scan, jnode * node, unsigned add_count,
-+ const coord_t * parent)
-+{
-+ /* Release the old references, take the new reference. */
-+ done_load_count(&scan->node_load);
-+
-+ if (scan->node != NULL) {
-+ jput(scan->node);
-+ }
-+ scan->node = node;
-+ scan->count += add_count;
-+
-+ /* This next stmt is somewhat inefficient. The reiser4_scan_extent() code could
-+ delay this update step until it finishes and update the parent_coord only once.
-+ It did that before, but there was a bug and this was the easiest way to make it
-+ correct. */
-+ if (parent != NULL) {
-+ coord_dup(&scan->parent_coord, parent);
-+ }
-+
-+ /* Failure may happen at the incr_load_count call, but the caller can assume the reference
-+ is safely taken. */
-+ return incr_load_count_jnode(&scan->node_load, node);
-+}
-+
-+/* Return true if scanning in the leftward direction. */
-+int reiser4_scanning_left(flush_scan * scan)
-+{
-+ return scan->direction == LEFT_SIDE;
-+}
-+
-+/* Performs leftward scanning starting from either kind of node. Counts the starting
-+ node. The right-scan object is passed in for the left-scan in order to copy the parent
-+ of an unformatted starting position. This way we avoid searching for the unformatted
-+ node's parent when scanning in each direction. If we search for the parent once it is
-+ set in both scan objects. The limit parameter tells flush-scan when to stop.
-+
-+ Rapid scanning is used only during scan_left, where we are interested in finding the
-+ 'leftpoint' where we begin flushing. We are interested in stopping at the left child
-+ of a twig that does not have a dirty left neighbor. THIS IS A SPECIAL CASE. The
-+ problem is finding a way to flush only those nodes without unallocated children, and it
-+ is difficult to solve in the bottom-up flushing algorithm we are currently using. The
-+ problem can be solved by scanning left at every level as we go upward, but this would
-+ basically bring us back to using a top-down allocation strategy, which we already tried
-+ (see BK history from May 2002), and has a different set of problems. The top-down
-+ strategy makes avoiding unallocated children easier, but makes it difficult to
-+ propertly flush dirty children with clean parents that would otherwise stop the
-+ top-down flush, only later to dirty the parent once the children are flushed. So we
-+ solve the problem in the bottom-up algorithm with a special case for twigs and leaves
-+ only.
-+
-+ The first step in solving the problem is this rapid leftward scan. After we determine
-+ that there are at least enough nodes counted to qualify for FLUSH_RELOCATE_THRESHOLD we
-+ are no longer interested in the exact count, we are only interested in finding a the
-+ best place to start the flush. We could choose one of two possibilities:
-+
-+ 1. Stop at the leftmost child (of a twig) that does not have a dirty left neighbor.
-+ This requires checking one leaf per rapid-scan twig
-+
-+ 2. Stop at the leftmost child (of a twig) where there are no dirty children of the twig
-+ to the left. This requires checking possibly all of the in-memory children of each
-+ twig during the rapid scan.
-+
-+ For now we implement the first policy.
-+*/
-+static int
-+scan_left(flush_scan * scan, flush_scan * right, jnode * node, unsigned limit)
-+{
-+ int ret = 0;
-+
-+ scan->max_count = limit;
-+ scan->direction = LEFT_SIDE;
-+
-+ ret = scan_set_current(scan, jref(node), 1, NULL);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ ret = scan_common(scan, right);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ /* Before rapid scanning, we need a lock on scan->node so that we can get its
-+ parent, only if formatted. */
-+ if (jnode_is_znode(scan->node)) {
-+ ret = longterm_lock_znode(&scan->node_lock, JZNODE(scan->node),
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
-+ }
-+
-+ /* Rapid_scan would go here (with limit set to FLUSH_RELOCATE_THRESHOLD). */
-+ return ret;
-+}
-+
-+/* Performs rightward scanning... Does not count the starting node. The limit parameter
-+ is described in scan_left. If the starting node is unformatted then the
-+ parent_coord was already set during scan_left. The rapid_after parameter is not used
-+ during right-scanning.
-+
-+ scan_right is only called if the scan_left operation does not count at least
-+ FLUSH_RELOCATE_THRESHOLD nodes for flushing. Otherwise, the limit parameter is set to
-+ the difference between scan-left's count and FLUSH_RELOCATE_THRESHOLD, meaning
-+ scan-right counts as high as FLUSH_RELOCATE_THRESHOLD and then stops. */
-+static int scan_right(flush_scan * scan, jnode * node, unsigned limit)
-+{
-+ int ret;
-+
-+ scan->max_count = limit;
-+ scan->direction = RIGHT_SIDE;
-+
-+ ret = scan_set_current(scan, jref(node), 0, NULL);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ return scan_common(scan, NULL);
-+}
-+
-+/* Common code to perform left or right scanning. */
-+static int scan_common(flush_scan * scan, flush_scan * other)
-+{
-+ int ret;
-+
-+ assert("nikita-2376", scan->node != NULL);
-+ assert("edward-54", jnode_is_unformatted(scan->node)
-+ || jnode_is_znode(scan->node));
-+
-+ /* Special case for starting at an unformatted node. Optimization: we only want
-+ to search for the parent (which requires a tree traversal) once. Obviously, we
-+ shouldn't have to call it once for the left scan and once for the right scan.
-+ For this reason, if we search for the parent during scan-left we then duplicate
-+ the coord/lock/load into the scan-right object. */
-+ if (jnode_is_unformatted(scan->node)) {
-+ ret = scan_unformatted(scan, other);
-+ if (ret != 0)
-+ return ret;
-+ }
-+ /* This loop expects to start at a formatted position and performs chaining of
-+ formatted regions */
-+ while (!reiser4_scan_finished(scan)) {
-+
-+ ret = scan_formatted(scan);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static int scan_unformatted(flush_scan * scan, flush_scan * other)
-+{
-+ int ret = 0;
-+ int try = 0;
-+
-+ if (!coord_is_invalid(&scan->parent_coord))
-+ goto scan;
-+
-+ /* set parent coord from */
-+ if (!jnode_is_unformatted(scan->node)) {
-+ /* formatted position */
-+
-+ lock_handle lock;
-+ assert("edward-301", jnode_is_znode(scan->node));
-+ init_lh(&lock);
-+
-+ /*
-+ * when flush starts from unformatted node, first thing it
-+ * does is tree traversal to find formatted parent of starting
-+ * node. This parent is then kept lock across scans to the
-+ * left and to the right. This means that during scan to the
-+ * left we cannot take left-ward lock, because this is
-+ * dead-lock prone. So, if we are scanning to the left and
-+ * there is already lock held by this thread,
-+ * jnode_lock_parent_coord() should use try-lock.
-+ */
-+ try = reiser4_scanning_left(scan)
-+ && !lock_stack_isclean(get_current_lock_stack());
-+ /* Need the node locked to get the parent lock, We have to
-+ take write lock since there is at least one call path
-+ where this znode is already write-locked by us. */
-+ ret =
-+ longterm_lock_znode(&lock, JZNODE(scan->node),
-+ ZNODE_WRITE_LOCK,
-+ reiser4_scanning_left(scan) ?
-+ ZNODE_LOCK_LOPRI :
-+ ZNODE_LOCK_HIPRI);
-+ if (ret != 0)
-+ /* EINVAL or E_DEADLOCK here mean... try again! At this point we've
-+ scanned too far and can't back out, just start over. */
-+ return ret;
-+
-+ ret = jnode_lock_parent_coord(scan->node,
-+ &scan->parent_coord,
-+ &scan->parent_lock,
-+ &scan->parent_load,
-+ ZNODE_WRITE_LOCK, try);
-+
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ done_lh(&lock);
-+ if (ret == -E_REPEAT) {
-+ scan->stop = 1;
-+ return 0;
-+ }
-+ if (ret)
-+ return ret;
-+
-+ } else {
-+ /* unformatted position */
-+
-+ ret =
-+ jnode_lock_parent_coord(scan->node, &scan->parent_coord,
-+ &scan->parent_lock,
-+ &scan->parent_load,
-+ ZNODE_WRITE_LOCK, try);
-+
-+ if (IS_CBKERR(ret))
-+ return ret;
-+
-+ if (ret == CBK_COORD_NOTFOUND)
-+ /* FIXME(C): check EINVAL, E_DEADLOCK */
-+ return ret;
-+
-+ /* parent was found */
-+ assert("jmacd-8661", other != NULL);
-+ /* Duplicate the reference into the other flush_scan. */
-+ coord_dup(&other->parent_coord, &scan->parent_coord);
-+ copy_lh(&other->parent_lock, &scan->parent_lock);
-+ copy_load_count(&other->parent_load, &scan->parent_load);
-+ }
-+ scan:
-+ return scan_by_coord(scan);
-+}
-+
-+/* Performs left- or rightward scanning starting from a formatted node. Follow left
-+ pointers under tree lock as long as:
-+
-+ - node->left/right is non-NULL
-+ - node->left/right is connected, dirty
-+ - node->left/right belongs to the same atom
-+ - scan has not reached maximum count
-+*/
-+static int scan_formatted(flush_scan * scan)
-+{
-+ int ret;
-+ znode *neighbor = NULL;
-+
-+ assert("jmacd-1401", !reiser4_scan_finished(scan));
-+
-+ do {
-+ znode *node = JZNODE(scan->node);
-+
-+ /* Node should be connected, but if not stop the scan. */
-+ if (!znode_is_connected(node)) {
-+ scan->stop = 1;
-+ break;
-+ }
-+
-+ /* Lock the tree, check-for and reference the next sibling. */
-+ read_lock_tree(znode_get_tree(node));
-+
-+ /* It may be that a node is inserted or removed between a node and its
-+ left sibling while the tree lock is released, but the flush-scan count
-+ does not need to be precise. Thus, we release the tree lock as soon as
-+ we get the neighboring node. */
-+ neighbor =
-+ reiser4_scanning_left(scan) ? node->left : node->right;
-+ if (neighbor != NULL) {
-+ zref(neighbor);
-+ }
-+
-+ read_unlock_tree(znode_get_tree(node));
-+
-+ /* If neighbor is NULL at the leaf level, need to check for an unformatted
-+ sibling using the parent--break in any case. */
-+ if (neighbor == NULL) {
-+ break;
-+ }
-+
-+ /* Check the condition for going left, break if it is not met. This also
-+ releases (jputs) the neighbor if false. */
-+ if (!reiser4_scan_goto(scan, ZJNODE(neighbor))) {
-+ break;
-+ }
-+
-+ /* Advance the flush_scan state to the left, repeat. */
-+ ret = scan_set_current(scan, ZJNODE(neighbor), 1, NULL);
-+ if (ret != 0) {
-+ return ret;
-+ }
-+
-+ } while (!reiser4_scan_finished(scan));
-+
-+ /* If neighbor is NULL then we reached the end of a formatted region, or else the
-+ sibling is out of memory, now check for an extent to the left (as long as
-+ LEAF_LEVEL). */
-+ if (neighbor != NULL || jnode_get_level(scan->node) != LEAF_LEVEL
-+ || reiser4_scan_finished(scan)) {
-+ scan->stop = 1;
-+ return 0;
-+ }
-+ /* Otherwise, calls scan_by_coord for the right(left)most item of the
-+ left(right) neighbor on the parent level, then possibly continue. */
-+
-+ coord_init_invalid(&scan->parent_coord, NULL);
-+ return scan_unformatted(scan, NULL);
-+}
-+
-+/* NOTE-EDWARD:
-+ This scans adjacent items of the same type and calls scan flush plugin for each one.
-+ Performs left(right)ward scanning starting from a (possibly) unformatted node. If we start
-+ from unformatted node, then we continue only if the next neighbor is also unformatted.
-+ When called from scan_formatted, we skip first iteration (to make sure that
-+ right(left)most item of the left(right) neighbor on the parent level is of the same
-+ type and set appropriate coord). */
-+static int scan_by_coord(flush_scan * scan)
-+{
-+ int ret = 0;
-+ int scan_this_coord;
-+ lock_handle next_lock;
-+ load_count next_load;
-+ coord_t next_coord;
-+ jnode *child;
-+ item_plugin *iplug;
-+
-+ init_lh(&next_lock);
-+ init_load_count(&next_load);
-+ scan_this_coord = (jnode_is_unformatted(scan->node) ? 1 : 0);
-+
-+ /* set initial item id */
-+ iplug = item_plugin_by_coord(&scan->parent_coord);
-+
-+ for (; !reiser4_scan_finished(scan); scan_this_coord = 1) {
-+ if (scan_this_coord) {
-+ /* Here we expect that unit is scannable. it would not be so due
-+ * to race with extent->tail conversion. */
-+ if (iplug->f.scan == NULL) {
-+ scan->stop = 1;
-+ ret = -E_REPEAT;
-+ /* skip the check at the end. */
-+ goto race;
-+ }
-+
-+ ret = iplug->f.scan(scan);
-+ if (ret != 0)
-+ goto exit;
-+
-+ if (reiser4_scan_finished(scan)) {
-+ checkchild(scan);
-+ break;
-+ }
-+ } else {
-+ /* the same race against truncate as above is possible
-+ * here, it seems */
-+
-+ /* NOTE-JMACD: In this case, apply the same end-of-node logic but don't scan
-+ the first coordinate. */
-+ assert("jmacd-1231",
-+ item_is_internal(&scan->parent_coord));
-+ }
-+
-+ if (iplug->f.utmost_child == NULL
-+ || znode_get_level(scan->parent_coord.node) != TWIG_LEVEL) {
-+ /* stop this coord and continue on parrent level */
-+ ret =
-+ scan_set_current(scan,
-+ ZJNODE(zref
-+ (scan->parent_coord.node)),
-+ 1, NULL);
-+ if (ret != 0)
-+ goto exit;
-+ break;
-+ }
-+
-+ /* Either way, the invariant is that scan->parent_coord is set to the
-+ parent of scan->node. Now get the next unit. */
-+ coord_dup(&next_coord, &scan->parent_coord);
-+ coord_sideof_unit(&next_coord, scan->direction);
-+
-+ /* If off-the-end of the twig, try the next twig. */
-+ if (coord_is_after_sideof_unit(&next_coord, scan->direction)) {
-+ /* We take the write lock because we may start flushing from this
-+ * coordinate. */
-+ ret = neighbor_in_slum(next_coord.node,
-+ &next_lock,
-+ scan->direction,
-+ ZNODE_WRITE_LOCK,
-+ 1 /* check dirty */,
-+ 0 /* don't go though upper
-+ levels */);
-+ if (ret == -E_NO_NEIGHBOR) {
-+ scan->stop = 1;
-+ ret = 0;
-+ break;
-+ }
-+
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ ret = incr_load_count_znode(&next_load, next_lock.node);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ coord_init_sideof_unit(&next_coord, next_lock.node,
-+ sideof_reverse(scan->direction));
-+ }
-+
-+ iplug = item_plugin_by_coord(&next_coord);
-+
-+ /* Get the next child. */
-+ ret =
-+ iplug->f.utmost_child(&next_coord,
-+ sideof_reverse(scan->direction),
-+ &child);
-+ if (ret != 0)
-+ goto exit;
-+ /* If the next child is not in memory, or, item_utmost_child
-+ failed (due to race with unlink, most probably), stop
-+ here. */
-+ if (child == NULL || IS_ERR(child)) {
-+ scan->stop = 1;
-+ checkchild(scan);
-+ break;
-+ }
-+
-+ assert("nikita-2374", jnode_is_unformatted(child)
-+ || jnode_is_znode(child));
-+
-+ /* See if it is dirty, part of the same atom. */
-+ if (!reiser4_scan_goto(scan, child)) {
-+ checkchild(scan);
-+ break;
-+ }
-+
-+ /* If so, make this child current. */
-+ ret = scan_set_current(scan, child, 1, &next_coord);
-+ if (ret != 0)
-+ goto exit;
-+
-+ /* Now continue. If formatted we release the parent lock and return, then
-+ proceed. */
-+ if (jnode_is_znode(child))
-+ break;
-+
-+ /* Otherwise, repeat the above loop with next_coord. */
-+ if (next_load.node != NULL) {
-+ done_lh(&scan->parent_lock);
-+ move_lh(&scan->parent_lock, &next_lock);
-+ move_load_count(&scan->parent_load, &next_load);
-+ }
-+ }
-+
-+ assert("jmacd-6233",
-+ reiser4_scan_finished(scan) || jnode_is_znode(scan->node));
-+ exit:
-+ checkchild(scan);
-+ race: /* skip the above check */
-+ if (jnode_is_znode(scan->node)) {
-+ done_lh(&scan->parent_lock);
-+ done_load_count(&scan->parent_load);
-+ }
-+
-+ done_load_count(&next_load);
-+ done_lh(&next_lock);
-+ return ret;
-+}
-+
-+/* FLUSH POS HELPERS */
-+
-+/* Initialize the fields of a flush_position. */
-+static void pos_init(flush_pos_t * pos)
-+{
-+ memset(pos, 0, sizeof *pos);
-+
-+ pos->state = POS_INVALID;
-+ coord_init_invalid(&pos->coord, NULL);
-+ init_lh(&pos->lock);
-+ init_load_count(&pos->load);
-+
-+ reiser4_blocknr_hint_init(&pos->preceder);
-+}
-+
-+/* The flush loop inside squalloc periodically checks pos_valid to
-+ determine when "enough flushing" has been performed. This will return true until one
-+ of the following conditions is met:
-+
-+ 1. the number of flush-queued nodes has reached the kernel-supplied "int *nr_to_flush"
-+ parameter, meaning we have flushed as many blocks as the kernel requested. When
-+ flushing to commit, this parameter is NULL.
-+
-+ 2. pos_stop() is called because squalloc discovers that the "next" node in the
-+ flush order is either non-existant, not dirty, or not in the same atom.
-+*/
-+
-+static int pos_valid(flush_pos_t * pos)
-+{
-+ return pos->state != POS_INVALID;
-+}
-+
-+/* Release any resources of a flush_position. Called when jnode_flush finishes. */
-+static void pos_done(flush_pos_t * pos)
-+{
-+ pos_stop(pos);
-+ reiser4_blocknr_hint_done(&pos->preceder);
-+ if (convert_data(pos))
-+ free_convert_data(pos);
-+}
-+
-+/* Reset the point and parent. Called during flush subroutines to terminate the
-+ squalloc loop. */
-+static int pos_stop(flush_pos_t * pos)
-+{
-+ pos->state = POS_INVALID;
-+ done_lh(&pos->lock);
-+ done_load_count(&pos->load);
-+ coord_init_invalid(&pos->coord, NULL);
-+
-+ if (pos->child) {
-+ jput(pos->child);
-+ pos->child = NULL;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Return the flush_position's block allocator hint. */
-+reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t * pos)
-+{
-+ return &pos->preceder;
-+}
-+
-+flush_queue_t * reiser4_pos_fq(flush_pos_t * pos)
-+{
-+ return pos->fq;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 90
-+ LocalWords: preceder
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/flush.h linux-2.6.24/fs/reiser4/flush.h
---- linux-2.6.24.orig/fs/reiser4/flush.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/flush.h 2008-01-25 11:39:06.924204598 +0300
-@@ -0,0 +1,290 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* DECLARATIONS: */
-+
-+#if !defined(__REISER4_FLUSH_H__)
-+#define __REISER4_FLUSH_H__
-+
-+#include "plugin/cluster.h"
-+
-+/* The flush_scan data structure maintains the state of an in-progress flush-scan on a
-+ single level of the tree. A flush-scan is used for counting the number of adjacent
-+ nodes to flush, which is used to determine whether we should relocate, and it is also
-+ used to find a starting point for flush. A flush-scan object can scan in both right
-+ and left directions via the scan_left() and scan_right() interfaces. The
-+ right- and left-variations are similar but perform different functions. When scanning
-+ left we (optionally perform rapid scanning and then) longterm-lock the endpoint node.
-+ When scanning right we are simply counting the number of adjacent, dirty nodes. */
-+struct flush_scan {
-+
-+ /* The current number of nodes scanned on this level. */
-+ unsigned count;
-+
-+ /* There may be a maximum number of nodes for a scan on any single level. When
-+ going leftward, max_count is determined by FLUSH_SCAN_MAXNODES (see reiser4.h) */
-+ unsigned max_count;
-+
-+ /* Direction: Set to one of the sideof enumeration: { LEFT_SIDE, RIGHT_SIDE }. */
-+ sideof direction;
-+
-+ /* Initially @stop is set to false then set true once some condition stops the
-+ search (e.g., we found a clean node before reaching max_count or we found a
-+ node belonging to another atom). */
-+ int stop;
-+
-+ /* The current scan position. If @node is non-NULL then its reference count has
-+ been incremented to reflect this reference. */
-+ jnode *node;
-+
-+ /* A handle for zload/zrelse of current scan position node. */
-+ load_count node_load;
-+
-+ /* During left-scan, if the final position (a.k.a. endpoint node) is formatted the
-+ node is locked using this lock handle. The endpoint needs to be locked for
-+ transfer to the flush_position object after scanning finishes. */
-+ lock_handle node_lock;
-+
-+ /* When the position is unformatted, its parent, coordinate, and parent
-+ zload/zrelse handle. */
-+ lock_handle parent_lock;
-+ coord_t parent_coord;
-+ load_count parent_load;
-+
-+ /* The block allocator preceder hint. Sometimes flush_scan determines what the
-+ preceder is and if so it sets it here, after which it is copied into the
-+ flush_position. Otherwise, the preceder is computed later. */
-+ reiser4_block_nr preceder_blk;
-+};
-+
-+struct convert_item_info {
-+ dc_item_stat d_cur; /* disk cluster state of the current item */
-+ dc_item_stat d_next; /* disk cluster state of the next slum item */
-+ int cluster_shift; /* disk cluster shift */
-+ flow_t flow; /* disk cluster data */
-+};
-+
-+struct convert_info {
-+ int count; /* for squalloc terminating */
-+ item_plugin *iplug; /* current item plugin */
-+ struct convert_item_info *itm; /* current item info */
-+ struct cluster_handle clust; /* transform cluster */
-+};
-+
-+typedef enum flush_position_state {
-+ POS_INVALID, /* Invalid or stopped pos, do not continue slum
-+ * processing */
-+ POS_ON_LEAF, /* pos points to already prepped, locked formatted node at
-+ * leaf level */
-+ POS_ON_EPOINT, /* pos keeps a lock on twig level, "coord" field is used
-+ * to traverse unformatted nodes */
-+ POS_TO_LEAF, /* pos is being moved to leaf level */
-+ POS_TO_TWIG, /* pos is being moved to twig level */
-+ POS_END_OF_TWIG, /* special case of POS_ON_TWIG, when coord is after
-+ * rightmost unit of the current twig */
-+ POS_ON_INTERNAL /* same as POS_ON_LEAF, but points to internal node */
-+} flushpos_state_t;
-+
-+/* An encapsulation of the current flush point and all the parameters that are passed
-+ through the entire squeeze-and-allocate stage of the flush routine. A single
-+ flush_position object is constructed after left- and right-scanning finishes. */
-+struct flush_position {
-+ flushpos_state_t state;
-+
-+ coord_t coord; /* coord to traverse unformatted nodes */
-+ lock_handle lock; /* current lock we hold */
-+ load_count load; /* load status for current locked formatted node */
-+
-+ jnode *child; /* for passing a reference to unformatted child
-+ * across pos state changes */
-+
-+ reiser4_blocknr_hint preceder; /* The flush 'hint' state. */
-+ int leaf_relocate; /* True if enough leaf-level nodes were
-+ * found to suggest a relocate policy. */
-+ int alloc_cnt; /* The number of nodes allocated during squeeze and allococate. */
-+ int prep_or_free_cnt; /* The number of nodes prepared for write (allocate) or squeezed and freed. */
-+ flush_queue_t *fq;
-+ long *nr_written; /* number of nodes submitted to disk */
-+ int flags; /* a copy of jnode_flush flags argument */
-+
-+ znode *prev_twig; /* previous parent pointer value, used to catch
-+ * processing of new twig node */
-+ struct convert_info *sq; /* convert info */
-+
-+ unsigned long pos_in_unit; /* for extents only. Position
-+ within an extent unit of first
-+ jnode of slum */
-+ long nr_to_write; /* number of unformatted nodes to handle on flush */
-+};
-+
-+static inline int item_convert_count(flush_pos_t * pos)
-+{
-+ return pos->sq->count;
-+}
-+static inline void inc_item_convert_count(flush_pos_t * pos)
-+{
-+ pos->sq->count++;
-+}
-+static inline void set_item_convert_count(flush_pos_t * pos, int count)
-+{
-+ pos->sq->count = count;
-+}
-+static inline item_plugin *item_convert_plug(flush_pos_t * pos)
-+{
-+ return pos->sq->iplug;
-+}
-+
-+static inline struct convert_info *convert_data(flush_pos_t * pos)
-+{
-+ return pos->sq;
-+}
-+
-+static inline struct convert_item_info *item_convert_data(flush_pos_t * pos)
-+{
-+ assert("edward-955", convert_data(pos));
-+ return pos->sq->itm;
-+}
-+
-+static inline struct tfm_cluster * tfm_cluster_sq(flush_pos_t * pos)
-+{
-+ return &pos->sq->clust.tc;
-+}
-+
-+static inline struct tfm_stream * tfm_stream_sq(flush_pos_t * pos,
-+ tfm_stream_id id)
-+{
-+ assert("edward-854", pos->sq != NULL);
-+ return get_tfm_stream(tfm_cluster_sq(pos), id);
-+}
-+
-+static inline int chaining_data_present(flush_pos_t * pos)
-+{
-+ return convert_data(pos) && item_convert_data(pos);
-+}
-+
-+/* Returns true if next node contains next item of the disk cluster
-+ so item convert data should be moved to the right slum neighbor.
-+*/
-+static inline int should_chain_next_node(flush_pos_t * pos)
-+{
-+ int result = 0;
-+
-+ assert("edward-1007", chaining_data_present(pos));
-+
-+ switch (item_convert_data(pos)->d_next) {
-+ case DC_CHAINED_ITEM:
-+ result = 1;
-+ break;
-+ case DC_AFTER_CLUSTER:
-+ break;
-+ default:
-+ impossible("edward-1009", "bad state of next slum item");
-+ }
-+ return result;
-+}
-+
-+/* update item state in a disk cluster to assign conversion mode */
-+static inline void
-+move_chaining_data(flush_pos_t * pos, int this_node /* where is next item */ )
-+{
-+
-+ assert("edward-1010", chaining_data_present(pos));
-+
-+ if (this_node == 0) {
-+ /* next item is on the right neighbor */
-+ assert("edward-1011",
-+ item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
-+ item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
-+ assert("edward-1012",
-+ item_convert_data(pos)->d_next == DC_CHAINED_ITEM);
-+
-+ item_convert_data(pos)->d_cur = DC_CHAINED_ITEM;
-+ item_convert_data(pos)->d_next = DC_INVALID_STATE;
-+ } else {
-+ /* next item is on the same node */
-+ assert("edward-1013",
-+ item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
-+ item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
-+ assert("edward-1227",
-+ item_convert_data(pos)->d_next == DC_AFTER_CLUSTER ||
-+ item_convert_data(pos)->d_next == DC_INVALID_STATE);
-+
-+ item_convert_data(pos)->d_cur = DC_AFTER_CLUSTER;
-+ item_convert_data(pos)->d_next = DC_INVALID_STATE;
-+ }
-+}
-+
-+static inline int should_convert_node(flush_pos_t * pos, znode * node)
-+{
-+ return znode_convertible(node);
-+}
-+
-+/* true if there is attached convert item info */
-+static inline int should_convert_next_node(flush_pos_t * pos)
-+{
-+ return convert_data(pos) && item_convert_data(pos);
-+}
-+
-+#define SQUALLOC_THRESHOLD 256
-+
-+static inline int should_terminate_squalloc(flush_pos_t * pos)
-+{
-+ return convert_data(pos) &&
-+ !item_convert_data(pos) &&
-+ item_convert_count(pos) >= SQUALLOC_THRESHOLD;
-+}
-+
-+#if 1
-+#define check_convert_info(pos) \
-+do { \
-+ if (unlikely(should_convert_next_node(pos))){ \
-+ warning("edward-1006", "unprocessed chained data"); \
-+ printk("d_cur = %d, d_next = %d, flow.len = %llu\n", \
-+ item_convert_data(pos)->d_cur, \
-+ item_convert_data(pos)->d_next, \
-+ item_convert_data(pos)->flow.length); \
-+ } \
-+} while (0)
-+#else
-+#define check_convert_info(pos)
-+#endif /* REISER4_DEBUG */
-+
-+void free_convert_data(flush_pos_t * pos);
-+/* used in extent.c */
-+int scan_set_current(flush_scan * scan, jnode * node, unsigned add_size,
-+ const coord_t * parent);
-+int reiser4_scan_finished(flush_scan * scan);
-+int reiser4_scanning_left(flush_scan * scan);
-+int reiser4_scan_goto(flush_scan * scan, jnode * tonode);
-+txn_atom *atom_locked_by_fq(flush_queue_t * fq);
-+int reiser4_alloc_extent(flush_pos_t *flush_pos);
-+squeeze_result squalloc_extent(znode *left, const coord_t *, flush_pos_t *,
-+ reiser4_key *stop_key);
-+extern int reiser4_init_fqs(void);
-+extern void reiser4_done_fqs(void);
-+
-+#if REISER4_DEBUG
-+
-+extern void reiser4_check_fq(const txn_atom *atom);
-+extern atomic_t flush_cnt;
-+
-+#define check_preceder(blk) \
-+assert("nikita-2588", blk < reiser4_block_count(reiser4_get_current_sb()));
-+extern void check_pos(flush_pos_t * pos);
-+#else
-+#define check_preceder(b) noop
-+#define check_pos(pos) noop
-+#endif
-+
-+/* __REISER4_FLUSH_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 90
-+ LocalWords: preceder
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/flush_queue.c linux-2.6.24/fs/reiser4/flush_queue.c
---- linux-2.6.24.orig/fs/reiser4/flush_queue.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/flush_queue.c 2008-01-25 11:54:46.665843146 +0300
-@@ -0,0 +1,674 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "super.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "page_cache.h"
-+#include "wander.h"
-+#include "vfs_ops.h"
-+#include "writeout.h"
-+#include "flush.h"
-+
-+#include <linux/bio.h>
-+#include <linux/mm.h>
-+#include <linux/pagemap.h>
-+#include <linux/blkdev.h>
-+#include <linux/writeback.h>
-+
-+/* A flush queue object is an accumulator for keeping jnodes prepared
-+ by the jnode_flush() function for writing to disk. Those "queued" jnodes are
-+ kept on the flush queue until memory pressure or atom commit asks
-+ flush queues to write some or all from their jnodes. */
-+
-+/*
-+ LOCKING:
-+
-+ fq->guard spin lock protects fq->atom pointer and nothing else. fq->prepped
-+ list protected by atom spin lock. fq->prepped list uses the following
-+ locking:
-+
-+ two ways to protect fq->prepped list for read-only list traversal:
-+
-+ 1. atom spin-lock atom.
-+ 2. fq is IN_USE, atom->nr_running_queues increased.
-+
-+ and one for list modification:
-+
-+ 1. atom is spin-locked and one condition is true: fq is IN_USE or
-+ atom->nr_running_queues == 0.
-+
-+ The deadlock-safe order for flush queues and atoms is: first lock atom, then
-+ lock flush queue, then lock jnode.
-+*/
-+
-+#define fq_in_use(fq) ((fq)->state & FQ_IN_USE)
-+#define fq_ready(fq) (!fq_in_use(fq))
-+
-+#define mark_fq_in_use(fq) do { (fq)->state |= FQ_IN_USE; } while (0)
-+#define mark_fq_ready(fq) do { (fq)->state &= ~FQ_IN_USE; } while (0)
-+
-+/* get lock on atom from locked flush queue object */
-+static txn_atom *atom_locked_by_fq_nolock(flush_queue_t * fq)
-+{
-+ /* This code is similar to jnode_get_atom(), look at it for the
-+ * explanation. */
-+ txn_atom *atom;
-+
-+ assert_spin_locked(&(fq->guard));
-+
-+ while (1) {
-+ atom = fq->atom;
-+ if (atom == NULL)
-+ break;
-+
-+ if (spin_trylock_atom(atom))
-+ break;
-+
-+ atomic_inc(&atom->refcount);
-+ spin_unlock(&(fq->guard));
-+ spin_lock_atom(atom);
-+ spin_lock(&(fq->guard));
-+
-+ if (fq->atom == atom) {
-+ atomic_dec(&atom->refcount);
-+ break;
-+ }
-+
-+ spin_unlock(&(fq->guard));
-+ atom_dec_and_unlock(atom);
-+ spin_lock(&(fq->guard));
-+ }
-+
-+ return atom;
-+}
-+
-+txn_atom *atom_locked_by_fq(flush_queue_t * fq)
-+{
-+ txn_atom *atom;
-+
-+ spin_lock(&(fq->guard));
-+ atom = atom_locked_by_fq_nolock(fq);
-+ spin_unlock(&(fq->guard));
-+ return atom;
-+}
-+
-+static void init_fq(flush_queue_t * fq)
-+{
-+ memset(fq, 0, sizeof *fq);
-+
-+ atomic_set(&fq->nr_submitted, 0);
-+
-+ INIT_LIST_HEAD(ATOM_FQ_LIST(fq));
-+
-+ init_waitqueue_head(&fq->wait);
-+ spin_lock_init(&fq->guard);
-+}
-+
-+/* slab for flush queues */
-+static struct kmem_cache *fq_slab;
-+
-+/**
-+ * reiser4_init_fqs - create flush queue cache
-+ *
-+ * Initializes slab cache of flush queues. It is part of reiser4 module
-+ * initialization.
-+ */
-+int reiser4_init_fqs(void)
-+{
-+ fq_slab = kmem_cache_create("fq",
-+ sizeof(flush_queue_t),
-+ 0, SLAB_HWCACHE_ALIGN, NULL);
-+ if (fq_slab == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_fqs - delete flush queue cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_fqs(void)
-+{
-+ destroy_reiser4_cache(&fq_slab);
-+}
-+
-+/* create new flush queue object */
-+static flush_queue_t *create_fq(gfp_t gfp)
-+{
-+ flush_queue_t *fq;
-+
-+ fq = kmem_cache_alloc(fq_slab, gfp);
-+ if (fq)
-+ init_fq(fq);
-+
-+ return fq;
-+}
-+
-+/* adjust atom's and flush queue's counters of queued nodes */
-+static void count_enqueued_node(flush_queue_t * fq)
-+{
-+ ON_DEBUG(fq->atom->num_queued++);
-+}
-+
-+static void count_dequeued_node(flush_queue_t * fq)
-+{
-+ assert("zam-993", fq->atom->num_queued > 0);
-+ ON_DEBUG(fq->atom->num_queued--);
-+}
-+
-+/* attach flush queue object to the atom */
-+static void attach_fq(txn_atom *atom, flush_queue_t *fq)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ list_add(&fq->alink, &atom->flush_queues);
-+ fq->atom = atom;
-+ ON_DEBUG(atom->nr_flush_queues++);
-+}
-+
-+static void detach_fq(flush_queue_t * fq)
-+{
-+ assert_spin_locked(&(fq->atom->alock));
-+
-+ spin_lock(&(fq->guard));
-+ list_del_init(&fq->alink);
-+ assert("vs-1456", fq->atom->nr_flush_queues > 0);
-+ ON_DEBUG(fq->atom->nr_flush_queues--);
-+ fq->atom = NULL;
-+ spin_unlock(&(fq->guard));
-+}
-+
-+/* destroy flush queue object */
-+static void done_fq(flush_queue_t * fq)
-+{
-+ assert("zam-763", list_empty_careful(ATOM_FQ_LIST(fq)));
-+ assert("zam-766", atomic_read(&fq->nr_submitted) == 0);
-+
-+ kmem_cache_free(fq_slab, fq);
-+}
-+
-+/* */
-+static void mark_jnode_queued(flush_queue_t * fq, jnode * node)
-+{
-+ JF_SET(node, JNODE_FLUSH_QUEUED);
-+ count_enqueued_node(fq);
-+}
-+
-+/* Putting jnode into the flush queue. Both atom and jnode should be
-+ spin-locked. */
-+void queue_jnode(flush_queue_t * fq, jnode * node)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert("zam-713", node->atom != NULL);
-+ assert_spin_locked(&(node->atom->alock));
-+ assert("zam-716", fq->atom != NULL);
-+ assert("zam-717", fq->atom == node->atom);
-+ assert("zam-907", fq_in_use(fq));
-+
-+ assert("zam-714", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-826", JF_ISSET(node, JNODE_RELOC));
-+ assert("vs-1481", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
-+ assert("vs-1481", NODE_LIST(node) != FQ_LIST);
-+
-+ mark_jnode_queued(fq, node);
-+ list_move_tail(&node->capture_link, ATOM_FQ_LIST(fq));
-+
-+ ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
-+ FQ_LIST, 1));
-+}
-+
-+/* repeatable process for waiting io completion on a flush queue object */
-+static int wait_io(flush_queue_t * fq, int *nr_io_errors)
-+{
-+ assert("zam-738", fq->atom != NULL);
-+ assert_spin_locked(&(fq->atom->alock));
-+ assert("zam-736", fq_in_use(fq));
-+ assert("zam-911", list_empty_careful(ATOM_FQ_LIST(fq)));
-+
-+ if (atomic_read(&fq->nr_submitted) != 0) {
-+ struct super_block *super;
-+
-+ spin_unlock_atom(fq->atom);
-+
-+ assert("nikita-3013", reiser4_schedulable());
-+
-+ super = reiser4_get_current_sb();
-+
-+ /* FIXME: this is instead of blk_run_queues() */
-+ blk_run_address_space(reiser4_get_super_fake(super)->i_mapping);
-+
-+ if (!(super->s_flags & MS_RDONLY))
-+ wait_event(fq->wait, atomic_read(&fq->nr_submitted) == 0);
-+
-+ /* Ask the caller to re-acquire the locks and call this
-+ function again. Note: this technique is commonly used in
-+ the txnmgr code. */
-+ return -E_REPEAT;
-+ }
-+
-+ *nr_io_errors += atomic_read(&fq->nr_errors);
-+ return 0;
-+}
-+
-+/* wait on I/O completion, re-submit dirty nodes to write */
-+static int finish_fq(flush_queue_t * fq, int *nr_io_errors)
-+{
-+ int ret;
-+ txn_atom *atom = fq->atom;
-+
-+ assert("zam-801", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("zam-762", fq_in_use(fq));
-+
-+ ret = wait_io(fq, nr_io_errors);
-+ if (ret)
-+ return ret;
-+
-+ detach_fq(fq);
-+ done_fq(fq);
-+
-+ reiser4_atom_send_event(atom);
-+
-+ return 0;
-+}
-+
-+/* wait for all i/o for given atom to be completed, actually do one iteration
-+ on that and return -E_REPEAT if there more iterations needed */
-+static int finish_all_fq(txn_atom * atom, int *nr_io_errors)
-+{
-+ flush_queue_t *fq;
-+
-+ assert_spin_locked(&(atom->alock));
-+
-+ if (list_empty_careful(&atom->flush_queues))
-+ return 0;
-+
-+ list_for_each_entry(fq, &atom->flush_queues, alink) {
-+ if (fq_ready(fq)) {
-+ int ret;
-+
-+ mark_fq_in_use(fq);
-+ assert("vs-1247", fq->owner == NULL);
-+ ON_DEBUG(fq->owner = current);
-+ ret = finish_fq(fq, nr_io_errors);
-+
-+ if (*nr_io_errors)
-+ reiser4_handle_error();
-+
-+ if (ret) {
-+ reiser4_fq_put(fq);
-+ return ret;
-+ }
-+
-+ spin_unlock_atom(atom);
-+
-+ return -E_REPEAT;
-+ }
-+ }
-+
-+ /* All flush queues are in use; atom remains locked */
-+ return -EBUSY;
-+}
-+
-+/* wait all i/o for current atom */
-+int current_atom_finish_all_fq(void)
-+{
-+ txn_atom *atom;
-+ int nr_io_errors = 0;
-+ int ret = 0;
-+
-+ do {
-+ while (1) {
-+ atom = get_current_atom_locked();
-+ ret = finish_all_fq(atom, &nr_io_errors);
-+ if (ret != -EBUSY)
-+ break;
-+ reiser4_atom_wait_event(atom);
-+ }
-+ } while (ret == -E_REPEAT);
-+
-+ /* we do not need locked atom after this function finishes, SUCCESS or
-+ -EBUSY are two return codes when atom remains locked after
-+ finish_all_fq */
-+ if (!ret)
-+ spin_unlock_atom(atom);
-+
-+ assert_spin_not_locked(&(atom->alock));
-+
-+ if (ret)
-+ return ret;
-+
-+ if (nr_io_errors)
-+ return RETERR(-EIO);
-+
-+ return 0;
-+}
-+
-+/* change node->atom field for all jnode from given list */
-+static void
-+scan_fq_and_update_atom_ref(struct list_head *list, txn_atom *atom)
-+{
-+ jnode *cur;
-+
-+ list_for_each_entry(cur, list, capture_link) {
-+ spin_lock_jnode(cur);
-+ cur->atom = atom;
-+ spin_unlock_jnode(cur);
-+ }
-+}
-+
-+/* support for atom fusion operation */
-+void reiser4_fuse_fq(txn_atom *to, txn_atom *from)
-+{
-+ flush_queue_t *fq;
-+
-+ assert_spin_locked(&(to->alock));
-+ assert_spin_locked(&(from->alock));
-+
-+ list_for_each_entry(fq, &from->flush_queues, alink) {
-+ scan_fq_and_update_atom_ref(ATOM_FQ_LIST(fq), to);
-+ spin_lock(&(fq->guard));
-+ fq->atom = to;
-+ spin_unlock(&(fq->guard));
-+ }
-+
-+ list_splice_init(&from->flush_queues, to->flush_queues.prev);
-+
-+#if REISER4_DEBUG
-+ to->num_queued += from->num_queued;
-+ to->nr_flush_queues += from->nr_flush_queues;
-+ from->nr_flush_queues = 0;
-+#endif
-+}
-+
-+#if REISER4_DEBUG
-+int atom_fq_parts_are_clean(txn_atom * atom)
-+{
-+ assert("zam-915", atom != NULL);
-+ return list_empty_careful(&atom->flush_queues);
-+}
-+#endif
-+/* Bio i/o completion routine for reiser4 write operations. */
-+static void
-+end_io_handler(struct bio *bio, int err)
-+{
-+ int i;
-+ int nr_errors = 0;
-+ flush_queue_t *fq;
-+
-+ assert("zam-958", bio->bi_rw & WRITE);
-+
-+ if (err == -EOPNOTSUPP)
-+ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-+
-+ /* we expect that bio->private is set to NULL or fq object which is used
-+ * for synchronization and error counting. */
-+ fq = bio->bi_private;
-+ /* Check all elements of io_vec for correct write completion. */
-+ for (i = 0; i < bio->bi_vcnt; i += 1) {
-+ struct page *pg = bio->bi_io_vec[i].bv_page;
-+
-+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-+ SetPageError(pg);
-+ nr_errors++;
-+ }
-+
-+ {
-+ /* jnode WRITEBACK ("write is in progress bit") is
-+ * atomically cleared here. */
-+ jnode *node;
-+
-+ assert("zam-736", pg != NULL);
-+ assert("zam-736", PagePrivate(pg));
-+ node = jprivate(pg);
-+
-+ JF_CLR(node, JNODE_WRITEBACK);
-+ }
-+
-+ end_page_writeback(pg);
-+ page_cache_release(pg);
-+ }
-+
-+ if (fq) {
-+ /* count i/o error in fq object */
-+ atomic_add(nr_errors, &fq->nr_errors);
-+
-+ /* If all write requests registered in this "fq" are done we up
-+ * the waiter. */
-+ if (atomic_sub_and_test(bio->bi_vcnt, &fq->nr_submitted))
-+ wake_up(&fq->wait);
-+ }
-+
-+ bio_put(bio);
-+}
-+
-+/* Count I/O requests which will be submitted by @bio in given flush queues
-+ @fq */
-+void add_fq_to_bio(flush_queue_t * fq, struct bio *bio)
-+{
-+ bio->bi_private = fq;
-+ bio->bi_end_io = end_io_handler;
-+
-+ if (fq)
-+ atomic_add(bio->bi_vcnt, &fq->nr_submitted);
-+}
-+
-+/* Move all queued nodes out from @fq->prepped list. */
-+static void release_prepped_list(flush_queue_t * fq)
-+{
-+ txn_atom *atom;
-+
-+ assert("zam-904", fq_in_use(fq));
-+ atom = atom_locked_by_fq(fq);
-+
-+ while (!list_empty(ATOM_FQ_LIST(fq))) {
-+ jnode *cur;
-+
-+ cur = list_entry(ATOM_FQ_LIST(fq)->next, jnode, capture_link);
-+ list_del_init(&cur->capture_link);
-+
-+ count_dequeued_node(fq);
-+ spin_lock_jnode(cur);
-+ assert("nikita-3154", !JF_ISSET(cur, JNODE_OVRWR));
-+ assert("nikita-3154", JF_ISSET(cur, JNODE_RELOC));
-+ assert("nikita-3154", JF_ISSET(cur, JNODE_FLUSH_QUEUED));
-+ JF_CLR(cur, JNODE_FLUSH_QUEUED);
-+
-+ if (JF_ISSET(cur, JNODE_DIRTY)) {
-+ list_add_tail(&cur->capture_link,
-+ ATOM_DIRTY_LIST(atom, jnode_get_level(cur)));
-+ ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
-+ DIRTY_LIST, 1));
-+ } else {
-+ list_add_tail(&cur->capture_link, ATOM_CLEAN_LIST(atom));
-+ ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
-+ CLEAN_LIST, 1));
-+ }
-+
-+ spin_unlock_jnode(cur);
-+ }
-+
-+ if (--atom->nr_running_queues == 0)
-+ reiser4_atom_send_event(atom);
-+
-+ spin_unlock_atom(atom);
-+}
-+
-+/* Submit write requests for nodes on the already filled flush queue @fq.
-+
-+ @fq: flush queue object which contains jnodes we can (and will) write.
-+ @return: number of submitted blocks (>=0) if success, otherwise -- an error
-+ code (<0). */
-+int reiser4_write_fq(flush_queue_t * fq, long *nr_submitted, int flags)
-+{
-+ int ret;
-+ txn_atom *atom;
-+
-+ while (1) {
-+ atom = atom_locked_by_fq(fq);
-+ assert("zam-924", atom);
-+ /* do not write fq in parallel. */
-+ if (atom->nr_running_queues == 0
-+ || !(flags & WRITEOUT_SINGLE_STREAM))
-+ break;
-+ reiser4_atom_wait_event(atom);
-+ }
-+
-+ atom->nr_running_queues++;
-+ spin_unlock_atom(atom);
-+
-+ ret = write_jnode_list(ATOM_FQ_LIST(fq), fq, nr_submitted, flags);
-+ release_prepped_list(fq);
-+
-+ return ret;
-+}
-+
-+/* Getting flush queue object for exclusive use by one thread. May require
-+ several iterations which is indicated by -E_REPEAT return code.
-+
-+ This function does not contain code for obtaining an atom lock because an
-+ atom lock is obtained by different ways in different parts of reiser4,
-+ usually it is current atom, but we need a possibility for getting fq for the
-+ atom of given jnode. */
-+static int fq_by_atom_gfp(txn_atom *atom, flush_queue_t **new_fq, gfp_t gfp)
-+{
-+ flush_queue_t *fq;
-+
-+ assert_spin_locked(&(atom->alock));
-+
-+ fq = list_entry(atom->flush_queues.next, flush_queue_t, alink);
-+ while (&atom->flush_queues != &fq->alink) {
-+ spin_lock(&(fq->guard));
-+
-+ if (fq_ready(fq)) {
-+ mark_fq_in_use(fq);
-+ assert("vs-1246", fq->owner == NULL);
-+ ON_DEBUG(fq->owner = current);
-+ spin_unlock(&(fq->guard));
-+
-+ if (*new_fq)
-+ done_fq(*new_fq);
-+
-+ *new_fq = fq;
-+
-+ return 0;
-+ }
-+
-+ spin_unlock(&(fq->guard));
-+
-+ fq = list_entry(fq->alink.next, flush_queue_t, alink);
-+ }
-+
-+ /* Use previously allocated fq object */
-+ if (*new_fq) {
-+ mark_fq_in_use(*new_fq);
-+ assert("vs-1248", (*new_fq)->owner == 0);
-+ ON_DEBUG((*new_fq)->owner = current);
-+ attach_fq(atom, *new_fq);
-+
-+ return 0;
-+ }
-+
-+ spin_unlock_atom(atom);
-+
-+ *new_fq = create_fq(gfp);
-+
-+ if (*new_fq == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ return RETERR(-E_REPEAT);
-+}
-+
-+int reiser4_fq_by_atom(txn_atom * atom, flush_queue_t ** new_fq)
-+{
-+ return fq_by_atom_gfp(atom, new_fq, reiser4_ctx_gfp_mask_get());
-+}
-+
-+/* A wrapper around reiser4_fq_by_atom for getting a flush queue
-+ object for current atom, if success fq->atom remains locked. */
-+flush_queue_t *get_fq_for_current_atom(void)
-+{
-+ flush_queue_t *fq = NULL;
-+ txn_atom *atom;
-+ int ret;
-+
-+ do {
-+ atom = get_current_atom_locked();
-+ ret = reiser4_fq_by_atom(atom, &fq);
-+ } while (ret == -E_REPEAT);
-+
-+ if (ret)
-+ return ERR_PTR(ret);
-+ return fq;
-+}
-+
-+/* Releasing flush queue object after exclusive use */
-+void reiser4_fq_put_nolock(flush_queue_t *fq)
-+{
-+ assert("zam-747", fq->atom != NULL);
-+ assert("zam-902", list_empty_careful(ATOM_FQ_LIST(fq)));
-+ mark_fq_ready(fq);
-+ assert("vs-1245", fq->owner == current);
-+ ON_DEBUG(fq->owner = NULL);
-+}
-+
-+void reiser4_fq_put(flush_queue_t * fq)
-+{
-+ txn_atom *atom;
-+
-+ spin_lock(&(fq->guard));
-+ atom = atom_locked_by_fq_nolock(fq);
-+
-+ assert("zam-746", atom != NULL);
-+
-+ reiser4_fq_put_nolock(fq);
-+ reiser4_atom_send_event(atom);
-+
-+ spin_unlock(&(fq->guard));
-+ spin_unlock_atom(atom);
-+}
-+
-+/* A part of atom object initialization related to the embedded flush queue
-+ list head */
-+
-+void init_atom_fq_parts(txn_atom *atom)
-+{
-+ INIT_LIST_HEAD(&atom->flush_queues);
-+}
-+
-+#if REISER4_DEBUG
-+
-+void reiser4_check_fq(const txn_atom *atom)
-+{
-+ /* check number of nodes on all atom's flush queues */
-+ flush_queue_t *fq;
-+ int count;
-+ struct list_head *pos;
-+
-+ count = 0;
-+ list_for_each_entry(fq, &atom->flush_queues, alink) {
-+ spin_lock(&(fq->guard));
-+ /* calculate number of jnodes on fq' list of prepped jnodes */
-+ list_for_each(pos, ATOM_FQ_LIST(fq))
-+ count++;
-+ spin_unlock(&(fq->guard));
-+ }
-+ if (count != atom->fq)
-+ warning("", "fq counter %d, real %d\n", atom->fq, count);
-+
-+}
-+
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/forward.h linux-2.6.24/fs/reiser4/forward.h
---- linux-2.6.24.orig/fs/reiser4/forward.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/forward.h 2008-01-25 11:39:06.928205628 +0300
-@@ -0,0 +1,252 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Forward declarations. Thank you Kernighan. */
-+
-+#if !defined( __REISER4_FORWARD_H__ )
-+#define __REISER4_FORWARD_H__
-+
-+#include <asm/errno.h>
-+#include <linux/types.h>
-+
-+typedef struct zlock zlock;
-+typedef struct lock_stack lock_stack;
-+typedef struct lock_handle lock_handle;
-+typedef struct znode znode;
-+typedef struct flow flow_t;
-+typedef struct coord coord_t;
-+typedef struct tree_access_pointer tap_t;
-+typedef struct reiser4_object_create_data reiser4_object_create_data;
-+typedef union reiser4_plugin reiser4_plugin;
-+typedef __u16 reiser4_plugin_id;
-+typedef __u64 reiser4_plugin_groups;
-+typedef struct item_plugin item_plugin;
-+typedef struct jnode_plugin jnode_plugin;
-+typedef struct reiser4_item_data reiser4_item_data;
-+typedef union reiser4_key reiser4_key;
-+typedef struct reiser4_tree reiser4_tree;
-+typedef struct carry_cut_data carry_cut_data;
-+typedef struct carry_kill_data carry_kill_data;
-+typedef struct carry_tree_op carry_tree_op;
-+typedef struct carry_tree_node carry_tree_node;
-+typedef struct carry_plugin_info carry_plugin_info;
-+typedef struct reiser4_journal reiser4_journal;
-+typedef struct txn_atom txn_atom;
-+typedef struct txn_handle txn_handle;
-+typedef struct txn_mgr txn_mgr;
-+typedef struct reiser4_dir_entry_desc reiser4_dir_entry_desc;
-+typedef struct reiser4_context reiser4_context;
-+typedef struct carry_level carry_level;
-+typedef struct blocknr_set_entry blocknr_set_entry;
-+/* super_block->s_fs_info points to this */
-+typedef struct reiser4_super_info_data reiser4_super_info_data;
-+/* next two objects are fields of reiser4_super_info_data */
-+typedef struct reiser4_oid_allocator reiser4_oid_allocator;
-+typedef struct reiser4_space_allocator reiser4_space_allocator;
-+
-+typedef struct flush_scan flush_scan;
-+typedef struct flush_position flush_pos_t;
-+
-+typedef unsigned short pos_in_node_t;
-+#define MAX_POS_IN_NODE 65535
-+
-+typedef struct jnode jnode;
-+typedef struct reiser4_blocknr_hint reiser4_blocknr_hint;
-+
-+typedef struct uf_coord uf_coord_t;
-+typedef struct hint hint_t;
-+
-+typedef struct ktxnmgrd_context ktxnmgrd_context;
-+
-+struct inode;
-+struct page;
-+struct file;
-+struct dentry;
-+struct super_block;
-+
-+/* return values of coord_by_key(). cbk == coord_by_key */
-+typedef enum {
-+ CBK_COORD_FOUND = 0,
-+ CBK_COORD_NOTFOUND = -ENOENT,
-+} lookup_result;
-+
-+/* results of lookup with directory file */
-+typedef enum {
-+ FILE_NAME_FOUND = 0,
-+ FILE_NAME_NOTFOUND = -ENOENT,
-+ FILE_IO_ERROR = -EIO, /* FIXME: it seems silly to have special OOM, IO_ERROR return codes for each search. */
-+ FILE_OOM = -ENOMEM /* FIXME: it seems silly to have special OOM, IO_ERROR return codes for each search. */
-+} file_lookup_result;
-+
-+/* behaviors of lookup. If coord we are looking for is actually in a tree,
-+ both coincide. */
-+typedef enum {
-+ /* search exactly for the coord with key given */
-+ FIND_EXACT,
-+ /* search for coord with the maximal key not greater than one
-+ given */
-+ FIND_MAX_NOT_MORE_THAN /*LEFT_SLANT_BIAS */
-+} lookup_bias;
-+
-+typedef enum {
-+ /* number of leaf level of the tree
-+ The fake root has (tree_level=0). */
-+ LEAF_LEVEL = 1,
-+
-+ /* number of level one above leaf level of the tree.
-+
-+ It is supposed that internal tree used by reiser4 to store file
-+ system data and meta data will have height 2 initially (when
-+ created by mkfs).
-+ */
-+ TWIG_LEVEL = 2,
-+} tree_level;
-+
-+/* The "real" maximum ztree height is the 0-origin size of any per-level
-+ array, since the zero'th level is not used. */
-+#define REAL_MAX_ZTREE_HEIGHT (REISER4_MAX_ZTREE_HEIGHT-LEAF_LEVEL)
-+
-+/* enumeration of possible mutual position of item and coord. This enum is
-+ return type of ->is_in_item() item plugin method which see. */
-+typedef enum {
-+ /* coord is on the left of an item */
-+ IP_ON_THE_LEFT,
-+ /* coord is inside item */
-+ IP_INSIDE,
-+ /* coord is inside item, but to the right of the rightmost unit of
-+ this item */
-+ IP_RIGHT_EDGE,
-+ /* coord is on the right of an item */
-+ IP_ON_THE_RIGHT
-+} interposition;
-+
-+/* type of lock to acquire on znode before returning it to caller */
-+typedef enum {
-+ ZNODE_NO_LOCK = 0,
-+ ZNODE_READ_LOCK = 1,
-+ ZNODE_WRITE_LOCK = 2,
-+} znode_lock_mode;
-+
-+/* type of lock request */
-+typedef enum {
-+ ZNODE_LOCK_LOPRI = 0,
-+ ZNODE_LOCK_HIPRI = (1 << 0),
-+
-+ /* By setting the ZNODE_LOCK_NONBLOCK flag in a lock request the call to longterm_lock_znode will not sleep
-+ waiting for the lock to become available. If the lock is unavailable, reiser4_znode_lock will immediately
-+ return the value -E_REPEAT. */
-+ ZNODE_LOCK_NONBLOCK = (1 << 1),
-+ /* An option for longterm_lock_znode which prevents atom fusion */
-+ ZNODE_LOCK_DONT_FUSE = (1 << 2)
-+} znode_lock_request;
-+
-+typedef enum { READ_OP = 0, WRITE_OP = 1 } rw_op;
-+
-+/* used to specify direction of shift. These must be -1 and 1 */
-+typedef enum {
-+ SHIFT_LEFT = 1,
-+ SHIFT_RIGHT = -1
-+} shift_direction;
-+
-+typedef enum {
-+ LEFT_SIDE,
-+ RIGHT_SIDE
-+} sideof;
-+
-+#define round_up( value, order ) \
-+ ( ( typeof( value ) )( ( ( long ) ( value ) + ( order ) - 1U ) & \
-+ ~( ( order ) - 1 ) ) )
-+
-+/* values returned by squalloc_right_neighbor and its auxiliary functions */
-+typedef enum {
-+ /* unit of internal item is moved */
-+ SUBTREE_MOVED = 0,
-+ /* nothing else can be squeezed into left neighbor */
-+ SQUEEZE_TARGET_FULL = 1,
-+ /* all content of node is squeezed into its left neighbor */
-+ SQUEEZE_SOURCE_EMPTY = 2,
-+ /* one more item is copied (this is only returned by
-+ allocate_and_copy_extent to squalloc_twig)) */
-+ SQUEEZE_CONTINUE = 3
-+} squeeze_result;
-+
-+/* Do not change items ids. If you do - there will be format change */
-+typedef enum {
-+ STATIC_STAT_DATA_ID = 0x0,
-+ SIMPLE_DIR_ENTRY_ID = 0x1,
-+ COMPOUND_DIR_ID = 0x2,
-+ NODE_POINTER_ID = 0x3,
-+ EXTENT_POINTER_ID = 0x5,
-+ FORMATTING_ID = 0x6,
-+ CTAIL_ID = 0x7,
-+ BLACK_BOX_ID = 0x8,
-+ LAST_ITEM_ID = 0x9
-+} item_id;
-+
-+/* Flags passed to jnode_flush() to allow it to distinguish default settings based on
-+ whether commit() was called or VM memory pressure was applied. */
-+typedef enum {
-+ /* submit flush queue to disk at jnode_flush completion */
-+ JNODE_FLUSH_WRITE_BLOCKS = 1,
-+
-+ /* flush is called for commit */
-+ JNODE_FLUSH_COMMIT = 2,
-+ /* not implemented */
-+ JNODE_FLUSH_MEMORY_FORMATTED = 4,
-+
-+ /* not implemented */
-+ JNODE_FLUSH_MEMORY_UNFORMATTED = 8,
-+} jnode_flush_flags;
-+
-+/* Flags to insert/paste carry operations. Currently they only used in
-+ flushing code, but in future, they can be used to optimize for repetitive
-+ accesses. */
-+typedef enum {
-+ /* carry is not allowed to shift data to the left when trying to find
-+ free space */
-+ COPI_DONT_SHIFT_LEFT = (1 << 0),
-+ /* carry is not allowed to shift data to the right when trying to find
-+ free space */
-+ COPI_DONT_SHIFT_RIGHT = (1 << 1),
-+ /* carry is not allowed to allocate new node(s) when trying to find
-+ free space */
-+ COPI_DONT_ALLOCATE = (1 << 2),
-+ /* try to load left neighbor if its not in a cache */
-+ COPI_LOAD_LEFT = (1 << 3),
-+ /* try to load right neighbor if its not in a cache */
-+ COPI_LOAD_RIGHT = (1 << 4),
-+ /* shift insertion point to the left neighbor */
-+ COPI_GO_LEFT = (1 << 5),
-+ /* shift insertion point to the right neighbor */
-+ COPI_GO_RIGHT = (1 << 6),
-+ /* try to step back into original node if insertion into new node
-+ fails after shifting data there. */
-+ COPI_STEP_BACK = (1 << 7)
-+} cop_insert_flag;
-+
-+typedef enum {
-+ SAFE_UNLINK, /* safe-link for unlink */
-+ SAFE_TRUNCATE /* safe-link for truncate */
-+} reiser4_safe_link_t;
-+
-+/* this is to show on which list of atom jnode is */
-+typedef enum {
-+ NOT_CAPTURED,
-+ DIRTY_LIST,
-+ CLEAN_LIST,
-+ FQ_LIST,
-+ WB_LIST,
-+ OVRWR_LIST
-+} atom_list;
-+
-+/* __REISER4_FORWARD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/fsdata.c linux-2.6.24/fs/reiser4/fsdata.c
---- linux-2.6.24.orig/fs/reiser4/fsdata.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/fsdata.c 2008-01-25 11:39:06.928205628 +0300
-@@ -0,0 +1,804 @@
-+/* Copyright 2001, 2002, 2003, 2004, 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "fsdata.h"
-+#include "inode.h"
-+
-+
-+/* cache or dir_cursors */
-+static struct kmem_cache *d_cursor_cache;
-+
-+/* list of unused cursors */
-+static LIST_HEAD(cursor_cache);
-+
-+/* number of cursors in list of ununsed cursors */
-+static unsigned long d_cursor_unused = 0;
-+
-+/* spinlock protecting manipulations with dir_cursor's hash table and lists */
-+DEFINE_SPINLOCK(d_lock);
-+
-+static reiser4_file_fsdata *create_fsdata(struct file *file);
-+static int file_is_stateless(struct file *file);
-+static void free_fsdata(reiser4_file_fsdata *fsdata);
-+static void kill_cursor(dir_cursor *);
-+
-+/**
-+ * d_cursor_shrink - shrink callback for cache of dir_cursor-s
-+ * @nr: number of objects to free
-+ * @mask: GFP mask
-+ *
-+ * Shrinks d_cursor_cache. Scan LRU list of unused cursors, freeing requested
-+ * number. Return number of still freeable cursors.
-+ */
-+static int d_cursor_shrink(int nr, gfp_t mask)
-+{
-+ if (nr != 0) {
-+ dir_cursor *scan;
-+ int killed;
-+
-+ killed = 0;
-+ spin_lock(&d_lock);
-+ while (!list_empty(&cursor_cache)) {
-+ scan = list_entry(cursor_cache.next, dir_cursor, alist);
-+ assert("nikita-3567", scan->ref == 0);
-+ kill_cursor(scan);
-+ ++killed;
-+ --nr;
-+ if (nr == 0)
-+ break;
-+ }
-+ spin_unlock(&d_lock);
-+ }
-+ return d_cursor_unused;
-+}
-+
-+/*
-+ * actually, d_cursors are "priceless", because there is no way to
-+ * recover information stored in them. On the other hand, we don't
-+ * want to consume all kernel memory by them. As a compromise, just
-+ * assign higher "seeks" value to d_cursor cache, so that it will be
-+ * shrunk only if system is really tight on memory.
-+ */
-+static struct shrinker d_cursor_shrinker = {
-+ .shrink = d_cursor_shrink,
-+ .seeks = DEFAULT_SEEKS << 3,
-+};
-+
-+/**
-+ * reiser4_init_d_cursor - create d_cursor cache
-+ *
-+ * Initializes slab cache of d_cursors. It is part of reiser4 module
-+ * initialization.
-+ */
-+int reiser4_init_d_cursor(void)
-+{
-+ d_cursor_cache = kmem_cache_create("d_cursor", sizeof(dir_cursor), 0,
-+ SLAB_HWCACHE_ALIGN, NULL);
-+ if (d_cursor_cache == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ register_shrinker(&d_cursor_shrinker);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_d_cursor - delete d_cursor cache and d_cursor shrinker
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_d_cursor(void)
-+{
-+ unregister_shrinker(&d_cursor_shrinker);
-+
-+ destroy_reiser4_cache(&d_cursor_cache);
-+}
-+
-+#define D_CURSOR_TABLE_SIZE (256)
-+
-+static inline unsigned long
-+d_cursor_hash(d_cursor_hash_table *table, const struct d_cursor_key *key)
-+{
-+ assert("nikita-3555", IS_POW(D_CURSOR_TABLE_SIZE));
-+ return (key->oid + key->cid) & (D_CURSOR_TABLE_SIZE - 1);
-+}
-+
-+static inline int d_cursor_eq(const struct d_cursor_key *k1,
-+ const struct d_cursor_key *k2)
-+{
-+ return k1->cid == k2->cid && k1->oid == k2->oid;
-+}
-+
-+/*
-+ * define functions to manipulate reiser4 super block's hash table of
-+ * dir_cursors
-+ */
-+#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
-+#define KFREE(ptr, size) kfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(d_cursor,
-+ dir_cursor,
-+ struct d_cursor_key,
-+ key, hash, d_cursor_hash, d_cursor_eq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+/**
-+ * reiser4_init_super_d_info - initialize per-super-block d_cursor resources
-+ * @super: super block to initialize
-+ *
-+ * Initializes per-super-block d_cursor's hash table and radix tree. It is part
-+ * of mount.
-+ */
-+int reiser4_init_super_d_info(struct super_block *super)
-+{
-+ struct d_cursor_info *p;
-+
-+ p = &get_super_private(super)->d_info;
-+
-+ INIT_RADIX_TREE(&p->tree, reiser4_ctx_gfp_mask_get());
-+ return d_cursor_hash_init(&p->table, D_CURSOR_TABLE_SIZE);
-+}
-+
-+/**
-+ * reiser4_done_super_d_info - release per-super-block d_cursor resources
-+ * @super: super block being umounted
-+ *
-+ * It is called on umount. Kills all directory cursors attached to suoer block.
-+ */
-+void reiser4_done_super_d_info(struct super_block *super)
-+{
-+ struct d_cursor_info *d_info;
-+ dir_cursor *cursor, *next;
-+
-+ d_info = &get_super_private(super)->d_info;
-+ for_all_in_htable(&d_info->table, d_cursor, cursor, next)
-+ kill_cursor(cursor);
-+
-+ BUG_ON(d_info->tree.rnode != NULL);
-+ d_cursor_hash_done(&d_info->table);
-+}
-+
-+/**
-+ * kill_cursor - free dir_cursor and reiser4_file_fsdata attached to it
-+ * @cursor: cursor to free
-+ *
-+ * Removes reiser4_file_fsdata attached to @cursor from readdir list of
-+ * reiser4_inode, frees that reiser4_file_fsdata. Removes @cursor from from
-+ * indices, hash table, list of unused cursors and frees it.
-+ */
-+static void kill_cursor(dir_cursor *cursor)
-+{
-+ unsigned long index;
-+
-+ assert("nikita-3566", cursor->ref == 0);
-+ assert("nikita-3572", cursor->fsdata != NULL);
-+
-+ index = (unsigned long)cursor->key.oid;
-+ list_del_init(&cursor->fsdata->dir.linkage);
-+ free_fsdata(cursor->fsdata);
-+ cursor->fsdata = NULL;
-+
-+ if (list_empty_careful(&cursor->list))
-+ /* this is last cursor for a file. Kill radix-tree entry */
-+ radix_tree_delete(&cursor->info->tree, index);
-+ else {
-+ void **slot;
-+
-+ /*
-+ * there are other cursors for the same oid.
-+ */
-+
-+ /*
-+ * if radix tree point to the cursor being removed, re-target
-+ * radix tree slot to the next cursor in the (non-empty as was
-+ * checked above) element of the circular list of all cursors
-+ * for this oid.
-+ */
-+ slot = radix_tree_lookup_slot(&cursor->info->tree, index);
-+ assert("nikita-3571", *slot != NULL);
-+ if (*slot == cursor)
-+ *slot = list_entry(cursor->list.next, dir_cursor, list);
-+ /* remove cursor from circular list */
-+ list_del_init(&cursor->list);
-+ }
-+ /* remove cursor from the list of unused cursors */
-+ list_del_init(&cursor->alist);
-+ /* remove cursor from the hash table */
-+ d_cursor_hash_remove(&cursor->info->table, cursor);
-+ /* and free it */
-+ kmem_cache_free(d_cursor_cache, cursor);
-+ --d_cursor_unused;
-+}
-+
-+/* possible actions that can be performed on all cursors for the given file */
-+enum cursor_action {
-+ /*
-+ * load all detached state: this is called when stat-data is loaded
-+ * from the disk to recover information about all pending readdirs
-+ */
-+ CURSOR_LOAD,
-+ /*
-+ * detach all state from inode, leaving it in the cache. This is called
-+ * when inode is removed form the memory by memory pressure
-+ */
-+ CURSOR_DISPOSE,
-+ /*
-+ * detach cursors from the inode, and free them. This is called when
-+ * inode is destroyed
-+ */
-+ CURSOR_KILL
-+};
-+
-+/*
-+ * return d_cursor data for the file system @inode is in.
-+ */
-+static inline struct d_cursor_info *d_info(struct inode *inode)
-+{
-+ return &get_super_private(inode->i_sb)->d_info;
-+}
-+
-+/*
-+ * lookup d_cursor in the per-super-block radix tree.
-+ */
-+static inline dir_cursor *lookup(struct d_cursor_info * info,
-+ unsigned long index)
-+{
-+ return (dir_cursor *) radix_tree_lookup(&info->tree, index);
-+}
-+
-+/*
-+ * attach @cursor to the radix tree. There may be multiple cursors for the
-+ * same oid, they are chained into circular list.
-+ */
-+static void bind_cursor(dir_cursor * cursor, unsigned long index)
-+{
-+ dir_cursor *head;
-+
-+ head = lookup(cursor->info, index);
-+ if (head == NULL) {
-+ /* this is the first cursor for this index */
-+ INIT_LIST_HEAD(&cursor->list);
-+ radix_tree_insert(&cursor->info->tree, index, cursor);
-+ } else {
-+ /* some cursor already exists. Chain ours */
-+ list_add(&cursor->list, &head->list);
-+ }
-+}
-+
-+/*
-+ * detach fsdata (if detachable) from file descriptor, and put cursor on the
-+ * "unused" list. Called when file descriptor is not longer in active use.
-+ */
-+static void clean_fsdata(struct file *file)
-+{
-+ dir_cursor *cursor;
-+ reiser4_file_fsdata *fsdata;
-+
-+ assert("nikita-3570", file_is_stateless(file));
-+
-+ fsdata = (reiser4_file_fsdata *) file->private_data;
-+ if (fsdata != NULL) {
-+ cursor = fsdata->cursor;
-+ if (cursor != NULL) {
-+ spin_lock(&d_lock);
-+ --cursor->ref;
-+ if (cursor->ref == 0) {
-+ list_add_tail(&cursor->alist, &cursor_cache);
-+ ++d_cursor_unused;
-+ }
-+ spin_unlock(&d_lock);
-+ file->private_data = NULL;
-+ }
-+ }
-+}
-+
-+/*
-+ * global counter used to generate "client ids". These ids are encoded into
-+ * high bits of fpos.
-+ */
-+static __u32 cid_counter = 0;
-+#define CID_SHIFT (20)
-+#define CID_MASK (0xfffffull)
-+
-+static void free_file_fsdata_nolock(struct file *);
-+
-+/**
-+ * insert_cursor - allocate file_fsdata, insert cursor to tree and hash table
-+ * @cursor:
-+ * @file:
-+ * @inode:
-+ *
-+ * Allocates reiser4_file_fsdata, attaches it to @cursor, inserts cursor to
-+ * reiser4 super block's hash table and radix tree.
-+ add detachable readdir
-+ * state to the @f
-+ */
-+static int insert_cursor(dir_cursor *cursor, struct file *file,
-+ struct inode *inode)
-+{
-+ int result;
-+ reiser4_file_fsdata *fsdata;
-+
-+ memset(cursor, 0, sizeof *cursor);
-+
-+ /* this is either first call to readdir, or rewind. Anyway, create new
-+ * cursor. */
-+ fsdata = create_fsdata(NULL);
-+ if (fsdata != NULL) {
-+ result = radix_tree_preload(reiser4_ctx_gfp_mask_get());
-+ if (result == 0) {
-+ struct d_cursor_info *info;
-+ oid_t oid;
-+
-+ info = d_info(inode);
-+ oid = get_inode_oid(inode);
-+ /* cid occupies higher 12 bits of f->f_pos. Don't
-+ * allow it to become negative: this confuses
-+ * nfsd_readdir() */
-+ cursor->key.cid = (++cid_counter) & 0x7ff;
-+ cursor->key.oid = oid;
-+ cursor->fsdata = fsdata;
-+ cursor->info = info;
-+ cursor->ref = 1;
-+
-+ spin_lock_inode(inode);
-+ /* install cursor as @f's private_data, discarding old
-+ * one if necessary */
-+#if REISER4_DEBUG
-+ if (file->private_data)
-+ warning("", "file has fsdata already");
-+#endif
-+ clean_fsdata(file);
-+ free_file_fsdata_nolock(file);
-+ file->private_data = fsdata;
-+ fsdata->cursor = cursor;
-+ spin_unlock_inode(inode);
-+ spin_lock(&d_lock);
-+ /* insert cursor into hash table */
-+ d_cursor_hash_insert(&info->table, cursor);
-+ /* and chain it into radix-tree */
-+ bind_cursor(cursor, (unsigned long)oid);
-+ spin_unlock(&d_lock);
-+ radix_tree_preload_end();
-+ file->f_pos = ((__u64) cursor->key.cid) << CID_SHIFT;
-+ }
-+ } else
-+ result = RETERR(-ENOMEM);
-+ return result;
-+}
-+
-+/**
-+ * process_cursors - do action on each cursor attached to inode
-+ * @inode:
-+ * @act: action to do
-+ *
-+ * Finds all cursors of @inode in reiser4's super block radix tree of cursors
-+ * and performs action specified by @act on each of cursors.
-+ */
-+static void process_cursors(struct inode *inode, enum cursor_action act)
-+{
-+ oid_t oid;
-+ dir_cursor *start;
-+ struct list_head *head;
-+ reiser4_context *ctx;
-+ struct d_cursor_info *info;
-+
-+ /* this can be called by
-+ *
-+ * kswapd->...->prune_icache->..reiser4_destroy_inode
-+ *
-+ * without reiser4_context
-+ */
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-23", "failed to init context");
-+ return;
-+ }
-+
-+ assert("nikita-3558", inode != NULL);
-+
-+ info = d_info(inode);
-+ oid = get_inode_oid(inode);
-+ spin_lock_inode(inode);
-+ head = get_readdir_list(inode);
-+ spin_lock(&d_lock);
-+ /* find any cursor for this oid: reference to it is hanging of radix
-+ * tree */
-+ start = lookup(info, (unsigned long)oid);
-+ if (start != NULL) {
-+ dir_cursor *scan;
-+ reiser4_file_fsdata *fsdata;
-+
-+ /* process circular list of cursors for this oid */
-+ scan = start;
-+ do {
-+ dir_cursor *next;
-+
-+ next = list_entry(scan->list.next, dir_cursor, list);
-+ fsdata = scan->fsdata;
-+ assert("nikita-3557", fsdata != NULL);
-+ if (scan->key.oid == oid) {
-+ switch (act) {
-+ case CURSOR_DISPOSE:
-+ list_del_init(&fsdata->dir.linkage);
-+ break;
-+ case CURSOR_LOAD:
-+ list_add(&fsdata->dir.linkage, head);
-+ break;
-+ case CURSOR_KILL:
-+ kill_cursor(scan);
-+ break;
-+ }
-+ }
-+ if (scan == next)
-+ /* last cursor was just killed */
-+ break;
-+ scan = next;
-+ } while (scan != start);
-+ }
-+ spin_unlock(&d_lock);
-+ /* check that we killed 'em all */
-+ assert("nikita-3568",
-+ ergo(act == CURSOR_KILL,
-+ list_empty_careful(get_readdir_list(inode))));
-+ assert("nikita-3569",
-+ ergo(act == CURSOR_KILL, lookup(info, oid) == NULL));
-+ spin_unlock_inode(inode);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_dispose_cursors - removes cursors from inode's list
-+ * @inode: inode to dispose cursors of
-+ *
-+ * For each of cursors corresponding to @inode - removes reiser4_file_fsdata
-+ * attached to cursor from inode's readdir list. This is called when inode is
-+ * removed from the memory by memory pressure.
-+ */
-+void reiser4_dispose_cursors(struct inode *inode)
-+{
-+ process_cursors(inode, CURSOR_DISPOSE);
-+}
-+
-+/**
-+ * reiser4_load_cursors - attach cursors to inode
-+ * @inode: inode to load cursors to
-+ *
-+ * For each of cursors corresponding to @inode - attaches reiser4_file_fsdata
-+ * attached to cursor to inode's readdir list. This is done when inode is
-+ * loaded into memory.
-+ */
-+void reiser4_load_cursors(struct inode *inode)
-+{
-+ process_cursors(inode, CURSOR_LOAD);
-+}
-+
-+/**
-+ * reiser4_kill_cursors - kill all inode cursors
-+ * @inode: inode to kill cursors of
-+ *
-+ * Frees all cursors for this inode. This is called when inode is destroyed.
-+ */
-+void reiser4_kill_cursors(struct inode *inode)
-+{
-+ process_cursors(inode, CURSOR_KILL);
-+}
-+
-+/**
-+ * file_is_stateless -
-+ * @file:
-+ *
-+ * true, if file descriptor @f is created by NFS server by "demand" to serve
-+ * one file system operation. This means that there may be "detached state"
-+ * for underlying inode.
-+ */
-+static int file_is_stateless(struct file *file)
-+{
-+ return reiser4_get_dentry_fsdata(file->f_dentry)->stateless;
-+}
-+
-+/**
-+ * reiser4_get_dir_fpos -
-+ * @dir:
-+ *
-+ * Calculates ->fpos from user-supplied cookie. Normally it is dir->f_pos, but
-+ * in the case of stateless directory operation (readdir-over-nfs), client id
-+ * was encoded in the high bits of cookie and should me masked off.
-+ */
-+loff_t reiser4_get_dir_fpos(struct file *dir)
-+{
-+ if (file_is_stateless(dir))
-+ return dir->f_pos & CID_MASK;
-+ else
-+ return dir->f_pos;
-+}
-+
-+/**
-+ * reiser4_attach_fsdata - try to attach fsdata
-+ * @file:
-+ * @inode:
-+ *
-+ * Finds or creates cursor for readdir-over-nfs.
-+ */
-+int reiser4_attach_fsdata(struct file *file, struct inode *inode)
-+{
-+ loff_t pos;
-+ int result;
-+ dir_cursor *cursor;
-+
-+ /*
-+ * we are serialized by inode->i_mutex
-+ */
-+ if (!file_is_stateless(file))
-+ return 0;
-+
-+ pos = file->f_pos;
-+ result = 0;
-+ if (pos == 0) {
-+ /*
-+ * first call to readdir (or rewind to the beginning of
-+ * directory)
-+ */
-+ cursor = kmem_cache_alloc(d_cursor_cache,
-+ reiser4_ctx_gfp_mask_get());
-+ if (cursor != NULL)
-+ result = insert_cursor(cursor, file, inode);
-+ else
-+ result = RETERR(-ENOMEM);
-+ } else {
-+ /* try to find existing cursor */
-+ struct d_cursor_key key;
-+
-+ key.cid = pos >> CID_SHIFT;
-+ key.oid = get_inode_oid(inode);
-+ spin_lock(&d_lock);
-+ cursor = d_cursor_hash_find(&d_info(inode)->table, &key);
-+ if (cursor != NULL) {
-+ /* cursor was found */
-+ if (cursor->ref == 0) {
-+ /* move it from unused list */
-+ list_del_init(&cursor->alist);
-+ --d_cursor_unused;
-+ }
-+ ++cursor->ref;
-+ }
-+ spin_unlock(&d_lock);
-+ if (cursor != NULL) {
-+ spin_lock_inode(inode);
-+ assert("nikita-3556", cursor->fsdata->back == NULL);
-+ clean_fsdata(file);
-+ free_file_fsdata_nolock(file);
-+ file->private_data = cursor->fsdata;
-+ spin_unlock_inode(inode);
-+ }
-+ }
-+ return result;
-+}
-+
-+/**
-+ * reiser4_detach_fsdata - ???
-+ * @file:
-+ *
-+ * detach fsdata, if necessary
-+ */
-+void reiser4_detach_fsdata(struct file *file)
-+{
-+ struct inode *inode;
-+
-+ if (!file_is_stateless(file))
-+ return;
-+
-+ inode = file->f_dentry->d_inode;
-+ spin_lock_inode(inode);
-+ clean_fsdata(file);
-+ spin_unlock_inode(inode);
-+}
-+
-+/* slab for reiser4_dentry_fsdata */
-+static struct kmem_cache *dentry_fsdata_cache;
-+
-+/**
-+ * reiser4_init_dentry_fsdata - create cache of dentry_fsdata
-+ *
-+ * Initializes slab cache of structures attached to denty->d_fsdata. It is
-+ * part of reiser4 module initialization.
-+ */
-+int reiser4_init_dentry_fsdata(void)
-+{
-+ dentry_fsdata_cache = kmem_cache_create("dentry_fsdata",
-+ sizeof(struct reiser4_dentry_fsdata),
-+ 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT,
-+ NULL);
-+ if (dentry_fsdata_cache == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_dentry_fsdata - delete cache of dentry_fsdata
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_dentry_fsdata(void)
-+{
-+ destroy_reiser4_cache(&dentry_fsdata_cache);
-+}
-+
-+/**
-+ * reiser4_get_dentry_fsdata - get fs-specific dentry data
-+ * @dentry: queried dentry
-+ *
-+ * Allocates if necessary and returns per-dentry data that we attach to each
-+ * dentry.
-+ */
-+struct reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *dentry)
-+{
-+ assert("nikita-1365", dentry != NULL);
-+
-+ if (dentry->d_fsdata == NULL) {
-+ dentry->d_fsdata = kmem_cache_alloc(dentry_fsdata_cache,
-+ reiser4_ctx_gfp_mask_get());
-+ if (dentry->d_fsdata == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ memset(dentry->d_fsdata, 0,
-+ sizeof(struct reiser4_dentry_fsdata));
-+ }
-+ return dentry->d_fsdata;
-+}
-+
-+/**
-+ * reiser4_free_dentry_fsdata - detach and free dentry_fsdata
-+ * @dentry: dentry to free fsdata of
-+ *
-+ * Detaches and frees fs-specific dentry data
-+ */
-+void reiser4_free_dentry_fsdata(struct dentry *dentry)
-+{
-+ if (dentry->d_fsdata != NULL) {
-+ kmem_cache_free(dentry_fsdata_cache, dentry->d_fsdata);
-+ dentry->d_fsdata = NULL;
-+ }
-+}
-+
-+/* slab for reiser4_file_fsdata */
-+static struct kmem_cache *file_fsdata_cache;
-+
-+/**
-+ * reiser4_init_file_fsdata - create cache of reiser4_file_fsdata
-+ *
-+ * Initializes slab cache of structures attached to file->private_data. It is
-+ * part of reiser4 module initialization.
-+ */
-+int reiser4_init_file_fsdata(void)
-+{
-+ file_fsdata_cache = kmem_cache_create("file_fsdata",
-+ sizeof(reiser4_file_fsdata),
-+ 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL);
-+ if (file_fsdata_cache == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_file_fsdata - delete cache of reiser4_file_fsdata
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void reiser4_done_file_fsdata(void)
-+{
-+ destroy_reiser4_cache(&file_fsdata_cache);
-+}
-+
-+/**
-+ * create_fsdata - allocate and initialize reiser4_file_fsdata
-+ * @file: what to create file_fsdata for, may be NULL
-+ *
-+ * Allocates and initializes reiser4_file_fsdata structure.
-+ */
-+static reiser4_file_fsdata *create_fsdata(struct file *file)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ fsdata = kmem_cache_alloc(file_fsdata_cache,
-+ reiser4_ctx_gfp_mask_get());
-+ if (fsdata != NULL) {
-+ memset(fsdata, 0, sizeof *fsdata);
-+ fsdata->ra1.max_window_size = VM_MAX_READAHEAD * 1024;
-+ fsdata->back = file;
-+ INIT_LIST_HEAD(&fsdata->dir.linkage);
-+ }
-+ return fsdata;
-+}
-+
-+/**
-+ * free_fsdata - free reiser4_file_fsdata
-+ * @fsdata: object to free
-+ *
-+ * Dual to create_fsdata(). Free reiser4_file_fsdata.
-+ */
-+static void free_fsdata(reiser4_file_fsdata *fsdata)
-+{
-+ BUG_ON(fsdata == NULL);
-+ kmem_cache_free(file_fsdata_cache, fsdata);
-+}
-+
-+/**
-+ * reiser4_get_file_fsdata - get fs-specific file data
-+ * @file: queried file
-+ *
-+ * Returns fs-specific data of @file. If it is NULL, allocates it and attaches
-+ * to @file.
-+ */
-+reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *file)
-+{
-+ assert("nikita-1603", file != NULL);
-+
-+ if (file->private_data == NULL) {
-+ reiser4_file_fsdata *fsdata;
-+ struct inode *inode;
-+
-+ fsdata = create_fsdata(file);
-+ if (fsdata == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ inode = file->f_dentry->d_inode;
-+ spin_lock_inode(inode);
-+ if (file->private_data == NULL) {
-+ file->private_data = fsdata;
-+ fsdata = NULL;
-+ }
-+ spin_unlock_inode(inode);
-+ if (fsdata != NULL)
-+ /* other thread initialized ->fsdata */
-+ kmem_cache_free(file_fsdata_cache, fsdata);
-+ }
-+ assert("nikita-2665", file->private_data != NULL);
-+ return file->private_data;
-+}
-+
-+/**
-+ * free_file_fsdata_nolock - detach and free reiser4_file_fsdata
-+ * @file:
-+ *
-+ * Detaches reiser4_file_fsdata from @file, removes reiser4_file_fsdata from
-+ * readdir list, frees if it is not linked to d_cursor object.
-+ */
-+static void free_file_fsdata_nolock(struct file *file)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ assert("", spin_inode_is_locked(file->f_dentry->d_inode));
-+ fsdata = file->private_data;
-+ if (fsdata != NULL) {
-+ list_del_init(&fsdata->dir.linkage);
-+ if (fsdata->cursor == NULL)
-+ free_fsdata(fsdata);
-+ }
-+ file->private_data = NULL;
-+}
-+
-+/**
-+ * reiser4_free_file_fsdata - detach from struct file and free reiser4_file_fsdata
-+ * @file:
-+ *
-+ * Spinlocks inode and calls free_file_fsdata_nolock to do the work.
-+ */
-+void reiser4_free_file_fsdata(struct file *file)
-+{
-+ spin_lock_inode(file->f_dentry->d_inode);
-+ free_file_fsdata_nolock(file);
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/fsdata.h linux-2.6.24/fs/reiser4/fsdata.h
---- linux-2.6.24.orig/fs/reiser4/fsdata.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/fsdata.h 2008-01-25 11:39:06.928205628 +0300
-@@ -0,0 +1,205 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#if !defined( __REISER4_FSDATA_H__ )
-+#define __REISER4_FSDATA_H__
-+
-+#include "debug.h"
-+#include "kassign.h"
-+#include "seal.h"
-+#include "type_safe_hash.h"
-+#include "plugin/file/file.h"
-+#include "readahead.h"
-+
-+/*
-+ * comment about reiser4_dentry_fsdata
-+ *
-+ *
-+ */
-+
-+/*
-+ * locking: fields of per file descriptor readdir_pos and ->f_pos are
-+ * protected by ->i_mutex on inode. Under this lock following invariant
-+ * holds:
-+ *
-+ * file descriptor is "looking" at the entry_no-th directory entry from
-+ * the beginning of directory. This entry has key dir_entry_key and is
-+ * pos-th entry with duplicate-key sequence.
-+ *
-+ */
-+
-+/* logical position within directory */
-+struct dir_pos {
-+ /* key of directory entry (actually, part of a key sufficient to
-+ identify directory entry) */
-+ de_id dir_entry_key;
-+ /* ordinal number of directory entry among all entries with the same
-+ key. (Starting from 0.) */
-+ unsigned pos;
-+};
-+
-+struct readdir_pos {
-+ /* f_pos corresponding to this readdir position */
-+ __u64 fpos;
-+ /* logical position within directory */
-+ struct dir_pos position;
-+ /* logical number of directory entry within
-+ directory */
-+ __u64 entry_no;
-+};
-+
-+/*
-+ * this is used to speed up lookups for directory entry: on initial call to
-+ * ->lookup() seal and coord of directory entry (if found, that is) are stored
-+ * in struct dentry and reused later to avoid tree traversals.
-+ */
-+struct de_location {
-+ /* seal covering directory entry */
-+ seal_t entry_seal;
-+ /* coord of directory entry */
-+ coord_t entry_coord;
-+ /* ordinal number of directory entry among all entries with the same
-+ key. (Starting from 0.) */
-+ int pos;
-+};
-+
-+/**
-+ * reiser4_dentry_fsdata - reiser4-specific data attached to dentries
-+ *
-+ * This is allocated dynamically and released in d_op->d_release()
-+ *
-+ * Currently it only contains cached location (hint) of directory entry, but
-+ * it is expected that other information will be accumulated here.
-+ */
-+struct reiser4_dentry_fsdata {
-+ /*
-+ * here will go fields filled by ->lookup() to speedup next
-+ * create/unlink, like blocknr of znode with stat-data, or key of
-+ * stat-data.
-+ */
-+ struct de_location dec;
-+ int stateless; /* created through reiser4_decode_fh, needs special
-+ * treatment in readdir. */
-+};
-+
-+extern int reiser4_init_dentry_fsdata(void);
-+extern void reiser4_done_dentry_fsdata(void);
-+extern struct reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *);
-+extern void reiser4_free_dentry_fsdata(struct dentry *dentry);
-+
-+/**
-+ * reiser4_file_fsdata - reiser4-specific data attached to file->private_data
-+ *
-+ * This is allocated dynamically and released in inode->i_fop->release
-+ */
-+typedef struct reiser4_file_fsdata {
-+ /*
-+ * pointer back to the struct file which this reiser4_file_fsdata is
-+ * part of
-+ */
-+ struct file *back;
-+ /* detached cursor for stateless readdir. */
-+ struct dir_cursor *cursor;
-+ /*
-+ * We need both directory and regular file parts here, because there
-+ * are file system objects that are files and directories.
-+ */
-+ struct {
-+ /*
-+ * position in directory. It is updated each time directory is
-+ * modified
-+ */
-+ struct readdir_pos readdir;
-+ /* head of this list is reiser4_inode->lists.readdir_list */
-+ struct list_head linkage;
-+ } dir;
-+ /* hints to speed up operations with regular files: read and write. */
-+ struct {
-+ hint_t hint;
-+ } reg;
-+ struct reiser4_file_ra_state ra1;
-+
-+} reiser4_file_fsdata;
-+
-+extern int reiser4_init_file_fsdata(void);
-+extern void reiser4_done_file_fsdata(void);
-+extern reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *);
-+extern void reiser4_free_file_fsdata(struct file *);
-+
-+/*
-+ * d_cursor is reiser4_file_fsdata not attached to struct file. d_cursors are
-+ * used to address problem reiser4 has with readdir accesses via NFS. See
-+ * plugin/file_ops_readdir.c for more details.
-+ */
-+struct d_cursor_key{
-+ __u16 cid;
-+ __u64 oid;
-+};
-+
-+/*
-+ * define structures d_cursor_hash_table d_cursor_hash_link which are used to
-+ * maintain hash table of dir_cursor-s in reiser4's super block
-+ */
-+typedef struct dir_cursor dir_cursor;
-+TYPE_SAFE_HASH_DECLARE(d_cursor, dir_cursor);
-+
-+struct dir_cursor {
-+ int ref;
-+ reiser4_file_fsdata *fsdata;
-+
-+ /* link to reiser4 super block hash table of cursors */
-+ d_cursor_hash_link hash;
-+
-+ /*
-+ * this is to link cursors to reiser4 super block's radix tree of
-+ * cursors if there are more than one cursor of the same objectid
-+ */
-+ struct list_head list;
-+ struct d_cursor_key key;
-+ struct d_cursor_info *info;
-+ /* list of unused cursors */
-+ struct list_head alist;
-+};
-+
-+extern int reiser4_init_d_cursor(void);
-+extern void reiser4_done_d_cursor(void);
-+
-+extern int reiser4_init_super_d_info(struct super_block *);
-+extern void reiser4_done_super_d_info(struct super_block *);
-+
-+extern loff_t reiser4_get_dir_fpos(struct file *);
-+extern int reiser4_attach_fsdata(struct file *, struct inode *);
-+extern void reiser4_detach_fsdata(struct file *);
-+
-+/* these are needed for "stateless" readdir. See plugin/file_ops_readdir.c for
-+ more details */
-+void reiser4_dispose_cursors(struct inode *inode);
-+void reiser4_load_cursors(struct inode *inode);
-+void reiser4_kill_cursors(struct inode *inode);
-+void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
-+ int offset, int adj);
-+
-+/*
-+ * this structure is embedded to reise4_super_info_data. It maintains d_cursors
-+ * (detached readdir state). See plugin/file_ops_readdir.c for more details.
-+ */
-+struct d_cursor_info {
-+ d_cursor_hash_table table;
-+ struct radix_tree_root tree;
-+};
-+
-+/* spinlock protecting readdir cursors */
-+extern spinlock_t d_lock;
-+
-+/* __REISER4_FSDATA_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/init_super.c linux-2.6.24/fs/reiser4/init_super.c
---- linux-2.6.24.orig/fs/reiser4/init_super.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/init_super.c 2008-01-25 11:39:06.932206658 +0300
-@@ -0,0 +1,751 @@
-+/* Copyright by Hans Reiser, 2003 */
-+
-+#include "super.h"
-+#include "inode.h"
-+#include "plugin/plugin_set.h"
-+
-+#include <linux/swap.h>
-+
-+/**
-+ * init_fs_info - allocate reiser4 specific super block
-+ * @super: super block of filesystem
-+ *
-+ * Allocates and initialize reiser4_super_info_data, attaches it to
-+ * super->s_fs_info, initializes structures maintaining d_cursor-s.
-+ */
-+int reiser4_init_fs_info(struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = kzalloc(sizeof(reiser4_super_info_data),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!sbinfo)
-+ return RETERR(-ENOMEM);
-+
-+ super->s_fs_info = sbinfo;
-+ super->s_op = NULL;
-+
-+ ON_DEBUG(INIT_LIST_HEAD(&sbinfo->all_jnodes));
-+ ON_DEBUG(spin_lock_init(&sbinfo->all_guard));
-+
-+ mutex_init(&sbinfo->delete_mutex);
-+ spin_lock_init(&(sbinfo->guard));
-+
-+ /* initialize per-super-block d_cursor resources */
-+ reiser4_init_super_d_info(super);
-+
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_done_fs_info - free reiser4 specific super block
-+ * @super: super block of filesystem
-+ *
-+ * Performs some sanity checks, releases structures maintaining d_cursor-s,
-+ * frees reiser4_super_info_data.
-+ */
-+void reiser4_done_fs_info(struct super_block *super)
-+{
-+ assert("zam-990", super->s_fs_info != NULL);
-+
-+ /* release per-super-block d_cursor resources */
-+ reiser4_done_super_d_info(super);
-+
-+ /* make sure that there are not jnodes already */
-+ assert("", list_empty(&get_super_private(super)->all_jnodes));
-+ assert("", get_current_context()->trans->atom == NULL);
-+ reiser4_check_block_counters(super);
-+ kfree(super->s_fs_info);
-+ super->s_fs_info = NULL;
-+}
-+
-+/* type of option parseable by parse_option() */
-+typedef enum {
-+ /* value of option is arbitrary string */
-+ OPT_STRING,
-+
-+ /*
-+ * option specifies bit in a bitmask. When option is set - bit in
-+ * sbinfo->fs_flags is set. Examples are bsdgroups, 32bittimes, mtflush,
-+ * dont_load_bitmap, atomic_write.
-+ */
-+ OPT_BIT,
-+
-+ /*
-+ * value of option should conform to sprintf() format. Examples are
-+ * tmgr.atom_max_size=N, tmgr.atom_max_age=N
-+ */
-+ OPT_FORMAT,
-+
-+ /*
-+ * option can take one of predefined values. Example is onerror=panic or
-+ * onerror=remount-ro
-+ */
-+ OPT_ONEOF,
-+} opt_type_t;
-+
-+#if 0
-+struct opt_bitmask_bit {
-+ const char *bit_name;
-+ int bit_nr;
-+};
-+#endif
-+
-+/* description of option parseable by parse_option() */
-+struct opt_desc {
-+ /* option name.
-+
-+ parsed portion of string has a form "name=value".
-+ */
-+ const char *name;
-+ /* type of option */
-+ opt_type_t type;
-+ union {
-+ /* where to store value of string option (type == OPT_STRING) */
-+ char **string;
-+ /* description of bits for bit option (type == OPT_BIT) */
-+ struct {
-+ int nr;
-+ void *addr;
-+ } bit;
-+ /* description of format and targets for format option (type
-+ == OPT_FORMAT) */
-+ struct {
-+ const char *format;
-+ int nr_args;
-+ void *arg1;
-+ void *arg2;
-+ void *arg3;
-+ void *arg4;
-+ } f;
-+ struct {
-+ int *result;
-+ const char *list[10];
-+ } oneof;
-+ struct {
-+ void *addr;
-+ int nr_bits;
-+ //struct opt_bitmask_bit *bits;
-+ } bitmask;
-+ } u;
-+};
-+
-+/**
-+ * parse_option - parse one option
-+ * @opt_strin: starting point of parsing
-+ * @opt: option description
-+ *
-+ * foo=bar,
-+ * ^ ^ ^
-+ * | | +-- replaced to '\0'
-+ * | +-- val_start
-+ * +-- opt_string
-+ * Figures out option type and handles option correspondingly.
-+ */
-+static int parse_option(char *opt_string, struct opt_desc *opt)
-+{
-+ char *val_start;
-+ int result;
-+ const char *err_msg;
-+
-+ /* NOTE-NIKITA think about using lib/cmdline.c functions here. */
-+
-+ val_start = strchr(opt_string, '=');
-+ if (val_start != NULL) {
-+ *val_start = '\0';
-+ ++val_start;
-+ }
-+
-+ err_msg = NULL;
-+ result = 0;
-+ switch (opt->type) {
-+ case OPT_STRING:
-+ if (val_start == NULL) {
-+ err_msg = "String arg missing";
-+ result = RETERR(-EINVAL);
-+ } else
-+ *opt->u.string = val_start;
-+ break;
-+ case OPT_BIT:
-+ if (val_start != NULL)
-+ err_msg = "Value ignored";
-+ else
-+ set_bit(opt->u.bit.nr, opt->u.bit.addr);
-+ break;
-+ case OPT_FORMAT:
-+ if (val_start == NULL) {
-+ err_msg = "Formatted arg missing";
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ if (sscanf(val_start, opt->u.f.format,
-+ opt->u.f.arg1, opt->u.f.arg2, opt->u.f.arg3,
-+ opt->u.f.arg4) != opt->u.f.nr_args) {
-+ err_msg = "Wrong conversion";
-+ result = RETERR(-EINVAL);
-+ }
-+ break;
-+ case OPT_ONEOF:
-+ {
-+ int i = 0;
-+
-+ if (val_start == NULL) {
-+ err_msg = "Value is missing";
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ err_msg = "Wrong option value";
-+ result = RETERR(-EINVAL);
-+ while (opt->u.oneof.list[i]) {
-+ if (!strcmp(opt->u.oneof.list[i], val_start)) {
-+ result = 0;
-+ err_msg = NULL;
-+ *opt->u.oneof.result = i;
-+ break;
-+ }
-+ i++;
-+ }
-+ break;
-+ }
-+ default:
-+ wrong_return_value("nikita-2100", "opt -> type");
-+ break;
-+ }
-+ if (err_msg != NULL) {
-+ warning("nikita-2496", "%s when parsing option \"%s%s%s\"",
-+ err_msg, opt->name, val_start ? "=" : "",
-+ val_start ? : "");
-+ }
-+ return result;
-+}
-+
-+/**
-+ * parse_options - parse reiser4 mount options
-+ * @opt_string: starting point
-+ * @opts: array of option description
-+ * @nr_opts: number of elements in @opts
-+ *
-+ * Parses comma separated list of reiser4 mount options.
-+ */
-+static int parse_options(char *opt_string, struct opt_desc *opts, int nr_opts)
-+{
-+ int result;
-+
-+ result = 0;
-+ while ((result == 0) && opt_string && *opt_string) {
-+ int j;
-+ char *next;
-+
-+ next = strchr(opt_string, ',');
-+ if (next != NULL) {
-+ *next = '\0';
-+ ++next;
-+ }
-+ for (j = 0; j < nr_opts; ++j) {
-+ if (!strncmp(opt_string, opts[j].name,
-+ strlen(opts[j].name))) {
-+ result = parse_option(opt_string, &opts[j]);
-+ break;
-+ }
-+ }
-+ if (j == nr_opts) {
-+ warning("nikita-2307", "Unrecognized option: \"%s\"",
-+ opt_string);
-+ /* traditionally, -EINVAL is returned on wrong mount
-+ option */
-+ result = RETERR(-EINVAL);
-+ }
-+ opt_string = next;
-+ }
-+ return result;
-+}
-+
-+#define NUM_OPT( label, fmt, addr ) \
-+ { \
-+ .name = ( label ), \
-+ .type = OPT_FORMAT, \
-+ .u = { \
-+ .f = { \
-+ .format = ( fmt ), \
-+ .nr_args = 1, \
-+ .arg1 = ( addr ), \
-+ .arg2 = NULL, \
-+ .arg3 = NULL, \
-+ .arg4 = NULL \
-+ } \
-+ } \
-+ }
-+
-+#define SB_FIELD_OPT( field, fmt ) NUM_OPT( #field, fmt, &sbinfo -> field )
-+
-+#define BIT_OPT(label, bitnr) \
-+ { \
-+ .name = label, \
-+ .type = OPT_BIT, \
-+ .u = { \
-+ .bit = { \
-+ .nr = bitnr, \
-+ .addr = &sbinfo->fs_flags \
-+ } \
-+ } \
-+ }
-+
-+#define MAX_NR_OPTIONS (30)
-+
-+/**
-+ * reiser4_init_super_data - initialize reiser4 private super block
-+ * @super: super block to initialize
-+ * @opt_string: list of reiser4 mount options
-+ *
-+ * Sets various reiser4 parameters to default values. Parses mount options and
-+ * overwrites default settings.
-+ */
-+int reiser4_init_super_data(struct super_block *super, char *opt_string)
-+{
-+ int result;
-+ struct opt_desc *opts, *p;
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+
-+ /* initialize super, export, dentry operations */
-+ sbinfo->ops.super = reiser4_super_operations;
-+ sbinfo->ops.export = reiser4_export_operations;
-+ sbinfo->ops.dentry = reiser4_dentry_operations;
-+ super->s_op = &sbinfo->ops.super;
-+ super->s_export_op = &sbinfo->ops.export;
-+
-+ /* initialize transaction manager parameters to default values */
-+ sbinfo->tmgr.atom_max_size = totalram_pages / 4;
-+ sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE / HZ;
-+ sbinfo->tmgr.atom_min_size = 256;
-+ sbinfo->tmgr.atom_max_flushers = ATOM_MAX_FLUSHERS;
-+
-+ /* initialize cbk cache parameter */
-+ sbinfo->tree.cbk_cache.nr_slots = CBK_CACHE_SLOTS;
-+
-+ /* initialize flush parameters */
-+ sbinfo->flush.relocate_threshold = FLUSH_RELOCATE_THRESHOLD;
-+ sbinfo->flush.relocate_distance = FLUSH_RELOCATE_DISTANCE;
-+ sbinfo->flush.written_threshold = FLUSH_WRITTEN_THRESHOLD;
-+ sbinfo->flush.scan_maxnodes = FLUSH_SCAN_MAXNODES;
-+
-+ sbinfo->optimal_io_size = REISER4_OPTIMAL_IO_SIZE;
-+
-+ /* preliminary tree initializations */
-+ sbinfo->tree.super = super;
-+ sbinfo->tree.carry.new_node_flags = REISER4_NEW_NODE_FLAGS;
-+ sbinfo->tree.carry.new_extent_flags = REISER4_NEW_EXTENT_FLAGS;
-+ sbinfo->tree.carry.paste_flags = REISER4_PASTE_FLAGS;
-+ sbinfo->tree.carry.insert_flags = REISER4_INSERT_FLAGS;
-+ rwlock_init(&(sbinfo->tree.tree_lock));
-+ spin_lock_init(&(sbinfo->tree.epoch_lock));
-+
-+ /* initialize default readahead params */
-+ sbinfo->ra_params.max = num_physpages / 4;
-+ sbinfo->ra_params.flags = 0;
-+
-+ /* allocate memory for structure describing reiser4 mount options */
-+ opts = kmalloc(sizeof(struct opt_desc) * MAX_NR_OPTIONS,
-+ reiser4_ctx_gfp_mask_get());
-+ if (opts == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ /* initialize structure describing reiser4 mount options */
-+ p = opts;
-+
-+#if REISER4_DEBUG
-+# define OPT_ARRAY_CHECK if ((p) > (opts) + MAX_NR_OPTIONS) { \
-+ warning ("zam-1046", "opt array is overloaded"); break; \
-+ }
-+#else
-+# define OPT_ARRAY_CHECK noop
-+#endif
-+
-+#define PUSH_OPT(...) \
-+do { \
-+ struct opt_desc o = __VA_ARGS__; \
-+ OPT_ARRAY_CHECK; \
-+ *p ++ = o; \
-+} while (0)
-+
-+#define PUSH_SB_FIELD_OPT(field, format) PUSH_OPT(SB_FIELD_OPT(field, format))
-+#define PUSH_BIT_OPT(name, bit) PUSH_OPT(BIT_OPT(name, bit))
-+
-+ /*
-+ * tmgr.atom_max_size=N
-+ * Atoms containing more than N blocks will be forced to commit. N is
-+ * decimal.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_max_size, "%u");
-+ /*
-+ * tmgr.atom_max_age=N
-+ * Atoms older than N seconds will be forced to commit. N is decimal.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_max_age, "%u");
-+ /*
-+ * tmgr.atom_min_size=N
-+ * In committing an atom to free dirty pages, force the atom less than
-+ * N in size to fuse with another one.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_min_size, "%u");
-+ /*
-+ * tmgr.atom_max_flushers=N
-+ * limit of concurrent flushers for one atom. 0 means no limit.
-+ */
-+ PUSH_SB_FIELD_OPT(tmgr.atom_max_flushers, "%u");
-+ /*
-+ * tree.cbk_cache_slots=N
-+ * Number of slots in the cbk cache.
-+ */
-+ PUSH_SB_FIELD_OPT(tree.cbk_cache.nr_slots, "%u");
-+ /*
-+ * If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty
-+ * leaf-level blocks it will force them to be relocated.
-+ */
-+ PUSH_SB_FIELD_OPT(flush.relocate_threshold, "%u");
-+ /*
-+ * If flush finds can find a block allocation closer than at most
-+ * FLUSH_RELOCATE_DISTANCE from the preceder it will relocate to that
-+ * position.
-+ */
-+ PUSH_SB_FIELD_OPT(flush.relocate_distance, "%u");
-+ /*
-+ * If we have written this much or more blocks before encountering busy
-+ * jnode in flush list - abort flushing hoping that next time we get
-+ * called this jnode will be clean already, and we will save some
-+ * seeks.
-+ */
-+ PUSH_SB_FIELD_OPT(flush.written_threshold, "%u");
-+ /* The maximum number of nodes to scan left on a level during flush. */
-+ PUSH_SB_FIELD_OPT(flush.scan_maxnodes, "%u");
-+ /* preferred IO size */
-+ PUSH_SB_FIELD_OPT(optimal_io_size, "%u");
-+ /* carry flags used for insertion of new nodes */
-+ PUSH_SB_FIELD_OPT(tree.carry.new_node_flags, "%u");
-+ /* carry flags used for insertion of new extents */
-+ PUSH_SB_FIELD_OPT(tree.carry.new_extent_flags, "%u");
-+ /* carry flags used for paste operations */
-+ PUSH_SB_FIELD_OPT(tree.carry.paste_flags, "%u");
-+ /* carry flags used for insert operations */
-+ PUSH_SB_FIELD_OPT(tree.carry.insert_flags, "%u");
-+
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ /*
-+ * Alternative master superblock location in case if it's original
-+ * location is not writeable/accessable. This is offset in BYTES.
-+ */
-+ PUSH_SB_FIELD_OPT(altsuper, "%lu");
-+#endif
-+
-+ /* turn on BSD-style gid assignment */
-+ PUSH_BIT_OPT("bsdgroups", REISER4_BSD_GID);
-+ /* turn on 32 bit times */
-+ PUSH_BIT_OPT("32bittimes", REISER4_32_BIT_TIMES);
-+ /*
-+ * Don't load all bitmap blocks at mount time, it is useful for
-+ * machines with tiny RAM and large disks.
-+ */
-+ PUSH_BIT_OPT("dont_load_bitmap", REISER4_DONT_LOAD_BITMAP);
-+ /* disable transaction commits during write() */
-+ PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE);
-+ /* disable use of write barriers in the reiser4 log writer. */
-+ PUSH_BIT_OPT("no_write_barrier", REISER4_NO_WRITE_BARRIER);
-+
-+ PUSH_OPT(
-+ {
-+ /*
-+ * tree traversal readahead parameters:
-+ * -o readahead:MAXNUM:FLAGS
-+ * MAXNUM - max number fo nodes to request readahead for: -1UL
-+ * will set it to max_sane_readahead()
-+ * FLAGS - combination of bits: RA_ADJCENT_ONLY, RA_ALL_LEVELS,
-+ * CONTINUE_ON_PRESENT
-+ */
-+ .name = "readahead",
-+ .type = OPT_FORMAT,
-+ .u = {
-+ .f = {
-+ .format = "%u:%u",
-+ .nr_args = 2,
-+ .arg1 = &sbinfo->ra_params.max,
-+ .arg2 = &sbinfo->ra_params.flags,
-+ .arg3 = NULL,
-+ .arg4 = NULL
-+ }
-+ }
-+ }
-+ );
-+
-+ /* What to do in case of fs error */
-+ PUSH_OPT(
-+ {
-+ .name = "onerror",
-+ .type = OPT_ONEOF,
-+ .u = {
-+ .oneof = {
-+ .result = &sbinfo->onerror,
-+ .list = {
-+ "panic", "remount-ro", NULL
-+ },
-+ }
-+ }
-+ }
-+ );
-+
-+ /* modify default settings to values set by mount options */
-+ result = parse_options(opt_string, opts, p - opts);
-+ kfree(opts);
-+ if (result != 0)
-+ return result;
-+
-+ /* correct settings to sanity values */
-+ sbinfo->tmgr.atom_max_age *= HZ;
-+ if (sbinfo->tmgr.atom_max_age <= 0)
-+ /* overflow */
-+ sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE;
-+
-+ /* round optimal io size up to 512 bytes */
-+ sbinfo->optimal_io_size >>= VFS_BLKSIZE_BITS;
-+ sbinfo->optimal_io_size <<= VFS_BLKSIZE_BITS;
-+ if (sbinfo->optimal_io_size == 0) {
-+ warning("nikita-2497", "optimal_io_size is too small");
-+ return RETERR(-EINVAL);
-+ }
-+ return result;
-+}
-+
-+/**
-+ * reiser4_init_read_super - read reiser4 master super block
-+ * @super: super block to fill
-+ * @silent: if 0 - print warnings
-+ *
-+ * Reads reiser4 master super block either from predefined location or from
-+ * location specified by altsuper mount option, initializes disk format plugin.
-+ */
-+int reiser4_init_read_super(struct super_block *super, int silent)
-+{
-+ struct buffer_head *super_bh;
-+ struct reiser4_master_sb *master_sb;
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ unsigned long blocksize;
-+
-+ read_super_block:
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ if (sbinfo->altsuper)
-+ /*
-+ * read reiser4 master super block at position specified by
-+ * mount option
-+ */
-+ super_bh = sb_bread(super,
-+ (sector_t)(sbinfo->altsuper / super->s_blocksize));
-+ else
-+#endif
-+ /* read reiser4 master super block at 16-th 4096 block */
-+ super_bh = sb_bread(super,
-+ (sector_t)(REISER4_MAGIC_OFFSET / super->s_blocksize));
-+ if (!super_bh)
-+ return RETERR(-EIO);
-+
-+ master_sb = (struct reiser4_master_sb *)super_bh->b_data;
-+ /* check reiser4 magic string */
-+ if (!strncmp(master_sb->magic, REISER4_SUPER_MAGIC_STRING,
-+ sizeof(REISER4_SUPER_MAGIC_STRING))) {
-+ /* reiser4 master super block contains filesystem blocksize */
-+ blocksize = le16_to_cpu(get_unaligned(&master_sb->blocksize));
-+
-+ if (blocksize != PAGE_CACHE_SIZE) {
-+ /*
-+ * currenly reiser4's blocksize must be equal to
-+ * pagesize
-+ */
-+ if (!silent)
-+ warning("nikita-2609",
-+ "%s: wrong block size %ld\n", super->s_id,
-+ blocksize);
-+ brelse(super_bh);
-+ return RETERR(-EINVAL);
-+ }
-+ if (blocksize != super->s_blocksize) {
-+ /*
-+ * filesystem uses different blocksize. Reread master
-+ * super block with correct blocksize
-+ */
-+ brelse(super_bh);
-+ if (!sb_set_blocksize(super, (int)blocksize))
-+ return RETERR(-EINVAL);
-+ goto read_super_block;
-+ }
-+
-+ sbinfo->df_plug =
-+ disk_format_plugin_by_id(
-+ le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
-+ if (sbinfo->df_plug == NULL) {
-+ if (!silent)
-+ warning("nikita-26091",
-+ "%s: unknown disk format plugin %d\n",
-+ super->s_id,
-+ le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
-+ brelse(super_bh);
-+ return RETERR(-EINVAL);
-+ }
-+ sbinfo->diskmap_block = le64_to_cpu(get_unaligned(&master_sb->diskmap));
-+ brelse(super_bh);
-+ return 0;
-+ }
-+
-+ /* there is no reiser4 on the device */
-+ if (!silent)
-+ warning("nikita-2608",
-+ "%s: wrong master super block magic", super->s_id);
-+ brelse(super_bh);
-+ return RETERR(-EINVAL);
-+}
-+
-+static struct {
-+ reiser4_plugin_type type;
-+ reiser4_plugin_id id;
-+} default_plugins[PSET_LAST] = {
-+ [PSET_FILE] = {
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .id = UNIX_FILE_PLUGIN_ID
-+ },
-+ [PSET_DIR] = {
-+ .type = REISER4_DIR_PLUGIN_TYPE,
-+ .id = HASHED_DIR_PLUGIN_ID
-+ },
-+ [PSET_HASH] = {
-+ .type = REISER4_HASH_PLUGIN_TYPE,
-+ .id = R5_HASH_ID
-+ },
-+ [PSET_FIBRATION] = {
-+ .type = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_DOT_O
-+ },
-+ [PSET_PERM] = {
-+ .type = REISER4_PERM_PLUGIN_TYPE,
-+ .id = NULL_PERM_ID
-+ },
-+ [PSET_FORMATTING] = {
-+ .type = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = SMALL_FILE_FORMATTING_ID
-+ },
-+ [PSET_SD] = {
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = STATIC_STAT_DATA_ID
-+ },
-+ [PSET_DIR_ITEM] = {
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = COMPOUND_DIR_ID
-+ },
-+ [PSET_CIPHER] = {
-+ .type = REISER4_CIPHER_PLUGIN_TYPE,
-+ .id = NONE_CIPHER_ID
-+ },
-+ [PSET_DIGEST] = {
-+ .type = REISER4_DIGEST_PLUGIN_TYPE,
-+ .id = SHA256_32_DIGEST_ID
-+ },
-+ [PSET_COMPRESSION] = {
-+ .type = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .id = LZO1_COMPRESSION_ID
-+ },
-+ [PSET_COMPRESSION_MODE] = {
-+ .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = CONVX_COMPRESSION_MODE_ID
-+ },
-+ [PSET_CLUSTER] = {
-+ .type = REISER4_CLUSTER_PLUGIN_TYPE,
-+ .id = CLUSTER_64K_ID
-+ },
-+ [PSET_CREATE] = {
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .id = UNIX_FILE_PLUGIN_ID
-+ }
-+};
-+
-+/* access to default plugin table */
-+reiser4_plugin *get_default_plugin(pset_member memb)
-+{
-+ return plugin_by_id(default_plugins[memb].type,
-+ default_plugins[memb].id);
-+}
-+
-+/**
-+ * reiser4_init_root_inode - obtain inode of root directory
-+ * @super: super block of filesystem
-+ *
-+ * Obtains inode of root directory (reading it from disk), initializes plugin
-+ * set it was not initialized.
-+ */
-+int reiser4_init_root_inode(struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ struct inode *inode;
-+ int result = 0;
-+
-+ inode = reiser4_iget(super, sbinfo->df_plug->root_dir_key(super), 0);
-+ if (IS_ERR(inode))
-+ return RETERR(PTR_ERR(inode));
-+
-+ super->s_root = d_alloc_root(inode);
-+ if (!super->s_root) {
-+ iput(inode);
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ super->s_root->d_op = &sbinfo->ops.dentry;
-+
-+ if (!is_inode_loaded(inode)) {
-+ pset_member memb;
-+ plugin_set *pset;
-+
-+ pset = reiser4_inode_data(inode)->pset;
-+ for (memb = 0; memb < PSET_LAST; ++memb) {
-+
-+ if (aset_get(pset, memb) != NULL)
-+ continue;
-+
-+ result = grab_plugin_pset(inode, NULL, memb);
-+ if (result != 0)
-+ break;
-+
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+ }
-+
-+ if (result == 0) {
-+ if (REISER4_DEBUG) {
-+ for (memb = 0; memb < PSET_LAST; ++memb)
-+ assert("nikita-3500",
-+ aset_get(pset, memb) != NULL);
-+ }
-+ } else
-+ warning("nikita-3448", "Cannot set plugins of root: %i",
-+ result);
-+ reiser4_iget_complete(inode);
-+
-+ /* As the default pset kept in the root dir may has been changed
-+ (length is unknown), call update_sd. */
-+ if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
-+ result = reiser4_grab_space(
-+ inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+
-+ if (result == 0)
-+ result = reiser4_update_sd(inode);
-+
-+ all_grabbed2free();
-+ }
-+ }
-+
-+ super->s_maxbytes = MAX_LFS_FILESIZE;
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/inode.c linux-2.6.24/fs/reiser4/inode.c
---- linux-2.6.24.orig/fs/reiser4/inode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/inode.c 2008-01-25 11:39:06.932206658 +0300
-@@ -0,0 +1,709 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Inode specific operations. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "kassign.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "dscale.h"
-+#include "plugin/item/item.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/plugin.h"
-+#include "plugin/object.h"
-+#include "znode.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/fs.h> /* for struct super_block, address_space */
-+
-+/* return reiser4 internal tree which inode belongs to */
-+/* Audited by: green(2002.06.17) */
-+reiser4_tree *reiser4_tree_by_inode(const struct inode *inode /* inode queried */ )
-+{
-+ assert("nikita-256", inode != NULL);
-+ assert("nikita-257", inode->i_sb != NULL);
-+ return reiser4_get_tree(inode->i_sb);
-+}
-+
-+/* return reiser4-specific inode flags */
-+static inline unsigned long *inode_flags(const struct inode *const inode)
-+{
-+ assert("nikita-2842", inode != NULL);
-+ return &reiser4_inode_data(inode)->flags;
-+}
-+
-+/* set reiser4-specific flag @f in @inode */
-+void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f)
-+{
-+ assert("nikita-2248", inode != NULL);
-+ set_bit((int)f, inode_flags(inode));
-+}
-+
-+/* clear reiser4-specific flag @f in @inode */
-+void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f)
-+{
-+ assert("nikita-2250", inode != NULL);
-+ clear_bit((int)f, inode_flags(inode));
-+}
-+
-+/* true if reiser4-specific flag @f is set in @inode */
-+int reiser4_inode_get_flag(const struct inode *inode,
-+ reiser4_file_plugin_flags f)
-+{
-+ assert("nikita-2251", inode != NULL);
-+ return test_bit((int)f, inode_flags(inode));
-+}
-+
-+/* convert oid to inode number */
-+ino_t oid_to_ino(oid_t oid)
-+{
-+ return (ino_t) oid;
-+}
-+
-+/* convert oid to user visible inode number */
-+ino_t oid_to_uino(oid_t oid)
-+{
-+ /* reiser4 object is uniquely identified by oid which is 64 bit
-+ quantity. Kernel in-memory inode is indexed (in the hash table) by
-+ 32 bit i_ino field, but this is not a problem, because there is a
-+ way to further distinguish inodes with identical inode numbers
-+ (find_actor supplied to iget()).
-+
-+ But user space expects unique 32 bit inode number. Obviously this
-+ is impossible. Work-around is to somehow hash oid into user visible
-+ inode number.
-+ */
-+ oid_t max_ino = (ino_t) ~ 0;
-+
-+ if (REISER4_INO_IS_OID || (oid <= max_ino))
-+ return oid;
-+ else
-+ /* this is remotely similar to algorithm used to find next pid
-+ to use for process: after wrap-around start from some
-+ offset rather than from 0. Idea is that there are some long
-+ living objects with which we don't want to collide.
-+ */
-+ return REISER4_UINO_SHIFT + ((oid - max_ino) & (max_ino >> 1));
-+}
-+
-+/* check that "inode" is on reiser4 file-system */
-+int is_reiser4_inode(const struct inode *inode /* inode queried */ )
-+{
-+ return inode != NULL && is_reiser4_super(inode->i_sb);
-+}
-+
-+/* Maximal length of a name that can be stored in directory @inode.
-+
-+ This is used in check during file creation and lookup. */
-+int reiser4_max_filename_len(const struct inode *inode /* inode queried */ )
-+{
-+ assert("nikita-287", is_reiser4_inode(inode));
-+ assert("nikita-1710", inode_dir_item_plugin(inode));
-+ if (inode_dir_item_plugin(inode)->s.dir.max_name_len)
-+ return inode_dir_item_plugin(inode)->s.dir.max_name_len(inode);
-+ else
-+ return 255;
-+}
-+
-+#if REISER4_USE_COLLISION_LIMIT
-+/* Maximal number of hash collisions for this directory. */
-+int max_hash_collisions(const struct inode *dir /* inode queried */ )
-+{
-+ assert("nikita-1711", dir != NULL);
-+ return reiser4_inode_data(dir)->plugin.max_collisions;
-+}
-+#endif /* REISER4_USE_COLLISION_LIMIT */
-+
-+/* Install file, inode, and address_space operation on @inode, depending on
-+ its mode. */
-+int setup_inode_ops(struct inode *inode /* inode to intialize */ ,
-+ reiser4_object_create_data * data /* parameters to create
-+ * object */ )
-+{
-+ reiser4_super_info_data *sinfo;
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ fplug = inode_file_plugin(inode);
-+ dplug = inode_dir_plugin(inode);
-+
-+ sinfo = get_super_private(inode->i_sb);
-+
-+ switch (inode->i_mode & S_IFMT) {
-+ case S_IFSOCK:
-+ case S_IFBLK:
-+ case S_IFCHR:
-+ case S_IFIFO:
-+ {
-+ dev_t rdev; /* to keep gcc happy */
-+
-+ assert("vs-46", fplug != NULL);
-+ /* ugly hack with rdev */
-+ if (data == NULL) {
-+ rdev = inode->i_rdev;
-+ inode->i_rdev = 0;
-+ } else
-+ rdev = data->rdev;
-+ inode->i_blocks = 0;
-+ assert("vs-42", fplug->h.id == SPECIAL_FILE_PLUGIN_ID);
-+ inode->i_op = file_plugins[fplug->h.id].inode_ops;
-+ /* initialize inode->i_fop and inode->i_rdev for block and char
-+ devices */
-+ init_special_inode(inode, inode->i_mode, rdev);
-+ /* all address space operations are null */
-+ inode->i_mapping->a_ops =
-+ file_plugins[fplug->h.id].as_ops;
-+ break;
-+ }
-+ case S_IFLNK:
-+ assert("vs-46", fplug != NULL);
-+ assert("vs-42", fplug->h.id == SYMLINK_FILE_PLUGIN_ID);
-+ inode->i_op = file_plugins[fplug->h.id].inode_ops;
-+ inode->i_fop = NULL;
-+ /* all address space operations are null */
-+ inode->i_mapping->a_ops = file_plugins[fplug->h.id].as_ops;
-+ break;
-+ case S_IFDIR:
-+ assert("vs-46", dplug != NULL);
-+ assert("vs-43", (dplug->h.id == HASHED_DIR_PLUGIN_ID ||
-+ dplug->h.id == SEEKABLE_HASHED_DIR_PLUGIN_ID));
-+ inode->i_op = dir_plugins[dplug->h.id].inode_ops;
-+ inode->i_fop = dir_plugins[dplug->h.id].file_ops;
-+ inode->i_mapping->a_ops = dir_plugins[dplug->h.id].as_ops;
-+ break;
-+ case S_IFREG:
-+ assert("vs-46", fplug != NULL);
-+ assert("vs-43", (fplug->h.id == UNIX_FILE_PLUGIN_ID ||
-+ fplug->h.id == CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+ inode->i_op = file_plugins[fplug->h.id].inode_ops;
-+ inode->i_fop = file_plugins[fplug->h.id].file_ops;
-+ inode->i_mapping->a_ops = file_plugins[fplug->h.id].as_ops;
-+ break;
-+ default:
-+ warning("nikita-291", "wrong file mode: %o for %llu",
-+ inode->i_mode,
-+ (unsigned long long)get_inode_oid(inode));
-+ reiser4_make_bad_inode(inode);
-+ return RETERR(-EINVAL);
-+ }
-+ return 0;
-+}
-+
-+/* Initialize inode from disk data. Called with inode locked.
-+ Return inode locked. */
-+static int init_inode(struct inode *inode /* inode to intialise */ ,
-+ coord_t * coord /* coord of stat data */ )
-+{
-+ int result;
-+ item_plugin *iplug;
-+ void *body;
-+ int length;
-+ reiser4_inode *state;
-+
-+ assert("nikita-292", coord != NULL);
-+ assert("nikita-293", inode != NULL);
-+
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (result)
-+ return result;
-+ iplug = item_plugin_by_coord(coord);
-+ body = item_body_by_coord(coord);
-+ length = item_length_by_coord(coord);
-+
-+ assert("nikita-295", iplug != NULL);
-+ assert("nikita-296", body != NULL);
-+ assert("nikita-297", length > 0);
-+
-+ /* inode is under I_LOCK now */
-+
-+ state = reiser4_inode_data(inode);
-+ /* call stat-data plugin method to load sd content into inode */
-+ result = iplug->s.sd.init_inode(inode, body, length);
-+ set_plugin(&state->pset, PSET_SD, item_plugin_to_plugin(iplug));
-+ if (result == 0) {
-+ result = setup_inode_ops(inode, NULL);
-+ if (result == 0 && inode->i_sb->s_root &&
-+ inode->i_sb->s_root->d_inode)
-+ result = finish_pset(inode);
-+ }
-+ zrelse(coord->node);
-+ return result;
-+}
-+
-+/* read `inode' from the disk. This is what was previously in
-+ reiserfs_read_inode2().
-+
-+ Must be called with inode locked. Return inode still locked.
-+*/
-+static int read_inode(struct inode *inode /* inode to read from disk */ ,
-+ const reiser4_key * key /* key of stat data */ ,
-+ int silent)
-+{
-+ int result;
-+ lock_handle lh;
-+ reiser4_inode *info;
-+ coord_t coord;
-+
-+ assert("nikita-298", inode != NULL);
-+ assert("nikita-1945", !is_inode_loaded(inode));
-+
-+ info = reiser4_inode_data(inode);
-+ assert("nikita-300", info->locality_id != 0);
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ /* locate stat-data in a tree and return znode locked */
-+ result = lookup_sd(inode, ZNODE_READ_LOCK, &coord, &lh, key, silent);
-+ assert("nikita-301", !is_inode_loaded(inode));
-+ if (result == 0) {
-+ /* use stat-data plugin to load sd into inode. */
-+ result = init_inode(inode, &coord);
-+ if (result == 0) {
-+ /* initialize stat-data seal */
-+ spin_lock_inode(inode);
-+ reiser4_seal_init(&info->sd_seal, &coord, key);
-+ info->sd_coord = coord;
-+ spin_unlock_inode(inode);
-+
-+ /* call file plugin's method to initialize plugin
-+ * specific part of inode */
-+ if (inode_file_plugin(inode)->init_inode_data)
-+ inode_file_plugin(inode)->init_inode_data(inode,
-+ NULL,
-+ 0);
-+ /* load detached directory cursors for stateless
-+ * directory readers (NFS). */
-+ reiser4_load_cursors(inode);
-+
-+ /* Check the opened inode for consistency. */
-+ result =
-+ get_super_private(inode->i_sb)->df_plug->
-+ check_open(inode);
-+ }
-+ }
-+ /* lookup_sd() doesn't release coord because we want znode
-+ stay read-locked while stat-data fields are accessed in
-+ init_inode() */
-+ done_lh(&lh);
-+
-+ if (result != 0)
-+ reiser4_make_bad_inode(inode);
-+ return result;
-+}
-+
-+/* initialise new reiser4 inode being inserted into hash table. */
-+static int init_locked_inode(struct inode *inode /* new inode */ ,
-+ void *opaque /* key of stat data passed to the
-+ * iget5_locked as cookie */ )
-+{
-+ reiser4_key *key;
-+
-+ assert("nikita-1995", inode != NULL);
-+ assert("nikita-1996", opaque != NULL);
-+ key = opaque;
-+ set_inode_oid(inode, get_key_objectid(key));
-+ reiser4_inode_data(inode)->locality_id = get_key_locality(key);
-+ return 0;
-+}
-+
-+/* reiser4_inode_find_actor() - "find actor" supplied by reiser4 to iget5_locked().
-+
-+ This function is called by iget5_locked() to distinguish reiser4 inodes
-+ having the same inode numbers. Such inodes can only exist due to some error
-+ condition. One of them should be bad. Inodes with identical inode numbers
-+ (objectids) are distinguished by their packing locality.
-+
-+*/
-+static int reiser4_inode_find_actor(struct inode *inode /* inode from hash table to
-+ * check */ ,
-+ void *opaque /* "cookie" passed to
-+ * iget5_locked(). This is stat data
-+ * key */ )
-+{
-+ reiser4_key *key;
-+
-+ key = opaque;
-+ return
-+ /* oid is unique, so first term is enough, actually. */
-+ get_inode_oid(inode) == get_key_objectid(key) &&
-+ /*
-+ * also, locality should be checked, but locality is stored in
-+ * the reiser4-specific part of the inode, and actor can be
-+ * called against arbitrary inode that happened to be in this
-+ * hash chain. Hence we first have to check that this is
-+ * reiser4 inode at least. is_reiser4_inode() is probably too
-+ * early to call, as inode may have ->i_op not yet
-+ * initialised.
-+ */
-+ is_reiser4_super(inode->i_sb) &&
-+ /*
-+ * usually objectid is unique, but pseudo files use counter to
-+ * generate objectid. All pseudo files are placed into special
-+ * (otherwise unused) locality.
-+ */
-+ reiser4_inode_data(inode)->locality_id == get_key_locality(key);
-+}
-+
-+/* hook for kmem_cache_create */
-+void loading_init_once(reiser4_inode * info)
-+{
-+ mutex_init(&info->loading);
-+}
-+
-+/* for reiser4_alloc_inode */
-+void loading_alloc(reiser4_inode * info)
-+{
-+ assert("vs-1717", !mutex_is_locked(&info->loading));
-+}
-+
-+/* for reiser4_destroy */
-+void loading_destroy(reiser4_inode * info)
-+{
-+ assert("vs-1717a", !mutex_is_locked(&info->loading));
-+}
-+
-+static void loading_begin(reiser4_inode * info)
-+{
-+ mutex_lock(&info->loading);
-+}
-+
-+static void loading_end(reiser4_inode * info)
-+{
-+ mutex_unlock(&info->loading);
-+}
-+
-+/**
-+ * reiser4_iget - obtain inode via iget5_locked, read from disk if necessary
-+ * @super: super block of filesystem
-+ * @key: key of inode's stat-data
-+ * @silent:
-+ *
-+ * This is our helper function a la iget(). This is be called by
-+ * lookup_common() and reiser4_read_super(). Return inode locked or error
-+ * encountered.
-+ */
-+struct inode *reiser4_iget(struct super_block *super, const reiser4_key *key,
-+ int silent)
-+{
-+ struct inode *inode;
-+ int result;
-+ reiser4_inode *info;
-+
-+ assert("nikita-302", super != NULL);
-+ assert("nikita-303", key != NULL);
-+
-+ result = 0;
-+
-+ /* call iget(). Our ->read_inode() is dummy, so this will either
-+ find inode in cache or return uninitialised inode */
-+ inode = iget5_locked(super,
-+ (unsigned long)get_key_objectid(key),
-+ reiser4_inode_find_actor,
-+ init_locked_inode, (reiser4_key *) key);
-+ if (inode == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ if (is_bad_inode(inode)) {
-+ warning("nikita-304", "Bad inode found");
-+ reiser4_print_key("key", key);
-+ iput(inode);
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+
-+ info = reiser4_inode_data(inode);
-+
-+ /* Reiser4 inode state bit REISER4_LOADED is used to distinguish fully
-+ loaded and initialized inode from just allocated inode. If
-+ REISER4_LOADED bit is not set, reiser4_iget() completes loading under
-+ info->loading. The place in reiser4 which uses not initialized inode
-+ is the reiser4 repacker, see repacker-related functions in
-+ plugin/item/extent.c */
-+ if (!is_inode_loaded(inode)) {
-+ loading_begin(info);
-+ if (!is_inode_loaded(inode)) {
-+ /* locking: iget5_locked returns locked inode */
-+ assert("nikita-1941", !is_inode_loaded(inode));
-+ assert("nikita-1949",
-+ reiser4_inode_find_actor(inode,
-+ (reiser4_key *) key));
-+ /* now, inode has objectid as ->i_ino and locality in
-+ reiser4-specific part. This is enough for
-+ read_inode() to read stat data from the disk */
-+ result = read_inode(inode, key, silent);
-+ } else
-+ loading_end(info);
-+ }
-+
-+ if (inode->i_state & I_NEW)
-+ unlock_new_inode(inode);
-+
-+ if (is_bad_inode(inode)) {
-+ assert("vs-1717", result != 0);
-+ loading_end(info);
-+ iput(inode);
-+ inode = ERR_PTR(result);
-+ } else if (REISER4_DEBUG) {
-+ reiser4_key found_key;
-+
-+ assert("vs-1717", result == 0);
-+ build_sd_key(inode, &found_key);
-+ if (!keyeq(&found_key, key)) {
-+ warning("nikita-305", "Wrong key in sd");
-+ reiser4_print_key("sought for", key);
-+ reiser4_print_key("found", &found_key);
-+ }
-+ if (inode->i_nlink == 0) {
-+ warning("nikita-3559", "Unlinked inode found: %llu\n",
-+ (unsigned long long)get_inode_oid(inode));
-+ }
-+ }
-+ return inode;
-+}
-+
-+/* reiser4_iget() may return not fully initialized inode, this function should
-+ * be called after one completes reiser4 inode initializing. */
-+void reiser4_iget_complete(struct inode *inode)
-+{
-+ assert("zam-988", is_reiser4_inode(inode));
-+
-+ if (!is_inode_loaded(inode)) {
-+ reiser4_inode_set_flag(inode, REISER4_LOADED);
-+ loading_end(reiser4_inode_data(inode));
-+ }
-+}
-+
-+void reiser4_make_bad_inode(struct inode *inode)
-+{
-+ assert("nikita-1934", inode != NULL);
-+
-+ /* clear LOADED bit */
-+ reiser4_inode_clr_flag(inode, REISER4_LOADED);
-+ make_bad_inode(inode);
-+ return;
-+}
-+
-+file_plugin *inode_file_plugin(const struct inode * inode)
-+{
-+ assert("nikita-1997", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->file;
-+}
-+
-+dir_plugin *inode_dir_plugin(const struct inode * inode)
-+{
-+ assert("nikita-1998", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->dir;
-+}
-+
-+formatting_plugin *inode_formatting_plugin(const struct inode * inode)
-+{
-+ assert("nikita-2000", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->formatting;
-+}
-+
-+hash_plugin *inode_hash_plugin(const struct inode * inode)
-+{
-+ assert("nikita-2001", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->hash;
-+}
-+
-+fibration_plugin *inode_fibration_plugin(const struct inode * inode)
-+{
-+ assert("nikita-2001", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->fibration;
-+}
-+
-+cipher_plugin *inode_cipher_plugin(const struct inode * inode)
-+{
-+ assert("edward-36", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->cipher;
-+}
-+
-+compression_plugin *inode_compression_plugin(const struct inode * inode)
-+{
-+ assert("edward-37", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->compression;
-+}
-+
-+compression_mode_plugin *inode_compression_mode_plugin(const struct inode *
-+ inode)
-+{
-+ assert("edward-1330", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->compression_mode;
-+}
-+
-+cluster_plugin *inode_cluster_plugin(const struct inode * inode)
-+{
-+ assert("edward-1328", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->cluster;
-+}
-+
-+file_plugin *inode_create_plugin(const struct inode * inode)
-+{
-+ assert("edward-1329", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->create;
-+}
-+
-+digest_plugin *inode_digest_plugin(const struct inode * inode)
-+{
-+ assert("edward-86", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->digest;
-+}
-+
-+item_plugin *inode_sd_plugin(const struct inode * inode)
-+{
-+ assert("vs-534", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->sd;
-+}
-+
-+item_plugin *inode_dir_item_plugin(const struct inode * inode)
-+{
-+ assert("vs-534", inode != NULL);
-+ return reiser4_inode_data(inode)->pset->dir_item;
-+}
-+
-+file_plugin *child_create_plugin(const struct inode * inode)
-+{
-+ assert("edward-1329", inode != NULL);
-+ return reiser4_inode_data(inode)->hset->create;
-+}
-+
-+void inode_set_extension(struct inode *inode, sd_ext_bits ext)
-+{
-+ reiser4_inode *state;
-+
-+ assert("nikita-2716", inode != NULL);
-+ assert("nikita-2717", ext < LAST_SD_EXTENSION);
-+ assert("nikita-3491", spin_inode_is_locked(inode));
-+
-+ state = reiser4_inode_data(inode);
-+ state->extmask |= 1 << ext;
-+ /* force re-calculation of stat-data length on next call to
-+ update_sd(). */
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+}
-+
-+void inode_clr_extension(struct inode *inode, sd_ext_bits ext)
-+{
-+ reiser4_inode *state;
-+
-+ assert("vpf-1926", inode != NULL);
-+ assert("vpf-1927", ext < LAST_SD_EXTENSION);
-+ assert("vpf-1928", spin_inode_is_locked(inode));
-+
-+ state = reiser4_inode_data(inode);
-+ state->extmask &= ~(1 << ext);
-+ /* force re-calculation of stat-data length on next call to
-+ update_sd(). */
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+}
-+
-+void inode_check_scale_nolock(struct inode *inode, __u64 old, __u64 new)
-+{
-+ assert("edward-1287", inode != NULL);
-+ if (!dscale_fit(old, new))
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+ return;
-+}
-+
-+void inode_check_scale(struct inode *inode, __u64 old, __u64 new)
-+{
-+ assert("nikita-2875", inode != NULL);
-+ spin_lock_inode(inode);
-+ inode_check_scale_nolock(inode, old, new);
-+ spin_unlock_inode(inode);
-+}
-+
-+/*
-+ * initialize ->ordering field of inode. This field defines how file stat-data
-+ * and body is ordered within a tree with respect to other objects within the
-+ * same parent directory.
-+ */
-+void
-+init_inode_ordering(struct inode *inode,
-+ reiser4_object_create_data * crd, int create)
-+{
-+ reiser4_key key;
-+
-+ if (create) {
-+ struct inode *parent;
-+
-+ parent = crd->parent;
-+ assert("nikita-3224", inode_dir_plugin(parent) != NULL);
-+ inode_dir_plugin(parent)->build_entry_key(parent,
-+ &crd->dentry->d_name,
-+ &key);
-+ } else {
-+ coord_t *coord;
-+
-+ coord = &reiser4_inode_data(inode)->sd_coord;
-+ coord_clear_iplug(coord);
-+ /* safe to use ->sd_coord, because node is under long term
-+ * lock */
-+ WITH_DATA(coord->node, item_key_by_coord(coord, &key));
-+ }
-+
-+ set_inode_ordering(inode, get_key_ordering(&key));
-+}
-+
-+znode *inode_get_vroot(struct inode *inode)
-+{
-+ reiser4_block_nr blk;
-+ znode *result;
-+
-+ spin_lock_inode(inode);
-+ blk = reiser4_inode_data(inode)->vroot;
-+ spin_unlock_inode(inode);
-+ if (!disk_addr_eq(&UBER_TREE_ADDR, &blk))
-+ result = zlook(reiser4_tree_by_inode(inode), &blk);
-+ else
-+ result = NULL;
-+ return result;
-+}
-+
-+void inode_set_vroot(struct inode *inode, znode *vroot)
-+{
-+ spin_lock_inode(inode);
-+ reiser4_inode_data(inode)->vroot = *znode_get_block(vroot);
-+ spin_unlock_inode(inode);
-+}
-+
-+#if REISER4_DEBUG
-+
-+void reiser4_inode_invariant(const struct inode *inode)
-+{
-+ assert("nikita-3077", spin_inode_is_locked(inode));
-+}
-+
-+int inode_has_no_jnodes(reiser4_inode * r4_inode)
-+{
-+ return jnode_tree_by_reiser4_inode(r4_inode)->rnode == NULL &&
-+ r4_inode->nr_jnodes == 0;
-+}
-+
-+#endif
-+
-+/* true if directory is empty (only contains dot and dotdot) */
-+/* FIXME: shouldn't it be dir plugin method? */
-+int is_dir_empty(const struct inode *dir)
-+{
-+ assert("nikita-1976", dir != NULL);
-+
-+ /* rely on our method to maintain directory i_size being equal to the
-+ number of entries. */
-+ return dir->i_size <= 2 ? 0 : RETERR(-ENOTEMPTY);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/inode.h linux-2.6.24/fs/reiser4/inode.h
---- linux-2.6.24.orig/fs/reiser4/inode.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/inode.h 2008-01-25 11:39:06.936207689 +0300
-@@ -0,0 +1,449 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Inode functions. */
-+
-+#if !defined( __REISER4_INODE_H__ )
-+#define __REISER4_INODE_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "seal.h"
-+#include "plugin/plugin.h"
-+#include "plugin/file/cryptcompress.h"
-+#include "plugin/file/file.h"
-+#include "plugin/dir/dir.h"
-+#include "plugin/plugin_set.h"
-+#include "plugin/security/perm.h"
-+#include "vfs_ops.h"
-+#include "jnode.h"
-+#include "fsdata.h"
-+
-+#include <linux/types.h> /* for __u?? , ino_t */
-+#include <linux/fs.h> /* for struct super_block, struct
-+ * rw_semaphore, etc */
-+#include <linux/spinlock.h>
-+#include <asm/types.h>
-+
-+/* reiser4-specific inode flags. They are "transient" and are not
-+ supposed to be stored on disk. Used to trace "state" of
-+ inode
-+*/
-+typedef enum {
-+ /* this is light-weight inode, inheriting some state from its
-+ parent */
-+ REISER4_LIGHT_WEIGHT = 0,
-+ /* stat data wasn't yet created */
-+ REISER4_NO_SD = 1,
-+ /* internal immutable flag. Currently is only used
-+ to avoid race condition during file creation.
-+ See comment in create_object(). */
-+ REISER4_IMMUTABLE = 2,
-+ /* inode was read from storage */
-+ REISER4_LOADED = 3,
-+ /* this bit is set for symlinks. inode->i_private points to target
-+ name of symlink. */
-+ REISER4_GENERIC_PTR_USED = 4,
-+ /* set if size of stat-data item for this inode is known. If this is
-+ * set we can avoid recalculating size of stat-data on each update. */
-+ REISER4_SDLEN_KNOWN = 5,
-+ /* reiser4_inode->crypt points to the crypto stat */
-+ REISER4_CRYPTO_STAT_LOADED = 6,
-+ /* cryptcompress_inode_data points to the secret key */
-+ REISER4_SECRET_KEY_INSTALLED = 7,
-+ /* File (possibly) has pages corresponding to the tail items, that
-+ * were created by ->readpage. It is set by mmap_unix_file() and
-+ * sendfile_unix_file(). This bit is inspected by write_unix_file and
-+ * kill-hook of tail items. It is never cleared once set. This bit is
-+ * modified and inspected under i_mutex. */
-+ REISER4_HAS_MMAP = 8,
-+ REISER4_PART_MIXED = 9,
-+ REISER4_PART_IN_CONV = 10,
-+ /* This flag indicates that file plugin conversion is in progress */
-+ REISER4_FILE_CONV_IN_PROGRESS = 11
-+} reiser4_file_plugin_flags;
-+
-+/* state associated with each inode.
-+ reiser4 inode.
-+
-+ NOTE-NIKITA In 2.5 kernels it is not necessary that all file-system inodes
-+ be of the same size. File-system allocates inodes by itself through
-+ s_op->allocate_inode() method. So, it is possible to adjust size of inode
-+ at the time of its creation.
-+
-+ Invariants involving parts of this data-type:
-+
-+ [inode->eflushed]
-+
-+*/
-+
-+typedef struct reiser4_inode reiser4_inode;
-+/* return pointer to reiser4-specific part of inode */
-+static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
-+ /* inode queried */ );
-+
-+#if BITS_PER_LONG == 64
-+
-+#define REISER4_INO_IS_OID (1)
-+typedef struct {;
-+} oid_hi_t;
-+
-+/* BITS_PER_LONG == 64 */
-+#else
-+
-+#define REISER4_INO_IS_OID (0)
-+typedef __u32 oid_hi_t;
-+
-+/* BITS_PER_LONG == 64 */
-+#endif
-+
-+struct reiser4_inode {
-+ /* spin lock protecting fields of this structure. */
-+ spinlock_t guard;
-+ /* main plugin set that control the file
-+ (see comments in plugin/plugin_set.c) */
-+ plugin_set *pset;
-+ /* plugin set for inheritance
-+ (see comments in plugin/plugin_set.c) */
-+ plugin_set *hset;
-+ /* high 32 bits of object id */
-+ oid_hi_t oid_hi;
-+ /* seal for stat-data */
-+ seal_t sd_seal;
-+ /* locality id for this file */
-+ oid_t locality_id;
-+#if REISER4_LARGE_KEY
-+ __u64 ordering;
-+#endif
-+ /* coord of stat-data in sealed node */
-+ coord_t sd_coord;
-+ /* bit-mask of stat-data extentions used by this file */
-+ __u64 extmask;
-+ /* bitmask of non-default plugins for this inode */
-+ __u16 plugin_mask;
-+ /* bitmask of set heir plugins for this inode. */
-+ __u16 heir_mask;
-+ union {
-+ struct list_head readdir_list;
-+ struct list_head not_used;
-+ } lists;
-+ /* per-inode flags. Filled by values of reiser4_file_plugin_flags */
-+ unsigned long flags;
-+ union {
-+ /* fields specific to unix_file plugin */
-+ struct unix_file_info unix_file_info;
-+ /* fields specific to cryptcompress file plugin */
-+ struct cryptcompress_info cryptcompress_info;
-+ } file_plugin_data;
-+
-+ /* this semaphore is to serialize readers and writers of @pset->file
-+ * when file plugin conversion is enabled
-+ */
-+ struct rw_semaphore conv_sem;
-+
-+ /* tree of jnodes. Phantom jnodes (ones not attched to any atom) are
-+ tagged in that tree by EFLUSH_TAG_ANONYMOUS */
-+ struct radix_tree_root jnodes_tree;
-+#if REISER4_DEBUG
-+ /* number of unformatted node jnodes of this file in jnode hash table */
-+ unsigned long nr_jnodes;
-+#endif
-+
-+ /* block number of virtual root for this object. See comment above
-+ * fs/reiser4/search.c:handle_vroot() */
-+ reiser4_block_nr vroot;
-+ struct mutex loading;
-+};
-+
-+void loading_init_once(reiser4_inode *);
-+void loading_alloc(reiser4_inode *);
-+void loading_destroy(reiser4_inode *);
-+
-+struct reiser4_inode_object {
-+ /* private part */
-+ reiser4_inode p;
-+ /* generic fields not specific to reiser4, but used by VFS */
-+ struct inode vfs_inode;
-+};
-+
-+/* return pointer to the reiser4 specific portion of @inode */
-+static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
-+ /* inode queried */ )
-+{
-+ assert("nikita-254", inode != NULL);
-+ return &container_of(inode, struct reiser4_inode_object, vfs_inode)->p;
-+}
-+
-+static inline struct inode *inode_by_reiser4_inode(const reiser4_inode *
-+ r4_inode /* inode queried */
-+ )
-+{
-+ return &container_of(r4_inode, struct reiser4_inode_object, p)->vfs_inode;
-+}
-+
-+/*
-+ * reiser4 inodes are identified by 64bit object-id (oid_t), but in struct
-+ * inode ->i_ino field is of type ino_t (long) that can be either 32 or 64
-+ * bits.
-+ *
-+ * If ->i_ino is 32 bits we store remaining 32 bits in reiser4 specific part
-+ * of inode, otherwise whole oid is stored in i_ino.
-+ *
-+ * Wrappers below ([sg]et_inode_oid()) are used to hide this difference.
-+ */
-+
-+#define OID_HI_SHIFT (sizeof(ino_t) * 8)
-+
-+#if REISER4_INO_IS_OID
-+
-+static inline oid_t get_inode_oid(const struct inode *inode)
-+{
-+ return inode->i_ino;
-+}
-+
-+static inline void set_inode_oid(struct inode *inode, oid_t oid)
-+{
-+ inode->i_ino = oid;
-+}
-+
-+/* REISER4_INO_IS_OID */
-+#else
-+
-+static inline oid_t get_inode_oid(const struct inode *inode)
-+{
-+ return
-+ ((__u64) reiser4_inode_data(inode)->oid_hi << OID_HI_SHIFT) |
-+ inode->i_ino;
-+}
-+
-+static inline void set_inode_oid(struct inode *inode, oid_t oid)
-+{
-+ assert("nikita-2519", inode != NULL);
-+ inode->i_ino = (ino_t) (oid);
-+ reiser4_inode_data(inode)->oid_hi = (oid) >> OID_HI_SHIFT;
-+ assert("nikita-2521", get_inode_oid(inode) == (oid));
-+}
-+
-+/* REISER4_INO_IS_OID */
-+#endif
-+
-+static inline oid_t get_inode_locality(const struct inode *inode)
-+{
-+ return reiser4_inode_data(inode)->locality_id;
-+}
-+
-+#if REISER4_LARGE_KEY
-+static inline __u64 get_inode_ordering(const struct inode *inode)
-+{
-+ return reiser4_inode_data(inode)->ordering;
-+}
-+
-+static inline void set_inode_ordering(const struct inode *inode, __u64 ordering)
-+{
-+ reiser4_inode_data(inode)->ordering = ordering;
-+}
-+
-+#else
-+
-+#define get_inode_ordering(inode) (0)
-+#define set_inode_ordering(inode, val) noop
-+
-+#endif
-+
-+/* return inode in which @uf_info is embedded */
-+static inline struct inode *
-+unix_file_info_to_inode(const struct unix_file_info * uf_info)
-+{
-+ return &container_of(uf_info, struct reiser4_inode_object,
-+ p.file_plugin_data.unix_file_info)->vfs_inode;
-+}
-+
-+extern ino_t oid_to_ino(oid_t oid) __attribute__ ((const));
-+extern ino_t oid_to_uino(oid_t oid) __attribute__ ((const));
-+
-+extern reiser4_tree *reiser4_tree_by_inode(const struct inode *inode);
-+
-+#if REISER4_DEBUG
-+extern void reiser4_inode_invariant(const struct inode *inode);
-+extern int inode_has_no_jnodes(reiser4_inode *);
-+#else
-+#define reiser4_inode_invariant(inode) noop
-+#endif
-+
-+static inline int spin_inode_is_locked(const struct inode *inode)
-+{
-+ assert_spin_locked(&reiser4_inode_data(inode)->guard);
-+ return 1;
-+}
-+
-+/**
-+ * spin_lock_inode - lock reiser4_inode' embedded spinlock
-+ * @inode: inode to lock
-+ *
-+ * In debug mode it checks that lower priority locks are not held and
-+ * increments reiser4_context's lock counters on which lock ordering checking
-+ * is based.
-+ */
-+static inline void spin_lock_inode(struct inode *inode)
-+{
-+ assert("", LOCK_CNT_NIL(spin_locked));
-+ /* check lock ordering */
-+ assert_spin_not_locked(&d_lock);
-+
-+ spin_lock(&reiser4_inode_data(inode)->guard);
-+
-+ LOCK_CNT_INC(spin_locked_inode);
-+ LOCK_CNT_INC(spin_locked);
-+
-+ reiser4_inode_invariant(inode);
-+}
-+
-+/**
-+ * spin_unlock_inode - unlock reiser4_inode' embedded spinlock
-+ * @inode: inode to unlock
-+ *
-+ * In debug mode it checks that spinlock is held and decrements
-+ * reiser4_context's lock counters on which lock ordering checking is based.
-+ */
-+static inline void spin_unlock_inode(struct inode *inode)
-+{
-+ assert_spin_locked(&reiser4_inode_data(inode)->guard);
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_inode));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ reiser4_inode_invariant(inode);
-+
-+ LOCK_CNT_DEC(spin_locked_inode);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&reiser4_inode_data(inode)->guard);
-+}
-+
-+extern znode *inode_get_vroot(struct inode *inode);
-+extern void inode_set_vroot(struct inode *inode, znode * vroot);
-+
-+extern int reiser4_max_filename_len(const struct inode *inode);
-+extern int max_hash_collisions(const struct inode *dir);
-+extern void reiser4_unlock_inode(struct inode *inode);
-+extern int is_reiser4_inode(const struct inode *inode);
-+extern int setup_inode_ops(struct inode *inode, reiser4_object_create_data *);
-+extern struct inode *reiser4_iget(struct super_block *super,
-+ const reiser4_key * key, int silent);
-+extern void reiser4_iget_complete(struct inode *inode);
-+extern void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f);
-+extern void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f);
-+extern int reiser4_inode_get_flag(const struct inode *inode,
-+ reiser4_file_plugin_flags f);
-+
-+/* has inode been initialized? */
-+static inline int
-+is_inode_loaded(const struct inode *inode /* inode queried */ )
-+{
-+ assert("nikita-1120", inode != NULL);
-+ return reiser4_inode_get_flag(inode, REISER4_LOADED);
-+}
-+
-+extern file_plugin *inode_file_plugin(const struct inode *inode);
-+extern dir_plugin *inode_dir_plugin(const struct inode *inode);
-+extern formatting_plugin *inode_formatting_plugin(const struct inode *inode);
-+extern hash_plugin *inode_hash_plugin(const struct inode *inode);
-+extern fibration_plugin *inode_fibration_plugin(const struct inode *inode);
-+extern cipher_plugin *inode_cipher_plugin(const struct inode *inode);
-+extern digest_plugin *inode_digest_plugin(const struct inode *inode);
-+extern compression_plugin *inode_compression_plugin(const struct inode *inode);
-+extern compression_mode_plugin *inode_compression_mode_plugin(const struct inode
-+ *inode);
-+extern cluster_plugin *inode_cluster_plugin(const struct inode *inode);
-+extern file_plugin *inode_create_plugin(const struct inode *inode);
-+extern item_plugin *inode_sd_plugin(const struct inode *inode);
-+extern item_plugin *inode_dir_item_plugin(const struct inode *inode);
-+extern file_plugin *child_create_plugin(const struct inode *inode);
-+
-+extern void reiser4_make_bad_inode(struct inode *inode);
-+
-+extern void inode_set_extension(struct inode *inode, sd_ext_bits ext);
-+extern void inode_clr_extension(struct inode *inode, sd_ext_bits ext);
-+extern void inode_check_scale(struct inode *inode, __u64 old, __u64 new);
-+extern void inode_check_scale_nolock(struct inode * inode, __u64 old, __u64 new);
-+
-+#define INODE_SET_SIZE(i, value) \
-+({ \
-+ struct inode *__i; \
-+ typeof(value) __v; \
-+ \
-+ __i = (i); \
-+ __v = (value); \
-+ inode_check_scale(__i, __i->i_size, __v); \
-+ i_size_write(__i, __v); \
-+})
-+
-+/*
-+ * update field @field in inode @i to contain value @value.
-+ */
-+#define INODE_SET_FIELD(i, field, value) \
-+({ \
-+ struct inode *__i; \
-+ typeof(value) __v; \
-+ \
-+ __i = (i); \
-+ __v = (value); \
-+ inode_check_scale(__i, __i->field, __v); \
-+ __i->field = __v; \
-+})
-+
-+#define INODE_INC_FIELD(i, field) \
-+({ \
-+ struct inode *__i; \
-+ \
-+ __i = (i); \
-+ inode_check_scale(__i, __i->field, __i->field + 1); \
-+ ++ __i->field; \
-+})
-+
-+#define INODE_DEC_FIELD(i, field) \
-+({ \
-+ struct inode *__i; \
-+ \
-+ __i = (i); \
-+ inode_check_scale(__i, __i->field, __i->field - 1); \
-+ -- __i->field; \
-+})
-+
-+/* See comment before reiser4_readdir_common() for description. */
-+static inline struct list_head *get_readdir_list(const struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->lists.readdir_list;
-+}
-+
-+extern void init_inode_ordering(struct inode *inode,
-+ reiser4_object_create_data * crd, int create);
-+
-+static inline struct radix_tree_root *jnode_tree_by_inode(struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->jnodes_tree;
-+}
-+
-+static inline struct radix_tree_root *jnode_tree_by_reiser4_inode(reiser4_inode
-+ * r4_inode)
-+{
-+ return &r4_inode->jnodes_tree;
-+}
-+
-+#if REISER4_DEBUG
-+extern void print_inode(const char *prefix, const struct inode *i);
-+#endif
-+
-+int is_dir_empty(const struct inode *);
-+
-+/* __REISER4_INODE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/ioctl.h linux-2.6.24/fs/reiser4/ioctl.h
---- linux-2.6.24.orig/fs/reiser4/ioctl.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/ioctl.h 2008-01-25 11:39:06.936207689 +0300
-@@ -0,0 +1,41 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#if !defined( __REISER4_IOCTL_H__ )
-+#define __REISER4_IOCTL_H__
-+
-+#include <linux/fs.h>
-+
-+/*
-+ * ioctl(2) command used to "unpack" reiser4 file, that is, convert it into
-+ * extents and fix in this state. This is used by applications that rely on
-+ *
-+ * . files being block aligned, and
-+ *
-+ * . files never migrating on disk
-+ *
-+ * for example, boot loaders (LILO) need this.
-+ *
-+ * This ioctl should be used as
-+ *
-+ * result = ioctl(fd, REISER4_IOC_UNPACK);
-+ *
-+ * File behind fd descriptor will be converted to the extents (if necessary),
-+ * and its stat-data will be updated so that it will never be converted back
-+ * into tails again.
-+ */
-+#define REISER4_IOC_UNPACK _IOW(0xCD,1,long)
-+
-+/* __REISER4_IOCTL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/jnode.c linux-2.6.24/fs/reiser4/jnode.c
---- linux-2.6.24.orig/fs/reiser4/jnode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/jnode.c 2008-01-25 11:39:06.940208719 +0300
-@@ -0,0 +1,1924 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* Jnode manipulation functions. */
-+/* Jnode is entity used to track blocks with data and meta-data in reiser4.
-+
-+ In particular, jnodes are used to track transactional information
-+ associated with each block. Each znode contains jnode as ->zjnode field.
-+
-+ Jnode stands for either Josh or Journal node.
-+*/
-+
-+/*
-+ * Taxonomy.
-+ *
-+ * Jnode represents block containing data or meta-data. There are jnodes
-+ * for:
-+ *
-+ * unformatted blocks (jnodes proper). There are plans, however to
-+ * have a handle per extent unit rather than per each unformatted
-+ * block, because there are so many of them.
-+ *
-+ * For bitmaps. Each bitmap is actually represented by two jnodes--one
-+ * for working and another for "commit" data, together forming bnode.
-+ *
-+ * For io-heads. These are used by log writer.
-+ *
-+ * For formatted nodes (znode). See comment at the top of znode.c for
-+ * details specific to the formatted nodes (znodes).
-+ *
-+ * Node data.
-+ *
-+ * Jnode provides access to the data of node it represents. Data are
-+ * stored in a page. Page is kept in a page cache. This means, that jnodes
-+ * are highly interconnected with page cache and VM internals.
-+ *
-+ * jnode has a pointer to page (->pg) containing its data. Pointer to data
-+ * themselves is cached in ->data field to avoid frequent calls to
-+ * page_address().
-+ *
-+ * jnode and page are attached to each other by jnode_attach_page(). This
-+ * function places pointer to jnode in set_page_private(), sets PG_private
-+ * flag and increments page counter.
-+ *
-+ * Opposite operation is performed by page_clear_jnode().
-+ *
-+ * jnode->pg is protected by jnode spin lock, and page->private is
-+ * protected by page lock. See comment at the top of page_cache.c for
-+ * more.
-+ *
-+ * page can be detached from jnode for two reasons:
-+ *
-+ * . jnode is removed from a tree (file is truncated, of formatted
-+ * node is removed by balancing).
-+ *
-+ * . during memory pressure, VM calls ->releasepage() method
-+ * (reiser4_releasepage()) to evict page from memory.
-+ *
-+ * (there, of course, is also umount, but this is special case we are not
-+ * concerned with here).
-+ *
-+ * To protect jnode page from eviction, one calls jload() function that
-+ * "pins" page in memory (loading it if necessary), increments
-+ * jnode->d_count, and kmap()s page. Page is unpinned through call to
-+ * jrelse().
-+ *
-+ * Jnode life cycle.
-+ *
-+ * jnode is created, placed in hash table, and, optionally, in per-inode
-+ * radix tree. Page can be attached to jnode, pinned, released, etc.
-+ *
-+ * When jnode is captured into atom its reference counter is
-+ * increased. While being part of an atom, jnode can be "early
-+ * flushed". This means that as part of flush procedure, jnode is placed
-+ * into "relocate set", and its page is submitted to the disk. After io
-+ * completes, page can be detached, then loaded again, re-dirtied, etc.
-+ *
-+ * Thread acquired reference to jnode by calling jref() and releases it by
-+ * jput(). When last reference is removed, jnode is still retained in
-+ * memory (cached) if it has page attached, _unless_ it is scheduled for
-+ * destruction (has JNODE_HEARD_BANSHEE bit set).
-+ *
-+ * Tree read-write lock was used as "existential" lock for jnodes. That is,
-+ * jnode->x_count could be changed from 0 to 1 only under tree write lock,
-+ * that is, tree lock protected unreferenced jnodes stored in the hash
-+ * table, from recycling.
-+ *
-+ * This resulted in high contention on tree lock, because jref()/jput() is
-+ * frequent operation. To ameliorate this problem, RCU is used: when jput()
-+ * is just about to release last reference on jnode it sets JNODE_RIP bit
-+ * on it, and then proceed with jnode destruction (removing jnode from hash
-+ * table, cbk_cache, detaching page, etc.). All places that change jnode
-+ * reference counter from 0 to 1 (jlookup(), zlook(), zget(), and
-+ * cbk_cache_scan_slots()) check for JNODE_RIP bit (this is done by
-+ * jnode_rip_check() function), and pretend that nothing was found in hash
-+ * table if bit is set.
-+ *
-+ * jput defers actual return of jnode into slab cache to some later time
-+ * (by call_rcu()), this guarantees that other threads can safely continue
-+ * working with JNODE_RIP-ped jnode.
-+ *
-+ */
-+
-+#include "reiser4.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "jnode.h"
-+#include "plugin/plugin_header.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+/*#include "jnode.h"*/
-+#include "znode.h"
-+#include "tree.h"
-+#include "tree_walk.h"
-+#include "super.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+
-+#include <asm/uaccess.h> /* UML needs this for PAGE_OFFSET */
-+#include <linux/types.h>
-+#include <linux/slab.h>
-+#include <linux/pagemap.h>
-+#include <linux/swap.h>
-+#include <linux/fs.h> /* for struct address_space */
-+#include <linux/writeback.h> /* for inode_lock */
-+
-+static struct kmem_cache *_jnode_slab = NULL;
-+
-+static void jnode_set_type(jnode * node, jnode_type type);
-+static int jdelete(jnode * node);
-+static int jnode_try_drop(jnode * node);
-+
-+#if REISER4_DEBUG
-+static int jnode_invariant(const jnode * node, int tlocked, int jlocked);
-+#endif
-+
-+/* true if valid page is attached to jnode */
-+static inline int jnode_is_parsed(jnode * node)
-+{
-+ return JF_ISSET(node, JNODE_PARSED);
-+}
-+
-+/* hash table support */
-+
-+/* compare two jnode keys for equality. Used by hash-table macros */
-+static inline int jnode_key_eq(const struct jnode_key * k1,
-+ const struct jnode_key * k2)
-+{
-+ assert("nikita-2350", k1 != NULL);
-+ assert("nikita-2351", k2 != NULL);
-+
-+ return (k1->index == k2->index && k1->objectid == k2->objectid);
-+}
-+
-+/* Hash jnode by its key (inode plus offset). Used by hash-table macros */
-+static inline __u32 jnode_key_hashfn(j_hash_table * table,
-+ const struct jnode_key * key)
-+{
-+ assert("nikita-2352", key != NULL);
-+ assert("nikita-3346", IS_POW(table->_buckets));
-+
-+ /* yes, this is remarkable simply (where not stupid) hash function. */
-+ return (key->objectid + key->index) & (table->_buckets - 1);
-+}
-+
-+/* The hash table definition */
-+#define KMALLOC(size) reiser4_vmalloc(size)
-+#define KFREE(ptr, size) vfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(j, jnode, struct jnode_key, key.j, link.j,
-+ jnode_key_hashfn, jnode_key_eq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+/* call this to initialise jnode hash table */
-+int jnodes_tree_init(reiser4_tree * tree /* tree to initialise jnodes for */ )
-+{
-+ assert("nikita-2359", tree != NULL);
-+ return j_hash_init(&tree->jhash_table, 16384);
-+}
-+
-+/* call this to destroy jnode hash table. This is called during umount. */
-+int jnodes_tree_done(reiser4_tree * tree /* tree to destroy jnodes for */ )
-+{
-+ j_hash_table *jtable;
-+ jnode *node;
-+ jnode *next;
-+
-+ assert("nikita-2360", tree != NULL);
-+
-+ /*
-+ * Scan hash table and free all jnodes.
-+ */
-+ jtable = &tree->jhash_table;
-+ if (jtable->_table) {
-+ for_all_in_htable(jtable, j, node, next) {
-+ assert("nikita-2361", !atomic_read(&node->x_count));
-+ jdrop(node);
-+ }
-+
-+ j_hash_done(&tree->jhash_table);
-+ }
-+ return 0;
-+}
-+
-+/**
-+ * init_jnodes - create jnode cache
-+ *
-+ * Initializes slab cache jnodes. It is part of reiser4 module initialization.
-+ */
-+int init_jnodes(void)
-+{
-+ assert("umka-168", _jnode_slab == NULL);
-+
-+ _jnode_slab = kmem_cache_create("jnode", sizeof(jnode), 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL);
-+ if (_jnode_slab == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ return 0;
-+}
-+
-+/**
-+ * done_znodes - delete znode cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_jnodes(void)
-+{
-+ destroy_reiser4_cache(&_jnode_slab);
-+}
-+
-+/* Initialize a jnode. */
-+void jnode_init(jnode * node, reiser4_tree * tree, jnode_type type)
-+{
-+ assert("umka-175", node != NULL);
-+
-+ memset(node, 0, sizeof(jnode));
-+ ON_DEBUG(node->magic = JMAGIC);
-+ jnode_set_type(node, type);
-+ atomic_set(&node->d_count, 0);
-+ atomic_set(&node->x_count, 0);
-+ spin_lock_init(&node->guard);
-+ spin_lock_init(&node->load);
-+ node->atom = NULL;
-+ node->tree = tree;
-+ INIT_LIST_HEAD(&node->capture_link);
-+
-+ ASSIGN_NODE_LIST(node, NOT_CAPTURED);
-+
-+ INIT_RCU_HEAD(&node->rcu);
-+
-+#if REISER4_DEBUG
-+ {
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(tree->super);
-+ spin_lock_irq(&sbinfo->all_guard);
-+ list_add(&node->jnodes, &sbinfo->all_jnodes);
-+ spin_unlock_irq(&sbinfo->all_guard);
-+ }
-+#endif
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * Remove jnode from ->all_jnodes list.
-+ */
-+static void jnode_done(jnode * node, reiser4_tree * tree)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(tree->super);
-+
-+ spin_lock_irq(&sbinfo->all_guard);
-+ assert("nikita-2422", !list_empty(&node->jnodes));
-+ list_del_init(&node->jnodes);
-+ spin_unlock_irq(&sbinfo->all_guard);
-+}
-+#endif
-+
-+/* return already existing jnode of page */
-+jnode *jnode_by_page(struct page *pg)
-+{
-+ assert("nikita-2066", pg != NULL);
-+ assert("nikita-2400", PageLocked(pg));
-+ assert("nikita-2068", PagePrivate(pg));
-+ assert("nikita-2067", jprivate(pg) != NULL);
-+ return jprivate(pg);
-+}
-+
-+/* exported functions to allocate/free jnode objects outside this file */
-+jnode *jalloc(void)
-+{
-+ jnode *jal = kmem_cache_alloc(_jnode_slab, reiser4_ctx_gfp_mask_get());
-+ return jal;
-+}
-+
-+/* return jnode back to the slab allocator */
-+inline void jfree(jnode * node)
-+{
-+ assert("zam-449", node != NULL);
-+
-+ assert("nikita-2663", (list_empty_careful(&node->capture_link) &&
-+ NODE_LIST(node) == NOT_CAPTURED));
-+ assert("nikita-3222", list_empty(&node->jnodes));
-+ assert("nikita-3221", jnode_page(node) == NULL);
-+
-+ /* not yet phash_jnode_destroy(node); */
-+
-+ kmem_cache_free(_jnode_slab, node);
-+}
-+
-+/*
-+ * This function is supplied as RCU callback. It actually frees jnode when
-+ * last reference to it is gone.
-+ */
-+static void jnode_free_actor(struct rcu_head *head)
-+{
-+ jnode *node;
-+ jnode_type jtype;
-+
-+ node = container_of(head, jnode, rcu);
-+ jtype = jnode_get_type(node);
-+
-+ ON_DEBUG(jnode_done(node, jnode_get_tree(node)));
-+
-+ switch (jtype) {
-+ case JNODE_IO_HEAD:
-+ case JNODE_BITMAP:
-+ case JNODE_UNFORMATTED_BLOCK:
-+ jfree(node);
-+ break;
-+ case JNODE_FORMATTED_BLOCK:
-+ zfree(JZNODE(node));
-+ break;
-+ case JNODE_INODE:
-+ default:
-+ wrong_return_value("nikita-3197", "Wrong jnode type");
-+ }
-+}
-+
-+/*
-+ * Free a jnode. Post a callback to be executed later through RCU when all
-+ * references to @node are released.
-+ */
-+static inline void jnode_free(jnode * node, jnode_type jtype)
-+{
-+ if (jtype != JNODE_INODE) {
-+ /*assert("nikita-3219", list_empty(&node->rcu.list)); */
-+ call_rcu(&node->rcu, jnode_free_actor);
-+ } else
-+ jnode_list_remove(node);
-+}
-+
-+/* allocate new unformatted jnode */
-+static jnode *jnew_unformatted(void)
-+{
-+ jnode *jal;
-+
-+ jal = jalloc();
-+ if (jal == NULL)
-+ return NULL;
-+
-+ jnode_init(jal, current_tree, JNODE_UNFORMATTED_BLOCK);
-+ jal->key.j.mapping = NULL;
-+ jal->key.j.index = (unsigned long)-1;
-+ jal->key.j.objectid = 0;
-+ return jal;
-+}
-+
-+/* look for jnode with given mapping and offset within hash table */
-+jnode *jlookup(reiser4_tree * tree, oid_t objectid, unsigned long index)
-+{
-+ struct jnode_key jkey;
-+ jnode *node;
-+
-+ assert("nikita-2353", tree != NULL);
-+
-+ jkey.objectid = objectid;
-+ jkey.index = index;
-+
-+ /*
-+ * hash table is _not_ protected by any lock during lookups. All we
-+ * have to do is to disable preemption to keep RCU happy.
-+ */
-+
-+ rcu_read_lock();
-+ node = j_hash_find(&tree->jhash_table, &jkey);
-+ if (node != NULL) {
-+ /* protect @node from recycling */
-+ jref(node);
-+ assert("nikita-2955", jnode_invariant(node, 0, 0));
-+ node = jnode_rip_check(tree, node);
-+ }
-+ rcu_read_unlock();
-+ return node;
-+}
-+
-+/* per inode radix tree of jnodes is protected by tree's read write spin lock */
-+static jnode *jfind_nolock(struct address_space *mapping, unsigned long index)
-+{
-+ assert("vs-1694", mapping->host != NULL);
-+
-+ return radix_tree_lookup(jnode_tree_by_inode(mapping->host), index);
-+}
-+
-+jnode *jfind(struct address_space * mapping, unsigned long index)
-+{
-+ reiser4_tree *tree;
-+ jnode *node;
-+
-+ assert("vs-1694", mapping->host != NULL);
-+ tree = reiser4_tree_by_inode(mapping->host);
-+
-+ read_lock_tree(tree);
-+ node = jfind_nolock(mapping, index);
-+ if (node != NULL)
-+ jref(node);
-+ read_unlock_tree(tree);
-+ return node;
-+}
-+
-+static void inode_attach_jnode(jnode * node)
-+{
-+ struct inode *inode;
-+ reiser4_inode *info;
-+ struct radix_tree_root *rtree;
-+
-+ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
-+ assert("zam-1043", node->key.j.mapping != NULL);
-+ inode = node->key.j.mapping->host;
-+ info = reiser4_inode_data(inode);
-+ rtree = jnode_tree_by_reiser4_inode(info);
-+ if (rtree->rnode == NULL) {
-+ /* prevent inode from being pruned when it has jnodes attached
-+ to it */
-+ write_lock_irq(&inode->i_data.tree_lock);
-+ inode->i_data.nrpages++;
-+ write_unlock_irq(&inode->i_data.tree_lock);
-+ }
-+ assert("zam-1049", equi(rtree->rnode != NULL, info->nr_jnodes != 0));
-+ check_me("zam-1045",
-+ !radix_tree_insert(rtree, node->key.j.index, node));
-+ ON_DEBUG(info->nr_jnodes++);
-+}
-+
-+static void inode_detach_jnode(jnode * node)
-+{
-+ struct inode *inode;
-+ reiser4_inode *info;
-+ struct radix_tree_root *rtree;
-+
-+ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
-+ assert("zam-1044", node->key.j.mapping != NULL);
-+ inode = node->key.j.mapping->host;
-+ info = reiser4_inode_data(inode);
-+ rtree = jnode_tree_by_reiser4_inode(info);
-+
-+ assert("zam-1051", info->nr_jnodes != 0);
-+ assert("zam-1052", rtree->rnode != NULL);
-+ ON_DEBUG(info->nr_jnodes--);
-+
-+ /* delete jnode from inode's radix tree of jnodes */
-+ check_me("zam-1046", radix_tree_delete(rtree, node->key.j.index));
-+ if (rtree->rnode == NULL) {
-+ /* inode can be pruned now */
-+ write_lock_irq(&inode->i_data.tree_lock);
-+ inode->i_data.nrpages--;
-+ write_unlock_irq(&inode->i_data.tree_lock);
-+ }
-+}
-+
-+/* put jnode into hash table (where they can be found by flush who does not know
-+ mapping) and to inode's tree of jnodes (where they can be found (hopefully
-+ faster) in places where mapping is known). Currently it is used by
-+ fs/reiser4/plugin/item/extent_file_ops.c:index_extent_jnode when new jnode is
-+ created */
-+static void
-+hash_unformatted_jnode(jnode * node, struct address_space *mapping,
-+ unsigned long index)
-+{
-+ j_hash_table *jtable;
-+
-+ assert("vs-1446", jnode_is_unformatted(node));
-+ assert("vs-1442", node->key.j.mapping == 0);
-+ assert("vs-1443", node->key.j.objectid == 0);
-+ assert("vs-1444", node->key.j.index == (unsigned long)-1);
-+ assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
-+
-+ node->key.j.mapping = mapping;
-+ node->key.j.objectid = get_inode_oid(mapping->host);
-+ node->key.j.index = index;
-+
-+ jtable = &jnode_get_tree(node)->jhash_table;
-+
-+ /* race with some other thread inserting jnode into the hash table is
-+ * impossible, because we keep the page lock. */
-+ /*
-+ * following assertion no longer holds because of RCU: it is possible
-+ * jnode is in the hash table, but with JNODE_RIP bit set.
-+ */
-+ /* assert("nikita-3211", j_hash_find(jtable, &node->key.j) == NULL); */
-+ j_hash_insert_rcu(jtable, node);
-+ inode_attach_jnode(node);
-+}
-+
-+static void unhash_unformatted_node_nolock(jnode * node)
-+{
-+ assert("vs-1683", node->key.j.mapping != NULL);
-+ assert("vs-1684",
-+ node->key.j.objectid ==
-+ get_inode_oid(node->key.j.mapping->host));
-+
-+ /* remove jnode from hash-table */
-+ j_hash_remove_rcu(&node->tree->jhash_table, node);
-+ inode_detach_jnode(node);
-+ node->key.j.mapping = NULL;
-+ node->key.j.index = (unsigned long)-1;
-+ node->key.j.objectid = 0;
-+
-+}
-+
-+/* remove jnode from hash table and from inode's tree of jnodes. This is used in
-+ reiser4_invalidatepage and in kill_hook_extent -> truncate_inode_jnodes ->
-+ reiser4_uncapture_jnode */
-+void unhash_unformatted_jnode(jnode * node)
-+{
-+ assert("vs-1445", jnode_is_unformatted(node));
-+
-+ write_lock_tree(node->tree);
-+ unhash_unformatted_node_nolock(node);
-+ write_unlock_tree(node->tree);
-+}
-+
-+/*
-+ * search hash table for a jnode with given oid and index. If not found,
-+ * allocate new jnode, insert it, and also insert into radix tree for the
-+ * given inode/mapping.
-+ */
-+static jnode *find_get_jnode(reiser4_tree * tree,
-+ struct address_space *mapping,
-+ oid_t oid, unsigned long index)
-+{
-+ jnode *result;
-+ jnode *shadow;
-+ int preload;
-+
-+ result = jnew_unformatted();
-+
-+ if (unlikely(result == NULL))
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ preload = radix_tree_preload(reiser4_ctx_gfp_mask_get());
-+ if (preload != 0)
-+ return ERR_PTR(preload);
-+
-+ write_lock_tree(tree);
-+ shadow = jfind_nolock(mapping, index);
-+ if (likely(shadow == NULL)) {
-+ /* add new jnode to hash table and inode's radix tree of jnodes */
-+ jref(result);
-+ hash_unformatted_jnode(result, mapping, index);
-+ } else {
-+ /* jnode is found in inode's radix tree of jnodes */
-+ jref(shadow);
-+ jnode_free(result, JNODE_UNFORMATTED_BLOCK);
-+ assert("vs-1498", shadow->key.j.mapping == mapping);
-+ result = shadow;
-+ }
-+ write_unlock_tree(tree);
-+
-+ assert("nikita-2955",
-+ ergo(result != NULL, jnode_invariant(result, 0, 0)));
-+ radix_tree_preload_end();
-+ return result;
-+}
-+
-+/* jget() (a la zget() but for unformatted nodes). Returns (and possibly
-+ creates) jnode corresponding to page @pg. jnode is attached to page and
-+ inserted into jnode hash-table. */
-+static jnode *do_jget(reiser4_tree * tree, struct page *pg)
-+{
-+ /*
-+ * There are two ways to create jnode: starting with pre-existing page
-+ * and without page.
-+ *
-+ * When page already exists, jnode is created
-+ * (jnode_of_page()->do_jget()) under page lock. This is done in
-+ * ->writepage(), or when capturing anonymous page dirtied through
-+ * mmap.
-+ *
-+ * Jnode without page is created by index_extent_jnode().
-+ *
-+ */
-+
-+ jnode *result;
-+ oid_t oid = get_inode_oid(pg->mapping->host);
-+
-+ assert("umka-176", pg != NULL);
-+ assert("nikita-2394", PageLocked(pg));
-+
-+ result = jprivate(pg);
-+ if (likely(result != NULL))
-+ return jref(result);
-+
-+ tree = reiser4_tree_by_page(pg);
-+
-+ /* check hash-table first */
-+ result = jfind(pg->mapping, pg->index);
-+ if (unlikely(result != NULL)) {
-+ spin_lock_jnode(result);
-+ jnode_attach_page(result, pg);
-+ spin_unlock_jnode(result);
-+ result->key.j.mapping = pg->mapping;
-+ return result;
-+ }
-+
-+ /* since page is locked, jnode should be allocated with GFP_NOFS flag */
-+ reiser4_ctx_gfp_mask_force(GFP_NOFS);
-+ result = find_get_jnode(tree, pg->mapping, oid, pg->index);
-+ if (unlikely(IS_ERR(result)))
-+ return result;
-+ /* attach jnode to page */
-+ spin_lock_jnode(result);
-+ jnode_attach_page(result, pg);
-+ spin_unlock_jnode(result);
-+ return result;
-+}
-+
-+/*
-+ * return jnode for @pg, creating it if necessary.
-+ */
-+jnode *jnode_of_page(struct page * pg)
-+{
-+ jnode *result;
-+
-+ assert("umka-176", pg != NULL);
-+ assert("nikita-2394", PageLocked(pg));
-+
-+ result = do_jget(reiser4_tree_by_page(pg), pg);
-+
-+ if (REISER4_DEBUG && !IS_ERR(result)) {
-+ assert("nikita-3210", result == jprivate(pg));
-+ assert("nikita-2046", jnode_page(jprivate(pg)) == pg);
-+ if (jnode_is_unformatted(jprivate(pg))) {
-+ assert("nikita-2364",
-+ jprivate(pg)->key.j.index == pg->index);
-+ assert("nikita-2367",
-+ jprivate(pg)->key.j.mapping == pg->mapping);
-+ assert("nikita-2365",
-+ jprivate(pg)->key.j.objectid ==
-+ get_inode_oid(pg->mapping->host));
-+ assert("vs-1200",
-+ jprivate(pg)->key.j.objectid ==
-+ pg->mapping->host->i_ino);
-+ assert("nikita-2356",
-+ jnode_is_unformatted(jnode_by_page(pg)));
-+ }
-+ assert("nikita-2956", jnode_invariant(jprivate(pg), 0, 0));
-+ }
-+ return result;
-+}
-+
-+/* attach page to jnode: set ->pg pointer in jnode, and ->private one in the
-+ * page.*/
-+void jnode_attach_page(jnode * node, struct page *pg)
-+{
-+ assert("nikita-2060", node != NULL);
-+ assert("nikita-2061", pg != NULL);
-+
-+ assert("nikita-2050", jprivate(pg) == 0ul);
-+ assert("nikita-2393", !PagePrivate(pg));
-+ assert("vs-1741", node->pg == NULL);
-+
-+ assert("nikita-2396", PageLocked(pg));
-+ assert_spin_locked(&(node->guard));
-+
-+ page_cache_get(pg);
-+ set_page_private(pg, (unsigned long)node);
-+ node->pg = pg;
-+ SetPagePrivate(pg);
-+}
-+
-+/* Dual to jnode_attach_page: break a binding between page and jnode */
-+void page_clear_jnode(struct page *page, jnode * node)
-+{
-+ assert("nikita-2424", page != NULL);
-+ assert("nikita-2425", PageLocked(page));
-+ assert("nikita-2426", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ assert("nikita-2428", PagePrivate(page));
-+
-+ assert("nikita-3551", !PageWriteback(page));
-+
-+ JF_CLR(node, JNODE_PARSED);
-+ set_page_private(page, 0ul);
-+ ClearPagePrivate(page);
-+ node->pg = NULL;
-+ page_cache_release(page);
-+}
-+
-+#if 0
-+/* it is only used in one place to handle error */
-+void
-+page_detach_jnode(struct page *page, struct address_space *mapping,
-+ unsigned long index)
-+{
-+ assert("nikita-2395", page != NULL);
-+
-+ lock_page(page);
-+ if ((page->mapping == mapping) && (page->index == index)
-+ && PagePrivate(page)) {
-+ jnode *node;
-+
-+ node = jprivate(page);
-+ spin_lock_jnode(node);
-+ page_clear_jnode(page, node);
-+ spin_unlock_jnode(node);
-+ }
-+ unlock_page(page);
-+}
-+#endif /* 0 */
-+
-+/* return @node page locked.
-+
-+ Locking ordering requires that one first takes page lock and afterwards
-+ spin lock on node attached to this page. Sometimes it is necessary to go in
-+ the opposite direction. This is done through standard trylock-and-release
-+ loop.
-+*/
-+static struct page *jnode_lock_page(jnode * node)
-+{
-+ struct page *page;
-+
-+ assert("nikita-2052", node != NULL);
-+ assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode));
-+
-+ while (1) {
-+
-+ spin_lock_jnode(node);
-+ page = jnode_page(node);
-+ if (page == NULL) {
-+ break;
-+ }
-+
-+ /* no need to page_cache_get( page ) here, because page cannot
-+ be evicted from memory without detaching it from jnode and
-+ this requires spin lock on jnode that we already hold.
-+ */
-+ if (!TestSetPageLocked(page)) {
-+ /* We won a lock on jnode page, proceed. */
-+ break;
-+ }
-+
-+ /* Page is locked by someone else. */
-+ page_cache_get(page);
-+ spin_unlock_jnode(node);
-+ wait_on_page_locked(page);
-+ /* it is possible that page was detached from jnode and
-+ returned to the free pool, or re-assigned while we were
-+ waiting on locked bit. This will be rechecked on the next
-+ loop iteration.
-+ */
-+ page_cache_release(page);
-+
-+ /* try again */
-+ }
-+ return page;
-+}
-+
-+/*
-+ * is JNODE_PARSED bit is not set, call ->parse() method of jnode, to verify
-+ * validness of jnode content.
-+ */
-+static inline int jparse(jnode * node)
-+{
-+ int result;
-+
-+ assert("nikita-2466", node != NULL);
-+
-+ spin_lock_jnode(node);
-+ if (likely(!jnode_is_parsed(node))) {
-+ result = jnode_ops(node)->parse(node);
-+ if (likely(result == 0))
-+ JF_SET(node, JNODE_PARSED);
-+ } else
-+ result = 0;
-+ spin_unlock_jnode(node);
-+ return result;
-+}
-+
-+/* Lock a page attached to jnode, create and attach page to jnode if it had no
-+ * one. */
-+static struct page *jnode_get_page_locked(jnode * node, gfp_t gfp_flags)
-+{
-+ struct page *page;
-+
-+ spin_lock_jnode(node);
-+ page = jnode_page(node);
-+
-+ if (page == NULL) {
-+ spin_unlock_jnode(node);
-+ page = find_or_create_page(jnode_get_mapping(node),
-+ jnode_get_index(node), gfp_flags);
-+ if (page == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ } else {
-+ if (!TestSetPageLocked(page)) {
-+ spin_unlock_jnode(node);
-+ return page;
-+ }
-+ page_cache_get(page);
-+ spin_unlock_jnode(node);
-+ lock_page(page);
-+ assert("nikita-3134", page->mapping == jnode_get_mapping(node));
-+ }
-+
-+ spin_lock_jnode(node);
-+ if (!jnode_page(node))
-+ jnode_attach_page(node, page);
-+ spin_unlock_jnode(node);
-+
-+ page_cache_release(page);
-+ assert("zam-894", jnode_page(node) == page);
-+ return page;
-+}
-+
-+/* Start read operation for jnode's page if page is not up-to-date. */
-+static int jnode_start_read(jnode * node, struct page *page)
-+{
-+ assert("zam-893", PageLocked(page));
-+
-+ if (PageUptodate(page)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ return reiser4_page_io(page, node, READ, reiser4_ctx_gfp_mask_get());
-+}
-+
-+#if REISER4_DEBUG
-+static void check_jload(jnode * node, struct page *page)
-+{
-+ if (jnode_is_znode(node)) {
-+ node40_header *nh;
-+ znode *z;
-+
-+ z = JZNODE(node);
-+ if (znode_is_any_locked(z)) {
-+ nh = (node40_header *) kmap(page);
-+ /* this only works for node40-only file systems. For
-+ * debugging. */
-+ assert("nikita-3253",
-+ z->nr_items == le16_to_cpu(get_unaligned(&nh->nr_items)));
-+ kunmap(page);
-+ }
-+ assert("nikita-3565", znode_invariant(z));
-+ }
-+}
-+#else
-+#define check_jload(node, page) noop
-+#endif
-+
-+/* prefetch jnode to speed up next call to jload. Call this when you are going
-+ * to call jload() shortly. This will bring appropriate portion of jnode into
-+ * CPU cache. */
-+void jload_prefetch(jnode * node)
-+{
-+ prefetchw(&node->x_count);
-+}
-+
-+/* load jnode's data into memory */
-+int jload_gfp(jnode * node /* node to load */ ,
-+ gfp_t gfp_flags /* allocation flags */ ,
-+ int do_kmap /* true if page should be kmapped */ )
-+{
-+ struct page *page;
-+ int result = 0;
-+ int parsed;
-+
-+ assert("nikita-3010", reiser4_schedulable());
-+
-+ prefetchw(&node->pg);
-+
-+ /* taking d-reference implies taking x-reference. */
-+ jref(node);
-+
-+ /*
-+ * acquiring d-reference to @jnode and check for JNODE_PARSED bit
-+ * should be atomic, otherwise there is a race against
-+ * reiser4_releasepage().
-+ */
-+ spin_lock(&(node->load));
-+ add_d_ref(node);
-+ parsed = jnode_is_parsed(node);
-+ spin_unlock(&(node->load));
-+
-+ if (unlikely(!parsed)) {
-+ page = jnode_get_page_locked(node, gfp_flags);
-+ if (unlikely(IS_ERR(page))) {
-+ result = PTR_ERR(page);
-+ goto failed;
-+ }
-+
-+ result = jnode_start_read(node, page);
-+ if (unlikely(result != 0))
-+ goto failed;
-+
-+ wait_on_page_locked(page);
-+ if (unlikely(!PageUptodate(page))) {
-+ result = RETERR(-EIO);
-+ goto failed;
-+ }
-+
-+ if (do_kmap)
-+ node->data = kmap(page);
-+
-+ result = jparse(node);
-+ if (unlikely(result != 0)) {
-+ if (do_kmap)
-+ kunmap(page);
-+ goto failed;
-+ }
-+ check_jload(node, page);
-+ } else {
-+ page = jnode_page(node);
-+ check_jload(node, page);
-+ if (do_kmap)
-+ node->data = kmap(page);
-+ }
-+
-+ if (!is_writeout_mode())
-+ /* We do not mark pages active if jload is called as a part of
-+ * jnode_flush() or reiser4_write_logs(). Both jnode_flush()
-+ * and write_logs() add no value to cached data, there is no
-+ * sense to mark pages as active when they go to disk, it just
-+ * confuses vm scanning routines because clean page could be
-+ * moved out from inactive list as a result of this
-+ * mark_page_accessed() call. */
-+ mark_page_accessed(page);
-+
-+ return 0;
-+
-+ failed:
-+ jrelse_tail(node);
-+ return result;
-+
-+}
-+
-+/* start asynchronous reading for given jnode's page. */
-+int jstartio(jnode * node)
-+{
-+ struct page *page;
-+
-+ page = jnode_get_page_locked(node, reiser4_ctx_gfp_mask_get());
-+ if (IS_ERR(page))
-+ return PTR_ERR(page);
-+
-+ return jnode_start_read(node, page);
-+}
-+
-+/* Initialize a node by calling appropriate plugin instead of reading
-+ * node from disk as in jload(). */
-+int jinit_new(jnode * node, gfp_t gfp_flags)
-+{
-+ struct page *page;
-+ int result;
-+
-+ jref(node);
-+ add_d_ref(node);
-+
-+ page = jnode_get_page_locked(node, gfp_flags);
-+ if (IS_ERR(page)) {
-+ result = PTR_ERR(page);
-+ goto failed;
-+ }
-+
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+
-+ node->data = kmap(page);
-+
-+ if (!jnode_is_parsed(node)) {
-+ jnode_plugin *jplug = jnode_ops(node);
-+ spin_lock_jnode(node);
-+ result = jplug->init(node);
-+ spin_unlock_jnode(node);
-+ if (result) {
-+ kunmap(page);
-+ goto failed;
-+ }
-+ JF_SET(node, JNODE_PARSED);
-+ }
-+
-+ return 0;
-+
-+ failed:
-+ jrelse(node);
-+ return result;
-+}
-+
-+/* release a reference to jnode acquired by jload(), decrement ->d_count */
-+void jrelse_tail(jnode * node /* jnode to release references to */ )
-+{
-+ assert("nikita-489", atomic_read(&node->d_count) > 0);
-+ atomic_dec(&node->d_count);
-+ /* release reference acquired in jload_gfp() or jinit_new() */
-+ jput(node);
-+ if (jnode_is_unformatted(node) || jnode_is_znode(node))
-+ LOCK_CNT_DEC(d_refs);
-+}
-+
-+/* drop reference to node data. When last reference is dropped, data are
-+ unloaded. */
-+void jrelse(jnode * node /* jnode to release references to */ )
-+{
-+ struct page *page;
-+
-+ assert("nikita-487", node != NULL);
-+ assert_spin_not_locked(&(node->guard));
-+
-+ page = jnode_page(node);
-+ if (likely(page != NULL)) {
-+ /*
-+ * it is safe not to lock jnode here, because at this point
-+ * @node->d_count is greater than zero (if jrelse() is used
-+ * correctly, that is). JNODE_PARSED may be not set yet, if,
-+ * for example, we got here as a result of error handling path
-+ * in jload(). Anyway, page cannot be detached by
-+ * reiser4_releasepage(). truncate will invalidate page
-+ * regardless, but this should not be a problem.
-+ */
-+ kunmap(page);
-+ }
-+ jrelse_tail(node);
-+}
-+
-+/* called from jput() to wait for io completion */
-+static void jnode_finish_io(jnode * node)
-+{
-+ struct page *page;
-+
-+ assert("nikita-2922", node != NULL);
-+
-+ spin_lock_jnode(node);
-+ page = jnode_page(node);
-+ if (page != NULL) {
-+ page_cache_get(page);
-+ spin_unlock_jnode(node);
-+ wait_on_page_writeback(page);
-+ page_cache_release(page);
-+ } else
-+ spin_unlock_jnode(node);
-+}
-+
-+/*
-+ * This is called by jput() when last reference to jnode is released. This is
-+ * separate function, because we want fast path of jput() to be inline and,
-+ * therefore, small.
-+ */
-+void jput_final(jnode * node)
-+{
-+ int r_i_p;
-+
-+ /* A fast check for keeping node in cache. We always keep node in cache
-+ * if its page is present and node was not marked for deletion */
-+ if (jnode_page(node) != NULL && !JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
-+ rcu_read_unlock();
-+ return;
-+ }
-+ r_i_p = !JF_TEST_AND_SET(node, JNODE_RIP);
-+ /*
-+ * if r_i_p is true, we were first to set JNODE_RIP on this node. In
-+ * this case it is safe to access node after unlock.
-+ */
-+ rcu_read_unlock();
-+ if (r_i_p) {
-+ jnode_finish_io(node);
-+ if (JF_ISSET(node, JNODE_HEARD_BANSHEE))
-+ /* node is removed from the tree. */
-+ jdelete(node);
-+ else
-+ jnode_try_drop(node);
-+ }
-+ /* if !r_i_p some other thread is already killing it */
-+}
-+
-+int jwait_io(jnode * node, int rw)
-+{
-+ struct page *page;
-+ int result;
-+
-+ assert("zam-447", node != NULL);
-+ assert("zam-448", jnode_page(node) != NULL);
-+
-+ page = jnode_page(node);
-+
-+ result = 0;
-+ if (rw == READ) {
-+ wait_on_page_locked(page);
-+ } else {
-+ assert("nikita-2227", rw == WRITE);
-+ wait_on_page_writeback(page);
-+ }
-+ if (PageError(page))
-+ result = RETERR(-EIO);
-+
-+ return result;
-+}
-+
-+/*
-+ * jnode types and plugins.
-+ *
-+ * jnode by itself is a "base type". There are several different jnode
-+ * flavors, called "jnode types" (see jnode_type for a list). Sometimes code
-+ * has to do different things based on jnode type. In the standard reiser4 way
-+ * this is done by having jnode plugin (see fs/reiser4/plugin.h:jnode_plugin).
-+ *
-+ * Functions below deal with jnode types and define methods of jnode plugin.
-+ *
-+ */
-+
-+/* set jnode type. This is done during jnode initialization. */
-+static void jnode_set_type(jnode * node, jnode_type type)
-+{
-+ static unsigned long type_to_mask[] = {
-+ [JNODE_UNFORMATTED_BLOCK] = 1,
-+ [JNODE_FORMATTED_BLOCK] = 0,
-+ [JNODE_BITMAP] = 2,
-+ [JNODE_IO_HEAD] = 6,
-+ [JNODE_INODE] = 4
-+ };
-+
-+ assert("zam-647", type < LAST_JNODE_TYPE);
-+ assert("nikita-2815", !jnode_is_loaded(node));
-+ assert("nikita-3386", node->state == 0);
-+
-+ node->state |= (type_to_mask[type] << JNODE_TYPE_1);
-+}
-+
-+/* ->init() method of jnode plugin for jnodes that don't require plugin
-+ * specific initialization. */
-+static int init_noinit(jnode * node UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* ->parse() method of jnode plugin for jnodes that don't require plugin
-+ * specific pasring. */
-+static int parse_noparse(jnode * node UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* ->mapping() method for unformatted jnode */
-+struct address_space *mapping_jnode(const jnode * node)
-+{
-+ struct address_space *map;
-+
-+ assert("nikita-2713", node != NULL);
-+
-+ /* mapping is stored in jnode */
-+
-+ map = node->key.j.mapping;
-+ assert("nikita-2714", map != NULL);
-+ assert("nikita-2897", is_reiser4_inode(map->host));
-+ assert("nikita-2715", get_inode_oid(map->host) == node->key.j.objectid);
-+ return map;
-+}
-+
-+/* ->index() method for unformatted jnodes */
-+unsigned long index_jnode(const jnode * node)
-+{
-+ /* index is stored in jnode */
-+ return node->key.j.index;
-+}
-+
-+/* ->remove() method for unformatted jnodes */
-+static inline void remove_jnode(jnode * node, reiser4_tree * tree)
-+{
-+ /* remove jnode from hash table and radix tree */
-+ if (node->key.j.mapping)
-+ unhash_unformatted_node_nolock(node);
-+}
-+
-+/* ->mapping() method for znodes */
-+static struct address_space *mapping_znode(const jnode * node)
-+{
-+ /* all znodes belong to fake inode */
-+ return reiser4_get_super_fake(jnode_get_tree(node)->super)->i_mapping;
-+}
-+
-+/* ->index() method for znodes */
-+static unsigned long index_znode(const jnode * node)
-+{
-+ unsigned long addr;
-+ assert("nikita-3317", (1 << znode_shift_order) < sizeof(znode));
-+
-+ /* index of znode is just its address (shifted) */
-+ addr = (unsigned long)node;
-+ return (addr - PAGE_OFFSET) >> znode_shift_order;
-+}
-+
-+/* ->mapping() method for bitmap jnode */
-+static struct address_space *mapping_bitmap(const jnode * node)
-+{
-+ /* all bitmap blocks belong to special bitmap inode */
-+ return get_super_private(jnode_get_tree(node)->super)->bitmap->
-+ i_mapping;
-+}
-+
-+/* ->index() method for jnodes that are indexed by address */
-+static unsigned long index_is_address(const jnode * node)
-+{
-+ unsigned long ind;
-+
-+ ind = (unsigned long)node;
-+ return ind - PAGE_OFFSET;
-+}
-+
-+/* resolve race with jput */
-+jnode *jnode_rip_sync(reiser4_tree *tree, jnode *node)
-+{
-+ /*
-+ * This is used as part of RCU-based jnode handling.
-+ *
-+ * jlookup(), zlook(), zget(), and cbk_cache_scan_slots() have to work
-+ * with unreferenced jnodes (ones with ->x_count == 0). Hash table is
-+ * not protected during this, so concurrent thread may execute
-+ * zget-set-HEARD_BANSHEE-zput, or somehow else cause jnode to be
-+ * freed in jput_final(). To avoid such races, jput_final() sets
-+ * JNODE_RIP on jnode (under tree lock). All places that work with
-+ * unreferenced jnodes call this function. It checks for JNODE_RIP bit
-+ * (first without taking tree lock), and if this bit is set, released
-+ * reference acquired by the current thread and returns NULL.
-+ *
-+ * As a result, if jnode is being concurrently freed, NULL is returned
-+ * and caller should pretend that jnode wasn't found in the first
-+ * place.
-+ *
-+ * Otherwise it's safe to release "rcu-read-lock" and continue with
-+ * jnode.
-+ */
-+ if (unlikely(JF_ISSET(node, JNODE_RIP))) {
-+ read_lock_tree(tree);
-+ if (JF_ISSET(node, JNODE_RIP)) {
-+ dec_x_ref(node);
-+ node = NULL;
-+ }
-+ read_unlock_tree(tree);
-+ }
-+ return node;
-+}
-+
-+reiser4_key *jnode_build_key(const jnode * node, reiser4_key * key)
-+{
-+ struct inode *inode;
-+ item_plugin *iplug;
-+ loff_t off;
-+
-+ assert("nikita-3092", node != NULL);
-+ assert("nikita-3093", key != NULL);
-+ assert("nikita-3094", jnode_is_unformatted(node));
-+
-+ off = ((loff_t) index_jnode(node)) << PAGE_CACHE_SHIFT;
-+ inode = mapping_jnode(node)->host;
-+
-+ if (node->parent_item_id != 0)
-+ iplug = item_plugin_by_id(node->parent_item_id);
-+ else
-+ iplug = NULL;
-+
-+ if (iplug != NULL && iplug->f.key_by_offset)
-+ iplug->f.key_by_offset(inode, off, key);
-+ else {
-+ file_plugin *fplug;
-+
-+ fplug = inode_file_plugin(inode);
-+ assert("zam-1007", fplug != NULL);
-+ assert("zam-1008", fplug->key_by_inode != NULL);
-+
-+ fplug->key_by_inode(inode, off, key);
-+ }
-+
-+ return key;
-+}
-+
-+/* ->parse() method for formatted nodes */
-+static int parse_znode(jnode * node)
-+{
-+ return zparse(JZNODE(node));
-+}
-+
-+/* ->delete() method for formatted nodes */
-+static void delete_znode(jnode * node, reiser4_tree * tree)
-+{
-+ znode *z;
-+
-+ assert_rw_write_locked(&(tree->tree_lock));
-+ assert("vs-898", JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ z = JZNODE(node);
-+ assert("vs-899", z->c_count == 0);
-+
-+ /* delete znode from sibling list. */
-+ sibling_list_remove(z);
-+
-+ znode_remove(z, tree);
-+}
-+
-+/* ->remove() method for formatted nodes */
-+static int remove_znode(jnode * node, reiser4_tree * tree)
-+{
-+ znode *z;
-+
-+ assert_rw_write_locked(&(tree->tree_lock));
-+ z = JZNODE(node);
-+
-+ if (z->c_count == 0) {
-+ /* detach znode from sibling list. */
-+ sibling_list_drop(z);
-+ /* this is called with tree spin-lock held, so call
-+ znode_remove() directly (rather than znode_lock_remove()). */
-+ znode_remove(z, tree);
-+ return 0;
-+ }
-+ return RETERR(-EBUSY);
-+}
-+
-+/* ->init() method for formatted nodes */
-+static int init_znode(jnode * node)
-+{
-+ znode *z;
-+
-+ z = JZNODE(node);
-+ /* call node plugin to do actual initialization */
-+ return z->nplug->init(z);
-+}
-+
-+/* ->clone() method for formatted nodes */
-+static jnode *clone_formatted(jnode * node)
-+{
-+ znode *clone;
-+
-+ assert("vs-1430", jnode_is_znode(node));
-+ clone = zalloc(reiser4_ctx_gfp_mask_get());
-+ if (clone == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ zinit(clone, NULL, current_tree);
-+ jnode_set_block(ZJNODE(clone), jnode_get_block(node));
-+ /* ZJNODE(clone)->key.z is not initialized */
-+ clone->level = JZNODE(node)->level;
-+
-+ return ZJNODE(clone);
-+}
-+
-+/* jplug->clone for unformatted nodes */
-+static jnode *clone_unformatted(jnode * node)
-+{
-+ jnode *clone;
-+
-+ assert("vs-1431", jnode_is_unformatted(node));
-+ clone = jalloc();
-+ if (clone == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+
-+ jnode_init(clone, current_tree, JNODE_UNFORMATTED_BLOCK);
-+ jnode_set_block(clone, jnode_get_block(node));
-+
-+ return clone;
-+
-+}
-+
-+/*
-+ * Setup jnode plugin methods for various jnode types.
-+ */
-+jnode_plugin jnode_plugins[LAST_JNODE_TYPE] = {
-+ [JNODE_UNFORMATTED_BLOCK] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_UNFORMATTED_BLOCK,
-+ .pops = NULL,
-+ .label = "unformatted",
-+ .desc = "unformatted node",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_noinit,
-+ .parse = parse_noparse,
-+ .mapping = mapping_jnode,
-+ .index = index_jnode,
-+ .clone = clone_unformatted
-+ },
-+ [JNODE_FORMATTED_BLOCK] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_FORMATTED_BLOCK,
-+ .pops = NULL,
-+ .label = "formatted",
-+ .desc = "formatted tree node",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_znode,
-+ .parse = parse_znode,
-+ .mapping = mapping_znode,
-+ .index = index_znode,
-+ .clone = clone_formatted
-+ },
-+ [JNODE_BITMAP] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_BITMAP,
-+ .pops = NULL,
-+ .label = "bitmap",
-+ .desc = "bitmap node",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_noinit,
-+ .parse = parse_noparse,
-+ .mapping = mapping_bitmap,
-+ .index = index_is_address,
-+ .clone = NULL
-+ },
-+ [JNODE_IO_HEAD] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_IO_HEAD,
-+ .pops = NULL,
-+ .label = "io head",
-+ .desc = "io head",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = init_noinit,
-+ .parse = parse_noparse,
-+ .mapping = mapping_bitmap,
-+ .index = index_is_address,
-+ .clone = NULL
-+ },
-+ [JNODE_INODE] = {
-+ .h = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .id = JNODE_INODE,
-+ .pops = NULL,
-+ .label = "inode",
-+ .desc = "inode's builtin jnode",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = NULL,
-+ .parse = NULL,
-+ .mapping = NULL,
-+ .index = NULL,
-+ .clone = NULL
-+ }
-+};
-+
-+/*
-+ * jnode destruction.
-+ *
-+ * Thread may use a jnode after it acquired a reference to it. References are
-+ * counted in ->x_count field. Reference protects jnode from being
-+ * recycled. This is different from protecting jnode data (that are stored in
-+ * jnode page) from being evicted from memory. Data are protected by jload()
-+ * and released by jrelse().
-+ *
-+ * If thread already possesses a reference to the jnode it can acquire another
-+ * one through jref(). Initial reference is obtained (usually) by locating
-+ * jnode in some indexing structure that depends on jnode type: formatted
-+ * nodes are kept in global hash table, where they are indexed by block
-+ * number, and also in the cbk cache. Unformatted jnodes are also kept in hash
-+ * table, which is indexed by oid and offset within file, and in per-inode
-+ * radix tree.
-+ *
-+ * Reference to jnode is released by jput(). If last reference is released,
-+ * jput_final() is called. This function determines whether jnode has to be
-+ * deleted (this happens when corresponding node is removed from the file
-+ * system, jnode is marked with JNODE_HEARD_BANSHEE bit in this case), or it
-+ * should be just "removed" (deleted from memory).
-+ *
-+ * Jnode destruction is signally delicate dance because of locking and RCU.
-+ */
-+
-+/*
-+ * Returns true if jnode cannot be removed right now. This check is called
-+ * under tree lock. If it returns true, jnode is irrevocably committed to be
-+ * deleted/removed.
-+ */
-+static inline int jnode_is_busy(const jnode * node, jnode_type jtype)
-+{
-+ /* if other thread managed to acquire a reference to this jnode, don't
-+ * free it. */
-+ if (atomic_read(&node->x_count) > 0)
-+ return 1;
-+ /* also, don't free znode that has children in memory */
-+ if (jtype == JNODE_FORMATTED_BLOCK && JZNODE(node)->c_count > 0)
-+ return 1;
-+ return 0;
-+}
-+
-+/*
-+ * this is called as part of removing jnode. Based on jnode type, call
-+ * corresponding function that removes jnode from indices and returns it back
-+ * to the appropriate slab (through RCU).
-+ */
-+static inline void
-+jnode_remove(jnode * node, jnode_type jtype, reiser4_tree * tree)
-+{
-+ switch (jtype) {
-+ case JNODE_UNFORMATTED_BLOCK:
-+ remove_jnode(node, tree);
-+ break;
-+ case JNODE_IO_HEAD:
-+ case JNODE_BITMAP:
-+ break;
-+ case JNODE_INODE:
-+ break;
-+ case JNODE_FORMATTED_BLOCK:
-+ remove_znode(node, tree);
-+ break;
-+ default:
-+ wrong_return_value("nikita-3196", "Wrong jnode type");
-+ }
-+}
-+
-+/*
-+ * this is called as part of deleting jnode. Based on jnode type, call
-+ * corresponding function that removes jnode from indices and returns it back
-+ * to the appropriate slab (through RCU).
-+ *
-+ * This differs from jnode_remove() only for formatted nodes---for them
-+ * sibling list handling is different for removal and deletion.
-+ */
-+static inline void
-+jnode_delete(jnode * node, jnode_type jtype, reiser4_tree * tree UNUSED_ARG)
-+{
-+ switch (jtype) {
-+ case JNODE_UNFORMATTED_BLOCK:
-+ remove_jnode(node, tree);
-+ break;
-+ case JNODE_IO_HEAD:
-+ case JNODE_BITMAP:
-+ break;
-+ case JNODE_FORMATTED_BLOCK:
-+ delete_znode(node, tree);
-+ break;
-+ case JNODE_INODE:
-+ default:
-+ wrong_return_value("nikita-3195", "Wrong jnode type");
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * remove jnode from the debugging list of all jnodes hanging off super-block.
-+ */
-+void jnode_list_remove(jnode * node)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(jnode_get_tree(node)->super);
-+
-+ spin_lock_irq(&sbinfo->all_guard);
-+ assert("nikita-2422", !list_empty(&node->jnodes));
-+ list_del_init(&node->jnodes);
-+ spin_unlock_irq(&sbinfo->all_guard);
-+}
-+#endif
-+
-+/*
-+ * this is called by jput_final() to remove jnode when last reference to it is
-+ * released.
-+ */
-+static int jnode_try_drop(jnode * node)
-+{
-+ int result;
-+ reiser4_tree *tree;
-+ jnode_type jtype;
-+
-+ assert("nikita-2491", node != NULL);
-+ assert("nikita-2583", JF_ISSET(node, JNODE_RIP));
-+
-+ tree = jnode_get_tree(node);
-+ jtype = jnode_get_type(node);
-+
-+ spin_lock_jnode(node);
-+ write_lock_tree(tree);
-+ /*
-+ * if jnode has a page---leave it alone. Memory pressure will
-+ * eventually kill page and jnode.
-+ */
-+ if (jnode_page(node) != NULL) {
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ JF_CLR(node, JNODE_RIP);
-+ return RETERR(-EBUSY);
-+ }
-+
-+ /* re-check ->x_count under tree lock. */
-+ result = jnode_is_busy(node, jtype);
-+ if (result == 0) {
-+ assert("nikita-2582", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert("jmacd-511/b", atomic_read(&node->d_count) == 0);
-+
-+ spin_unlock_jnode(node);
-+ /* no page and no references---despatch him. */
-+ jnode_remove(node, jtype, tree);
-+ write_unlock_tree(tree);
-+ jnode_free(node, jtype);
-+ } else {
-+ /* busy check failed: reference was acquired by concurrent
-+ * thread. */
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ JF_CLR(node, JNODE_RIP);
-+ }
-+ return result;
-+}
-+
-+/* jdelete() -- Delete jnode from the tree and file system */
-+static int jdelete(jnode * node /* jnode to finish with */ )
-+{
-+ struct page *page;
-+ int result;
-+ reiser4_tree *tree;
-+ jnode_type jtype;
-+
-+ assert("nikita-467", node != NULL);
-+ assert("nikita-2531", JF_ISSET(node, JNODE_RIP));
-+
-+ jtype = jnode_get_type(node);
-+
-+ page = jnode_lock_page(node);
-+ assert_spin_locked(&(node->guard));
-+
-+ tree = jnode_get_tree(node);
-+
-+ write_lock_tree(tree);
-+ /* re-check ->x_count under tree lock. */
-+ result = jnode_is_busy(node, jtype);
-+ if (likely(!result)) {
-+ assert("nikita-2123", JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert("jmacd-511", atomic_read(&node->d_count) == 0);
-+
-+ /* detach page */
-+ if (page != NULL) {
-+ /*
-+ * FIXME this is racy against jnode_extent_write().
-+ */
-+ page_clear_jnode(page, node);
-+ }
-+ spin_unlock_jnode(node);
-+ /* goodbye */
-+ jnode_delete(node, jtype, tree);
-+ write_unlock_tree(tree);
-+ jnode_free(node, jtype);
-+ /* @node is no longer valid pointer */
-+ if (page != NULL)
-+ reiser4_drop_page(page);
-+ } else {
-+ /* busy check failed: reference was acquired by concurrent
-+ * thread. */
-+ JF_CLR(node, JNODE_RIP);
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ if (page != NULL)
-+ unlock_page(page);
-+ }
-+ return result;
-+}
-+
-+/* drop jnode on the floor.
-+
-+ Return value:
-+
-+ -EBUSY: failed to drop jnode, because there are still references to it
-+
-+ 0: successfully dropped jnode
-+
-+*/
-+static int jdrop_in_tree(jnode * node, reiser4_tree * tree)
-+{
-+ struct page *page;
-+ jnode_type jtype;
-+ int result;
-+
-+ assert("zam-602", node != NULL);
-+ assert_rw_not_read_locked(&(tree->tree_lock));
-+ assert_rw_not_write_locked(&(tree->tree_lock));
-+ assert("nikita-2403", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ jtype = jnode_get_type(node);
-+
-+ page = jnode_lock_page(node);
-+ assert_spin_locked(&(node->guard));
-+
-+ write_lock_tree(tree);
-+
-+ /* re-check ->x_count under tree lock. */
-+ result = jnode_is_busy(node, jtype);
-+ if (!result) {
-+ assert("nikita-2488", page == jnode_page(node));
-+ assert("nikita-2533", atomic_read(&node->d_count) == 0);
-+ if (page != NULL) {
-+ assert("nikita-2126", !PageDirty(page));
-+ assert("nikita-2127", PageUptodate(page));
-+ assert("nikita-2181", PageLocked(page));
-+ page_clear_jnode(page, node);
-+ }
-+ spin_unlock_jnode(node);
-+ jnode_remove(node, jtype, tree);
-+ write_unlock_tree(tree);
-+ jnode_free(node, jtype);
-+ if (page != NULL) {
-+ reiser4_drop_page(page);
-+ }
-+ } else {
-+ /* busy check failed: reference was acquired by concurrent
-+ * thread. */
-+ JF_CLR(node, JNODE_RIP);
-+ write_unlock_tree(tree);
-+ spin_unlock_jnode(node);
-+ if (page != NULL)
-+ unlock_page(page);
-+ }
-+ return result;
-+}
-+
-+/* This function frees jnode "if possible". In particular, [dcx]_count has to
-+ be 0 (where applicable). */
-+void jdrop(jnode * node)
-+{
-+ jdrop_in_tree(node, jnode_get_tree(node));
-+}
-+
-+/* IO head jnode implementation; The io heads are simple j-nodes with limited
-+ functionality (these j-nodes are not in any hash table) just for reading
-+ from and writing to disk. */
-+
-+jnode *reiser4_alloc_io_head(const reiser4_block_nr * block)
-+{
-+ jnode *jal = jalloc();
-+
-+ if (jal != NULL) {
-+ jnode_init(jal, current_tree, JNODE_IO_HEAD);
-+ jnode_set_block(jal, block);
-+ }
-+
-+ jref(jal);
-+
-+ return jal;
-+}
-+
-+void reiser4_drop_io_head(jnode * node)
-+{
-+ assert("zam-648", jnode_get_type(node) == JNODE_IO_HEAD);
-+
-+ jput(node);
-+ jdrop(node);
-+}
-+
-+/* protect keep jnode data from reiser4_releasepage() */
-+void pin_jnode_data(jnode * node)
-+{
-+ assert("zam-671", jnode_page(node) != NULL);
-+ page_cache_get(jnode_page(node));
-+}
-+
-+/* make jnode data free-able again */
-+void unpin_jnode_data(jnode * node)
-+{
-+ assert("zam-672", jnode_page(node) != NULL);
-+ page_cache_release(jnode_page(node));
-+}
-+
-+struct address_space *jnode_get_mapping(const jnode * node)
-+{
-+ assert("nikita-3162", node != NULL);
-+ return jnode_ops(node)->mapping(node);
-+}
-+
-+#if REISER4_DEBUG
-+/* debugging aid: jnode invariant */
-+int jnode_invariant_f(const jnode * node, char const **msg)
-+{
-+#define _ergo(ant, con) \
-+ ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
-+#define _check(exp) ((*msg) = #exp, (exp))
-+
-+ return _check(node != NULL) &&
-+ /* [jnode-queued] */
-+ /* only relocated node can be queued, except that when znode
-+ * is being deleted, its JNODE_RELOC bit is cleared */
-+ _ergo(JF_ISSET(node, JNODE_FLUSH_QUEUED),
-+ JF_ISSET(node, JNODE_RELOC) ||
-+ JF_ISSET(node, JNODE_HEARD_BANSHEE)) &&
-+ _check(node->jnodes.prev != NULL) &&
-+ _check(node->jnodes.next != NULL) &&
-+ /* [jnode-dirty] invariant */
-+ /* dirty inode is part of atom */
-+ _ergo(JF_ISSET(node, JNODE_DIRTY), node->atom != NULL) &&
-+ /* [jnode-oid] invariant */
-+ /* for unformatted node ->objectid and ->mapping fields are
-+ * consistent */
-+ _ergo(jnode_is_unformatted(node) && node->key.j.mapping != NULL,
-+ node->key.j.objectid ==
-+ get_inode_oid(node->key.j.mapping->host)) &&
-+ /* [jnode-atom-valid] invariant */
-+ /* node atom has valid state */
-+ _ergo(node->atom != NULL, node->atom->stage != ASTAGE_INVALID) &&
-+ /* [jnode-page-binding] invariant */
-+ /* if node points to page, it points back to node */
-+ _ergo(node->pg != NULL, jprivate(node->pg) == node) &&
-+ /* [jnode-refs] invariant */
-+ /* only referenced jnode can be loaded */
-+ _check(atomic_read(&node->x_count) >= atomic_read(&node->d_count));
-+
-+}
-+
-+static const char *jnode_type_name(jnode_type type)
-+{
-+ switch (type) {
-+ case JNODE_UNFORMATTED_BLOCK:
-+ return "unformatted";
-+ case JNODE_FORMATTED_BLOCK:
-+ return "formatted";
-+ case JNODE_BITMAP:
-+ return "bitmap";
-+ case JNODE_IO_HEAD:
-+ return "io head";
-+ case JNODE_INODE:
-+ return "inode";
-+ case LAST_JNODE_TYPE:
-+ return "last";
-+ default:{
-+ static char unknown[30];
-+
-+ sprintf(unknown, "unknown %i", type);
-+ return unknown;
-+ }
-+ }
-+}
-+
-+#define jnode_state_name( node, flag ) \
-+ ( JF_ISSET( ( node ), ( flag ) ) ? ((#flag "|")+6) : "" )
-+
-+/* debugging aid: output human readable information about @node */
-+static void info_jnode(const char *prefix /* prefix to print */ ,
-+ const jnode * node /* node to print */ )
-+{
-+ assert("umka-068", prefix != NULL);
-+
-+ if (node == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+
-+ printk
-+ ("%s: %p: state: %lx: [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s], level: %i,"
-+ " block: %s, d_count: %d, x_count: %d, "
-+ "pg: %p, atom: %p, lock: %i:%i, type: %s, ", prefix, node,
-+ node->state,
-+ jnode_state_name(node, JNODE_PARSED),
-+ jnode_state_name(node, JNODE_HEARD_BANSHEE),
-+ jnode_state_name(node, JNODE_LEFT_CONNECTED),
-+ jnode_state_name(node, JNODE_RIGHT_CONNECTED),
-+ jnode_state_name(node, JNODE_ORPHAN),
-+ jnode_state_name(node, JNODE_CREATED),
-+ jnode_state_name(node, JNODE_RELOC),
-+ jnode_state_name(node, JNODE_OVRWR),
-+ jnode_state_name(node, JNODE_DIRTY),
-+ jnode_state_name(node, JNODE_IS_DYING),
-+ jnode_state_name(node, JNODE_RIP),
-+ jnode_state_name(node, JNODE_MISSED_IN_CAPTURE),
-+ jnode_state_name(node, JNODE_WRITEBACK),
-+ jnode_state_name(node, JNODE_NEW),
-+ jnode_state_name(node, JNODE_DKSET),
-+ jnode_state_name(node, JNODE_REPACK),
-+ jnode_state_name(node, JNODE_CLUSTER_PAGE),
-+ jnode_get_level(node), sprint_address(jnode_get_block(node)),
-+ atomic_read(&node->d_count), atomic_read(&node->x_count),
-+ jnode_page(node), node->atom, 0, 0,
-+ jnode_type_name(jnode_get_type(node)));
-+ if (jnode_is_unformatted(node)) {
-+ printk("inode: %llu, index: %lu, ",
-+ node->key.j.objectid, node->key.j.index);
-+ }
-+}
-+
-+/* debugging aid: check znode invariant and panic if it doesn't hold */
-+static int jnode_invariant(const jnode * node, int tlocked, int jlocked)
-+{
-+ char const *failed_msg;
-+ int result;
-+ reiser4_tree *tree;
-+
-+ tree = jnode_get_tree(node);
-+
-+ assert("umka-063312", node != NULL);
-+ assert("umka-064321", tree != NULL);
-+
-+ if (!jlocked && !tlocked)
-+ spin_lock_jnode((jnode *) node);
-+ if (!tlocked)
-+ read_lock_tree(jnode_get_tree(node));
-+ result = jnode_invariant_f(node, &failed_msg);
-+ if (!result) {
-+ info_jnode("corrupted node", node);
-+ warning("jmacd-555", "Condition %s failed", failed_msg);
-+ }
-+ if (!tlocked)
-+ read_unlock_tree(jnode_get_tree(node));
-+ if (!jlocked && !tlocked)
-+ spin_unlock_jnode((jnode *) node);
-+ return result;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/jnode.h linux-2.6.24/fs/reiser4/jnode.h
---- linux-2.6.24.orig/fs/reiser4/jnode.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/jnode.h 2008-01-25 11:39:06.940208719 +0300
-@@ -0,0 +1,702 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Declaration of jnode. See jnode.c for details. */
-+
-+#ifndef __JNODE_H__
-+#define __JNODE_H__
-+
-+#include "forward.h"
-+#include "type_safe_hash.h"
-+#include "txnmgr.h"
-+#include "key.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "page_cache.h"
-+#include "context.h"
-+
-+#include "plugin/plugin.h"
-+
-+#include <linux/fs.h>
-+#include <linux/mm.h>
-+#include <linux/spinlock.h>
-+#include <asm/atomic.h>
-+#include <linux/bitops.h>
-+#include <linux/list.h>
-+#include <linux/rcupdate.h>
-+
-+/* declare hash table of jnodes (jnodes proper, that is, unformatted
-+ nodes) */
-+TYPE_SAFE_HASH_DECLARE(j, jnode);
-+
-+/* declare hash table of znodes */
-+TYPE_SAFE_HASH_DECLARE(z, znode);
-+
-+struct jnode_key {
-+ __u64 objectid;
-+ unsigned long index;
-+ struct address_space *mapping;
-+};
-+
-+/*
-+ Jnode is the "base class" of other nodes in reiser4. It is also happens to
-+ be exactly the node we use for unformatted tree nodes.
-+
-+ Jnode provides following basic functionality:
-+
-+ . reference counting and indexing.
-+
-+ . integration with page cache. Jnode has ->pg reference to which page can
-+ be attached.
-+
-+ . interface to transaction manager. It is jnode that is kept in transaction
-+ manager lists, attached to atoms, etc. (NOTE-NIKITA one may argue that this
-+ means, there should be special type of jnode for inode.)
-+
-+ Locking:
-+
-+ Spin lock: the following fields are protected by the per-jnode spin lock:
-+
-+ ->state
-+ ->atom
-+ ->capture_link
-+
-+ Following fields are protected by the global tree lock:
-+
-+ ->link
-+ ->key.z (content of ->key.z is only changed in znode_rehash())
-+ ->key.j
-+
-+ Atomic counters
-+
-+ ->x_count
-+ ->d_count
-+
-+ ->pg, and ->data are protected by spin lock for unused jnode and are
-+ immutable for used jnode (one for which fs/reiser4/vfs_ops.c:releasable()
-+ is false).
-+
-+ ->tree is immutable after creation
-+
-+ Unclear
-+
-+ ->blocknr: should be under jnode spin-lock, but current interface is based
-+ on passing of block address.
-+
-+ If you ever need to spin lock two nodes at once, do this in "natural"
-+ memory order: lock znode with lower address first. (See lock_two_nodes().)
-+
-+ Invariants involving this data-type:
-+
-+ [jnode-dirty]
-+ [jnode-refs]
-+ [jnode-oid]
-+ [jnode-queued]
-+ [jnode-atom-valid]
-+ [jnode-page-binding]
-+*/
-+
-+struct jnode {
-+#if REISER4_DEBUG
-+#define JMAGIC 0x52654973 /* "ReIs" */
-+ int magic;
-+#endif
-+ /* FIRST CACHE LINE (16 bytes): data used by jload */
-+
-+ /* jnode's state: bitwise flags from the reiser4_jnode_state enum. */
-+ /* 0 */ unsigned long state;
-+
-+ /* lock, protecting jnode's fields. */
-+ /* 4 */ spinlock_t load;
-+
-+ /* counter of references to jnode itself. Increased on jref().
-+ Decreased on jput().
-+ */
-+ /* 8 */ atomic_t x_count;
-+
-+ /* counter of references to jnode's data. Pin data page(s) in
-+ memory while this is greater than 0. Increased on jload().
-+ Decreased on jrelse().
-+ */
-+ /* 12 */ atomic_t d_count;
-+
-+ /* SECOND CACHE LINE: data used by hash table lookups */
-+
-+ /* 16 */ union {
-+ /* znodes are hashed by block number */
-+ reiser4_block_nr z;
-+ /* unformatted nodes are hashed by mapping plus offset */
-+ struct jnode_key j;
-+ } key;
-+
-+ /* THIRD CACHE LINE */
-+
-+ /* 32 */ union {
-+ /* pointers to maintain hash-table */
-+ z_hash_link z;
-+ j_hash_link j;
-+ } link;
-+
-+ /* pointer to jnode page. */
-+ /* 36 */ struct page *pg;
-+ /* pointer to node itself. This is page_address(node->pg) when page is
-+ attached to the jnode
-+ */
-+ /* 40 */ void *data;
-+
-+ /* 44 */ reiser4_tree *tree;
-+
-+ /* FOURTH CACHE LINE: atom related fields */
-+
-+ /* 48 */ spinlock_t guard;
-+
-+ /* atom the block is in, if any */
-+ /* 52 */ txn_atom *atom;
-+
-+ /* capture list */
-+ /* 56 */ struct list_head capture_link;
-+
-+ /* FIFTH CACHE LINE */
-+
-+ /* 64 */ struct rcu_head rcu;
-+ /* crosses cache line */
-+
-+ /* SIXTH CACHE LINE */
-+
-+ /* the real blocknr (where io is going to/from) */
-+ /* 80 */ reiser4_block_nr blocknr;
-+ /* Parent item type, unformatted and CRC need it for offset => key conversion. */
-+ /* NOTE: this parent_item_id looks like jnode type. */
-+ /* 88 */ reiser4_plugin_id parent_item_id;
-+ /* 92 */
-+#if REISER4_DEBUG
-+ /* list of all jnodes for debugging purposes. */
-+ struct list_head jnodes;
-+ /* how many times this jnode was written in one transaction */
-+ int written;
-+ /* this indicates which atom's list the jnode is on */
-+ atom_list list;
-+#endif
-+} __attribute__ ((aligned(16)));
-+
-+/*
-+ * jnode types. Enumeration of existing jnode types.
-+ */
-+typedef enum {
-+ JNODE_UNFORMATTED_BLOCK, /* unformatted block */
-+ JNODE_FORMATTED_BLOCK, /* formatted block, znode */
-+ JNODE_BITMAP, /* bitmap */
-+ JNODE_IO_HEAD, /* jnode representing a block in the
-+ * wandering log */
-+ JNODE_INODE, /* jnode embedded into inode */
-+ LAST_JNODE_TYPE
-+} jnode_type;
-+
-+/* jnode states */
-+typedef enum {
-+ /* jnode's page is loaded and data checked */
-+ JNODE_PARSED = 0,
-+ /* node was deleted, not all locks on it were released. This
-+ node is empty and is going to be removed from the tree
-+ shortly. */
-+ JNODE_HEARD_BANSHEE = 1,
-+ /* left sibling pointer is valid */
-+ JNODE_LEFT_CONNECTED = 2,
-+ /* right sibling pointer is valid */
-+ JNODE_RIGHT_CONNECTED = 3,
-+
-+ /* znode was just created and doesn't yet have a pointer from
-+ its parent */
-+ JNODE_ORPHAN = 4,
-+
-+ /* this node was created by its transaction and has not been assigned
-+ a block address. */
-+ JNODE_CREATED = 5,
-+
-+ /* this node is currently relocated */
-+ JNODE_RELOC = 6,
-+ /* this node is currently wandered */
-+ JNODE_OVRWR = 7,
-+
-+ /* this znode has been modified */
-+ JNODE_DIRTY = 8,
-+
-+ /* znode lock is being invalidated */
-+ JNODE_IS_DYING = 9,
-+
-+ /* THIS PLACE IS INTENTIONALLY LEFT BLANK */
-+
-+ /* jnode is queued for flushing. */
-+ JNODE_FLUSH_QUEUED = 12,
-+
-+ /* In the following bits jnode type is encoded. */
-+ JNODE_TYPE_1 = 13,
-+ JNODE_TYPE_2 = 14,
-+ JNODE_TYPE_3 = 15,
-+
-+ /* jnode is being destroyed */
-+ JNODE_RIP = 16,
-+
-+ /* znode was not captured during locking (it might so be because
-+ ->level != LEAF_LEVEL and lock_mode == READ_LOCK) */
-+ JNODE_MISSED_IN_CAPTURE = 17,
-+
-+ /* write is in progress */
-+ JNODE_WRITEBACK = 18,
-+
-+ /* FIXME: now it is used by crypto-compress plugin only */
-+ JNODE_NEW = 19,
-+
-+ /* delimiting keys are already set for this znode. */
-+ JNODE_DKSET = 20,
-+
-+ /* when this bit is set page and jnode can not be disconnected */
-+ JNODE_WRITE_PREPARED = 21,
-+
-+ JNODE_CLUSTER_PAGE = 22,
-+ /* Jnode is marked for repacking, that means the reiser4 flush and the
-+ * block allocator should process this node special way */
-+ JNODE_REPACK = 23,
-+ /* node should be converted by flush in squalloc phase */
-+ JNODE_CONVERTIBLE = 24,
-+ /*
-+ * When jnode is dirtied for the first time in given transaction,
-+ * do_jnode_make_dirty() checks whether this jnode can possible became
-+ * member of overwrite set. If so, this bit is set, and one block is
-+ * reserved in the ->flush_reserved space of atom.
-+ *
-+ * This block is "used" (and JNODE_FLUSH_RESERVED bit is cleared) when
-+ *
-+ * (1) flush decides that we want this block to go into relocate
-+ * set after all.
-+ *
-+ * (2) wandering log is allocated (by log writer)
-+ *
-+ * (3) extent is allocated
-+ *
-+ */
-+ JNODE_FLUSH_RESERVED = 29
-+} reiser4_jnode_state;
-+
-+/* Macros for accessing the jnode state. */
-+
-+static inline void JF_CLR(jnode * j, int f)
-+{
-+ assert("unknown-1", j->magic == JMAGIC);
-+ clear_bit(f, &j->state);
-+}
-+static inline int JF_ISSET(const jnode * j, int f)
-+{
-+ assert("unknown-2", j->magic == JMAGIC);
-+ return test_bit(f, &((jnode *) j)->state);
-+}
-+static inline void JF_SET(jnode * j, int f)
-+{
-+ assert("unknown-3", j->magic == JMAGIC);
-+ set_bit(f, &j->state);
-+}
-+
-+static inline int JF_TEST_AND_SET(jnode * j, int f)
-+{
-+ assert("unknown-4", j->magic == JMAGIC);
-+ return test_and_set_bit(f, &j->state);
-+}
-+
-+static inline void spin_lock_jnode(jnode *node)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
-+ LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_LT(spin_locked_jnode, 2)));
-+
-+ spin_lock(&(node->guard));
-+
-+ LOCK_CNT_INC(spin_locked_jnode);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_unlock_jnode(jnode *node)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_jnode);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(node->guard));
-+}
-+
-+static inline int jnode_is_in_deleteset(const jnode * node)
-+{
-+ return JF_ISSET(node, JNODE_RELOC);
-+}
-+
-+extern int init_jnodes(void);
-+extern void done_jnodes(void);
-+
-+/* Jnode routines */
-+extern jnode *jalloc(void);
-+extern void jfree(jnode * node) NONNULL;
-+extern jnode *jclone(jnode *);
-+extern jnode *jlookup(reiser4_tree * tree,
-+ oid_t objectid, unsigned long ind) NONNULL;
-+extern jnode *jfind(struct address_space *, unsigned long index) NONNULL;
-+extern jnode *jnode_by_page(struct page *pg) NONNULL;
-+extern jnode *jnode_of_page(struct page *pg) NONNULL;
-+void jnode_attach_page(jnode * node, struct page *pg);
-+
-+void unhash_unformatted_jnode(jnode *);
-+extern jnode *page_next_jnode(jnode * node) NONNULL;
-+extern void jnode_init(jnode * node, reiser4_tree * tree, jnode_type) NONNULL;
-+extern void jnode_make_dirty(jnode * node) NONNULL;
-+extern void jnode_make_clean(jnode * node) NONNULL;
-+extern void jnode_make_wander_nolock(jnode * node) NONNULL;
-+extern void jnode_make_wander(jnode *) NONNULL;
-+extern void znode_make_reloc(znode *, flush_queue_t *) NONNULL;
-+extern void unformatted_make_reloc(jnode *, flush_queue_t *) NONNULL;
-+extern struct address_space *jnode_get_mapping(const jnode * node) NONNULL;
-+
-+/**
-+ * jnode_get_block
-+ * @node: jnode to query
-+ *
-+ */
-+static inline const reiser4_block_nr *jnode_get_block(const jnode *node)
-+{
-+ assert("nikita-528", node != NULL);
-+
-+ return &node->blocknr;
-+}
-+
-+/**
-+ * jnode_set_block
-+ * @node: jnode to update
-+ * @blocknr: new block nr
-+ */
-+static inline void jnode_set_block(jnode *node, const reiser4_block_nr *blocknr)
-+{
-+ assert("nikita-2020", node != NULL);
-+ assert("umka-055", blocknr != NULL);
-+ node->blocknr = *blocknr;
-+}
-+
-+
-+/* block number for IO. Usually this is the same as jnode_get_block(), unless
-+ * jnode was emergency flushed---then block number chosen by eflush is
-+ * used. */
-+static inline const reiser4_block_nr *jnode_get_io_block(jnode * node)
-+{
-+ assert("nikita-2768", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+
-+ return jnode_get_block(node);
-+}
-+
-+/* Jnode flush interface. */
-+extern reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t * pos);
-+extern flush_queue_t *reiser4_pos_fq(flush_pos_t * pos);
-+
-+/* FIXME-VS: these are used in plugin/item/extent.c */
-+
-+/* does extent_get_block have to be called */
-+#define jnode_mapped(node) JF_ISSET (node, JNODE_MAPPED)
-+#define jnode_set_mapped(node) JF_SET (node, JNODE_MAPPED)
-+
-+/* the node should be converted during flush squalloc phase */
-+#define jnode_convertible(node) JF_ISSET (node, JNODE_CONVERTIBLE)
-+#define jnode_set_convertible(node) JF_SET (node, JNODE_CONVERTIBLE)
-+
-+/* Macros to convert from jnode to znode, znode to jnode. These are macros
-+ because C doesn't allow overloading of const prototypes. */
-+#define ZJNODE(x) (& (x) -> zjnode)
-+#define JZNODE(x) \
-+({ \
-+ typeof (x) __tmp_x; \
-+ \
-+ __tmp_x = (x); \
-+ assert ("jmacd-1300", jnode_is_znode (__tmp_x)); \
-+ (znode*) __tmp_x; \
-+})
-+
-+extern int jnodes_tree_init(reiser4_tree * tree);
-+extern int jnodes_tree_done(reiser4_tree * tree);
-+
-+#if REISER4_DEBUG
-+
-+extern int znode_is_any_locked(const znode * node);
-+extern void jnode_list_remove(jnode * node);
-+
-+#else
-+
-+#define jnode_list_remove(node) noop
-+
-+#endif
-+
-+int znode_is_root(const znode * node) NONNULL;
-+
-+/* bump reference counter on @node */
-+static inline void add_x_ref(jnode * node /* node to increase x_count of */ )
-+{
-+ assert("nikita-1911", node != NULL);
-+
-+ atomic_inc(&node->x_count);
-+ LOCK_CNT_INC(x_refs);
-+}
-+
-+static inline void dec_x_ref(jnode * node)
-+{
-+ assert("nikita-3215", node != NULL);
-+ assert("nikita-3216", atomic_read(&node->x_count) > 0);
-+
-+ atomic_dec(&node->x_count);
-+ assert("nikita-3217", LOCK_CNT_GTZ(x_refs));
-+ LOCK_CNT_DEC(x_refs);
-+}
-+
-+/* jref() - increase counter of references to jnode/znode (x_count) */
-+static inline jnode *jref(jnode * node)
-+{
-+ assert("jmacd-508", (node != NULL) && !IS_ERR(node));
-+ add_x_ref(node);
-+ return node;
-+}
-+
-+/* get the page of jnode */
-+static inline struct page *jnode_page(const jnode * node)
-+{
-+ return node->pg;
-+}
-+
-+/* return pointer to jnode data */
-+static inline char *jdata(const jnode * node)
-+{
-+ assert("nikita-1415", node != NULL);
-+ assert("nikita-3198", jnode_page(node) != NULL);
-+ return node->data;
-+}
-+
-+static inline int jnode_is_loaded(const jnode * node)
-+{
-+ assert("zam-506", node != NULL);
-+ return atomic_read(&node->d_count) > 0;
-+}
-+
-+extern void page_clear_jnode(struct page *page, jnode * node) NONNULL;
-+
-+static inline void jnode_set_reloc(jnode * node)
-+{
-+ assert("nikita-2431", node != NULL);
-+ assert("nikita-2432", !JF_ISSET(node, JNODE_OVRWR));
-+ JF_SET(node, JNODE_RELOC);
-+}
-+
-+/* jload/jwrite/junload give a bread/bwrite/brelse functionality for jnodes */
-+
-+extern int jload_gfp(jnode *, gfp_t, int do_kmap) NONNULL;
-+
-+static inline int jload(jnode *node)
-+{
-+ return jload_gfp(node, reiser4_ctx_gfp_mask_get(), 1);
-+}
-+
-+extern int jinit_new(jnode *, gfp_t) NONNULL;
-+extern int jstartio(jnode *) NONNULL;
-+
-+extern void jdrop(jnode *) NONNULL;
-+extern int jwait_io(jnode *, int rw) NONNULL;
-+
-+void jload_prefetch(jnode *);
-+
-+extern jnode *reiser4_alloc_io_head(const reiser4_block_nr * block) NONNULL;
-+extern void reiser4_drop_io_head(jnode * node) NONNULL;
-+
-+static inline reiser4_tree *jnode_get_tree(const jnode * node)
-+{
-+ assert("nikita-2691", node != NULL);
-+ return node->tree;
-+}
-+
-+extern void pin_jnode_data(jnode *);
-+extern void unpin_jnode_data(jnode *);
-+
-+static inline jnode_type jnode_get_type(const jnode * node)
-+{
-+ static const unsigned long state_mask =
-+ (1 << JNODE_TYPE_1) | (1 << JNODE_TYPE_2) | (1 << JNODE_TYPE_3);
-+
-+ static jnode_type mask_to_type[] = {
-+ /* JNODE_TYPE_3 : JNODE_TYPE_2 : JNODE_TYPE_1 */
-+
-+ /* 000 */
-+ [0] = JNODE_FORMATTED_BLOCK,
-+ /* 001 */
-+ [1] = JNODE_UNFORMATTED_BLOCK,
-+ /* 010 */
-+ [2] = JNODE_BITMAP,
-+ /* 011 */
-+ [3] = LAST_JNODE_TYPE, /*invalid */
-+ /* 100 */
-+ [4] = JNODE_INODE,
-+ /* 101 */
-+ [5] = LAST_JNODE_TYPE,
-+ /* 110 */
-+ [6] = JNODE_IO_HEAD,
-+ /* 111 */
-+ [7] = LAST_JNODE_TYPE, /* invalid */
-+ };
-+
-+ return mask_to_type[(node->state & state_mask) >> JNODE_TYPE_1];
-+}
-+
-+/* returns true if node is a znode */
-+static inline int jnode_is_znode(const jnode * node)
-+{
-+ return jnode_get_type(node) == JNODE_FORMATTED_BLOCK;
-+}
-+
-+static inline int jnode_is_flushprepped(jnode * node)
-+{
-+ assert("jmacd-78212", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ return !JF_ISSET(node, JNODE_DIRTY) || JF_ISSET(node, JNODE_RELOC) ||
-+ JF_ISSET(node, JNODE_OVRWR);
-+}
-+
-+/* Return true if @node has already been processed by the squeeze and allocate
-+ process. This implies the block address has been finalized for the
-+ duration of this atom (or it is clean and will remain in place). If this
-+ returns true you may use the block number as a hint. */
-+static inline int jnode_check_flushprepped(jnode * node)
-+{
-+ int result;
-+
-+ /* It must be clean or relocated or wandered. New allocations are set to relocate. */
-+ spin_lock_jnode(node);
-+ result = jnode_is_flushprepped(node);
-+ spin_unlock_jnode(node);
-+ return result;
-+}
-+
-+/* returns true if node is unformatted */
-+static inline int jnode_is_unformatted(const jnode * node)
-+{
-+ assert("jmacd-0123", node != NULL);
-+ return jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK;
-+}
-+
-+/* returns true if node represents a cluster cache page */
-+static inline int jnode_is_cluster_page(const jnode * node)
-+{
-+ assert("edward-50", node != NULL);
-+ return (JF_ISSET(node, JNODE_CLUSTER_PAGE));
-+}
-+
-+/* returns true is node is builtin inode's jnode */
-+static inline int jnode_is_inode(const jnode * node)
-+{
-+ assert("vs-1240", node != NULL);
-+ return jnode_get_type(node) == JNODE_INODE;
-+}
-+
-+static inline jnode_plugin *jnode_ops_of(const jnode_type type)
-+{
-+ assert("nikita-2367", type < LAST_JNODE_TYPE);
-+ return jnode_plugin_by_id((reiser4_plugin_id) type);
-+}
-+
-+static inline jnode_plugin *jnode_ops(const jnode * node)
-+{
-+ assert("nikita-2366", node != NULL);
-+
-+ return jnode_ops_of(jnode_get_type(node));
-+}
-+
-+/* Get the index of a block. */
-+static inline unsigned long jnode_get_index(jnode * node)
-+{
-+ return jnode_ops(node)->index(node);
-+}
-+
-+/* return true if "node" is the root */
-+static inline int jnode_is_root(const jnode * node)
-+{
-+ return jnode_is_znode(node) && znode_is_root(JZNODE(node));
-+}
-+
-+extern struct address_space *mapping_jnode(const jnode * node);
-+extern unsigned long index_jnode(const jnode * node);
-+
-+static inline void jput(jnode * node);
-+extern void jput_final(jnode * node);
-+
-+/* bump data counter on @node */
-+static inline void add_d_ref(jnode * node /* node to increase d_count of */ )
-+{
-+ assert("nikita-1962", node != NULL);
-+
-+ atomic_inc(&node->d_count);
-+ if (jnode_is_unformatted(node) || jnode_is_znode(node))
-+ LOCK_CNT_INC(d_refs);
-+}
-+
-+/* jput() - decrement x_count reference counter on znode.
-+
-+ Count may drop to 0, jnode stays in cache until memory pressure causes the
-+ eviction of its page. The c_count variable also ensures that children are
-+ pressured out of memory before the parent. The jnode remains hashed as
-+ long as the VM allows its page to stay in memory.
-+*/
-+static inline void jput(jnode * node)
-+{
-+ assert("jmacd-509", node != NULL);
-+ assert("jmacd-510", atomic_read(&node->x_count) > 0);
-+ assert("zam-926", reiser4_schedulable());
-+ LOCK_CNT_DEC(x_refs);
-+
-+ rcu_read_lock();
-+ /*
-+ * we don't need any kind of lock here--jput_final() uses RCU.
-+ */
-+ if (unlikely(atomic_dec_and_test(&node->x_count))) {
-+ jput_final(node);
-+ } else
-+ rcu_read_unlock();
-+ assert("nikita-3473", reiser4_schedulable());
-+}
-+
-+extern void jrelse(jnode * node);
-+extern void jrelse_tail(jnode * node);
-+
-+extern jnode *jnode_rip_sync(reiser4_tree * t, jnode * node);
-+
-+/* resolve race with jput */
-+static inline jnode *jnode_rip_check(reiser4_tree * tree, jnode * node)
-+{
-+ if (unlikely(JF_ISSET(node, JNODE_RIP)))
-+ node = jnode_rip_sync(tree, node);
-+ return node;
-+}
-+
-+extern reiser4_key *jnode_build_key(const jnode *node, reiser4_key * key);
-+
-+#if REISER4_DEBUG
-+extern int jnode_invariant_f(const jnode *node, char const **msg);
-+#endif
-+
-+extern jnode_plugin jnode_plugins[LAST_JNODE_TYPE];
-+
-+/* __JNODE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/kassign.c linux-2.6.24/fs/reiser4/kassign.c
---- linux-2.6.24.orig/fs/reiser4/kassign.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/kassign.c 2008-01-25 11:55:43.900543447 +0300
-@@ -0,0 +1,677 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Key assignment policy implementation */
-+
-+/*
-+ * In reiser4 every piece of file system data and meta-data has a key. Keys
-+ * are used to store information in and retrieve it from reiser4 internal
-+ * tree. In addition to this, keys define _ordering_ of all file system
-+ * information: things having close keys are placed into the same or
-+ * neighboring (in the tree order) nodes of the tree. As our block allocator
-+ * tries to respect tree order (see flush.c), keys also define order in which
-+ * things are laid out on the disk, and hence, affect performance directly.
-+ *
-+ * Obviously, assignment of keys to data and meta-data should be consistent
-+ * across whole file system. Algorithm that calculates a key for a given piece
-+ * of data or meta-data is referred to as "key assignment".
-+ *
-+ * Key assignment is too expensive to be implemented as a plugin (that is,
-+ * with an ability to support different key assignment schemas in the same
-+ * compiled kernel image). As a compromise, all key-assignment functions and
-+ * data-structures are collected in this single file, so that modifications to
-+ * key assignment algorithm can be localized. Additional changes may be
-+ * required in key.[ch].
-+ *
-+ * Current default reiser4 key assignment algorithm is dubbed "Plan A". As one
-+ * may guess, there is "Plan B" too.
-+ *
-+ */
-+
-+/*
-+ * Additional complication with key assignment implementation is a requirement
-+ * to support different key length.
-+ */
-+
-+/*
-+ * KEY ASSIGNMENT: PLAN A, LONG KEYS.
-+ *
-+ * DIRECTORY ITEMS
-+ *
-+ * | 60 | 4 | 7 |1| 56 | 64 | 64 |
-+ * +--------------+---+---+-+-------------+------------------+-----------------+
-+ * | dirid | 0 | F |H| prefix-1 | prefix-2 | prefix-3/hash |
-+ * +--------------+---+---+-+-------------+------------------+-----------------+
-+ * | | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * dirid objectid of directory this item is for
-+ *
-+ * F fibration, see fs/reiser4/plugin/fibration.[ch]
-+ *
-+ * H 1 if last 8 bytes of the key contain hash,
-+ * 0 if last 8 bytes of the key contain prefix-3
-+ *
-+ * prefix-1 first 7 characters of file name.
-+ * Padded by zeroes if name is not long enough.
-+ *
-+ * prefix-2 next 8 characters of the file name.
-+ *
-+ * prefix-3 next 8 characters of the file name.
-+ *
-+ * hash hash of the rest of file name (i.e., portion of file
-+ * name not included into prefix-1 and prefix-2).
-+ *
-+ * File names shorter than 23 (== 7 + 8 + 8) characters are completely encoded
-+ * in the key. Such file names are called "short". They are distinguished by H
-+ * bit set 0 in the key.
-+ *
-+ * Other file names are "long". For long name, H bit is 1, and first 15 (== 7
-+ * + 8) characters are encoded in prefix-1 and prefix-2 portions of the
-+ * key. Last 8 bytes of the key are occupied by hash of the remaining
-+ * characters of the name.
-+ *
-+ * This key assignment reaches following important goals:
-+ *
-+ * (1) directory entries are sorted in approximately lexicographical
-+ * order.
-+ *
-+ * (2) collisions (when multiple directory items have the same key), while
-+ * principally unavoidable in a tree with fixed length keys, are rare.
-+ *
-+ * STAT DATA
-+ *
-+ * | 60 | 4 | 64 | 4 | 60 | 64 |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | locality id | 1 | ordering | 0 | objectid | 0 |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * ordering copy of second 8-byte portion of the key of directory
-+ * entry for the first name of this object. Ordering has a form
-+ * {
-+ * fibration :7;
-+ * h :1;
-+ * prefix1 :56;
-+ * }
-+ * see description of key for directory entry above.
-+ *
-+ * objectid object id for this object
-+ *
-+ * This key assignment policy is designed to keep stat-data in the same order
-+ * as corresponding directory items, thus speeding up readdir/stat types of
-+ * workload.
-+ *
-+ * FILE BODY
-+ *
-+ * | 60 | 4 | 64 | 4 | 60 | 64 |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | locality id | 4 | ordering | 0 | objectid | offset |
-+ * +--------------+---+-----------------+---+--------------+-----------------+
-+ * | | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * ordering the same as in the key of stat-data for this object
-+ *
-+ * objectid object id for this object
-+ *
-+ * offset logical offset from the beginning of this file.
-+ * Measured in bytes.
-+ *
-+ *
-+ * KEY ASSIGNMENT: PLAN A, SHORT KEYS.
-+ *
-+ * DIRECTORY ITEMS
-+ *
-+ * | 60 | 4 | 7 |1| 56 | 64 |
-+ * +--------------+---+---+-+-------------+-----------------+
-+ * | dirid | 0 | F |H| prefix-1 | prefix-2/hash |
-+ * +--------------+---+---+-+-------------+-----------------+
-+ * | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * dirid objectid of directory this item is for
-+ *
-+ * F fibration, see fs/reiser4/plugin/fibration.[ch]
-+ *
-+ * H 1 if last 8 bytes of the key contain hash,
-+ * 0 if last 8 bytes of the key contain prefix-2
-+ *
-+ * prefix-1 first 7 characters of file name.
-+ * Padded by zeroes if name is not long enough.
-+ *
-+ * prefix-2 next 8 characters of the file name.
-+ *
-+ * hash hash of the rest of file name (i.e., portion of file
-+ * name not included into prefix-1).
-+ *
-+ * File names shorter than 15 (== 7 + 8) characters are completely encoded in
-+ * the key. Such file names are called "short". They are distinguished by H
-+ * bit set in the key.
-+ *
-+ * Other file names are "long". For long name, H bit is 0, and first 7
-+ * characters are encoded in prefix-1 portion of the key. Last 8 bytes of the
-+ * key are occupied by hash of the remaining characters of the name.
-+ *
-+ * STAT DATA
-+ *
-+ * | 60 | 4 | 4 | 60 | 64 |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | locality id | 1 | 0 | objectid | 0 |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * objectid object id for this object
-+ *
-+ * FILE BODY
-+ *
-+ * | 60 | 4 | 4 | 60 | 64 |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | locality id | 4 | 0 | objectid | offset |
-+ * +--------------+---+---+--------------+-----------------+
-+ * | | | |
-+ * | 8 bytes | 8 bytes | 8 bytes |
-+ *
-+ * locality id object id of a directory where first name was created for
-+ * the object
-+ *
-+ * objectid object id for this object
-+ *
-+ * offset logical offset from the beginning of this file.
-+ * Measured in bytes.
-+ *
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "key.h"
-+#include "kassign.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "super.h"
-+#include "dscale.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block, etc */
-+
-+/* bitmask for H bit (see comment at the beginning of this file */
-+static const __u64 longname_mark = 0x0100000000000000ull;
-+/* bitmask for F and H portions of the key. */
-+static const __u64 fibration_mask = 0xff00000000000000ull;
-+
-+/* return true if name is not completely encoded in @key */
-+int is_longname_key(const reiser4_key * key)
-+{
-+ __u64 highpart;
-+
-+ assert("nikita-2863", key != NULL);
-+ if (get_key_type(key) != KEY_FILE_NAME_MINOR)
-+ reiser4_print_key("oops", key);
-+ assert("nikita-2864", get_key_type(key) == KEY_FILE_NAME_MINOR);
-+
-+ if (REISER4_LARGE_KEY)
-+ highpart = get_key_ordering(key);
-+ else
-+ highpart = get_key_objectid(key);
-+
-+ return (highpart & longname_mark) ? 1 : 0;
-+}
-+
-+/* return true if @name is too long to be completely encoded in the key */
-+int is_longname(const char *name UNUSED_ARG, int len)
-+{
-+ if (REISER4_LARGE_KEY)
-+ return len > 23;
-+ else
-+ return len > 15;
-+}
-+
-+/* code ascii string into __u64.
-+
-+ Put characters of @name into result (@str) one after another starting
-+ from @start_idx-th highest (arithmetically) byte. This produces
-+ endian-safe encoding. memcpy(2) will not do.
-+
-+*/
-+static __u64 pack_string(const char *name /* string to encode */ ,
-+ int start_idx /* highest byte in result from
-+ * which to start encoding */ )
-+{
-+ unsigned i;
-+ __u64 str;
-+
-+ str = 0;
-+ for (i = 0; (i < sizeof str - start_idx) && name[i]; ++i) {
-+ str <<= 8;
-+ str |= (unsigned char)name[i];
-+ }
-+ str <<= (sizeof str - i - start_idx) << 3;
-+ return str;
-+}
-+
-+/* opposite to pack_string(). Takes value produced by pack_string(), restores
-+ * string encoded in it and stores result in @buf */
-+char * reiser4_unpack_string(__u64 value, char *buf)
-+{
-+ do {
-+ *buf = value >> (64 - 8);
-+ if (*buf)
-+ ++buf;
-+ value <<= 8;
-+ } while (value != 0);
-+ *buf = 0;
-+ return buf;
-+}
-+
-+/* obtain name encoded in @key and store it in @buf */
-+char *extract_name_from_key(const reiser4_key * key, char *buf)
-+{
-+ char *c;
-+
-+ assert("nikita-2868", !is_longname_key(key));
-+
-+ c = buf;
-+ if (REISER4_LARGE_KEY) {
-+ c = reiser4_unpack_string(get_key_ordering(key) &
-+ ~fibration_mask, c);
-+ c = reiser4_unpack_string(get_key_fulloid(key), c);
-+ } else
-+ c = reiser4_unpack_string(get_key_fulloid(key) &
-+ ~fibration_mask, c);
-+ reiser4_unpack_string(get_key_offset(key), c);
-+ return buf;
-+}
-+
-+/**
-+ * complete_entry_key - calculate entry key by name
-+ * @dir: directory where entry is (or will be) in
-+ * @name: name to calculate key of
-+ * @len: lenth of name
-+ * @result: place to store result in
-+ *
-+ * Sets fields of entry key @result which depend on file name.
-+ * When REISER4_LARGE_KEY is defined three fields of @result are set: ordering,
-+ * objectid and offset. Otherwise, objectid and offset are set.
-+ */
-+void complete_entry_key(const struct inode *dir, const char *name,
-+ int len, reiser4_key *result)
-+{
-+#if REISER4_LARGE_KEY
-+ __u64 ordering;
-+ __u64 objectid;
-+ __u64 offset;
-+
-+ assert("nikita-1139", dir != NULL);
-+ assert("nikita-1142", result != NULL);
-+ assert("nikita-2867", strlen(name) == len);
-+
-+ /*
-+ * key allocation algorithm for directory entries in case of large
-+ * keys:
-+ *
-+ * If name is not longer than 7 + 8 + 8 = 23 characters, put first 7
-+ * characters into ordering field of key, next 8 charactes (if any)
-+ * into objectid field of key and next 8 ones (of any) into offset
-+ * field of key
-+ *
-+ * If file name is longer than 23 characters, put first 7 characters
-+ * into key's ordering, next 8 to objectid and hash of remaining
-+ * characters into offset field.
-+ *
-+ * To distinguish above cases, in latter set up unused high bit in
-+ * ordering field.
-+ */
-+
-+ /* [0-6] characters to ordering */
-+ ordering = pack_string(name, 1);
-+ if (len > 7) {
-+ /* [7-14] characters to objectid */
-+ objectid = pack_string(name + 7, 0);
-+ if (len > 15) {
-+ if (len <= 23) {
-+ /* [15-23] characters to offset */
-+ offset = pack_string(name + 15, 0);
-+ } else {
-+ /* note in a key the fact that offset contains hash. */
-+ ordering |= longname_mark;
-+
-+ /* offset is the hash of the file name's tail. */
-+ offset = inode_hash_plugin(dir)->hash(name + 15,
-+ len - 15);
-+ }
-+ } else {
-+ offset = 0ull;
-+ }
-+ } else {
-+ objectid = 0ull;
-+ offset = 0ull;
-+ }
-+
-+ assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
-+ ordering |= inode_fibration_plugin(dir)->fibre(dir, name, len);
-+
-+ set_key_ordering(result, ordering);
-+ set_key_fulloid(result, objectid);
-+ set_key_offset(result, offset);
-+ return;
-+
-+#else
-+ __u64 objectid;
-+ __u64 offset;
-+
-+ assert("nikita-1139", dir != NULL);
-+ assert("nikita-1142", result != NULL);
-+ assert("nikita-2867", strlen(name) == len);
-+
-+ /*
-+ * key allocation algorithm for directory entries in case of not large
-+ * keys:
-+ *
-+ * If name is not longer than 7 + 8 = 15 characters, put first 7
-+ * characters into objectid field of key, next 8 charactes (if any)
-+ * into offset field of key
-+ *
-+ * If file name is longer than 15 characters, put first 7 characters
-+ * into key's objectid, and hash of remaining characters into offset
-+ * field.
-+ *
-+ * To distinguish above cases, in latter set up unused high bit in
-+ * objectid field.
-+ */
-+
-+ /* [0-6] characters to objectid */
-+ objectid = pack_string(name, 1);
-+ if (len > 7) {
-+ if (len <= 15) {
-+ /* [7-14] characters to offset */
-+ offset = pack_string(name + 7, 0);
-+ } else {
-+ /* note in a key the fact that offset contains hash. */
-+ objectid |= longname_mark;
-+
-+ /* offset is the hash of the file name. */
-+ offset = inode_hash_plugin(dir)->hash(name + 7,
-+ len - 7);
-+ }
-+ } else
-+ offset = 0ull;
-+
-+ assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
-+ objectid |= inode_fibration_plugin(dir)->fibre(dir, name, len);
-+
-+ set_key_fulloid(result, objectid);
-+ set_key_offset(result, offset);
-+ return;
-+#endif /* ! REISER4_LARGE_KEY */
-+}
-+
-+/* true, if @key is the key of "." */
-+int is_dot_key(const reiser4_key * key /* key to check */ )
-+{
-+ assert("nikita-1717", key != NULL);
-+ assert("nikita-1718", get_key_type(key) == KEY_FILE_NAME_MINOR);
-+ return
-+ (get_key_ordering(key) == 0ull) &&
-+ (get_key_objectid(key) == 0ull) && (get_key_offset(key) == 0ull);
-+}
-+
-+/* build key for stat-data.
-+
-+ return key of stat-data of this object. This should became sd plugin
-+ method in the future. For now, let it be here.
-+
-+*/
-+reiser4_key *build_sd_key(const struct inode * target /* inode of an object */ ,
-+ reiser4_key * result /* resulting key of @target
-+ stat-data */ )
-+{
-+ assert("nikita-261", result != NULL);
-+
-+ reiser4_key_init(result);
-+ set_key_locality(result, reiser4_inode_data(target)->locality_id);
-+ set_key_ordering(result, get_inode_ordering(target));
-+ set_key_objectid(result, get_inode_oid(target));
-+ set_key_type(result, KEY_SD_MINOR);
-+ set_key_offset(result, (__u64) 0);
-+ return result;
-+}
-+
-+/* encode part of key into &obj_key_id
-+
-+ This encodes into @id part of @key sufficient to restore @key later,
-+ given that latter is key of object (key of stat-data).
-+
-+ See &obj_key_id
-+*/
-+int build_obj_key_id(const reiser4_key * key /* key to encode */ ,
-+ obj_key_id * id /* id where key is encoded in */ )
-+{
-+ assert("nikita-1151", key != NULL);
-+ assert("nikita-1152", id != NULL);
-+
-+ memcpy(id, key, sizeof *id);
-+ return 0;
-+}
-+
-+/* encode reference to @obj in @id.
-+
-+ This is like build_obj_key_id() above, but takes inode as parameter. */
-+int build_inode_key_id(const struct inode *obj /* object to build key of */ ,
-+ obj_key_id * id /* result */ )
-+{
-+ reiser4_key sdkey;
-+
-+ assert("nikita-1166", obj != NULL);
-+ assert("nikita-1167", id != NULL);
-+
-+ build_sd_key(obj, &sdkey);
-+ build_obj_key_id(&sdkey, id);
-+ return 0;
-+}
-+
-+/* decode @id back into @key
-+
-+ Restore key of object stat-data from @id. This is dual to
-+ build_obj_key_id() above.
-+*/
-+int extract_key_from_id(const obj_key_id * id /* object key id to extract key
-+ * from */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-1153", id != NULL);
-+ assert("nikita-1154", key != NULL);
-+
-+ reiser4_key_init(key);
-+ memcpy(key, id, sizeof *id);
-+ return 0;
-+}
-+
-+/* extract objectid of directory from key of directory entry within said
-+ directory.
-+ */
-+oid_t extract_dir_id_from_key(const reiser4_key * de_key /* key of
-+ * directory
-+ * entry */ )
-+{
-+ assert("nikita-1314", de_key != NULL);
-+ return get_key_locality(de_key);
-+}
-+
-+/* encode into @id key of directory entry.
-+
-+ Encode into @id information sufficient to later distinguish directory
-+ entries within the same directory. This is not whole key, because all
-+ directory entries within directory item share locality which is equal
-+ to objectid of their directory.
-+
-+*/
-+int build_de_id(const struct inode *dir /* inode of directory */ ,
-+ const struct qstr *name /* name to be given to @obj by
-+ * directory entry being
-+ * constructed */ ,
-+ de_id * id /* short key of directory entry */ )
-+{
-+ reiser4_key key;
-+
-+ assert("nikita-1290", dir != NULL);
-+ assert("nikita-1292", id != NULL);
-+
-+ /* NOTE-NIKITA this is suboptimal. */
-+ inode_dir_plugin(dir)->build_entry_key(dir, name, &key);
-+ return build_de_id_by_key(&key, id);
-+}
-+
-+/* encode into @id key of directory entry.
-+
-+ Encode into @id information sufficient to later distinguish directory
-+ entries within the same directory. This is not whole key, because all
-+ directory entries within directory item share locality which is equal
-+ to objectid of their directory.
-+
-+*/
-+int build_de_id_by_key(const reiser4_key * entry_key /* full key of directory
-+ * entry */ ,
-+ de_id * id /* short key of directory entry */ )
-+{
-+ memcpy(id, ((__u64 *) entry_key) + 1, sizeof *id);
-+ return 0;
-+}
-+
-+/* restore from @id key of directory entry.
-+
-+ Function dual to build_de_id(): given @id and locality, build full
-+ key of directory entry within directory item.
-+
-+*/
-+int extract_key_from_de_id(const oid_t locality /* locality of directory
-+ * entry */ ,
-+ const de_id * id /* directory entry id */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ /* no need to initialise key here: all fields are overwritten */
-+ memcpy(((__u64 *) key) + 1, id, sizeof *id);
-+ set_key_locality(key, locality);
-+ set_key_type(key, KEY_FILE_NAME_MINOR);
-+ return 0;
-+}
-+
-+/* compare two &de_id's */
-+cmp_t de_id_cmp(const de_id * id1 /* first &de_id to compare */ ,
-+ const de_id * id2 /* second &de_id to compare */ )
-+{
-+ /* NOTE-NIKITA ugly implementation */
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ extract_key_from_de_id((oid_t) 0, id1, &k1);
-+ extract_key_from_de_id((oid_t) 0, id2, &k2);
-+ return keycmp(&k1, &k2);
-+}
-+
-+/* compare &de_id with key */
-+cmp_t de_id_key_cmp(const de_id * id /* directory entry id to compare */ ,
-+ const reiser4_key * key /* key to compare */ )
-+{
-+ cmp_t result;
-+ reiser4_key *k1;
-+
-+ k1 = (reiser4_key *) (((unsigned long)id) - sizeof key->el[0]);
-+ result = KEY_DIFF_EL(k1, key, 1);
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, key, 2);
-+ if (REISER4_LARGE_KEY && result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, key, 3);
-+ }
-+ }
-+ return result;
-+}
-+
-+/*
-+ * return number of bytes necessary to encode @inode identity.
-+ */
-+int inode_onwire_size(const struct inode *inode)
-+{
-+ int result;
-+
-+ result = dscale_bytes_to_write(get_inode_oid(inode));
-+ result += dscale_bytes_to_write(get_inode_locality(inode));
-+
-+ /*
-+ * ordering is large (it usually has highest bits set), so it makes
-+ * little sense to dscale it.
-+ */
-+ if (REISER4_LARGE_KEY)
-+ result += sizeof(get_inode_ordering(inode));
-+ return result;
-+}
-+
-+/*
-+ * encode @inode identity at @start
-+ */
-+char *build_inode_onwire(const struct inode *inode, char *start)
-+{
-+ start += dscale_write(start, get_inode_locality(inode));
-+ start += dscale_write(start, get_inode_oid(inode));
-+
-+ if (REISER4_LARGE_KEY) {
-+ put_unaligned(cpu_to_le64(get_inode_ordering(inode)), (__le64 *)start);
-+ start += sizeof(get_inode_ordering(inode));
-+ }
-+ return start;
-+}
-+
-+/*
-+ * extract key that was previously encoded by build_inode_onwire() at @addr
-+ */
-+char *extract_obj_key_id_from_onwire(char *addr, obj_key_id * key_id)
-+{
-+ __u64 val;
-+
-+ addr += dscale_read(addr, &val);
-+ val = (val << KEY_LOCALITY_SHIFT) | KEY_SD_MINOR;
-+ put_unaligned(cpu_to_le64(val), (__le64 *)key_id->locality);
-+ addr += dscale_read(addr, &val);
-+ put_unaligned(cpu_to_le64(val), (__le64 *)key_id->objectid);
-+#if REISER4_LARGE_KEY
-+ memcpy(&key_id->ordering, addr, sizeof key_id->ordering);
-+ addr += sizeof key_id->ordering;
-+#endif
-+ return addr;
-+}
-+
-+/*
-+ * skip a key that was previously encoded by build_inode_onwire() at @addr
-+ * FIXME: handle IO errors.
-+ */
-+char * locate_obj_key_id_onwire(char * addr)
-+{
-+ /* locality */
-+ addr += dscale_bytes_to_read(addr);
-+ /* objectid */
-+ addr += dscale_bytes_to_read(addr);
-+#if REISER4_LARGE_KEY
-+ addr += sizeof ((obj_key_id *)0)->ordering;
-+#endif
-+ return addr;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/kassign.h linux-2.6.24/fs/reiser4/kassign.h
---- linux-2.6.24.orig/fs/reiser4/kassign.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/kassign.h 2008-01-25 11:55:43.900543447 +0300
-@@ -0,0 +1,111 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Key assignment policy interface. See kassign.c for details. */
-+
-+#if !defined( __KASSIGN_H__ )
-+#define __KASSIGN_H__
-+
-+#include "forward.h"
-+#include "key.h"
-+#include "dformat.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block, etc */
-+#include <linux/dcache.h> /* for struct qstr */
-+
-+/* key assignment functions */
-+
-+/* Information from which key of file stat-data can be uniquely
-+ restored. This depends on key assignment policy for
-+ stat-data. Currently it's enough to store object id and locality id
-+ (60+60==120) bits, because minor packing locality and offset of
-+ stat-data key are always known constants: KEY_SD_MINOR and 0
-+ respectively. For simplicity 4 bits are wasted in each id, and just
-+ two 64 bit integers are stored.
-+
-+ This field has to be byte-aligned, because we don't want to waste
-+ space in directory entries. There is another side of a coin of
-+ course: we waste CPU and bus bandwidth in stead, by copying data back
-+ and forth.
-+
-+ Next optimization: &obj_key_id is mainly used to address stat data from
-+ directory entries. Under the assumption that majority of files only have
-+ only name (one hard link) from *the* parent directory it seems reasonable
-+ to only store objectid of stat data and take its locality from key of
-+ directory item.
-+
-+ This requires some flag to be added to the &obj_key_id to distinguish
-+ between these two cases. Remaining bits in flag byte are then asking to be
-+ used to store file type.
-+
-+ This optimization requires changes in directory item handling code.
-+
-+*/
-+typedef struct obj_key_id {
-+ d8 locality[sizeof(__u64)];
-+ ON_LARGE_KEY(d8 ordering[sizeof(__u64)];
-+ )
-+ d8 objectid[sizeof(__u64)];
-+}
-+obj_key_id;
-+
-+/* Information sufficient to uniquely identify directory entry within
-+ compressed directory item.
-+
-+ For alignment issues see &obj_key_id above.
-+*/
-+typedef struct de_id {
-+ ON_LARGE_KEY(d8 ordering[sizeof(__u64)];)
-+ d8 objectid[sizeof(__u64)];
-+ d8 offset[sizeof(__u64)];
-+}
-+de_id;
-+
-+extern int inode_onwire_size(const struct inode *obj);
-+extern char *build_inode_onwire(const struct inode *obj, char *area);
-+extern char *locate_obj_key_id_onwire(char *area);
-+extern char *extract_obj_key_id_from_onwire(char *area, obj_key_id * key_id);
-+
-+extern int build_inode_key_id(const struct inode *obj, obj_key_id * id);
-+extern int extract_key_from_id(const obj_key_id * id, reiser4_key * key);
-+extern int build_obj_key_id(const reiser4_key * key, obj_key_id * id);
-+extern oid_t extract_dir_id_from_key(const reiser4_key * de_key);
-+extern int build_de_id(const struct inode *dir, const struct qstr *name,
-+ de_id * id);
-+extern int build_de_id_by_key(const reiser4_key * entry_key, de_id * id);
-+extern int extract_key_from_de_id(const oid_t locality, const de_id * id,
-+ reiser4_key * key);
-+extern cmp_t de_id_cmp(const de_id * id1, const de_id * id2);
-+extern cmp_t de_id_key_cmp(const de_id * id, const reiser4_key * key);
-+
-+extern int build_readdir_key_common(struct file *dir, reiser4_key * result);
-+extern void build_entry_key_common(const struct inode *dir,
-+ const struct qstr *name,
-+ reiser4_key * result);
-+extern void build_entry_key_stable_entry(const struct inode *dir,
-+ const struct qstr *name,
-+ reiser4_key * result);
-+extern int is_dot_key(const reiser4_key * key);
-+extern reiser4_key *build_sd_key(const struct inode *target,
-+ reiser4_key * result);
-+
-+extern int is_longname_key(const reiser4_key * key);
-+extern int is_longname(const char *name, int len);
-+extern char *extract_name_from_key(const reiser4_key * key, char *buf);
-+extern char *reiser4_unpack_string(__u64 value, char *buf);
-+extern void complete_entry_key(const struct inode *dir, const char *name,
-+ int len, reiser4_key *result);
-+
-+/* __KASSIGN_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/Kconfig linux-2.6.24/fs/reiser4/Kconfig
---- linux-2.6.24.orig/fs/reiser4/Kconfig 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/Kconfig 2008-01-25 11:39:06.944209750 +0300
-@@ -0,0 +1,34 @@
-+config REISER4_FS
-+ tristate "Reiser4 (EXPERIMENTAL)"
-+ depends on EXPERIMENTAL
-+ select ZLIB_INFLATE
-+ select ZLIB_DEFLATE
-+ select LZO_COMPRESS
-+ select LZO_DECOMPRESS
-+ select CRYPTO
-+ help
-+ Reiser4 is a filesystem that performs all filesystem operations
-+ as atomic transactions, which means that it either performs a
-+ write, or it does not, and in the event of a crash it does not
-+ partially perform it or corrupt it.
-+
-+ It stores files in dancing trees, which are like balanced trees but
-+ faster. It packs small files together so that they share blocks
-+ without wasting space. This means you can use it to store really
-+ small files. It also means that it saves you disk space. It avoids
-+ hassling you with anachronisms like having a maximum number of
-+ inodes, and wasting space if you use less than that number.
-+
-+ Reiser4 is a distinct filesystem type from reiserfs (V3).
-+ It's therefore not possible to use reiserfs file systems
-+ with reiser4.
-+
-+ To learn more about reiser4, go to http://www.namesys.com
-+
-+config REISER4_DEBUG
-+ bool "Enable reiser4 debug mode"
-+ depends on REISER4_FS
-+ help
-+ Don't use this unless you are debugging reiser4.
-+
-+ If unsure, say N.
-diff -urN linux-2.6.24.orig/fs/reiser4/key.c linux-2.6.24/fs/reiser4/key.c
---- linux-2.6.24.orig/fs/reiser4/key.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/key.c 2008-01-25 11:39:06.944209750 +0300
-@@ -0,0 +1,137 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Key manipulations. */
-+
-+#include "debug.h"
-+#include "key.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+
-+/* Minimal possible key: all components are zero. It is presumed that this is
-+ independent of key scheme. */
-+static const reiser4_key MINIMAL_KEY = {
-+ .el = {
-+ 0ull,
-+ ON_LARGE_KEY(0ull,)
-+ 0ull,
-+ 0ull
-+ }
-+};
-+
-+/* Maximal possible key: all components are ~0. It is presumed that this is
-+ independent of key scheme. */
-+static const reiser4_key MAXIMAL_KEY = {
-+ .el = {
-+ __constant_cpu_to_le64(~0ull),
-+ ON_LARGE_KEY(__constant_cpu_to_le64(~0ull),)
-+ __constant_cpu_to_le64(~0ull),
-+ __constant_cpu_to_le64(~0ull)
-+ }
-+};
-+
-+/* Initialize key. */
-+void reiser4_key_init(reiser4_key * key /* key to init */ )
-+{
-+ assert("nikita-1169", key != NULL);
-+ memset(key, 0, sizeof *key);
-+}
-+
-+/* minimal possible key in the tree. Return pointer to the static storage. */
-+const reiser4_key *reiser4_min_key(void)
-+{
-+ return &MINIMAL_KEY;
-+}
-+
-+/* maximum possible key in the tree. Return pointer to the static storage. */
-+const reiser4_key *reiser4_max_key(void)
-+{
-+ return &MAXIMAL_KEY;
-+}
-+
-+#if REISER4_DEBUG
-+/* debugging aid: print symbolic name of key type */
-+static const char *type_name(unsigned int key_type /* key type */ )
-+{
-+ switch (key_type) {
-+ case KEY_FILE_NAME_MINOR:
-+ return "file name";
-+ case KEY_SD_MINOR:
-+ return "stat data";
-+ case KEY_ATTR_NAME_MINOR:
-+ return "attr name";
-+ case KEY_ATTR_BODY_MINOR:
-+ return "attr body";
-+ case KEY_BODY_MINOR:
-+ return "file body";
-+ default:
-+ return "unknown";
-+ }
-+}
-+
-+/* debugging aid: print human readable information about key */
-+void reiser4_print_key(const char *prefix /* prefix to print */ ,
-+ const reiser4_key * key /* key to print */ )
-+{
-+ /* turn bold on */
-+ /* printf ("\033[1m"); */
-+ if (key == NULL)
-+ printk("%s: null key\n", prefix);
-+ else {
-+ if (REISER4_LARGE_KEY)
-+ printk("%s: (%Lx:%x:%Lx:%Lx:%Lx:%Lx)", prefix,
-+ get_key_locality(key),
-+ get_key_type(key),
-+ get_key_ordering(key),
-+ get_key_band(key),
-+ get_key_objectid(key), get_key_offset(key));
-+ else
-+ printk("%s: (%Lx:%x:%Lx:%Lx:%Lx)", prefix,
-+ get_key_locality(key),
-+ get_key_type(key),
-+ get_key_band(key),
-+ get_key_objectid(key), get_key_offset(key));
-+ /*
-+ * if this is a key of directory entry, try to decode part of
-+ * a name stored in the key, and output it.
-+ */
-+ if (get_key_type(key) == KEY_FILE_NAME_MINOR) {
-+ char buf[DE_NAME_BUF_LEN];
-+ char *c;
-+
-+ c = buf;
-+ c = reiser4_unpack_string(get_key_ordering(key), c);
-+ reiser4_unpack_string(get_key_fulloid(key), c);
-+ printk("[%s", buf);
-+ if (is_longname_key(key))
-+ /*
-+ * only part of the name is stored in the key.
-+ */
-+ printk("...]\n");
-+ else {
-+ /*
-+ * whole name is stored in the key.
-+ */
-+ reiser4_unpack_string(get_key_offset(key), buf);
-+ printk("%s]\n", buf);
-+ }
-+ } else {
-+ printk("[%s]\n", type_name(get_key_type(key)));
-+ }
-+ }
-+ /* turn bold off */
-+ /* printf ("\033[m\017"); */
-+}
-+
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/key.h linux-2.6.24/fs/reiser4/key.h
---- linux-2.6.24.orig/fs/reiser4/key.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/key.h 2008-01-25 11:39:06.944209750 +0300
-@@ -0,0 +1,384 @@
-+/* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Declarations of key-related data-structures and operations on keys. */
-+
-+#if !defined( __REISER4_KEY_H__ )
-+#define __REISER4_KEY_H__
-+
-+#include "dformat.h"
-+#include "forward.h"
-+#include "debug.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+
-+/* Operations on keys in reiser4 tree */
-+
-+/* No access to any of these fields shall be done except via a
-+ wrapping macro/function, and that wrapping macro/function shall
-+ convert to little endian order. Compare keys will consider cpu byte order. */
-+
-+/* A storage layer implementation difference between a regular unix file body and its attributes is in the typedef below
-+ which causes all of the attributes of a file to be near in key to all of the other attributes for all of the files
-+ within that directory, and not near to the file itself. It is interesting to consider whether this is the wrong
-+ approach, and whether there should be no difference at all. For current usage patterns this choice is probably the
-+ right one. */
-+
-+/* possible values for minor packing locality (4 bits required) */
-+typedef enum {
-+ /* file name */
-+ KEY_FILE_NAME_MINOR = 0,
-+ /* stat-data */
-+ KEY_SD_MINOR = 1,
-+ /* file attribute name */
-+ KEY_ATTR_NAME_MINOR = 2,
-+ /* file attribute value */
-+ KEY_ATTR_BODY_MINOR = 3,
-+ /* file body (tail or extent) */
-+ KEY_BODY_MINOR = 4,
-+} key_minor_locality;
-+
-+/* everything stored in the tree has a unique key, which means that the tree is (logically) fully ordered by key.
-+ Physical order is determined by dynamic heuristics that attempt to reflect key order when allocating available space,
-+ and by the repacker. It is stylistically better to put aggregation information into the key. Thus, if you want to
-+ segregate extents from tails, it is better to give them distinct minor packing localities rather than changing
-+ block_alloc.c to check the node type when deciding where to allocate the node.
-+
-+ The need to randomly displace new directories and large files disturbs this symmetry unfortunately. However, it
-+ should be noted that this is a need that is not clearly established given the existence of a repacker. Also, in our
-+ current implementation tails have a different minor packing locality from extents, and no files have both extents and
-+ tails, so maybe symmetry can be had without performance cost after all. Symmetry is what we ship for now....
-+*/
-+
-+/* Arbitrary major packing localities can be assigned to objects using
-+ the reiser4(filenameA/..packing<=some_number) system call.
-+
-+ In reiser4, the creat() syscall creates a directory
-+
-+ whose default flow (that which is referred to if the directory is
-+ read as a file) is the traditional unix file body.
-+
-+ whose directory plugin is the 'filedir'
-+
-+ whose major packing locality is that of the parent of the object created.
-+
-+ The static_stat item is a particular commonly used directory
-+ compression (the one for normal unix files).
-+
-+ The filedir plugin checks to see if the static_stat item exists.
-+ There is a unique key for static_stat. If yes, then it uses the
-+ static_stat item for all of the values that it contains. The
-+ static_stat item contains a flag for each stat it contains which
-+ indicates whether one should look outside the static_stat item for its
-+ contents.
-+*/
-+
-+/* offset of fields in reiser4_key. Value of each element of this enum
-+ is index within key (thought as array of __u64's) where this field
-+ is. */
-+typedef enum {
-+ /* major "locale", aka dirid. Sits in 1st element */
-+ KEY_LOCALITY_INDEX = 0,
-+ /* minor "locale", aka item type. Sits in 1st element */
-+ KEY_TYPE_INDEX = 0,
-+ ON_LARGE_KEY(KEY_ORDERING_INDEX,)
-+ /* "object band". Sits in 2nd element */
-+ KEY_BAND_INDEX,
-+ /* objectid. Sits in 2nd element */
-+ KEY_OBJECTID_INDEX = KEY_BAND_INDEX,
-+ /* full objectid. Sits in 2nd element */
-+ KEY_FULLOID_INDEX = KEY_BAND_INDEX,
-+ /* Offset. Sits in 3rd element */
-+ KEY_OFFSET_INDEX,
-+ /* Name hash. Sits in 3rd element */
-+ KEY_HASH_INDEX = KEY_OFFSET_INDEX,
-+ KEY_CACHELINE_END = KEY_OFFSET_INDEX,
-+ KEY_LAST_INDEX
-+} reiser4_key_field_index;
-+
-+/* key in reiser4 internal "balanced" tree. It is just array of three
-+ 64bit integers in disk byte order (little-endian by default). This
-+ array is actually indexed by reiser4_key_field. Each __u64 within
-+ this array is called "element". Logical key component encoded within
-+ elements are called "fields".
-+
-+ We declare this as union with second component dummy to suppress
-+ inconvenient array<->pointer casts implied in C. */
-+union reiser4_key {
-+ __le64 el[KEY_LAST_INDEX];
-+ int pad;
-+};
-+
-+/* bitmasks showing where within reiser4_key particular key is stored. */
-+/* major locality occupies higher 60 bits of the first element */
-+#define KEY_LOCALITY_MASK 0xfffffffffffffff0ull
-+
-+/* minor locality occupies lower 4 bits of the first element */
-+#define KEY_TYPE_MASK 0xfull
-+
-+/* controversial band occupies higher 4 bits of the 2nd element */
-+#define KEY_BAND_MASK 0xf000000000000000ull
-+
-+/* objectid occupies lower 60 bits of the 2nd element */
-+#define KEY_OBJECTID_MASK 0x0fffffffffffffffull
-+
-+/* full 64bit objectid*/
-+#define KEY_FULLOID_MASK 0xffffffffffffffffull
-+
-+/* offset is just 3rd L.M.Nt itself */
-+#define KEY_OFFSET_MASK 0xffffffffffffffffull
-+
-+/* ordering is whole second element */
-+#define KEY_ORDERING_MASK 0xffffffffffffffffull
-+
-+/* how many bits key element should be shifted to left to get particular field */
-+typedef enum {
-+ KEY_LOCALITY_SHIFT = 4,
-+ KEY_TYPE_SHIFT = 0,
-+ KEY_BAND_SHIFT = 60,
-+ KEY_OBJECTID_SHIFT = 0,
-+ KEY_FULLOID_SHIFT = 0,
-+ KEY_OFFSET_SHIFT = 0,
-+ KEY_ORDERING_SHIFT = 0,
-+} reiser4_key_field_shift;
-+
-+static inline __u64
-+get_key_el(const reiser4_key * key, reiser4_key_field_index off)
-+{
-+ assert("nikita-753", key != NULL);
-+ assert("nikita-754", off < KEY_LAST_INDEX);
-+ return le64_to_cpu(get_unaligned(&key->el[off]));
-+}
-+
-+static inline void
-+set_key_el(reiser4_key * key, reiser4_key_field_index off, __u64 value)
-+{
-+ assert("nikita-755", key != NULL);
-+ assert("nikita-756", off < KEY_LAST_INDEX);
-+ put_unaligned(cpu_to_le64(value), &key->el[off]);
-+}
-+
-+/* macro to define getter and setter functions for field F with type T */
-+#define DEFINE_KEY_FIELD( L, U, T ) \
-+static inline T get_key_ ## L ( const reiser4_key *key ) \
-+{ \
-+ assert( "nikita-750", key != NULL ); \
-+ return ( T ) ( get_key_el( key, KEY_ ## U ## _INDEX ) & \
-+ KEY_ ## U ## _MASK ) >> KEY_ ## U ## _SHIFT; \
-+} \
-+ \
-+static inline void set_key_ ## L ( reiser4_key *key, T loc ) \
-+{ \
-+ __u64 el; \
-+ \
-+ assert( "nikita-752", key != NULL ); \
-+ \
-+ el = get_key_el( key, KEY_ ## U ## _INDEX ); \
-+ /* clear field bits in the key */ \
-+ el &= ~KEY_ ## U ## _MASK; \
-+ /* actually it should be \
-+ \
-+ el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \
-+ \
-+ but we trust user to never pass values that wouldn't fit \
-+ into field. Clearing extra bits is one operation, but this \
-+ function is time-critical. \
-+ But check this in assertion. */ \
-+ assert( "nikita-759", ( ( loc << KEY_ ## U ## _SHIFT ) & \
-+ ~KEY_ ## U ## _MASK ) == 0 ); \
-+ el |= ( loc << KEY_ ## U ## _SHIFT ); \
-+ set_key_el( key, KEY_ ## U ## _INDEX, el ); \
-+}
-+
-+typedef __u64 oid_t;
-+
-+/* define get_key_locality(), set_key_locality() */
-+DEFINE_KEY_FIELD(locality, LOCALITY, oid_t);
-+/* define get_key_type(), set_key_type() */
-+DEFINE_KEY_FIELD(type, TYPE, key_minor_locality);
-+/* define get_key_band(), set_key_band() */
-+DEFINE_KEY_FIELD(band, BAND, __u64);
-+/* define get_key_objectid(), set_key_objectid() */
-+DEFINE_KEY_FIELD(objectid, OBJECTID, oid_t);
-+/* define get_key_fulloid(), set_key_fulloid() */
-+DEFINE_KEY_FIELD(fulloid, FULLOID, oid_t);
-+/* define get_key_offset(), set_key_offset() */
-+DEFINE_KEY_FIELD(offset, OFFSET, __u64);
-+#if (REISER4_LARGE_KEY)
-+/* define get_key_ordering(), set_key_ordering() */
-+DEFINE_KEY_FIELD(ordering, ORDERING, __u64);
-+#else
-+static inline __u64 get_key_ordering(const reiser4_key * key)
-+{
-+ return 0;
-+}
-+
-+static inline void set_key_ordering(reiser4_key * key, __u64 val)
-+{
-+}
-+#endif
-+
-+/* key comparison result */
-+typedef enum { LESS_THAN = -1, /* if first key is less than second */
-+ EQUAL_TO = 0, /* if keys are equal */
-+ GREATER_THAN = +1 /* if first key is greater than second */
-+} cmp_t;
-+
-+void reiser4_key_init(reiser4_key * key);
-+
-+/* minimal possible key in the tree. Return pointer to the static storage. */
-+extern const reiser4_key *reiser4_min_key(void);
-+extern const reiser4_key *reiser4_max_key(void);
-+
-+/* helper macro for keycmp() */
-+#define KEY_DIFF(k1, k2, field) \
-+({ \
-+ typeof (get_key_ ## field (k1)) f1; \
-+ typeof (get_key_ ## field (k2)) f2; \
-+ \
-+ f1 = get_key_ ## field (k1); \
-+ f2 = get_key_ ## field (k2); \
-+ \
-+ (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \
-+})
-+
-+/* helper macro for keycmp() */
-+#define KEY_DIFF_EL(k1, k2, off) \
-+({ \
-+ __u64 e1; \
-+ __u64 e2; \
-+ \
-+ e1 = get_key_el(k1, off); \
-+ e2 = get_key_el(k2, off); \
-+ \
-+ (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \
-+})
-+
-+/* compare `k1' and `k2'. This function is a heart of "key allocation
-+ policy". All you need to implement new policy is to add yet another
-+ clause here. */
-+static inline cmp_t keycmp(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ cmp_t result;
-+
-+ /*
-+ * This function is the heart of reiser4 tree-routines. Key comparison
-+ * is among most heavily used operations in the file system.
-+ */
-+
-+ assert("nikita-439", k1 != NULL);
-+ assert("nikita-440", k2 != NULL);
-+
-+ /* there is no actual branch here: condition is compile time constant
-+ * and constant folding and propagation ensures that only one branch
-+ * is actually compiled in. */
-+
-+ if (REISER4_PLANA_KEY_ALLOCATION) {
-+ /* if physical order of fields in a key is identical
-+ with logical order, we can implement key comparison
-+ as three 64bit comparisons. */
-+ /* logical order of fields in plan-a:
-+ locality->type->objectid->offset. */
-+ /* compare locality and type at once */
-+ result = KEY_DIFF_EL(k1, k2, 0);
-+ if (result == EQUAL_TO) {
-+ /* compare objectid (and band if it's there) */
-+ result = KEY_DIFF_EL(k1, k2, 1);
-+ /* compare offset */
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, k2, 2);
-+ if (REISER4_LARGE_KEY && result == EQUAL_TO) {
-+ result = KEY_DIFF_EL(k1, k2, 3);
-+ }
-+ }
-+ }
-+ } else if (REISER4_3_5_KEY_ALLOCATION) {
-+ result = KEY_DIFF(k1, k2, locality);
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF(k1, k2, objectid);
-+ if (result == EQUAL_TO) {
-+ result = KEY_DIFF(k1, k2, type);
-+ if (result == EQUAL_TO)
-+ result = KEY_DIFF(k1, k2, offset);
-+ }
-+ }
-+ } else
-+ impossible("nikita-441", "Unknown key allocation scheme!");
-+ return result;
-+}
-+
-+/* true if @k1 equals @k2 */
-+static inline int keyeq(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1879", k1 != NULL);
-+ assert("nikita-1880", k2 != NULL);
-+ return !memcmp(k1, k2, sizeof *k1);
-+}
-+
-+/* true if @k1 is less than @k2 */
-+static inline int keylt(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1952", k1 != NULL);
-+ assert("nikita-1953", k2 != NULL);
-+ return keycmp(k1, k2) == LESS_THAN;
-+}
-+
-+/* true if @k1 is less than or equal to @k2 */
-+static inline int keyle(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1954", k1 != NULL);
-+ assert("nikita-1955", k2 != NULL);
-+ return keycmp(k1, k2) != GREATER_THAN;
-+}
-+
-+/* true if @k1 is greater than @k2 */
-+static inline int keygt(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1959", k1 != NULL);
-+ assert("nikita-1960", k2 != NULL);
-+ return keycmp(k1, k2) == GREATER_THAN;
-+}
-+
-+/* true if @k1 is greater than or equal to @k2 */
-+static inline int keyge(const reiser4_key * k1 /* first key to compare */ ,
-+ const reiser4_key * k2 /* second key to compare */ )
-+{
-+ assert("nikita-1956", k1 != NULL);
-+ assert("nikita-1957", k2 != NULL); /* October 4: sputnik launched
-+ * November 3: Laika */
-+ return keycmp(k1, k2) != LESS_THAN;
-+}
-+
-+static inline void prefetchkey(reiser4_key * key)
-+{
-+ prefetch(key);
-+ prefetch(&key->el[KEY_CACHELINE_END]);
-+}
-+
-+/* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) =
-+ 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */
-+/* size of a buffer suitable to hold human readable key representation */
-+#define KEY_BUF_LEN (80)
-+
-+#if REISER4_DEBUG
-+extern void reiser4_print_key(const char *prefix, const reiser4_key * key);
-+#else
-+#define reiser4_print_key(p,k) noop
-+#endif
-+
-+/* __FS_REISERFS_KEY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/ktxnmgrd.c linux-2.6.24/fs/reiser4/ktxnmgrd.c
---- linux-2.6.24.orig/fs/reiser4/ktxnmgrd.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/ktxnmgrd.c 2008-01-25 11:39:06.944209750 +0300
-@@ -0,0 +1,214 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Transaction manager daemon. */
-+
-+/*
-+ * ktxnmgrd is a kernel daemon responsible for committing transactions. It is
-+ * needed/important for the following reasons:
-+ *
-+ * 1. in reiser4 atom is not committed immediately when last transaction
-+ * handle closes, unless atom is either too old or too large (see
-+ * atom_should_commit()). This is done to avoid committing too frequently.
-+ * because:
-+ *
-+ * 2. sometimes we don't want to commit atom when closing last transaction
-+ * handle even if it is old and fat enough. For example, because we are at
-+ * this point under directory semaphore, and committing would stall all
-+ * accesses to this directory.
-+ *
-+ * ktxnmgrd binds its time sleeping on condition variable. When is awakes
-+ * either due to (tunable) timeout or because it was explicitly woken up by
-+ * call to ktxnmgrd_kick(), it scans list of all atoms and commits ones
-+ * eligible.
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "tree.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/sched.h> /* for struct task_struct */
-+#include <linux/wait.h>
-+#include <linux/suspend.h>
-+#include <linux/kernel.h>
-+#include <linux/writeback.h>
-+#include <linux/kthread.h>
-+#include <linux/freezer.h>
-+
-+static int scan_mgr(struct super_block *);
-+
-+/*
-+ * change current->comm so that ps, top, and friends will see changed
-+ * state. This serves no useful purpose whatsoever, but also costs nothing. May
-+ * be it will make lonely system administrator feeling less alone at 3 A.M.
-+ */
-+#define set_comm( state ) \
-+ snprintf( current -> comm, sizeof( current -> comm ), \
-+ "%s:%s:%s", __FUNCTION__, (super)->s_id, ( state ) )
-+
-+/**
-+ * ktxnmgrd - kernel txnmgr daemon
-+ * @arg: pointer to super block
-+ *
-+ * The background transaction manager daemon, started as a kernel thread during
-+ * reiser4 initialization.
-+ */
-+static int ktxnmgrd(void *arg)
-+{
-+ struct super_block *super;
-+ ktxnmgrd_context *ctx;
-+ txn_mgr *mgr;
-+ int done = 0;
-+
-+ super = arg;
-+ mgr = &get_super_private(super)->tmgr;
-+
-+ /*
-+ * do_fork() just copies task_struct into the new thread. ->fs_context
-+ * shouldn't be copied of course. This shouldn't be a problem for the
-+ * rest of the code though.
-+ */
-+ current->journal_info = NULL;
-+ ctx = mgr->daemon;
-+ while (1) {
-+ try_to_freeze();
-+ set_comm("wait");
-+ {
-+ DEFINE_WAIT(__wait);
-+
-+ prepare_to_wait(&ctx->wait, &__wait, TASK_INTERRUPTIBLE);
-+ if (kthread_should_stop()) {
-+ done = 1;
-+ } else
-+ schedule_timeout(ctx->timeout);
-+ finish_wait(&ctx->wait, &__wait);
-+ }
-+ if (done)
-+ break;
-+ set_comm("run");
-+ spin_lock(&ctx->guard);
-+ /*
-+ * wait timed out or ktxnmgrd was woken up by explicit request
-+ * to commit something. Scan list of atoms in txnmgr and look
-+ * for too old atoms.
-+ */
-+ do {
-+ ctx->rescan = 0;
-+ scan_mgr(super);
-+ spin_lock(&ctx->guard);
-+ if (ctx->rescan) {
-+ /*
-+ * the list could be modified while ctx
-+ * spinlock was released, we have to repeat
-+ * scanning from the beginning
-+ */
-+ break;
-+ }
-+ } while (ctx->rescan);
-+ spin_unlock(&ctx->guard);
-+ }
-+ return 0;
-+}
-+
-+#undef set_comm
-+
-+/**
-+ * reiser4_init_ktxnmgrd - initialize ktxnmgrd context and start kernel daemon
-+ * @super: pointer to super block
-+ *
-+ * Allocates and initializes ktxnmgrd_context, attaches it to transaction
-+ * manager. Starts kernel txnmgr daemon. This is called on mount.
-+ */
-+int reiser4_init_ktxnmgrd(struct super_block *super)
-+{
-+ txn_mgr *mgr;
-+ ktxnmgrd_context *ctx;
-+
-+ mgr = &get_super_private(super)->tmgr;
-+
-+ assert("zam-1014", mgr->daemon == NULL);
-+
-+ ctx = kzalloc(sizeof(ktxnmgrd_context), reiser4_ctx_gfp_mask_get());
-+ if (!ctx)
-+ return RETERR(-ENOMEM);
-+
-+ assert("nikita-2442", ctx != NULL);
-+
-+ init_waitqueue_head(&ctx->wait);
-+
-+ /*kcond_init(&ctx->startup);*/
-+ spin_lock_init(&ctx->guard);
-+ ctx->timeout = REISER4_TXNMGR_TIMEOUT;
-+ ctx->rescan = 1;
-+ mgr->daemon = ctx;
-+
-+ ctx->tsk = kthread_run(ktxnmgrd, super, "ktxnmgrd");
-+ if (IS_ERR(ctx->tsk)) {
-+ int ret = PTR_ERR(ctx->tsk);
-+ mgr->daemon = NULL;
-+ kfree(ctx);
-+ return RETERR(ret);
-+ }
-+ return 0;
-+}
-+
-+void ktxnmgrd_kick(txn_mgr *mgr)
-+{
-+ assert("nikita-3234", mgr != NULL);
-+ assert("nikita-3235", mgr->daemon != NULL);
-+ wake_up(&mgr->daemon->wait);
-+}
-+
-+int is_current_ktxnmgrd(void)
-+{
-+ return (get_current_super_private()->tmgr.daemon->tsk == current);
-+}
-+
-+/**
-+ * scan_mgr - commit atoms which are to be committed
-+ * @super: super block to commit atoms of
-+ *
-+ * Commits old atoms.
-+ */
-+static int scan_mgr(struct super_block *super)
-+{
-+ int ret;
-+ reiser4_context ctx;
-+
-+ init_stack_context(&ctx, super);
-+
-+ ret = commit_some_atoms(&get_super_private(super)->tmgr);
-+
-+ reiser4_exit_context(&ctx);
-+ return ret;
-+}
-+
-+/**
-+ * reiser4_done_ktxnmgrd - stop kernel thread and frees ktxnmgrd context
-+ * @mgr:
-+ *
-+ * This is called on umount. Stops ktxnmgrd and free t
-+ */
-+void reiser4_done_ktxnmgrd(struct super_block *super)
-+{
-+ txn_mgr *mgr;
-+
-+ mgr = &get_super_private(super)->tmgr;
-+ assert("zam-1012", mgr->daemon != NULL);
-+
-+ kthread_stop(mgr->daemon->tsk);
-+ kfree(mgr->daemon);
-+ mgr->daemon = NULL;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/ktxnmgrd.h linux-2.6.24/fs/reiser4/ktxnmgrd.h
---- linux-2.6.24.orig/fs/reiser4/ktxnmgrd.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/ktxnmgrd.h 2008-01-25 11:39:06.944209750 +0300
-@@ -0,0 +1,52 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Transaction manager daemon. See ktxnmgrd.c for comments. */
-+
-+#ifndef __KTXNMGRD_H__
-+#define __KTXNMGRD_H__
-+
-+#include "txnmgr.h"
-+
-+#include <linux/fs.h>
-+#include <linux/wait.h>
-+#include <linux/completion.h>
-+#include <linux/spinlock.h>
-+#include <asm/atomic.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+/* in this structure all data necessary to start up, shut down and communicate
-+ * with ktxnmgrd are kept. */
-+struct ktxnmgrd_context {
-+ /* wait queue head on which ktxnmgrd sleeps */
-+ wait_queue_head_t wait;
-+ /* spin lock protecting all fields of this structure */
-+ spinlock_t guard;
-+ /* timeout of sleeping on ->wait */
-+ signed long timeout;
-+ /* kernel thread running ktxnmgrd */
-+ struct task_struct *tsk;
-+ /* list of all file systems served by this ktxnmgrd */
-+ struct list_head queue;
-+ /* should ktxnmgrd repeat scanning of atoms? */
-+ unsigned int rescan:1;
-+};
-+
-+extern int reiser4_init_ktxnmgrd(struct super_block *);
-+extern void reiser4_done_ktxnmgrd(struct super_block *);
-+
-+extern void ktxnmgrd_kick(txn_mgr * mgr);
-+extern int is_current_ktxnmgrd(void);
-+
-+/* __KTXNMGRD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/lock.c linux-2.6.24/fs/reiser4/lock.c
---- linux-2.6.24.orig/fs/reiser4/lock.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/lock.c 2008-01-25 11:39:06.948210780 +0300
-@@ -0,0 +1,1232 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Traditional deadlock avoidance is achieved by acquiring all locks in a single
-+ order. V4 balances the tree from the bottom up, and searches the tree from
-+ the top down, and that is really the way we want it, so tradition won't work
-+ for us.
-+
-+ Instead we have two lock orderings, a high priority lock ordering, and a low
-+ priority lock ordering. Each node in the tree has a lock in its znode.
-+
-+ Suppose we have a set of processes which lock (R/W) tree nodes. Each process
-+ has a set (maybe empty) of already locked nodes ("process locked set"). Each
-+ process may have a pending lock request to a node locked by another process.
-+ Note: we lock and unlock, but do not transfer locks: it is possible
-+ transferring locks instead would save some bus locking....
-+
-+ Deadlock occurs when we have a loop constructed from process locked sets and
-+ lock request vectors.
-+
-+ NOTE: The reiser4 "tree" is a tree on disk, but its cached representation in
-+ memory is extended with "znodes" with which we connect nodes with their left
-+ and right neighbors using sibling pointers stored in the znodes. When we
-+ perform balancing operations we often go from left to right and from right to
-+ left.
-+
-+ +-P1-+ +-P3-+
-+ |+--+| V1 |+--+|
-+ ||N1|| -------> ||N3||
-+ |+--+| |+--+|
-+ +----+ +----+
-+ ^ |
-+ |V2 |V3
-+ | v
-+ +---------P2---------+
-+ |+--+ +--+|
-+ ||N2| -------- |N4||
-+ |+--+ +--+|
-+ +--------------------+
-+
-+ We solve this by ensuring that only low priority processes lock in top to
-+ bottom order and from right to left, and high priority processes lock from
-+ bottom to top and left to right.
-+
-+ ZAM-FIXME-HANS: order not just node locks in this way, order atom locks, and
-+ kill those damn busy loops.
-+ ANSWER(ZAM): atom locks (which are introduced by ASTAGE_CAPTURE_WAIT atom
-+ stage) cannot be ordered that way. There are no rules what nodes can belong
-+ to the atom and what nodes cannot. We cannot define what is right or left
-+ direction, what is top or bottom. We can take immediate parent or side
-+ neighbor of one node, but nobody guarantees that, say, left neighbor node is
-+ not a far right neighbor for other nodes from the same atom. It breaks
-+ deadlock avoidance rules and hi-low priority locking cannot be applied for
-+ atom locks.
-+
-+ How does it help to avoid deadlocks ?
-+
-+ Suppose we have a deadlock with n processes. Processes from one priority
-+ class never deadlock because they take locks in one consistent
-+ order.
-+
-+ So, any possible deadlock loop must have low priority as well as high
-+ priority processes. There are no other lock priority levels except low and
-+ high. We know that any deadlock loop contains at least one node locked by a
-+ low priority process and requested by a high priority process. If this
-+ situation is caught and resolved it is sufficient to avoid deadlocks.
-+
-+ V4 DEADLOCK PREVENTION ALGORITHM IMPLEMENTATION.
-+
-+ The deadlock prevention algorithm is based on comparing
-+ priorities of node owners (processes which keep znode locked) and
-+ requesters (processes which want to acquire a lock on znode). We
-+ implement a scheme where low-priority owners yield locks to
-+ high-priority requesters. We created a signal passing system that
-+ is used to ask low-priority processes to yield one or more locked
-+ znodes.
-+
-+ The condition when a znode needs to change its owners is described by the
-+ following formula:
-+
-+ #############################################
-+ # #
-+ # (number of high-priority requesters) > 0 #
-+ # AND #
-+ # (numbers of high-priority owners) == 0 #
-+ # #
-+ #############################################
-+
-+ Note that a low-priority process delays node releasing if another
-+ high-priority process owns this node. So, slightly more strictly speaking,
-+ to have a deadlock capable cycle you must have a loop in which a high
-+ priority process is waiting on a low priority process to yield a node, which
-+ is slightly different from saying a high priority process is waiting on a
-+ node owned by a low priority process.
-+
-+ It is enough to avoid deadlocks if we prevent any low-priority process from
-+ falling asleep if its locked set contains a node which satisfies the
-+ deadlock condition.
-+
-+ That condition is implicitly or explicitly checked in all places where new
-+ high-priority requests may be added or removed from node request queue or
-+ high-priority process takes or releases a lock on node. The main
-+ goal of these checks is to never lose the moment when node becomes "has
-+ wrong owners" and send "must-yield-this-lock" signals to its low-pri owners
-+ at that time.
-+
-+ The information about received signals is stored in the per-process
-+ structure (lock stack) and analyzed before a low-priority process goes to
-+ sleep but after a "fast" attempt to lock a node fails. Any signal wakes
-+ sleeping process up and forces him to re-check lock status and received
-+ signal info. If "must-yield-this-lock" signals were received the locking
-+ primitive (longterm_lock_znode()) fails with -E_DEADLOCK error code.
-+
-+ V4 LOCKING DRAWBACKS
-+
-+ If we have already balanced on one level, and we are propagating our changes
-+ upward to a higher level, it could be very messy to surrender all locks on
-+ the lower level because we put so much computational work into it, and
-+ reverting them to their state before they were locked might be very complex.
-+ We also don't want to acquire all locks before performing balancing because
-+ that would either be almost as much work as the balancing, or it would be
-+ too conservative and lock too much. We want balancing to be done only at
-+ high priority. Yet, we might want to go to the left one node and use some
-+ of its empty space... So we make one attempt at getting the node to the left
-+ using try_lock, and if it fails we do without it, because we didn't really
-+ need it, it was only a nice to have.
-+
-+ LOCK STRUCTURES DESCRIPTION
-+
-+ The following data structures are used in the reiser4 locking
-+ implementation:
-+
-+ All fields related to long-term locking are stored in znode->lock.
-+
-+ The lock stack is a per thread object. It owns all znodes locked by the
-+ thread. One znode may be locked by several threads in case of read lock or
-+ one znode may be write locked by one thread several times. The special link
-+ objects (lock handles) support n<->m relation between znodes and lock
-+ owners.
-+
-+ <Thread 1> <Thread 2>
-+
-+ +---------+ +---------+
-+ | LS1 | | LS2 |
-+ +---------+ +---------+
-+ ^ ^
-+ |---------------+ +----------+
-+ v v v v
-+ +---------+ +---------+ +---------+ +---------+
-+ | LH1 | | LH2 | | LH3 | | LH4 |
-+ +---------+ +---------+ +---------+ +---------+
-+ ^ ^ ^ ^
-+ | +------------+ |
-+ v v v
-+ +---------+ +---------+ +---------+
-+ | Z1 | | Z2 | | Z3 |
-+ +---------+ +---------+ +---------+
-+
-+ Thread 1 locked znodes Z1 and Z2, thread 2 locked znodes Z2 and Z3. The
-+ picture above shows that lock stack LS1 has a list of 2 lock handles LH1 and
-+ LH2, lock stack LS2 has a list with lock handles LH3 and LH4 on it. Znode
-+ Z1 is locked by only one thread, znode has only one lock handle LH1 on its
-+ list, similar situation is for Z3 which is locked by the thread 2 only. Z2
-+ is locked (for read) twice by different threads and two lock handles are on
-+ its list. Each lock handle represents a single relation of a locking of a
-+ znode by a thread. Locking of a znode is an establishing of a locking
-+ relation between the lock stack and the znode by adding of a new lock handle
-+ to a list of lock handles, the lock stack. The lock stack links all lock
-+ handles for all znodes locked by the lock stack. The znode list groups all
-+ lock handles for all locks stacks which locked the znode.
-+
-+ Yet another relation may exist between znode and lock owners. If lock
-+ procedure cannot immediately take lock on an object it adds the lock owner
-+ on special `requestors' list belongs to znode. That list represents a
-+ queue of pending lock requests. Because one lock owner may request only
-+ only one lock object at a time, it is a 1->n relation between lock objects
-+ and a lock owner implemented as it is described above. Full information
-+ (priority, pointers to lock and link objects) about each lock request is
-+ stored in lock owner structure in `request' field.
-+
-+ SHORT_TERM LOCKING
-+
-+ This is a list of primitive operations over lock stacks / lock handles /
-+ znodes and locking descriptions for them.
-+
-+ 1. locking / unlocking which is done by two list insertion/deletion, one
-+ to/from znode's list of lock handles, another one is to/from lock stack's
-+ list of lock handles. The first insertion is protected by
-+ znode->lock.guard spinlock. The list owned by the lock stack can be
-+ modified only by thread who owns the lock stack and nobody else can
-+ modify/read it. There is nothing to be protected by a spinlock or
-+ something else.
-+
-+ 2. adding/removing a lock request to/from znode requesters list. The rule is
-+ that znode->lock.guard spinlock should be taken for this.
-+
-+ 3. we can traverse list of lock handles and use references to lock stacks who
-+ locked given znode if znode->lock.guard spinlock is taken.
-+
-+ 4. If a lock stack is associated with a znode as a lock requestor or lock
-+ owner its existence is guaranteed by znode->lock.guard spinlock. Some its
-+ (lock stack's) fields should be protected from being accessed in parallel
-+ by two or more threads. Please look at lock_stack structure definition
-+ for the info how those fields are protected. */
-+
-+/* Znode lock and capturing intertwining. */
-+/* In current implementation we capture formatted nodes before locking
-+ them. Take a look on longterm lock znode, reiser4_try_capture() request
-+ precedes locking requests. The longterm_lock_znode function unconditionally
-+ captures znode before even checking of locking conditions.
-+
-+ Another variant is to capture znode after locking it. It was not tested, but
-+ at least one deadlock condition is supposed to be there. One thread has
-+ locked a znode (Node-1) and calls reiser4_try_capture() for it.
-+ reiser4_try_capture() sleeps because znode's atom has CAPTURE_WAIT state.
-+ Second thread is a flushing thread, its current atom is the atom Node-1
-+ belongs to. Second thread wants to lock Node-1 and sleeps because Node-1
-+ is locked by the first thread. The described situation is a deadlock. */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "znode.h"
-+#include "jnode.h"
-+#include "tree.h"
-+#include "plugin/node/node.h"
-+#include "super.h"
-+
-+#include <linux/spinlock.h>
-+
-+#if REISER4_DEBUG
-+static int request_is_deadlock_safe(znode *, znode_lock_mode,
-+ znode_lock_request);
-+#endif
-+
-+/* Returns a lock owner associated with current thread */
-+lock_stack *get_current_lock_stack(void)
-+{
-+ return &get_current_context()->stack;
-+}
-+
-+/* Wakes up all low priority owners informing them about possible deadlock */
-+static void wake_up_all_lopri_owners(znode * node)
-+{
-+ lock_handle *handle;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+ list_for_each_entry(handle, &node->lock.owners, owners_link) {
-+ assert("nikita-1832", handle->node == node);
-+ /* count this signal in owner->nr_signaled */
-+ if (!handle->signaled) {
-+ handle->signaled = 1;
-+ atomic_inc(&handle->owner->nr_signaled);
-+ /* Wake up a single process */
-+ reiser4_wake_up(handle->owner);
-+ }
-+ }
-+}
-+
-+/* Adds a lock to a lock owner, which means creating a link to the lock and
-+ putting the link into the two lists all links are on (the doubly linked list
-+ that forms the lock_stack, and the doubly linked list of links attached
-+ to a lock.
-+*/
-+static inline void
-+link_object(lock_handle * handle, lock_stack * owner, znode * node)
-+{
-+ assert("jmacd-810", handle->owner == NULL);
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ handle->owner = owner;
-+ handle->node = node;
-+
-+ assert("reiser4-4",
-+ ergo(list_empty_careful(&owner->locks), owner->nr_locks == 0));
-+
-+ /* add lock handle to the end of lock_stack's list of locks */
-+ list_add_tail(&handle->locks_link, &owner->locks);
-+ ON_DEBUG(owner->nr_locks++);
-+ reiser4_ctx_gfp_mask_set();
-+
-+ /* add lock handle to the head of znode's list of owners */
-+ list_add(&handle->owners_link, &node->lock.owners);
-+ handle->signaled = 0;
-+}
-+
-+/* Breaks a relation between a lock and its owner */
-+static inline void unlink_object(lock_handle * handle)
-+{
-+ assert("zam-354", handle->owner != NULL);
-+ assert("nikita-1608", handle->node != NULL);
-+ assert_spin_locked(&(handle->node->lock.guard));
-+ assert("nikita-1829", handle->owner == get_current_lock_stack());
-+ assert("reiser4-5", handle->owner->nr_locks > 0);
-+
-+ /* remove lock handle from lock_stack's list of locks */
-+ list_del(&handle->locks_link);
-+ ON_DEBUG(handle->owner->nr_locks--);
-+ reiser4_ctx_gfp_mask_set();
-+ assert("reiser4-6",
-+ ergo(list_empty_careful(&handle->owner->locks),
-+ handle->owner->nr_locks == 0));
-+ /* remove lock handle from znode's list of owners */
-+ list_del(&handle->owners_link);
-+ /* indicates that lock handle is free now */
-+ handle->node = NULL;
-+#if REISER4_DEBUG
-+ INIT_LIST_HEAD(&handle->locks_link);
-+ INIT_LIST_HEAD(&handle->owners_link);
-+ handle->owner = NULL;
-+#endif
-+}
-+
-+/* Actually locks an object knowing that we are able to do this */
-+static void lock_object(lock_stack * owner)
-+{
-+ struct lock_request *request;
-+ znode *node;
-+
-+ request = &owner->request;
-+ node = request->node;
-+ assert_spin_locked(&(node->lock.guard));
-+ if (request->mode == ZNODE_READ_LOCK) {
-+ node->lock.nr_readers++;
-+ } else {
-+ /* check that we don't switched from read to write lock */
-+ assert("nikita-1840", node->lock.nr_readers <= 0);
-+ /* We allow recursive locking; a node can be locked several
-+ times for write by same process */
-+ node->lock.nr_readers--;
-+ }
-+
-+ link_object(request->handle, owner, node);
-+
-+ if (owner->curpri) {
-+ node->lock.nr_hipri_owners++;
-+ }
-+}
-+
-+/* Check for recursive write locking */
-+static int recursive(lock_stack * owner)
-+{
-+ int ret;
-+ znode *node;
-+ lock_handle *lh;
-+
-+ node = owner->request.node;
-+
-+ /* Owners list is not empty for a locked node */
-+ assert("zam-314", !list_empty_careful(&node->lock.owners));
-+ assert("nikita-1841", owner == get_current_lock_stack());
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ lh = list_entry(node->lock.owners.next, lock_handle, owners_link);
-+ ret = (lh->owner == owner);
-+
-+ /* Recursive read locking should be done usual way */
-+ assert("zam-315", !ret || owner->request.mode == ZNODE_WRITE_LOCK);
-+ /* mixing of read/write locks is not allowed */
-+ assert("zam-341", !ret || znode_is_wlocked(node));
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+/* Returns true if the lock is held by the calling thread. */
-+int znode_is_any_locked(const znode * node)
-+{
-+ lock_handle *handle;
-+ lock_stack *stack;
-+ int ret;
-+
-+ if (!znode_is_locked(node)) {
-+ return 0;
-+ }
-+
-+ stack = get_current_lock_stack();
-+
-+ spin_lock_stack(stack);
-+
-+ ret = 0;
-+
-+ list_for_each_entry(handle, &stack->locks, locks_link) {
-+ if (handle->node == node) {
-+ ret = 1;
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_stack(stack);
-+
-+ return ret;
-+}
-+
-+#endif
-+
-+/* Returns true if a write lock is held by the calling thread. */
-+int znode_is_write_locked(const znode * node)
-+{
-+ lock_stack *stack;
-+ lock_handle *handle;
-+
-+ assert("jmacd-8765", node != NULL);
-+
-+ if (!znode_is_wlocked(node)) {
-+ return 0;
-+ }
-+
-+ stack = get_current_lock_stack();
-+
-+ /*
-+ * When znode is write locked, all owner handles point to the same lock
-+ * stack. Get pointer to lock stack from the first lock handle from
-+ * znode's owner list
-+ */
-+ handle = list_entry(node->lock.owners.next, lock_handle, owners_link);
-+
-+ return (handle->owner == stack);
-+}
-+
-+/* This "deadlock" condition is the essential part of reiser4 locking
-+ implementation. This condition is checked explicitly by calling
-+ check_deadlock_condition() or implicitly in all places where znode lock
-+ state (set of owners and request queue) is changed. Locking code is
-+ designed to use this condition to trigger procedure of passing object from
-+ low priority owner(s) to high priority one(s).
-+
-+ The procedure results in passing an event (setting lock_handle->signaled
-+ flag) and counting this event in nr_signaled field of owner's lock stack
-+ object and wakeup owner's process.
-+*/
-+static inline int check_deadlock_condition(znode * node)
-+{
-+ assert_spin_locked(&(node->lock.guard));
-+ return node->lock.nr_hipri_requests > 0
-+ && node->lock.nr_hipri_owners == 0;
-+}
-+
-+static int check_livelock_condition(znode * node, znode_lock_mode mode)
-+{
-+ zlock * lock = &node->lock;
-+
-+ return mode == ZNODE_READ_LOCK &&
-+ lock -> nr_readers >= 0 && lock->nr_hipri_write_requests > 0;
-+}
-+
-+/* checks lock/request compatibility */
-+static int can_lock_object(lock_stack * owner)
-+{
-+ znode *node = owner->request.node;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ /* See if the node is disconnected. */
-+ if (unlikely(ZF_ISSET(node, JNODE_IS_DYING)))
-+ return RETERR(-EINVAL);
-+
-+ /* Do not ever try to take a lock if we are going in low priority
-+ direction and a node have a high priority request without high
-+ priority owners. */
-+ if (unlikely(!owner->curpri && check_deadlock_condition(node)))
-+ return RETERR(-E_REPEAT);
-+ if (unlikely(owner->curpri && check_livelock_condition(node, owner->request.mode)))
-+ return RETERR(-E_REPEAT);
-+ if (unlikely(!is_lock_compatible(node, owner->request.mode)))
-+ return RETERR(-E_REPEAT);
-+ return 0;
-+}
-+
-+/* Setting of a high priority to the process. It clears "signaled" flags
-+ because znode locked by high-priority process can't satisfy our "deadlock
-+ condition". */
-+static void set_high_priority(lock_stack * owner)
-+{
-+ assert("nikita-1846", owner == get_current_lock_stack());
-+ /* Do nothing if current priority is already high */
-+ if (!owner->curpri) {
-+ /* We don't need locking for owner->locks list, because, this
-+ * function is only called with the lock stack of the current
-+ * thread, and no other thread can play with owner->locks list
-+ * and/or change ->node pointers of lock handles in this list.
-+ *
-+ * (Interrupts also are not involved.)
-+ */
-+ lock_handle *item = list_entry(owner->locks.next, lock_handle, locks_link);
-+ while (&owner->locks != &item->locks_link) {
-+ znode *node = item->node;
-+
-+ spin_lock_zlock(&node->lock);
-+
-+ node->lock.nr_hipri_owners++;
-+
-+ /* we can safely set signaled to zero, because
-+ previous statement (nr_hipri_owners ++) guarantees
-+ that signaled will be never set again. */
-+ item->signaled = 0;
-+ spin_unlock_zlock(&node->lock);
-+
-+ item = list_entry(item->locks_link.next, lock_handle, locks_link);
-+ }
-+ owner->curpri = 1;
-+ atomic_set(&owner->nr_signaled, 0);
-+ }
-+}
-+
-+/* Sets a low priority to the process. */
-+static void set_low_priority(lock_stack * owner)
-+{
-+ assert("nikita-3075", owner == get_current_lock_stack());
-+ /* Do nothing if current priority is already low */
-+ if (owner->curpri) {
-+ /* scan all locks (lock handles) held by @owner, which is
-+ actually current thread, and check whether we are reaching
-+ deadlock possibility anywhere.
-+ */
-+ lock_handle *handle = list_entry(owner->locks.next, lock_handle, locks_link);
-+ while (&owner->locks != &handle->locks_link) {
-+ znode *node = handle->node;
-+ spin_lock_zlock(&node->lock);
-+ /* this thread just was hipri owner of @node, so
-+ nr_hipri_owners has to be greater than zero. */
-+ assert("nikita-1835", node->lock.nr_hipri_owners > 0);
-+ node->lock.nr_hipri_owners--;
-+ /* If we have deadlock condition, adjust a nr_signaled
-+ field. It is enough to set "signaled" flag only for
-+ current process, other low-pri owners will be
-+ signaled and waken up after current process unlocks
-+ this object and any high-priority requestor takes
-+ control. */
-+ if (check_deadlock_condition(node)
-+ && !handle->signaled) {
-+ handle->signaled = 1;
-+ atomic_inc(&owner->nr_signaled);
-+ }
-+ spin_unlock_zlock(&node->lock);
-+ handle = list_entry(handle->locks_link.next, lock_handle, locks_link);
-+ }
-+ owner->curpri = 0;
-+ }
-+}
-+
-+static void remove_lock_request(lock_stack * requestor)
-+{
-+ zlock * lock = &requestor->request.node->lock;
-+
-+ if (requestor->curpri) {
-+ assert("nikita-1838", lock->nr_hipri_requests > 0);
-+ lock->nr_hipri_requests--;
-+ if (requestor->request.mode == ZNODE_WRITE_LOCK)
-+ lock->nr_hipri_write_requests --;
-+ }
-+ list_del(&requestor->requestors_link);
-+}
-+
-+static void invalidate_all_lock_requests(znode * node)
-+{
-+ lock_stack *requestor, *tmp;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) {
-+ remove_lock_request(requestor);
-+ requestor->request.ret_code = -EINVAL;
-+ reiser4_wake_up(requestor);
-+ requestor->request.mode = ZNODE_NO_LOCK;
-+ }
-+}
-+
-+static void dispatch_lock_requests(znode * node)
-+{
-+ lock_stack *requestor, *tmp;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) {
-+ if (znode_is_write_locked(node))
-+ break;
-+ if (!can_lock_object(requestor)) {
-+ lock_object(requestor);
-+ remove_lock_request(requestor);
-+ requestor->request.ret_code = 0;
-+ reiser4_wake_up(requestor);
-+ requestor->request.mode = ZNODE_NO_LOCK;
-+ }
-+ }
-+}
-+
-+/* release long-term lock, acquired by longterm_lock_znode() */
-+void longterm_unlock_znode(lock_handle * handle)
-+{
-+ znode *node = handle->node;
-+ lock_stack *oldowner = handle->owner;
-+ int hipri;
-+ int readers;
-+ int rdelta;
-+ int youdie;
-+
-+ /*
-+ * this is time-critical and highly optimized code. Modify carefully.
-+ */
-+
-+ assert("jmacd-1021", handle != NULL);
-+ assert("jmacd-1022", handle->owner != NULL);
-+ assert("nikita-1392", LOCK_CNT_GTZ(long_term_locked_znode));
-+
-+ assert("zam-130", oldowner == get_current_lock_stack());
-+
-+ LOCK_CNT_DEC(long_term_locked_znode);
-+
-+ /*
-+ * to minimize amount of operations performed under lock, pre-compute
-+ * all variables used within critical section. This makes code
-+ * obscure.
-+ */
-+
-+ /* was this lock of hi or lo priority */
-+ hipri = oldowner->curpri ? 1 : 0;
-+ /* number of readers */
-+ readers = node->lock.nr_readers;
-+ /* +1 if write lock, -1 if read lock */
-+ rdelta = (readers > 0) ? -1 : +1;
-+ /* true if node is to die and write lock is released */
-+ youdie = ZF_ISSET(node, JNODE_HEARD_BANSHEE) && (readers < 0);
-+
-+ spin_lock_zlock(&node->lock);
-+
-+ assert("zam-101", znode_is_locked(node));
-+
-+ /* Adjust a number of high priority owners of this lock */
-+ assert("nikita-1836", node->lock.nr_hipri_owners >= hipri);
-+ node->lock.nr_hipri_owners -= hipri;
-+
-+ /* Handle znode deallocation on last write-lock release. */
-+ if (znode_is_wlocked_once(node)) {
-+ if (youdie) {
-+ forget_znode(handle);
-+ assert("nikita-2191", znode_invariant(node));
-+ zput(node);
-+ return;
-+ }
-+ }
-+
-+ if (handle->signaled)
-+ atomic_dec(&oldowner->nr_signaled);
-+
-+ /* Unlocking means owner<->object link deletion */
-+ unlink_object(handle);
-+
-+ /* This is enough to be sure whether an object is completely
-+ unlocked. */
-+ node->lock.nr_readers += rdelta;
-+
-+ /* If the node is locked it must have an owners list. Likewise, if
-+ the node is unlocked it must have an empty owners list. */
-+ assert("zam-319", equi(znode_is_locked(node),
-+ !list_empty_careful(&node->lock.owners)));
-+
-+#if REISER4_DEBUG
-+ if (!znode_is_locked(node))
-+ ++node->times_locked;
-+#endif
-+
-+ /* If there are pending lock requests we wake up a requestor */
-+ if (!znode_is_wlocked(node))
-+ dispatch_lock_requests(node);
-+ if (check_deadlock_condition(node))
-+ wake_up_all_lopri_owners(node);
-+ spin_unlock_zlock(&node->lock);
-+
-+ /* minus one reference from handle->node */
-+ assert("nikita-2190", znode_invariant(node));
-+ ON_DEBUG(check_lock_data());
-+ ON_DEBUG(check_lock_node_data(node));
-+ zput(node);
-+}
-+
-+/* final portion of longterm-lock */
-+static int
-+lock_tail(lock_stack * owner, int ok, znode_lock_mode mode)
-+{
-+ znode *node = owner->request.node;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ /* If we broke with (ok == 0) it means we can_lock, now do it. */
-+ if (ok == 0) {
-+ lock_object(owner);
-+ owner->request.mode = 0;
-+ /* count a reference from lockhandle->node
-+
-+ znode was already referenced at the entry to this function,
-+ hence taking spin-lock here is not necessary (see comment
-+ in the zref()).
-+ */
-+ zref(node);
-+
-+ LOCK_CNT_INC(long_term_locked_znode);
-+ }
-+ spin_unlock_zlock(&node->lock);
-+ ON_DEBUG(check_lock_data());
-+ ON_DEBUG(check_lock_node_data(node));
-+ return ok;
-+}
-+
-+/*
-+ * version of longterm_znode_lock() optimized for the most common case: read
-+ * lock without any special flags. This is the kind of lock that any tree
-+ * traversal takes on the root node of the tree, which is very frequent.
-+ */
-+static int longterm_lock_tryfast(lock_stack * owner)
-+{
-+ int result;
-+ znode *node;
-+ zlock *lock;
-+
-+ node = owner->request.node;
-+ lock = &node->lock;
-+
-+ assert("nikita-3340", reiser4_schedulable());
-+ assert("nikita-3341", request_is_deadlock_safe(node,
-+ ZNODE_READ_LOCK,
-+ ZNODE_LOCK_LOPRI));
-+ spin_lock_zlock(lock);
-+ result = can_lock_object(owner);
-+ spin_unlock_zlock(lock);
-+
-+ if (likely(result != -EINVAL)) {
-+ spin_lock_znode(node);
-+ result = reiser4_try_capture(ZJNODE(node), ZNODE_READ_LOCK, 0);
-+ spin_unlock_znode(node);
-+ spin_lock_zlock(lock);
-+ if (unlikely(result != 0)) {
-+ owner->request.mode = 0;
-+ } else {
-+ result = can_lock_object(owner);
-+ if (unlikely(result == -E_REPEAT)) {
-+ /* fall back to longterm_lock_znode() */
-+ spin_unlock_zlock(lock);
-+ return 1;
-+ }
-+ }
-+ return lock_tail(owner, result, ZNODE_READ_LOCK);
-+ } else
-+ return 1;
-+}
-+
-+/* locks given lock object */
-+int longterm_lock_znode(
-+ /* local link object (allocated by lock owner thread, usually on its own
-+ * stack) */
-+ lock_handle * handle,
-+ /* znode we want to lock. */
-+ znode * node,
-+ /* {ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}; */
-+ znode_lock_mode mode,
-+ /* {0, -EINVAL, -E_DEADLOCK}, see return codes description. */
-+ znode_lock_request request) {
-+ int ret;
-+ int hipri = (request & ZNODE_LOCK_HIPRI) != 0;
-+ int non_blocking = 0;
-+ int has_atom;
-+ txn_capture cap_flags;
-+ zlock *lock;
-+ txn_handle *txnh;
-+ tree_level level;
-+
-+ /* Get current process context */
-+ lock_stack *owner = get_current_lock_stack();
-+
-+ /* Check that the lock handle is initialized and isn't already being
-+ * used. */
-+ assert("jmacd-808", handle->owner == NULL);
-+ assert("nikita-3026", reiser4_schedulable());
-+ assert("nikita-3219", request_is_deadlock_safe(node, mode, request));
-+ assert("zam-1056", atomic_read(&ZJNODE(node)->x_count) > 0);
-+ /* long term locks are not allowed in the VM contexts (->writepage(),
-+ * prune_{d,i}cache()).
-+ *
-+ * FIXME this doesn't work due to unused-dentry-with-unlinked-inode
-+ * bug caused by d_splice_alias() only working for directories.
-+ */
-+ assert("nikita-3547", 1 || ((current->flags & PF_MEMALLOC) == 0));
-+ assert ("zam-1055", mode != ZNODE_NO_LOCK);
-+
-+ cap_flags = 0;
-+ if (request & ZNODE_LOCK_NONBLOCK) {
-+ cap_flags |= TXN_CAPTURE_NONBLOCKING;
-+ non_blocking = 1;
-+ }
-+
-+ if (request & ZNODE_LOCK_DONT_FUSE)
-+ cap_flags |= TXN_CAPTURE_DONT_FUSE;
-+
-+ /* If we are changing our process priority we must adjust a number
-+ of high priority owners for each znode that we already lock */
-+ if (hipri) {
-+ set_high_priority(owner);
-+ } else {
-+ set_low_priority(owner);
-+ }
-+
-+ level = znode_get_level(node);
-+
-+ /* Fill request structure with our values. */
-+ owner->request.mode = mode;
-+ owner->request.handle = handle;
-+ owner->request.node = node;
-+
-+ txnh = get_current_context()->trans;
-+ lock = &node->lock;
-+
-+ if (mode == ZNODE_READ_LOCK && request == 0) {
-+ ret = longterm_lock_tryfast(owner);
-+ if (ret <= 0)
-+ return ret;
-+ }
-+
-+ has_atom = (txnh->atom != NULL);
-+
-+ /* Synchronize on node's zlock guard lock. */
-+ spin_lock_zlock(lock);
-+
-+ if (znode_is_locked(node) &&
-+ mode == ZNODE_WRITE_LOCK && recursive(owner))
-+ return lock_tail(owner, 0, mode);
-+
-+ for (;;) {
-+ /* Check the lock's availability: if it is unavaiable we get
-+ E_REPEAT, 0 indicates "can_lock", otherwise the node is
-+ invalid. */
-+ ret = can_lock_object(owner);
-+
-+ if (unlikely(ret == -EINVAL)) {
-+ /* @node is dying. Leave it alone. */
-+ break;
-+ }
-+
-+ if (unlikely(ret == -E_REPEAT && non_blocking)) {
-+ /* either locking of @node by the current thread will
-+ * lead to the deadlock, or lock modes are
-+ * incompatible. */
-+ break;
-+ }
-+
-+ assert("nikita-1844", (ret == 0)
-+ || ((ret == -E_REPEAT) && !non_blocking));
-+ /* If we can get the lock... Try to capture first before
-+ taking the lock. */
-+
-+ /* first handle commonest case where node and txnh are already
-+ * in the same atom. */
-+ /* safe to do without taking locks, because:
-+ *
-+ * 1. read of aligned word is atomic with respect to writes to
-+ * this word
-+ *
-+ * 2. false negatives are handled in reiser4_try_capture().
-+ *
-+ * 3. false positives are impossible.
-+ *
-+ * PROOF: left as an exercise to the curious reader.
-+ *
-+ * Just kidding. Here is one:
-+ *
-+ * At the time T0 txnh->atom is stored in txnh_atom.
-+ *
-+ * At the time T1 node->atom is stored in node_atom.
-+ *
-+ * At the time T2 we observe that
-+ *
-+ * txnh_atom != NULL && node_atom == txnh_atom.
-+ *
-+ * Imagine that at this moment we acquire node and txnh spin
-+ * lock in this order. Suppose that under spin lock we have
-+ *
-+ * node->atom != txnh->atom, (S1)
-+ *
-+ * at the time T3.
-+ *
-+ * txnh->atom != NULL still, because txnh is open by the
-+ * current thread.
-+ *
-+ * Suppose node->atom == NULL, that is, node was un-captured
-+ * between T1, and T3. But un-capturing of formatted node is
-+ * always preceded by the call to reiser4_invalidate_lock(),
-+ * which marks znode as JNODE_IS_DYING under zlock spin
-+ * lock. Contradiction, because can_lock_object() above checks
-+ * for JNODE_IS_DYING. Hence, node->atom != NULL at T3.
-+ *
-+ * Suppose that node->atom != node_atom, that is, atom, node
-+ * belongs to was fused into another atom: node_atom was fused
-+ * into node->atom. Atom of txnh was equal to node_atom at T2,
-+ * which means that under spin lock, txnh->atom == node->atom,
-+ * because txnh->atom can only follow fusion
-+ * chain. Contradicts S1.
-+ *
-+ * The same for hypothesis txnh->atom != txnh_atom. Hence,
-+ * node->atom == node_atom == txnh_atom == txnh->atom. Again
-+ * contradicts S1. Hence S1 is false. QED.
-+ *
-+ */
-+
-+ if (likely(has_atom && ZJNODE(node)->atom == txnh->atom)) {
-+ ;
-+ } else {
-+ /*
-+ * unlock zlock spin lock here. It is possible for
-+ * longterm_unlock_znode() to sneak in here, but there
-+ * is no harm: reiser4_invalidate_lock() will mark znode
-+ * as JNODE_IS_DYING and this will be noted by
-+ * can_lock_object() below.
-+ */
-+ spin_unlock_zlock(lock);
-+ spin_lock_znode(node);
-+ ret = reiser4_try_capture(ZJNODE(node), mode, cap_flags);
-+ spin_unlock_znode(node);
-+ spin_lock_zlock(lock);
-+ if (unlikely(ret != 0)) {
-+ /* In the failure case, the txnmgr releases
-+ the znode's lock (or in some cases, it was
-+ released a while ago). There's no need to
-+ reacquire it so we should return here,
-+ avoid releasing the lock. */
-+ owner->request.mode = 0;
-+ break;
-+ }
-+
-+ /* Check the lock's availability again -- this is
-+ because under some circumstances the capture code
-+ has to release and reacquire the znode spinlock. */
-+ ret = can_lock_object(owner);
-+ }
-+
-+ /* This time, a return of (ret == 0) means we can lock, so we
-+ should break out of the loop. */
-+ if (likely(ret != -E_REPEAT || non_blocking))
-+ break;
-+
-+ /* Lock is unavailable, we have to wait. */
-+ ret = reiser4_prepare_to_sleep(owner);
-+ if (unlikely(ret != 0))
-+ break;
-+
-+ assert_spin_locked(&(node->lock.guard));
-+ if (hipri) {
-+ /* If we are going in high priority direction then
-+ increase high priority requests counter for the
-+ node */
-+ lock->nr_hipri_requests++;
-+ if (mode == ZNODE_WRITE_LOCK)
-+ lock->nr_hipri_write_requests ++;
-+ /* If there are no high priority owners for a node,
-+ then immediately wake up low priority owners, so
-+ they can detect possible deadlock */
-+ if (lock->nr_hipri_owners == 0)
-+ wake_up_all_lopri_owners(node);
-+ }
-+ list_add_tail(&owner->requestors_link, &lock->requestors);
-+
-+ /* Ok, here we have prepared a lock request, so unlock
-+ a znode ... */
-+ spin_unlock_zlock(lock);
-+ /* ... and sleep */
-+ reiser4_go_to_sleep(owner);
-+ if (owner->request.mode == ZNODE_NO_LOCK)
-+ goto request_is_done;
-+ spin_lock_zlock(lock);
-+ if (owner->request.mode == ZNODE_NO_LOCK) {
-+ spin_unlock_zlock(lock);
-+ request_is_done:
-+ if (owner->request.ret_code == 0) {
-+ LOCK_CNT_INC(long_term_locked_znode);
-+ zref(node);
-+ }
-+ return owner->request.ret_code;
-+ }
-+ remove_lock_request(owner);
-+ }
-+
-+ return lock_tail(owner, ret, mode);
-+}
-+
-+/* lock object invalidation means changing of lock object state to `INVALID'
-+ and waiting for all other processes to cancel theirs lock requests. */
-+void reiser4_invalidate_lock(lock_handle * handle /* path to lock
-+ * owner and lock
-+ * object is being
-+ * invalidated. */ )
-+{
-+ znode *node = handle->node;
-+ lock_stack *owner = handle->owner;
-+
-+ assert("zam-325", owner == get_current_lock_stack());
-+ assert("zam-103", znode_is_write_locked(node));
-+ assert("nikita-1393", !ZF_ISSET(node, JNODE_LEFT_CONNECTED));
-+ assert("nikita-1793", !ZF_ISSET(node, JNODE_RIGHT_CONNECTED));
-+ assert("nikita-1394", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert("nikita-3097", znode_is_wlocked_once(node));
-+ assert_spin_locked(&(node->lock.guard));
-+
-+ if (handle->signaled)
-+ atomic_dec(&owner->nr_signaled);
-+
-+ ZF_SET(node, JNODE_IS_DYING);
-+ unlink_object(handle);
-+ node->lock.nr_readers = 0;
-+
-+ invalidate_all_lock_requests(node);
-+ spin_unlock_zlock(&node->lock);
-+}
-+
-+/* Initializes lock_stack. */
-+void init_lock_stack(lock_stack * owner /* pointer to
-+ * allocated
-+ * structure. */ )
-+{
-+ INIT_LIST_HEAD(&owner->locks);
-+ INIT_LIST_HEAD(&owner->requestors_link);
-+ spin_lock_init(&owner->sguard);
-+ owner->curpri = 1;
-+ init_waitqueue_head(&owner->wait);
-+}
-+
-+/* Initializes lock object. */
-+void reiser4_init_lock(zlock * lock /* pointer on allocated
-+ * uninitialized lock object
-+ * structure. */ )
-+{
-+ memset(lock, 0, sizeof(zlock));
-+ spin_lock_init(&lock->guard);
-+ INIT_LIST_HEAD(&lock->requestors);
-+ INIT_LIST_HEAD(&lock->owners);
-+}
-+
-+/* Transfer a lock handle (presumably so that variables can be moved between stack and
-+ heap locations). */
-+static void
-+move_lh_internal(lock_handle * new, lock_handle * old, int unlink_old)
-+{
-+ znode *node = old->node;
-+ lock_stack *owner = old->owner;
-+ int signaled;
-+
-+ /* locks_list, modified by link_object() is not protected by
-+ anything. This is valid because only current thread ever modifies
-+ locks_list of its lock_stack.
-+ */
-+ assert("nikita-1827", owner == get_current_lock_stack());
-+ assert("nikita-1831", new->owner == NULL);
-+
-+ spin_lock_zlock(&node->lock);
-+
-+ signaled = old->signaled;
-+ if (unlink_old) {
-+ unlink_object(old);
-+ } else {
-+ if (node->lock.nr_readers > 0) {
-+ node->lock.nr_readers += 1;
-+ } else {
-+ node->lock.nr_readers -= 1;
-+ }
-+ if (signaled) {
-+ atomic_inc(&owner->nr_signaled);
-+ }
-+ if (owner->curpri) {
-+ node->lock.nr_hipri_owners += 1;
-+ }
-+ LOCK_CNT_INC(long_term_locked_znode);
-+
-+ zref(node);
-+ }
-+ link_object(new, owner, node);
-+ new->signaled = signaled;
-+
-+ spin_unlock_zlock(&node->lock);
-+}
-+
-+void move_lh(lock_handle * new, lock_handle * old)
-+{
-+ move_lh_internal(new, old, /*unlink_old */ 1);
-+}
-+
-+void copy_lh(lock_handle * new, lock_handle * old)
-+{
-+ move_lh_internal(new, old, /*unlink_old */ 0);
-+}
-+
-+/* after getting -E_DEADLOCK we unlock znodes until this function returns false */
-+int reiser4_check_deadlock(void)
-+{
-+ lock_stack *owner = get_current_lock_stack();
-+ return atomic_read(&owner->nr_signaled) != 0;
-+}
-+
-+/* Before going to sleep we re-check "release lock" requests which might come from threads with hi-pri lock
-+ priorities. */
-+int reiser4_prepare_to_sleep(lock_stack * owner)
-+{
-+ assert("nikita-1847", owner == get_current_lock_stack());
-+
-+ /* We return -E_DEADLOCK if one or more "give me the lock" messages are
-+ * counted in nr_signaled */
-+ if (unlikely(atomic_read(&owner->nr_signaled) != 0)) {
-+ assert("zam-959", !owner->curpri);
-+ return RETERR(-E_DEADLOCK);
-+ }
-+ return 0;
-+}
-+
-+/* Wakes up a single thread */
-+void __reiser4_wake_up(lock_stack * owner)
-+{
-+ atomic_set(&owner->wakeup, 1);
-+ wake_up(&owner->wait);
-+}
-+
-+/* Puts a thread to sleep */
-+void reiser4_go_to_sleep(lock_stack * owner)
-+{
-+ /* Well, we might sleep here, so holding of any spinlocks is no-no */
-+ assert("nikita-3027", reiser4_schedulable());
-+
-+ wait_event(owner->wait, atomic_read(&owner->wakeup));
-+ atomic_set(&owner->wakeup, 0);
-+}
-+
-+int lock_stack_isclean(lock_stack * owner)
-+{
-+ if (list_empty_careful(&owner->locks)) {
-+ assert("zam-353", atomic_read(&owner->nr_signaled) == 0);
-+ return 1;
-+ }
-+
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/*
-+ * debugging functions
-+ */
-+
-+static void list_check(struct list_head *head)
-+{
-+ struct list_head *pos;
-+
-+ list_for_each(pos, head)
-+ assert("", (pos->prev != NULL && pos->next != NULL &&
-+ pos->prev->next == pos && pos->next->prev == pos));
-+}
-+
-+/* check consistency of locking data-structures hanging of the @stack */
-+static void check_lock_stack(lock_stack * stack)
-+{
-+ spin_lock_stack(stack);
-+ /* check that stack->locks is not corrupted */
-+ list_check(&stack->locks);
-+ spin_unlock_stack(stack);
-+}
-+
-+/* check consistency of locking data structures */
-+void check_lock_data(void)
-+{
-+ check_lock_stack(&get_current_context()->stack);
-+}
-+
-+/* check consistency of locking data structures for @node */
-+void check_lock_node_data(znode * node)
-+{
-+ spin_lock_zlock(&node->lock);
-+ list_check(&node->lock.owners);
-+ list_check(&node->lock.requestors);
-+ spin_unlock_zlock(&node->lock);
-+}
-+
-+/* check that given lock request is dead lock safe. This check is, of course,
-+ * not exhaustive. */
-+static int
-+request_is_deadlock_safe(znode * node, znode_lock_mode mode,
-+ znode_lock_request request)
-+{
-+ lock_stack *owner;
-+
-+ owner = get_current_lock_stack();
-+ /*
-+ * check that hipri lock request is not issued when there are locked
-+ * nodes at the higher levels.
-+ */
-+ if (request & ZNODE_LOCK_HIPRI && !(request & ZNODE_LOCK_NONBLOCK) &&
-+ znode_get_level(node) != 0) {
-+ lock_handle *item;
-+
-+ list_for_each_entry(item, &owner->locks, locks_link) {
-+ znode *other;
-+
-+ other = item->node;
-+
-+ if (znode_get_level(other) == 0)
-+ continue;
-+ if (znode_get_level(other) > znode_get_level(node))
-+ return 0;
-+ }
-+ }
-+ return 1;
-+}
-+
-+#endif
-+
-+/* return pointer to static storage with name of lock_mode. For
-+ debugging */
-+const char *lock_mode_name(znode_lock_mode lock /* lock mode to get name of */ )
-+{
-+ if (lock == ZNODE_READ_LOCK)
-+ return "read";
-+ else if (lock == ZNODE_WRITE_LOCK)
-+ return "write";
-+ else {
-+ static char buf[30];
-+
-+ sprintf(buf, "unknown: %i", lock);
-+ return buf;
-+ }
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 79
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/lock.h linux-2.6.24/fs/reiser4/lock.h
---- linux-2.6.24.orig/fs/reiser4/lock.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/lock.h 2008-01-25 11:39:06.948210780 +0300
-@@ -0,0 +1,249 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Long term locking data structures. See lock.c for details. */
-+
-+#ifndef __LOCK_H__
-+#define __LOCK_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/node/node.h"
-+#include "txnmgr.h"
-+#include "readahead.h"
-+
-+#include <linux/types.h>
-+#include <linux/spinlock.h>
-+#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
-+#include <asm/atomic.h>
-+#include <linux/wait.h>
-+
-+/* Per-znode lock object */
-+struct zlock {
-+ spinlock_t guard;
-+ /* The number of readers if positive; the number of recursively taken
-+ write locks if negative. Protected by zlock spin lock. */
-+ int nr_readers;
-+ /* A number of processes (lock_stacks) that have this object
-+ locked with high priority */
-+ unsigned nr_hipri_owners;
-+ /* A number of attempts to lock znode in high priority direction */
-+ unsigned nr_hipri_requests;
-+ /* A linked list of lock_handle objects that contains pointers
-+ for all lock_stacks which have this lock object locked */
-+ unsigned nr_hipri_write_requests;
-+ struct list_head owners;
-+ /* A linked list of lock_stacks that wait for this lock */
-+ struct list_head requestors;
-+};
-+
-+static inline void spin_lock_zlock(zlock *lock)
-+{
-+ /* check that zlock is not locked */
-+ assert("", LOCK_CNT_NIL(spin_locked_zlock));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", LOCK_CNT_NIL(spin_locked_stack));
-+
-+ spin_lock(&lock->guard);
-+
-+ LOCK_CNT_INC(spin_locked_zlock);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_unlock_zlock(zlock *lock)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_zlock));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_zlock);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&lock->guard);
-+}
-+
-+#define lock_is_locked(lock) ((lock)->nr_readers != 0)
-+#define lock_is_rlocked(lock) ((lock)->nr_readers > 0)
-+#define lock_is_wlocked(lock) ((lock)->nr_readers < 0)
-+#define lock_is_wlocked_once(lock) ((lock)->nr_readers == -1)
-+#define lock_can_be_rlocked(lock) ((lock)->nr_readers >=0)
-+#define lock_mode_compatible(lock, mode) \
-+ (((mode) == ZNODE_WRITE_LOCK && !lock_is_locked(lock)) || \
-+ ((mode) == ZNODE_READ_LOCK && lock_can_be_rlocked(lock)))
-+
-+/* Since we have R/W znode locks we need additional bidirectional `link'
-+ objects to implement n<->m relationship between lock owners and lock
-+ objects. We call them `lock handles'.
-+
-+ Locking: see lock.c/"SHORT-TERM LOCKING"
-+*/
-+struct lock_handle {
-+ /* This flag indicates that a signal to yield a lock was passed to
-+ lock owner and counted in owner->nr_signalled
-+
-+ Locking: this is accessed under spin lock on ->node.
-+ */
-+ int signaled;
-+ /* A link to owner of a lock */
-+ lock_stack *owner;
-+ /* A link to znode locked */
-+ znode *node;
-+ /* A list of all locks for a process */
-+ struct list_head locks_link;
-+ /* A list of all owners for a znode */
-+ struct list_head owners_link;
-+};
-+
-+struct lock_request {
-+ /* A pointer to uninitialized link object */
-+ lock_handle *handle;
-+ /* A pointer to the object we want to lock */
-+ znode *node;
-+ /* Lock mode (ZNODE_READ_LOCK or ZNODE_WRITE_LOCK) */
-+ znode_lock_mode mode;
-+ /* how dispatch_lock_requests() returns lock request result code */
-+ int ret_code;
-+};
-+
-+/* A lock stack structure for accumulating locks owned by a process */
-+struct lock_stack {
-+ /* A guard lock protecting a lock stack */
-+ spinlock_t sguard;
-+ /* number of znodes which were requested by high priority processes */
-+ atomic_t nr_signaled;
-+ /* Current priority of a process
-+
-+ This is only accessed by the current thread and thus requires no
-+ locking.
-+ */
-+ int curpri;
-+ /* A list of all locks owned by this process. Elements can be added to
-+ * this list only by the current thread. ->node pointers in this list
-+ * can be only changed by the current thread. */
-+ struct list_head locks;
-+ /* When lock_stack waits for the lock, it puts itself on double-linked
-+ requestors list of that lock */
-+ struct list_head requestors_link;
-+ /* Current lock request info.
-+
-+ This is only accessed by the current thread and thus requires no
-+ locking.
-+ */
-+ struct lock_request request;
-+ /* the following two fields are the lock stack's
-+ * synchronization object to use with the standard linux/wait.h
-+ * interface. See reiser4_go_to_sleep and __reiser4_wake_up for
-+ * usage details. */
-+ wait_queue_head_t wait;
-+ atomic_t wakeup;
-+#if REISER4_DEBUG
-+ int nr_locks; /* number of lock handles in the above list */
-+#endif
-+};
-+
-+/*
-+ User-visible znode locking functions
-+*/
-+
-+extern int longterm_lock_znode(lock_handle * handle,
-+ znode * node,
-+ znode_lock_mode mode,
-+ znode_lock_request request);
-+
-+extern void longterm_unlock_znode(lock_handle * handle);
-+
-+extern int reiser4_check_deadlock(void);
-+
-+extern lock_stack *get_current_lock_stack(void);
-+
-+extern void init_lock_stack(lock_stack * owner);
-+extern void reiser4_init_lock(zlock * lock);
-+
-+static inline void init_lh(lock_handle *lh)
-+{
-+#if REISER4_DEBUG
-+ memset(lh, 0, sizeof *lh);
-+ INIT_LIST_HEAD(&lh->locks_link);
-+ INIT_LIST_HEAD(&lh->owners_link);
-+#else
-+ lh->node = NULL;
-+#endif
-+}
-+
-+static inline void done_lh(lock_handle *lh)
-+{
-+ assert("zam-342", lh != NULL);
-+ if (lh->node != NULL)
-+ longterm_unlock_znode(lh);
-+}
-+
-+extern void move_lh(lock_handle * new, lock_handle * old);
-+extern void copy_lh(lock_handle * new, lock_handle * old);
-+
-+extern int reiser4_prepare_to_sleep(lock_stack * owner);
-+extern void reiser4_go_to_sleep(lock_stack * owner);
-+extern void __reiser4_wake_up(lock_stack * owner);
-+
-+extern int lock_stack_isclean(lock_stack * owner);
-+
-+/* zlock object state check macros: only used in assertions. Both forms imply that the
-+ lock is held by the current thread. */
-+extern int znode_is_write_locked(const znode *);
-+extern void reiser4_invalidate_lock(lock_handle *);
-+
-+/* lock ordering is: first take zlock spin lock, then lock stack spin lock */
-+#define spin_ordering_pred_stack(stack) \
-+ (LOCK_CNT_NIL(spin_locked_stack) && \
-+ LOCK_CNT_NIL(spin_locked_txnmgr) && \
-+ LOCK_CNT_NIL(spin_locked_inode) && \
-+ LOCK_CNT_NIL(rw_locked_cbk_cache) && \
-+ LOCK_CNT_NIL(spin_locked_super_eflush) )
-+
-+static inline void spin_lock_stack(lock_stack *stack)
-+{
-+ assert("", spin_ordering_pred_stack(stack));
-+ spin_lock(&(stack->sguard));
-+ LOCK_CNT_INC(spin_locked_stack);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_unlock_stack(lock_stack *stack)
-+{
-+ assert_spin_locked(&(stack->sguard));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_stack));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+ LOCK_CNT_DEC(spin_locked_stack);
-+ LOCK_CNT_DEC(spin_locked);
-+ spin_unlock(&(stack->sguard));
-+}
-+
-+static inline void reiser4_wake_up(lock_stack * owner)
-+{
-+ spin_lock_stack(owner);
-+ __reiser4_wake_up(owner);
-+ spin_unlock_stack(owner);
-+}
-+
-+const char *lock_mode_name(znode_lock_mode lock);
-+
-+#if REISER4_DEBUG
-+extern void check_lock_data(void);
-+extern void check_lock_node_data(znode * node);
-+#else
-+#define check_lock_data() noop
-+#define check_lock_node_data() noop
-+#endif
-+
-+/* __LOCK_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/Makefile linux-2.6.24/fs/reiser4/Makefile
---- linux-2.6.24.orig/fs/reiser4/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/Makefile 2008-01-25 11:39:06.948210780 +0300
-@@ -0,0 +1,98 @@
-+#
-+# reiser4/Makefile
-+#
-+
-+obj-$(CONFIG_REISER4_FS) += reiser4.o
-+
-+reiser4-y := \
-+ debug.o \
-+ jnode.o \
-+ znode.o \
-+ key.o \
-+ pool.o \
-+ tree_mod.o \
-+ estimate.o \
-+ carry.o \
-+ carry_ops.o \
-+ lock.o \
-+ tree.o \
-+ context.o \
-+ tap.o \
-+ coord.o \
-+ block_alloc.o \
-+ txnmgr.o \
-+ kassign.o \
-+ flush.o \
-+ wander.o \
-+ eottl.o \
-+ search.o \
-+ page_cache.o \
-+ seal.o \
-+ dscale.o \
-+ flush_queue.o \
-+ ktxnmgrd.o \
-+ blocknrset.o \
-+ super.o \
-+ super_ops.o \
-+ fsdata.o \
-+ export_ops.o \
-+ oid.o \
-+ tree_walk.o \
-+ inode.o \
-+ vfs_ops.o \
-+ as_ops.o \
-+ entd.o\
-+ readahead.o \
-+ status_flags.o \
-+ init_super.o \
-+ safe_link.o \
-+ \
-+ plugin/plugin.o \
-+ plugin/plugin_set.o \
-+ plugin/node/node.o \
-+ plugin/object.o \
-+ plugin/cluster.o \
-+ plugin/inode_ops.o \
-+ plugin/inode_ops_rename.o \
-+ plugin/file_ops.o \
-+ plugin/file_ops_readdir.o \
-+ plugin/file_plugin_common.o \
-+ plugin/file/file.o \
-+ plugin/file/tail_conversion.o \
-+ plugin/file/file_conversion.o \
-+ plugin/file/symlink.o \
-+ plugin/file/cryptcompress.o \
-+ plugin/dir_plugin_common.o \
-+ plugin/dir/hashed_dir.o \
-+ plugin/dir/seekable_dir.o \
-+ plugin/node/node40.o \
-+ \
-+ plugin/crypto/cipher.o \
-+ plugin/crypto/digest.o \
-+ \
-+ plugin/compress/compress.o \
-+ plugin/compress/compress_mode.o \
-+ \
-+ plugin/item/static_stat.o \
-+ plugin/item/sde.o \
-+ plugin/item/cde.o \
-+ plugin/item/blackbox.o \
-+ plugin/item/internal.o \
-+ plugin/item/tail.o \
-+ plugin/item/ctail.o \
-+ plugin/item/extent.o \
-+ plugin/item/extent_item_ops.o \
-+ plugin/item/extent_file_ops.o \
-+ plugin/item/extent_flush_ops.o \
-+ \
-+ plugin/hash.o \
-+ plugin/fibration.o \
-+ plugin/tail_policy.o \
-+ plugin/item/item.o \
-+ \
-+ plugin/security/perm.o \
-+ plugin/space/bitmap.o \
-+ \
-+ plugin/disk_format/disk_format40.o \
-+ plugin/disk_format/disk_format.o
-+
-diff -urN linux-2.6.24.orig/fs/reiser4/oid.c linux-2.6.24/fs/reiser4/oid.c
---- linux-2.6.24.orig/fs/reiser4/oid.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/oid.c 2008-01-25 11:39:06.952211810 +0300
-@@ -0,0 +1,141 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "debug.h"
-+#include "super.h"
-+#include "txnmgr.h"
-+
-+/* we used to have oid allocation plugin. It was removed because it
-+ was recognized as providing unneeded level of abstraction. If one
-+ ever will find it useful - look at yet_unneeded_abstractions/oid
-+*/
-+
-+/*
-+ * initialize in-memory data for oid allocator at @super. @nr_files and @next
-+ * are provided by disk format plugin that reads them from the disk during
-+ * mount.
-+ */
-+int oid_init_allocator(struct super_block *super, oid_t nr_files, oid_t next)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(super);
-+
-+ sbinfo->next_to_use = next;
-+ sbinfo->oids_in_use = nr_files;
-+ return 0;
-+}
-+
-+/*
-+ * allocate oid and return it. ABSOLUTE_MAX_OID is returned when allocator
-+ * runs out of oids.
-+ */
-+oid_t oid_allocate(struct super_block * super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ oid_t oid;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ if (sbinfo->next_to_use != ABSOLUTE_MAX_OID) {
-+ oid = sbinfo->next_to_use++;
-+ sbinfo->oids_in_use++;
-+ } else
-+ oid = ABSOLUTE_MAX_OID;
-+ spin_unlock_reiser4_super(sbinfo);
-+ return oid;
-+}
-+
-+/*
-+ * Tell oid allocator that @oid is now free.
-+ */
-+int oid_release(struct super_block *super, oid_t oid UNUSED_ARG)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ sbinfo->oids_in_use--;
-+ spin_unlock_reiser4_super(sbinfo);
-+ return 0;
-+}
-+
-+/*
-+ * return next @oid that would be allocated (i.e., returned by oid_allocate())
-+ * without actually allocating it. This is used by disk format plugin to save
-+ * oid allocator state on the disk.
-+ */
-+oid_t oid_next(const struct super_block * super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ oid_t oid;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ oid = sbinfo->next_to_use;
-+ spin_unlock_reiser4_super(sbinfo);
-+ return oid;
-+}
-+
-+/*
-+ * returns number of currently used oids. This is used by statfs(2) to report
-+ * number of "inodes" and by disk format plugin to save oid allocator state on
-+ * the disk.
-+ */
-+long oids_used(const struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ oid_t used;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ used = sbinfo->oids_in_use;
-+ spin_unlock_reiser4_super(sbinfo);
-+ if (used < (__u64) ((long)~0) >> 1)
-+ return (long)used;
-+ else
-+ return (long)-1;
-+}
-+
-+/*
-+ * Count oid as allocated in atom. This is done after call to oid_allocate()
-+ * at the point when we are irrevocably committed to creation of the new file
-+ * (i.e., when oid allocation cannot be any longer rolled back due to some
-+ * error).
-+ */
-+void oid_count_allocated(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ atom->nr_objects_created++;
-+ spin_unlock_atom(atom);
-+}
-+
-+/*
-+ * Count oid as free in atom. This is done after call to oid_release() at the
-+ * point when we are irrevocably committed to the deletion of the file (i.e.,
-+ * when oid release cannot be any longer rolled back due to some error).
-+ */
-+void oid_count_released(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked();
-+ atom->nr_objects_deleted++;
-+ spin_unlock_atom(atom);
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/page_cache.c linux-2.6.24/fs/reiser4/page_cache.c
---- linux-2.6.24.orig/fs/reiser4/page_cache.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/page_cache.c 2008-01-25 11:54:46.665843146 +0300
-@@ -0,0 +1,714 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Memory pressure hooks. Fake inodes handling. */
-+
-+/* GLOSSARY
-+
-+ . Formatted and unformatted nodes.
-+ Elements of reiser4 balanced tree to store data and metadata.
-+ Unformatted nodes are pointed to by extent pointers. Such nodes
-+ are used to store data of large objects. Unlike unformatted nodes,
-+ formatted ones have associated format described by node4X plugin.
-+
-+ . Jnode (or journal node)
-+ The in-memory header which is used to track formatted and unformatted
-+ nodes, bitmap nodes, etc. In particular, jnodes are used to track
-+ transactional information associated with each block(see reiser4/jnode.c
-+ for details).
-+
-+ . Znode
-+ The in-memory header which is used to track formatted nodes. Contains
-+ embedded jnode (see reiser4/znode.c for details).
-+*/
-+
-+/* We store all file system meta data (and data, of course) in the page cache.
-+
-+ What does this mean? In stead of using bread/brelse we create special
-+ "fake" inode (one per super block) and store content of formatted nodes
-+ into pages bound to this inode in the page cache. In newer kernels bread()
-+ already uses inode attached to block device (bd_inode). Advantage of having
-+ our own fake inode is that we can install appropriate methods in its
-+ address_space operations. Such methods are called by VM on memory pressure
-+ (or during background page flushing) and we can use them to react
-+ appropriately.
-+
-+ In initial version we only support one block per page. Support for multiple
-+ blocks per page is complicated by relocation.
-+
-+ To each page, used by reiser4, jnode is attached. jnode is analogous to
-+ buffer head. Difference is that jnode is bound to the page permanently:
-+ jnode cannot be removed from memory until its backing page is.
-+
-+ jnode contain pointer to page (->pg field) and page contain pointer to
-+ jnode in ->private field. Pointer from jnode to page is protected to by
-+ jnode's spinlock and pointer from page to jnode is protected by page lock
-+ (PG_locked bit). Lock ordering is: first take page lock, then jnode spin
-+ lock. To go into reverse direction use jnode_lock_page() function that uses
-+ standard try-lock-and-release device.
-+
-+ Properties:
-+
-+ 1. when jnode-to-page mapping is established (by jnode_attach_page()), page
-+ reference counter is increased.
-+
-+ 2. when jnode-to-page mapping is destroyed (by page_clear_jnode(), page
-+ reference counter is decreased.
-+
-+ 3. on jload() reference counter on jnode page is increased, page is
-+ kmapped and `referenced'.
-+
-+ 4. on jrelse() inverse operations are performed.
-+
-+ 5. kmapping/kunmapping of unformatted pages is done by read/write methods.
-+
-+ DEADLOCKS RELATED TO MEMORY PRESSURE. [OUTDATED. Only interesting
-+ historically.]
-+
-+ [In the following discussion, `lock' invariably means long term lock on
-+ znode.] (What about page locks?)
-+
-+ There is some special class of deadlock possibilities related to memory
-+ pressure. Locks acquired by other reiser4 threads are accounted for in
-+ deadlock prevention mechanism (lock.c), but when ->vm_writeback() is
-+ invoked additional hidden arc is added to the locking graph: thread that
-+ tries to allocate memory waits for ->vm_writeback() to finish. If this
-+ thread keeps lock and ->vm_writeback() tries to acquire this lock, deadlock
-+ prevention is useless.
-+
-+ Another related problem is possibility for ->vm_writeback() to run out of
-+ memory itself. This is not a problem for ext2 and friends, because their
-+ ->vm_writeback() don't allocate much memory, but reiser4 flush is
-+ definitely able to allocate huge amounts of memory.
-+
-+ It seems that there is no reliable way to cope with the problems above. In
-+ stead it was decided that ->vm_writeback() (as invoked in the kswapd
-+ context) wouldn't perform any flushing itself, but rather should just wake
-+ up some auxiliary thread dedicated for this purpose (or, the same thread
-+ that does periodic commit of old atoms (ktxnmgrd.c)).
-+
-+ Details:
-+
-+ 1. Page is called `reclaimable' against particular reiser4 mount F if this
-+ page can be ultimately released by try_to_free_pages() under presumptions
-+ that:
-+
-+ a. ->vm_writeback() for F is no-op, and
-+
-+ b. none of the threads accessing F are making any progress, and
-+
-+ c. other reiser4 mounts obey the same memory reservation protocol as F
-+ (described below).
-+
-+ For example, clean un-pinned page, or page occupied by ext2 data are
-+ reclaimable against any reiser4 mount.
-+
-+ When there is more than one reiser4 mount in a system, condition (c) makes
-+ reclaim-ability not easily verifiable beyond trivial cases mentioned above.
-+
-+ THIS COMMENT IS VALID FOR "MANY BLOCKS ON PAGE" CASE
-+
-+ Fake inode is used to bound formatted nodes and each node is indexed within
-+ fake inode by its block number. If block size of smaller than page size, it
-+ may so happen that block mapped to the page with formatted node is occupied
-+ by unformatted node or is unallocated. This lead to some complications,
-+ because flushing whole page can lead to an incorrect overwrite of
-+ unformatted node that is moreover, can be cached in some other place as
-+ part of the file body. To avoid this, buffers for unformatted nodes are
-+ never marked dirty. Also pages in the fake are never marked dirty. This
-+ rules out usage of ->writepage() as memory pressure hook. In stead
-+ ->releasepage() is used.
-+
-+ Josh is concerned that page->buffer is going to die. This should not pose
-+ significant problem though, because we need to add some data structures to
-+ the page anyway (jnode) and all necessary book keeping can be put there.
-+
-+*/
-+
-+/* Life cycle of pages/nodes.
-+
-+ jnode contains reference to page and page contains reference back to
-+ jnode. This reference is counted in page ->count. Thus, page bound to jnode
-+ cannot be released back into free pool.
-+
-+ 1. Formatted nodes.
-+
-+ 1. formatted node is represented by znode. When new znode is created its
-+ ->pg pointer is NULL initially.
-+
-+ 2. when node content is loaded into znode (by call to zload()) for the
-+ first time following happens (in call to ->read_node() or
-+ ->allocate_node()):
-+
-+ 1. new page is added to the page cache.
-+
-+ 2. this page is attached to znode and its ->count is increased.
-+
-+ 3. page is kmapped.
-+
-+ 3. if more calls to zload() follow (without corresponding zrelses), page
-+ counter is left intact and in its stead ->d_count is increased in znode.
-+
-+ 4. each call to zrelse decreases ->d_count. When ->d_count drops to zero
-+ ->release_node() is called and page is kunmapped as result.
-+
-+ 5. at some moment node can be captured by a transaction. Its ->x_count
-+ is then increased by transaction manager.
-+
-+ 6. if node is removed from the tree (empty node with JNODE_HEARD_BANSHEE
-+ bit set) following will happen (also see comment at the top of znode.c):
-+
-+ 1. when last lock is released, node will be uncaptured from
-+ transaction. This released reference that transaction manager acquired
-+ at the step 5.
-+
-+ 2. when last reference is released, zput() detects that node is
-+ actually deleted and calls ->delete_node()
-+ operation. page_cache_delete_node() implementation detaches jnode from
-+ page and releases page.
-+
-+ 7. otherwise (node wasn't removed from the tree), last reference to
-+ znode will be released after transaction manager committed transaction
-+ node was in. This implies squallocing of this node (see
-+ flush.c). Nothing special happens at this point. Znode is still in the
-+ hash table and page is still attached to it.
-+
-+ 8. znode is actually removed from the memory because of the memory
-+ pressure, or during umount (znodes_tree_done()). Anyway, znode is
-+ removed by the call to zdrop(). At this moment, page is detached from
-+ znode and removed from the inode address space.
-+
-+*/
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "super.h"
-+#include "entd.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/swap.h> /* for struct page */
-+#include <linux/pagemap.h>
-+#include <linux/bio.h>
-+#include <linux/writeback.h>
-+#include <linux/blkdev.h>
-+
-+static struct bio *page_bio(struct page *, jnode *, int rw, gfp_t gfp);
-+
-+static struct address_space_operations formatted_fake_as_ops;
-+
-+static const oid_t fake_ino = 0x1;
-+static const oid_t bitmap_ino = 0x2;
-+static const oid_t cc_ino = 0x3;
-+
-+static void
-+init_fake_inode(struct super_block *super, struct inode *fake,
-+ struct inode **pfake)
-+{
-+ assert("nikita-2168", fake->i_state & I_NEW);
-+ fake->i_mapping->a_ops = &formatted_fake_as_ops;
-+ *pfake = fake;
-+ /* NOTE-NIKITA something else? */
-+ unlock_new_inode(fake);
-+}
-+
-+/**
-+ * reiser4_init_formatted_fake - iget inodes for formatted nodes and bitmaps
-+ * @super: super block to init fake inode for
-+ *
-+ * Initializes fake inode to which formatted nodes are bound in the page cache
-+ * and inode for bitmaps.
-+ */
-+int reiser4_init_formatted_fake(struct super_block *super)
-+{
-+ struct inode *fake;
-+ struct inode *bitmap;
-+ struct inode *cc;
-+ reiser4_super_info_data *sinfo;
-+
-+ assert("nikita-1703", super != NULL);
-+
-+ sinfo = get_super_private_nocheck(super);
-+ fake = iget_locked(super, oid_to_ino(fake_ino));
-+
-+ if (fake != NULL) {
-+ init_fake_inode(super, fake, &sinfo->fake);
-+
-+ bitmap = iget_locked(super, oid_to_ino(bitmap_ino));
-+ if (bitmap != NULL) {
-+ init_fake_inode(super, bitmap, &sinfo->bitmap);
-+
-+ cc = iget_locked(super, oid_to_ino(cc_ino));
-+ if (cc != NULL) {
-+ init_fake_inode(super, cc, &sinfo->cc);
-+ return 0;
-+ } else {
-+ iput(sinfo->fake);
-+ iput(sinfo->bitmap);
-+ sinfo->fake = NULL;
-+ sinfo->bitmap = NULL;
-+ }
-+ } else {
-+ iput(sinfo->fake);
-+ sinfo->fake = NULL;
-+ }
-+ }
-+ return RETERR(-ENOMEM);
-+}
-+
-+/**
-+ * reiser4_done_formatted_fake - release inode used by formatted nodes and bitmaps
-+ * @super: super block to init fake inode for
-+ *
-+ * Releases inodes which were used as address spaces of bitmap and formatted
-+ * nodes.
-+ */
-+void reiser4_done_formatted_fake(struct super_block *super)
-+{
-+ reiser4_super_info_data *sinfo;
-+
-+ sinfo = get_super_private_nocheck(super);
-+
-+ if (sinfo->fake != NULL) {
-+ iput(sinfo->fake);
-+ sinfo->fake = NULL;
-+ }
-+
-+ if (sinfo->bitmap != NULL) {
-+ iput(sinfo->bitmap);
-+ sinfo->bitmap = NULL;
-+ }
-+
-+ if (sinfo->cc != NULL) {
-+ iput(sinfo->cc);
-+ sinfo->cc = NULL;
-+ }
-+ return;
-+}
-+
-+void reiser4_wait_page_writeback(struct page *page)
-+{
-+ assert("zam-783", PageLocked(page));
-+
-+ do {
-+ unlock_page(page);
-+ wait_on_page_writeback(page);
-+ lock_page(page);
-+ } while (PageWriteback(page));
-+}
-+
-+/* return tree @page is in */
-+reiser4_tree *reiser4_tree_by_page(const struct page *page /* page to query */ )
-+{
-+ assert("nikita-2461", page != NULL);
-+ return &get_super_private(page->mapping->host->i_sb)->tree;
-+}
-+
-+/* completion handler for single page bio-based read.
-+
-+ mpage_end_io_read() would also do. But it's static.
-+
-+*/
-+static void
-+end_bio_single_page_read(struct bio *bio, int err UNUSED_ARG)
-+{
-+ struct page *page;
-+
-+ page = bio->bi_io_vec[0].bv_page;
-+
-+ if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-+ SetPageUptodate(page);
-+ } else {
-+ ClearPageUptodate(page);
-+ SetPageError(page);
-+ }
-+ unlock_page(page);
-+ bio_put(bio);
-+}
-+
-+/* completion handler for single page bio-based write.
-+
-+ mpage_end_io_write() would also do. But it's static.
-+
-+*/
-+static void
-+end_bio_single_page_write(struct bio *bio, int err UNUSED_ARG)
-+{
-+ struct page *page;
-+
-+ page = bio->bi_io_vec[0].bv_page;
-+
-+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-+ SetPageError(page);
-+ end_page_writeback(page);
-+ bio_put(bio);
-+}
-+
-+/* ->readpage() method for formatted nodes */
-+static int formatted_readpage(struct file *f UNUSED_ARG,
-+ struct page *page /* page to read */ )
-+{
-+ assert("nikita-2412", PagePrivate(page) && jprivate(page));
-+ return reiser4_page_io(page, jprivate(page), READ,
-+ reiser4_ctx_gfp_mask_get());
-+}
-+
-+/**
-+ * reiser4_page_io - submit single-page bio request
-+ * @page: page to perform io for
-+ * @node: jnode of page
-+ * @rw: read or write
-+ * @gfp: gfp mask for bio allocation
-+ *
-+ * Submits single page read or write.
-+ */
-+int reiser4_page_io(struct page *page, jnode *node, int rw, gfp_t gfp)
-+{
-+ struct bio *bio;
-+ int result;
-+
-+ assert("nikita-2094", page != NULL);
-+ assert("nikita-2226", PageLocked(page));
-+ assert("nikita-2634", node != NULL);
-+ assert("nikita-2893", rw == READ || rw == WRITE);
-+
-+ if (rw) {
-+ if (unlikely(page->mapping->host->i_sb->s_flags & MS_RDONLY)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ }
-+
-+ bio = page_bio(page, node, rw, gfp);
-+ if (!IS_ERR(bio)) {
-+ if (rw == WRITE) {
-+ set_page_writeback(page);
-+ unlock_page(page);
-+ }
-+ reiser4_submit_bio(rw, bio);
-+ result = 0;
-+ } else {
-+ unlock_page(page);
-+ result = PTR_ERR(bio);
-+ }
-+
-+ return result;
-+}
-+
-+/* helper function to construct bio for page */
-+static struct bio *page_bio(struct page *page, jnode * node, int rw, gfp_t gfp)
-+{
-+ struct bio *bio;
-+ assert("nikita-2092", page != NULL);
-+ assert("nikita-2633", node != NULL);
-+
-+ /* Simple implementation in the assumption that blocksize == pagesize.
-+
-+ We only have to submit one block, but submit_bh() will allocate bio
-+ anyway, so lets use all the bells-and-whistles of bio code.
-+ */
-+
-+ bio = bio_alloc(gfp, 1);
-+ if (bio != NULL) {
-+ int blksz;
-+ struct super_block *super;
-+ reiser4_block_nr blocknr;
-+
-+ super = page->mapping->host->i_sb;
-+ assert("nikita-2029", super != NULL);
-+ blksz = super->s_blocksize;
-+ assert("nikita-2028", blksz == (int)PAGE_CACHE_SIZE);
-+
-+ spin_lock_jnode(node);
-+ blocknr = *jnode_get_io_block(node);
-+ spin_unlock_jnode(node);
-+
-+ assert("nikita-2275", blocknr != (reiser4_block_nr) 0);
-+ assert("nikita-2276", !reiser4_blocknr_is_fake(&blocknr));
-+
-+ bio->bi_bdev = super->s_bdev;
-+ /* fill bio->bi_sector before calling bio_add_page(), because
-+ * q->merge_bvec_fn may want to inspect it (see
-+ * drivers/md/linear.c:linear_mergeable_bvec() for example. */
-+ bio->bi_sector = blocknr * (blksz >> 9);
-+
-+ if (!bio_add_page(bio, page, blksz, 0)) {
-+ warning("nikita-3452",
-+ "Single page bio cannot be constructed");
-+ return ERR_PTR(RETERR(-EINVAL));
-+ }
-+
-+ /* bio -> bi_idx is filled by bio_init() */
-+ bio->bi_end_io = (rw == READ) ?
-+ end_bio_single_page_read : end_bio_single_page_write;
-+
-+ return bio;
-+ } else
-+ return ERR_PTR(RETERR(-ENOMEM));
-+}
-+
-+/* this function is internally called by jnode_make_dirty() */
-+int reiser4_set_page_dirty_internal(struct page *page)
-+{
-+ struct address_space *mapping;
-+
-+ mapping = page->mapping;
-+ BUG_ON(mapping == NULL);
-+
-+ if (!TestSetPageDirty(page)) {
-+ if (mapping_cap_account_dirty(mapping))
-+ inc_zone_page_state(page, NR_FILE_DIRTY);
-+
-+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-+ }
-+
-+ /* znode must be dirty ? */
-+ if (mapping->host == reiser4_get_super_fake(mapping->host->i_sb))
-+ assert("", JF_ISSET(jprivate(page), JNODE_DIRTY));
-+ return 0;
-+}
-+
-+#if 0
-+static int can_hit_entd(reiser4_context *ctx, struct super_block *s)
-+{
-+ if (ctx == NULL || ((unsigned long)ctx->magic) != context_magic)
-+ return 1;
-+ if (ctx->super != s)
-+ return 1;
-+ if (get_super_private(s)->entd.tsk == current)
-+ return 0;
-+ if (!lock_stack_isclean(&ctx->stack))
-+ return 0;
-+ if (ctx->trans->atom != NULL)
-+ return 0;
-+ return 1;
-+}
-+#endif
-+
-+/**
-+ * reiser4_writepage - writepage of struct address_space_operations
-+ * @page: page to write
-+ * @wbc:
-+ *
-+ *
-+ */
-+/* Common memory pressure notification. */
-+int reiser4_writepage(struct page *page,
-+ struct writeback_control *wbc)
-+{
-+ struct super_block *s;
-+ reiser4_context *ctx;
-+
-+ assert("vs-828", PageLocked(page));
-+
-+ s = page->mapping->host->i_sb;
-+ ctx = get_current_context_check();
-+
-+ //assert("", can_hit_entd(ctx, s));
-+ return write_page_by_ent(page, wbc);
-+}
-+
-+/* ->set_page_dirty() method of formatted address_space */
-+static int formatted_set_page_dirty(struct page *page)
-+{
-+ assert("nikita-2173", page != NULL);
-+ BUG();
-+ return __set_page_dirty_nobuffers(page);
-+}
-+
-+/* writepages method of address space operations in reiser4 is used to involve
-+ into transactions pages which are dirtied via mmap. Only regular files can
-+ have such pages. Fake inode is used to access formatted nodes via page
-+ cache. As formatted nodes can never be mmaped, fake inode's writepages has
-+ nothing to do */
-+static int
-+writepages_fake(struct address_space *mapping, struct writeback_control *wbc)
-+{
-+ return 0;
-+}
-+
-+/* address space operations for the fake inode */
-+static struct address_space_operations formatted_fake_as_ops = {
-+ /* Perform a writeback of a single page as a memory-freeing
-+ * operation. */
-+ .writepage = reiser4_writepage,
-+ /* this is called to read formatted node */
-+ .readpage = formatted_readpage,
-+ /* ->sync_page() method of fake inode address space operations. Called
-+ from wait_on_page() and lock_page().
-+
-+ This is most annoyingly misnomered method. Actually it is called
-+ from wait_on_page_bit() and lock_page() and its purpose is to
-+ actually start io by jabbing device drivers.
-+ */
-+ .sync_page = block_sync_page,
-+ /* Write back some dirty pages from this mapping. Called from sync.
-+ called during sync (pdflush) */
-+ .writepages = writepages_fake,
-+ /* Set a page dirty */
-+ .set_page_dirty = formatted_set_page_dirty,
-+ /* used for read-ahead. Not applicable */
-+ .readpages = NULL,
-+ .prepare_write = NULL,
-+ .commit_write = NULL,
-+ .bmap = NULL,
-+ /* called just before page is being detached from inode mapping and
-+ removed from memory. Called on truncate, cut/squeeze, and
-+ umount. */
-+ .invalidatepage = reiser4_invalidatepage,
-+ /* this is called by shrink_cache() so that file system can try to
-+ release objects (jnodes, buffers, journal heads) attached to page
-+ and, may be made page itself free-able.
-+ */
-+ .releasepage = reiser4_releasepage,
-+ .direct_IO = NULL
-+};
-+
-+/* called just before page is released (no longer used by reiser4). Callers:
-+ jdelete() and extent2tail(). */
-+void reiser4_drop_page(struct page *page)
-+{
-+ assert("nikita-2181", PageLocked(page));
-+ clear_page_dirty_for_io(page);
-+ ClearPageUptodate(page);
-+#if defined(PG_skipped)
-+ ClearPageSkipped(page);
-+#endif
-+ unlock_page(page);
-+}
-+
-+#define JNODE_GANG_SIZE (16)
-+
-+/* find all jnodes from range specified and invalidate them */
-+static int
-+truncate_jnodes_range(struct inode *inode, pgoff_t from, pgoff_t count)
-+{
-+ reiser4_inode *info;
-+ int truncated_jnodes;
-+ reiser4_tree *tree;
-+ unsigned long index;
-+ unsigned long end;
-+
-+ if (inode_file_plugin(inode) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
-+ /*
-+ * No need to get rid of jnodes here: if the single jnode of
-+ * page cluster did not have page, then it was found and killed
-+ * before in
-+ * truncate_complete_page_cluster()->jput()->jput_final(),
-+ * otherwise it will be dropped by reiser4_invalidatepage()
-+ */
-+ return 0;
-+ truncated_jnodes = 0;
-+
-+ info = reiser4_inode_data(inode);
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ index = from;
-+ end = from + count;
-+
-+ while (1) {
-+ jnode *gang[JNODE_GANG_SIZE];
-+ int taken;
-+ int i;
-+ jnode *node;
-+
-+ assert("nikita-3466", index <= end);
-+
-+ read_lock_tree(tree);
-+ taken =
-+ radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info),
-+ (void **)gang, index,
-+ JNODE_GANG_SIZE);
-+ for (i = 0; i < taken; ++i) {
-+ node = gang[i];
-+ if (index_jnode(node) < end)
-+ jref(node);
-+ else
-+ gang[i] = NULL;
-+ }
-+ read_unlock_tree(tree);
-+
-+ for (i = 0; i < taken; ++i) {
-+ node = gang[i];
-+ if (node != NULL) {
-+ index = max(index, index_jnode(node));
-+ spin_lock_jnode(node);
-+ assert("edward-1457", node->pg == NULL);
-+ /* this is always called after
-+ truncate_inode_pages_range(). Therefore, here
-+ jnode can not have page. New pages can not be
-+ created because truncate_jnodes_range goes
-+ under exclusive access on file obtained,
-+ where as new page creation requires
-+ non-exclusive access obtained */
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ reiser4_uncapture_jnode(node);
-+ unhash_unformatted_jnode(node);
-+ truncated_jnodes++;
-+ jput(node);
-+ } else
-+ break;
-+ }
-+ if (i != taken || taken == 0)
-+ break;
-+ }
-+ return truncated_jnodes;
-+}
-+
-+/* Truncating files in reiser4: problems and solutions.
-+
-+ VFS calls fs's truncate after it has called truncate_inode_pages()
-+ to get rid of pages corresponding to part of file being truncated.
-+ In reiser4 it may cause existence of unallocated extents which do
-+ not have jnodes. Flush code does not expect that. Solution of this
-+ problem is straightforward. As vfs's truncate is implemented using
-+ setattr operation, it seems reasonable to have ->setattr() that
-+ will cut file body. However, flush code also does not expect dirty
-+ pages without parent items, so it is impossible to cut all items,
-+ then truncate all pages in two steps. We resolve this problem by
-+ cutting items one-by-one. Each such fine-grained step performed
-+ under longterm znode lock calls at the end ->kill_hook() method of
-+ a killed item to remove its binded pages and jnodes.
-+
-+ The following function is a common part of mentioned kill hooks.
-+ Also, this is called before tail-to-extent conversion (to not manage
-+ few copies of the data).
-+*/
-+void reiser4_invalidate_pages(struct address_space *mapping, pgoff_t from,
-+ unsigned long count, int even_cows)
-+{
-+ loff_t from_bytes, count_bytes;
-+
-+ if (count == 0)
-+ return;
-+ from_bytes = ((loff_t) from) << PAGE_CACHE_SHIFT;
-+ count_bytes = ((loff_t) count) << PAGE_CACHE_SHIFT;
-+
-+ unmap_mapping_range(mapping, from_bytes, count_bytes, even_cows);
-+ truncate_inode_pages_range(mapping, from_bytes,
-+ from_bytes + count_bytes - 1);
-+ truncate_jnodes_range(mapping->host, from, count);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/page_cache.h linux-2.6.24/fs/reiser4/page_cache.h
---- linux-2.6.24.orig/fs/reiser4/page_cache.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/page_cache.h 2008-01-25 11:39:06.952211810 +0300
-@@ -0,0 +1,68 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* Memory pressure hooks. Fake inodes handling. See page_cache.c. */
-+
-+#if !defined( __REISER4_PAGE_CACHE_H__ )
-+#define __REISER4_PAGE_CACHE_H__
-+
-+#include "forward.h"
-+#include "context.h" /* for reiser4_ctx_gfp_mask_get() */
-+
-+#include <linux/fs.h> /* for struct super_block, address_space */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/pagemap.h> /* for lock_page() */
-+#include <linux/vmalloc.h> /* for __vmalloc() */
-+
-+extern int reiser4_init_formatted_fake(struct super_block *);
-+extern void reiser4_done_formatted_fake(struct super_block *);
-+
-+extern reiser4_tree *reiser4_tree_by_page(const struct page *);
-+
-+extern int reiser4_set_page_dirty_internal(struct page *);
-+
-+#define reiser4_submit_bio(rw, bio) submit_bio((rw), (bio))
-+
-+extern void reiser4_wait_page_writeback(struct page *);
-+static inline void lock_and_wait_page_writeback(struct page *page)
-+{
-+ lock_page(page);
-+ if (unlikely(PageWriteback(page)))
-+ reiser4_wait_page_writeback(page);
-+}
-+
-+#define jprivate(page) ((jnode *)page_private(page))
-+
-+extern int reiser4_page_io(struct page *, jnode *, int rw, gfp_t);
-+extern void reiser4_drop_page(struct page *);
-+extern void reiser4_invalidate_pages(struct address_space *, pgoff_t from,
-+ unsigned long count, int even_cows);
-+extern void capture_reiser4_inodes(struct super_block *,
-+ struct writeback_control *);
-+static inline void * reiser4_vmalloc (unsigned long size)
-+{
-+ return __vmalloc(size,
-+ reiser4_ctx_gfp_mask_get() | __GFP_HIGHMEM,
-+ PAGE_KERNEL);
-+}
-+
-+#define PAGECACHE_TAG_REISER4_MOVED PAGECACHE_TAG_DIRTY
-+
-+#if REISER4_DEBUG
-+extern void print_page(const char *prefix, struct page *page);
-+#else
-+#define print_page(prf, p) noop
-+#endif
-+
-+/* __REISER4_PAGE_CACHE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/cluster.c linux-2.6.24/fs/reiser4/plugin/cluster.c
---- linux-2.6.24.orig/fs/reiser4/plugin/cluster.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/cluster.c 2008-01-25 11:39:06.952211810 +0300
-@@ -0,0 +1,71 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Contains reiser4 cluster plugins (see
-+ http://www.namesys.com/cryptcompress_design.html
-+ "Concepts of clustering" for details). */
-+
-+#include "plugin_header.h"
-+#include "plugin.h"
-+#include "../inode.h"
-+
-+static int change_cluster(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ assert("edward-1324", inode != NULL);
-+ assert("edward-1325", plugin != NULL);
-+ assert("edward-1326", is_reiser4_inode(inode));
-+ assert("edward-1327", plugin->h.type_id == REISER4_CLUSTER_PLUGIN_TYPE);
-+
-+ /* Can't change the cluster plugin for already existent regular files. */
-+ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
-+ return RETERR(-EINVAL);
-+
-+ /* If matches, nothing to change. */
-+ if (inode_hash_plugin(inode) != NULL &&
-+ inode_hash_plugin(inode)->h.id == plugin->h.id)
-+ return 0;
-+
-+ return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_CLUSTER, plugin);
-+}
-+
-+static reiser4_plugin_ops cluster_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = &change_cluster
-+};
-+
-+#define SUPPORT_CLUSTER(SHIFT, ID, LABEL, DESC) \
-+ [CLUSTER_ ## ID ## _ID] = { \
-+ .h = { \
-+ .type_id = REISER4_CLUSTER_PLUGIN_TYPE, \
-+ .id = CLUSTER_ ## ID ## _ID, \
-+ .pops = &cluster_plugin_ops, \
-+ .label = LABEL, \
-+ .desc = DESC, \
-+ .linkage = {NULL, NULL} \
-+ }, \
-+ .shift = SHIFT \
-+ }
-+
-+cluster_plugin cluster_plugins[LAST_CLUSTER_ID] = {
-+ SUPPORT_CLUSTER(16, 64K, "64K", "Large"),
-+ SUPPORT_CLUSTER(15, 32K, "32K", "Big"),
-+ SUPPORT_CLUSTER(14, 16K, "16K", "Average"),
-+ SUPPORT_CLUSTER(13, 8K, "8K", "Small"),
-+ SUPPORT_CLUSTER(12, 4K, "4K", "Minimal")
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/cluster.h linux-2.6.24/fs/reiser4/plugin/cluster.h
---- linux-2.6.24.orig/fs/reiser4/plugin/cluster.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/cluster.h 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,409 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* This file contains size/offset translators, modulators
-+ and other helper functions. */
-+
-+#if !defined( __FS_REISER4_CLUSTER_H__ )
-+#define __FS_REISER4_CLUSTER_H__
-+
-+#include "../inode.h"
-+
-+static inline int inode_cluster_shift(struct inode *inode)
-+{
-+ assert("edward-92", inode != NULL);
-+ assert("edward-93", reiser4_inode_data(inode) != NULL);
-+
-+ return inode_cluster_plugin(inode)->shift;
-+}
-+
-+static inline unsigned cluster_nrpages_shift(struct inode *inode)
-+{
-+ return inode_cluster_shift(inode) - PAGE_CACHE_SHIFT;
-+}
-+
-+/* cluster size in page units */
-+static inline unsigned cluster_nrpages(struct inode *inode)
-+{
-+ return 1U << cluster_nrpages_shift(inode);
-+}
-+
-+static inline size_t inode_cluster_size(struct inode *inode)
-+{
-+ assert("edward-96", inode != NULL);
-+
-+ return 1U << inode_cluster_shift(inode);
-+}
-+
-+static inline cloff_t pg_to_clust(pgoff_t idx, struct inode *inode)
-+{
-+ return idx >> cluster_nrpages_shift(inode);
-+}
-+
-+static inline pgoff_t clust_to_pg(cloff_t idx, struct inode *inode)
-+{
-+ return idx << cluster_nrpages_shift(inode);
-+}
-+
-+static inline pgoff_t pg_to_clust_to_pg(pgoff_t idx, struct inode *inode)
-+{
-+ return clust_to_pg(pg_to_clust(idx, inode), inode);
-+}
-+
-+static inline pgoff_t off_to_pg(loff_t off)
-+{
-+ return (off >> PAGE_CACHE_SHIFT);
-+}
-+
-+static inline loff_t pg_to_off(pgoff_t idx)
-+{
-+ return ((loff_t) (idx) << PAGE_CACHE_SHIFT);
-+}
-+
-+static inline cloff_t off_to_clust(loff_t off, struct inode *inode)
-+{
-+ return off >> inode_cluster_shift(inode);
-+}
-+
-+static inline loff_t clust_to_off(cloff_t idx, struct inode *inode)
-+{
-+ return (loff_t) idx << inode_cluster_shift(inode);
-+}
-+
-+static inline loff_t off_to_clust_to_off(loff_t off, struct inode *inode)
-+{
-+ return clust_to_off(off_to_clust(off, inode), inode);
-+}
-+
-+static inline pgoff_t off_to_clust_to_pg(loff_t off, struct inode *inode)
-+{
-+ return clust_to_pg(off_to_clust(off, inode), inode);
-+}
-+
-+static inline unsigned off_to_pgoff(loff_t off)
-+{
-+ return off & (PAGE_CACHE_SIZE - 1);
-+}
-+
-+static inline unsigned off_to_cloff(loff_t off, struct inode *inode)
-+{
-+ return off & ((loff_t) (inode_cluster_size(inode)) - 1);
-+}
-+
-+static inline pgoff_t offset_in_clust(struct page * page)
-+{
-+ assert("edward-1488", page != NULL);
-+ assert("edward-1489", page->mapping != NULL);
-+
-+ return page_index(page) & ((cluster_nrpages(page->mapping->host)) - 1);
-+}
-+
-+static inline int first_page_in_cluster(struct page * page)
-+{
-+ return offset_in_clust(page) == 0;
-+}
-+
-+static inline int last_page_in_cluster(struct page * page)
-+{
-+ return offset_in_clust(page) ==
-+ cluster_nrpages(page->mapping->host) - 1;
-+}
-+
-+static inline unsigned
-+pg_to_off_to_cloff(unsigned long idx, struct inode *inode)
-+{
-+ return off_to_cloff(pg_to_off(idx), inode);
-+}
-+
-+/*********************** Size translators **************************/
-+
-+/* Translate linear size.
-+ * New units are (1 << @blk_shift) times larger, then old ones.
-+ * In other words, calculate number of logical blocks, occupied
-+ * by @count elements
-+ */
-+static inline unsigned long size_in_blocks(loff_t count, unsigned blkbits)
-+{
-+ return (count + (1UL << blkbits) - 1) >> blkbits;
-+}
-+
-+/* size in pages */
-+static inline pgoff_t size_in_pages(loff_t size)
-+{
-+ return size_in_blocks(size, PAGE_CACHE_SHIFT);
-+}
-+
-+/* size in logical clusters */
-+static inline cloff_t size_in_lc(loff_t size, struct inode *inode)
-+{
-+ return size_in_blocks(size, inode_cluster_shift(inode));
-+}
-+
-+/* size in pages to the size in page clusters */
-+static inline cloff_t sp_to_spcl(pgoff_t size, struct inode *inode)
-+{
-+ return size_in_blocks(size, cluster_nrpages_shift(inode));
-+}
-+
-+/*********************** Size modulators ***************************/
-+
-+/*
-+ Modulate linear size by nominated block size and offset.
-+
-+ The "finite" function (which is zero almost everywhere).
-+ How much is a height of the figure at a position @pos,
-+ when trying to construct rectangle of height (1 << @blkbits),
-+ and square @size.
-+
-+ ******
-+ *******
-+ *******
-+ *******
-+ ----------> pos
-+*/
-+static inline unsigned __mbb(loff_t size, unsigned long pos, int blkbits)
-+{
-+ unsigned end = size >> blkbits;
-+ if (pos < end)
-+ return 1U << blkbits;
-+ if (unlikely(pos > end))
-+ return 0;
-+ return size & ~(~0ull << blkbits);
-+}
-+
-+/* the same as above, but block size is page size */
-+static inline unsigned __mbp(loff_t size, pgoff_t pos)
-+{
-+ return __mbb(size, pos, PAGE_CACHE_SHIFT);
-+}
-+
-+/* number of file's bytes in the nominated logical cluster */
-+static inline unsigned lbytes(cloff_t index, struct inode * inode)
-+{
-+ return __mbb(i_size_read(inode), index, inode_cluster_shift(inode));
-+}
-+
-+/* number of file's bytes in the nominated page */
-+static inline unsigned pbytes(pgoff_t index, struct inode * inode)
-+{
-+ return __mbp(i_size_read(inode), index);
-+}
-+
-+/**
-+ * number of pages occuped by @win->count bytes starting from
-+ * @win->off at logical cluster defined by @win. This is exactly
-+ * a number of pages to be modified and dirtied in any cluster operation.
-+ */
-+static inline pgoff_t win_count_to_nrpages(struct reiser4_slide * win)
-+{
-+ return ((win->off + win->count +
-+ (1UL << PAGE_CACHE_SHIFT) - 1) >> PAGE_CACHE_SHIFT) -
-+ off_to_pg(win->off);
-+}
-+
-+/* return true, if logical cluster is not occupied by the file */
-+static inline int new_logical_cluster(struct cluster_handle * clust,
-+ struct inode *inode)
-+{
-+ return clust_to_off(clust->index, inode) >= i_size_read(inode);
-+}
-+
-+/* return true, if pages @p1 and @p2 are of the same page cluster */
-+static inline int same_page_cluster(struct page * p1, struct page * p2)
-+{
-+ assert("edward-1490", p1 != NULL);
-+ assert("edward-1491", p2 != NULL);
-+ assert("edward-1492", p1->mapping != NULL);
-+ assert("edward-1493", p2->mapping != NULL);
-+
-+ return (pg_to_clust(page_index(p1), p1->mapping->host) ==
-+ pg_to_clust(page_index(p2), p2->mapping->host));
-+}
-+
-+static inline int cluster_is_complete(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ return clust->tc.lsize == inode_cluster_size(inode);
-+}
-+
-+static inline void reiser4_slide_init(struct reiser4_slide * win)
-+{
-+ assert("edward-1084", win != NULL);
-+ memset(win, 0, sizeof *win);
-+}
-+
-+static inline tfm_action
-+cluster_get_tfm_act(struct tfm_cluster * tc)
-+{
-+ assert("edward-1356", tc != NULL);
-+ return tc->act;
-+}
-+
-+static inline void
-+cluster_set_tfm_act(struct tfm_cluster * tc, tfm_action act)
-+{
-+ assert("edward-1356", tc != NULL);
-+ tc->act = act;
-+}
-+
-+static inline void cluster_init_act(struct cluster_handle * clust,
-+ tfm_action act,
-+ struct reiser4_slide * window)
-+{
-+ assert("edward-84", clust != NULL);
-+ memset(clust, 0, sizeof *clust);
-+ cluster_set_tfm_act(&clust->tc, act);
-+ clust->dstat = INVAL_DISK_CLUSTER;
-+ clust->win = window;
-+}
-+
-+static inline void cluster_init_read(struct cluster_handle * clust,
-+ struct reiser4_slide * window)
-+{
-+ cluster_init_act (clust, TFMA_READ, window);
-+}
-+
-+static inline void cluster_init_write(struct cluster_handle * clust,
-+ struct reiser4_slide * window)
-+{
-+ cluster_init_act (clust, TFMA_WRITE, window);
-+}
-+
-+/* true if @p1 and @p2 are items of the same disk cluster */
-+static inline int same_disk_cluster(const coord_t * p1, const coord_t * p2)
-+{
-+ /* drop this if you have other items to aggregate */
-+ assert("edward-1494", item_id_by_coord(p1) == CTAIL_ID);
-+
-+ return item_plugin_by_coord(p1)->b.mergeable(p1, p2);
-+}
-+
-+static inline int dclust_get_extension_dsize(hint_t * hint)
-+{
-+ return hint->ext_coord.extension.ctail.dsize;
-+}
-+
-+static inline void dclust_set_extension_dsize(hint_t * hint, int dsize)
-+{
-+ hint->ext_coord.extension.ctail.dsize = dsize;
-+}
-+
-+static inline int dclust_get_extension_shift(hint_t * hint)
-+{
-+ return hint->ext_coord.extension.ctail.shift;
-+}
-+
-+static inline int dclust_get_extension_ncount(hint_t * hint)
-+{
-+ return hint->ext_coord.extension.ctail.ncount;
-+}
-+
-+static inline void dclust_inc_extension_ncount(hint_t * hint)
-+{
-+ hint->ext_coord.extension.ctail.ncount ++;
-+}
-+
-+static inline void dclust_init_extension(hint_t * hint)
-+{
-+ memset(&hint->ext_coord.extension.ctail, 0,
-+ sizeof(hint->ext_coord.extension.ctail));
-+}
-+
-+static inline int hint_is_unprepped_dclust(hint_t * hint)
-+{
-+ assert("edward-1451", hint_is_valid(hint));
-+ return dclust_get_extension_shift(hint) == (int)UCTAIL_SHIFT;
-+}
-+
-+static inline void coord_set_between_clusters(coord_t * coord)
-+{
-+#if REISER4_DEBUG
-+ int result;
-+ result = zload(coord->node);
-+ assert("edward-1296", !result);
-+#endif
-+ if (!coord_is_between_items(coord)) {
-+ coord->between = AFTER_ITEM;
-+ coord->unit_pos = 0;
-+ }
-+#if REISER4_DEBUG
-+ zrelse(coord->node);
-+#endif
-+}
-+
-+int reiser4_inflate_cluster(struct cluster_handle *, struct inode *);
-+int find_disk_cluster(struct cluster_handle *, struct inode *, int read,
-+ znode_lock_mode mode);
-+int checkout_logical_cluster(struct cluster_handle *, jnode *, struct inode *);
-+int reiser4_deflate_cluster(struct cluster_handle *, struct inode *);
-+void truncate_complete_page_cluster(struct inode *inode, cloff_t start,
-+ int even_cows);
-+void invalidate_hint_cluster(struct cluster_handle * clust);
-+int get_disk_cluster_locked(struct cluster_handle * clust, struct inode * inode,
-+ znode_lock_mode lock_mode);
-+void reset_cluster_params(struct cluster_handle * clust);
-+int set_cluster_by_page(struct cluster_handle * clust, struct page * page,
-+ int count);
-+int prepare_page_cluster(struct inode *inode, struct cluster_handle * clust,
-+ rw_op rw);
-+void __put_page_cluster(int from, int count,
-+ struct page ** pages, struct inode * inode);
-+void put_page_cluster(struct cluster_handle * clust,
-+ struct inode * inode, rw_op rw);
-+void put_cluster_handle(struct cluster_handle * clust);
-+int grab_tfm_stream(struct inode *inode, struct tfm_cluster * tc, tfm_stream_id id);
-+int tfm_cluster_is_uptodate(struct tfm_cluster * tc);
-+void tfm_cluster_set_uptodate(struct tfm_cluster * tc);
-+void tfm_cluster_clr_uptodate(struct tfm_cluster * tc);
-+
-+/* move cluster handle to the target position
-+ specified by the page of index @pgidx */
-+static inline void move_cluster_forward(struct cluster_handle * clust,
-+ struct inode *inode,
-+ pgoff_t pgidx)
-+{
-+ assert("edward-1297", clust != NULL);
-+ assert("edward-1298", inode != NULL);
-+
-+ reset_cluster_params(clust);
-+ if (clust->index_valid &&
-+ /* Hole in the indices. Hint became invalid and can not be
-+ used by find_cluster_item() even if seal/node versions
-+ will coincide */
-+ pg_to_clust(pgidx, inode) != clust->index + 1) {
-+ reiser4_unset_hint(clust->hint);
-+ invalidate_hint_cluster(clust);
-+ }
-+ clust->index = pg_to_clust(pgidx, inode);
-+ clust->index_valid = 1;
-+}
-+
-+static inline int alloc_clust_pages(struct cluster_handle * clust,
-+ struct inode *inode)
-+{
-+ assert("edward-791", clust != NULL);
-+ assert("edward-792", inode != NULL);
-+ clust->pages =
-+ kmalloc(sizeof(*clust->pages) << inode_cluster_shift(inode),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!clust->pages)
-+ return -ENOMEM;
-+ return 0;
-+}
-+
-+static inline void free_clust_pages(struct cluster_handle * clust)
-+{
-+ kfree(clust->pages);
-+}
-+
-+#endif /* __FS_REISER4_CLUSTER_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/compress/compress.c linux-2.6.24/fs/reiser4/plugin/compress/compress.c
---- linux-2.6.24.orig/fs/reiser4/plugin/compress/compress.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/compress/compress.c 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,367 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* reiser4 compression transform plugins */
-+
-+#include "../../debug.h"
-+#include "../../inode.h"
-+#include "../plugin.h"
-+
-+#include <linux/lzo.h>
-+#include <linux/zlib.h>
-+#include <linux/types.h>
-+#include <linux/hardirq.h>
-+
-+static int change_compression(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ assert("edward-1316", inode != NULL);
-+ assert("edward-1317", plugin != NULL);
-+ assert("edward-1318", is_reiser4_inode(inode));
-+ assert("edward-1319",
-+ plugin->h.type_id == REISER4_COMPRESSION_PLUGIN_TYPE);
-+
-+ /* cannot change compression plugin of already existing regular object */
-+ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
-+ return RETERR(-EINVAL);
-+
-+ /* If matches, nothing to change. */
-+ if (inode_hash_plugin(inode) != NULL &&
-+ inode_hash_plugin(inode)->h.id == plugin->h.id)
-+ return 0;
-+
-+ return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_COMPRESSION, plugin);
-+}
-+
-+static reiser4_plugin_ops compression_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = &change_compression
-+};
-+
-+/******************************************************************************/
-+/* gzip1 compression */
-+/******************************************************************************/
-+
-+#define GZIP1_DEF_LEVEL Z_BEST_SPEED
-+#define GZIP1_DEF_WINBITS 15
-+#define GZIP1_DEF_MEMLEVEL MAX_MEM_LEVEL
-+
-+static int gzip1_init(void)
-+{
-+ int ret = -EINVAL;
-+#if REISER4_ZLIB
-+ ret = 0;
-+#endif
-+ if (ret == -EINVAL)
-+ warning("edward-1337", "Zlib not compiled into kernel");
-+ return ret;
-+}
-+
-+static int gzip1_overrun(unsigned src_len UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+static coa_t gzip1_alloc(tfm_action act)
-+{
-+ coa_t coa = NULL;
-+#if REISER4_ZLIB
-+ int ret = 0;
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ coa = reiser4_vmalloc(zlib_deflate_workspacesize());
-+ if (!coa) {
-+ ret = -ENOMEM;
-+ break;
-+ }
-+ break;
-+ case TFMA_READ: /* decompress */
-+ coa = reiser4_vmalloc(zlib_inflate_workspacesize());
-+ if (!coa) {
-+ ret = -ENOMEM;
-+ break;
-+ }
-+ break;
-+ default:
-+ impossible("edward-767",
-+ "trying to alloc workspace for unknown tfm action");
-+ }
-+ if (ret) {
-+ warning("edward-768",
-+ "alloc workspace for gzip1 (tfm action = %d) failed\n",
-+ act);
-+ return ERR_PTR(ret);
-+ }
-+#endif
-+ return coa;
-+}
-+
-+static void gzip1_free(coa_t coa, tfm_action act)
-+{
-+ assert("edward-769", coa != NULL);
-+
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ vfree(coa);
-+ break;
-+ case TFMA_READ: /* decompress */
-+ vfree(coa);
-+ break;
-+ default:
-+ impossible("edward-770", "unknown tfm action");
-+ }
-+ return;
-+}
-+
-+static int gzip1_min_size_deflate(void)
-+{
-+ return 64;
-+}
-+
-+static void
-+gzip1_compress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+#if REISER4_ZLIB
-+ int ret = 0;
-+ struct z_stream_s stream;
-+
-+ assert("edward-842", coa != NULL);
-+ assert("edward-875", src_len != 0);
-+
-+ stream.workspace = coa;
-+ ret = zlib_deflateInit2(&stream, GZIP1_DEF_LEVEL, Z_DEFLATED,
-+ -GZIP1_DEF_WINBITS, GZIP1_DEF_MEMLEVEL,
-+ Z_DEFAULT_STRATEGY);
-+ if (ret != Z_OK) {
-+ warning("edward-771", "zlib_deflateInit2 returned %d\n", ret);
-+ goto rollback;
-+ }
-+ ret = zlib_deflateReset(&stream);
-+ if (ret != Z_OK) {
-+ warning("edward-772", "zlib_deflateReset returned %d\n", ret);
-+ goto rollback;
-+ }
-+ stream.next_in = src_first;
-+ stream.avail_in = src_len;
-+ stream.next_out = dst_first;
-+ stream.avail_out = *dst_len;
-+
-+ ret = zlib_deflate(&stream, Z_FINISH);
-+ if (ret != Z_STREAM_END) {
-+ if (ret != Z_OK)
-+ warning("edward-773",
-+ "zlib_deflate returned %d\n", ret);
-+ goto rollback;
-+ }
-+ *dst_len = stream.total_out;
-+ return;
-+ rollback:
-+ *dst_len = src_len;
-+#endif
-+ return;
-+}
-+
-+static void
-+gzip1_decompress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+#if REISER4_ZLIB
-+ int ret = 0;
-+ struct z_stream_s stream;
-+
-+ assert("edward-843", coa != NULL);
-+ assert("edward-876", src_len != 0);
-+
-+ stream.workspace = coa;
-+ ret = zlib_inflateInit2(&stream, -GZIP1_DEF_WINBITS);
-+ if (ret != Z_OK) {
-+ warning("edward-774", "zlib_inflateInit2 returned %d\n", ret);
-+ return;
-+ }
-+ ret = zlib_inflateReset(&stream);
-+ if (ret != Z_OK) {
-+ warning("edward-775", "zlib_inflateReset returned %d\n", ret);
-+ return;
-+ }
-+
-+ stream.next_in = src_first;
-+ stream.avail_in = src_len;
-+ stream.next_out = dst_first;
-+ stream.avail_out = *dst_len;
-+
-+ ret = zlib_inflate(&stream, Z_SYNC_FLUSH);
-+ /*
-+ * Work around a bug in zlib, which sometimes wants to taste an extra
-+ * byte when being used in the (undocumented) raw deflate mode.
-+ * (From USAGI).
-+ */
-+ if (ret == Z_OK && !stream.avail_in && stream.avail_out) {
-+ u8 zerostuff = 0;
-+ stream.next_in = &zerostuff;
-+ stream.avail_in = 1;
-+ ret = zlib_inflate(&stream, Z_FINISH);
-+ }
-+ if (ret != Z_STREAM_END) {
-+ warning("edward-776", "zlib_inflate returned %d\n", ret);
-+ return;
-+ }
-+ *dst_len = stream.total_out;
-+#endif
-+ return;
-+}
-+
-+/******************************************************************************/
-+/* lzo1 compression */
-+/******************************************************************************/
-+
-+static int lzo1_init(void)
-+{
-+ return 0;
-+}
-+
-+static int lzo1_overrun(unsigned in_len)
-+{
-+ return in_len / 64 + 16 + 3;
-+}
-+
-+static coa_t lzo1_alloc(tfm_action act)
-+{
-+ int ret = 0;
-+ coa_t coa = NULL;
-+
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ coa = reiser4_vmalloc(LZO1X_1_MEM_COMPRESS);
-+ if (!coa) {
-+ ret = -ENOMEM;
-+ break;
-+ }
-+ case TFMA_READ: /* decompress */
-+ break;
-+ default:
-+ impossible("edward-877",
-+ "trying to alloc workspace for unknown tfm action");
-+ }
-+ if (ret) {
-+ warning("edward-878",
-+ "alloc workspace for lzo1 (tfm action = %d) failed\n",
-+ act);
-+ return ERR_PTR(ret);
-+ }
-+ return coa;
-+}
-+
-+static void lzo1_free(coa_t coa, tfm_action act)
-+{
-+ assert("edward-879", coa != NULL);
-+
-+ switch (act) {
-+ case TFMA_WRITE: /* compress */
-+ vfree(coa);
-+ break;
-+ case TFMA_READ: /* decompress */
-+ impossible("edward-1304",
-+ "trying to free non-allocated workspace");
-+ default:
-+ impossible("edward-880", "unknown tfm action");
-+ }
-+ return;
-+}
-+
-+static int lzo1_min_size_deflate(void)
-+{
-+ return 256;
-+}
-+
-+static void
-+lzo1_compress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+ int result;
-+
-+ assert("edward-846", coa != NULL);
-+ assert("edward-847", src_len != 0);
-+
-+ result = lzo1x_1_compress(src_first, src_len, dst_first, dst_len, coa);
-+ if (unlikely(result != LZO_E_OK)) {
-+ warning("edward-849", "lzo1x_1_compress failed\n");
-+ goto out;
-+ }
-+ if (*dst_len >= src_len) {
-+ //warning("edward-850", "lzo1x_1_compress: incompressible data\n");
-+ goto out;
-+ }
-+ return;
-+ out:
-+ *dst_len = src_len;
-+ return;
-+}
-+
-+static void
-+lzo1_decompress(coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len)
-+{
-+ int result;
-+
-+ assert("edward-851", coa == NULL);
-+ assert("edward-852", src_len != 0);
-+
-+ result = lzo1x_decompress_safe(src_first, src_len, dst_first, dst_len);
-+ if (result != LZO_E_OK)
-+ warning("edward-853", "lzo1x_1_decompress failed\n");
-+ return;
-+}
-+
-+compression_plugin compression_plugins[LAST_COMPRESSION_ID] = {
-+ [LZO1_COMPRESSION_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .id = LZO1_COMPRESSION_ID,
-+ .pops = &compression_plugin_ops,
-+ .label = "lzo1",
-+ .desc = "lzo1 compression transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = lzo1_init,
-+ .overrun = lzo1_overrun,
-+ .alloc = lzo1_alloc,
-+ .free = lzo1_free,
-+ .min_size_deflate = lzo1_min_size_deflate,
-+ .checksum = reiser4_adler32,
-+ .compress = lzo1_compress,
-+ .decompress = lzo1_decompress
-+ },
-+ [GZIP1_COMPRESSION_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .id = GZIP1_COMPRESSION_ID,
-+ .pops = &compression_plugin_ops,
-+ .label = "gzip1",
-+ .desc = "gzip1 compression transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init = gzip1_init,
-+ .overrun = gzip1_overrun,
-+ .alloc = gzip1_alloc,
-+ .free = gzip1_free,
-+ .min_size_deflate = gzip1_min_size_deflate,
-+ .checksum = reiser4_adler32,
-+ .compress = gzip1_compress,
-+ .decompress = gzip1_decompress
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/compress/compress.h linux-2.6.24/fs/reiser4/plugin/compress/compress.h
---- linux-2.6.24.orig/fs/reiser4/plugin/compress/compress.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/compress/compress.h 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,43 @@
-+#if !defined( __FS_REISER4_COMPRESS_H__ )
-+#define __FS_REISER4_COMPRESS_H__
-+
-+#include <linux/types.h>
-+#include <linux/string.h>
-+
-+/* transform direction */
-+typedef enum {
-+ TFMA_READ, /* decrypt, decompress */
-+ TFMA_WRITE, /* encrypt, compress */
-+ TFMA_LAST
-+} tfm_action;
-+
-+/* supported compression algorithms */
-+typedef enum {
-+ LZO1_COMPRESSION_ID,
-+ GZIP1_COMPRESSION_ID,
-+ LAST_COMPRESSION_ID,
-+} reiser4_compression_id;
-+
-+/* the same as pgoff, but units are page clusters */
-+typedef unsigned long cloff_t;
-+
-+/* working data of a (de)compression algorithm */
-+typedef void *coa_t;
-+
-+/* table for all supported (de)compression algorithms */
-+typedef coa_t coa_set[LAST_COMPRESSION_ID][TFMA_LAST];
-+
-+__u32 reiser4_adler32(char *data, __u32 len);
-+
-+#endif /* __FS_REISER4_COMPRESS_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/compress/compress_mode.c linux-2.6.24/fs/reiser4/plugin/compress/compress_mode.c
---- linux-2.6.24.orig/fs/reiser4/plugin/compress/compress_mode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/compress/compress_mode.c 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,162 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* This file contains Reiser4 compression mode plugins.
-+
-+ Compression mode plugin is a set of handlers called by compressor
-+ at flush time and represent some heuristics including the ones
-+ which are to avoid compression of incompressible data, see
-+ http://www.namesys.com/cryptcompress_design.html for more details.
-+*/
-+#include "../../inode.h"
-+#include "../plugin.h"
-+
-+static int should_deflate_none(struct inode * inode, cloff_t index)
-+{
-+ return 0;
-+}
-+
-+static int should_deflate_common(struct inode * inode, cloff_t index)
-+{
-+ return compression_is_on(cryptcompress_inode_data(inode));
-+}
-+
-+static int discard_hook_ultim(struct inode *inode, cloff_t index)
-+{
-+ turn_off_compression(cryptcompress_inode_data(inode));
-+ return 0;
-+}
-+
-+static int discard_hook_lattd(struct inode *inode, cloff_t index)
-+{
-+ struct cryptcompress_info * info = cryptcompress_inode_data(inode);
-+
-+ assert("edward-1462",
-+ get_lattice_factor(info) >= MIN_LATTICE_FACTOR &&
-+ get_lattice_factor(info) <= MAX_LATTICE_FACTOR);
-+
-+ turn_off_compression(info);
-+ if (get_lattice_factor(info) < MAX_LATTICE_FACTOR)
-+ set_lattice_factor(info, get_lattice_factor(info) << 1);
-+ return 0;
-+}
-+
-+static int accept_hook_lattd(struct inode *inode, cloff_t index)
-+{
-+ turn_on_compression(cryptcompress_inode_data(inode));
-+ set_lattice_factor(cryptcompress_inode_data(inode), MIN_LATTICE_FACTOR);
-+ return 0;
-+}
-+
-+/* Check on dynamic lattice, the adaptive compression modes which
-+ defines the following behavior:
-+
-+ Compression is on: try to compress everything and turn
-+ it off, whenever cluster is incompressible.
-+
-+ Compression is off: try to compress clusters of indexes
-+ k * FACTOR (k = 0, 1, 2, ...) and turn it on, if some of
-+ them is compressible. If incompressible, then increase FACTOR */
-+
-+/* check if @index belongs to one-dimensional lattice
-+ of sparce factor @factor */
-+static int is_on_lattice(cloff_t index, int factor)
-+{
-+ return (factor ? index % factor == 0: index == 0);
-+}
-+
-+static int should_deflate_lattd(struct inode * inode, cloff_t index)
-+{
-+ return should_deflate_common(inode, index) ||
-+ is_on_lattice(index,
-+ get_lattice_factor
-+ (cryptcompress_inode_data(inode)));
-+}
-+
-+/* compression mode_plugins */
-+compression_mode_plugin compression_mode_plugins[LAST_COMPRESSION_MODE_ID] = {
-+ [NONE_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = NONE_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "none",
-+ .desc = "Compress nothing",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_none,
-+ .accept_hook = NULL,
-+ .discard_hook = NULL
-+ },
-+ /* Check-on-dynamic-lattice adaptive compression mode */
-+ [LATTD_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = LATTD_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "lattd",
-+ .desc = "Check on dynamic lattice",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_lattd,
-+ .accept_hook = accept_hook_lattd,
-+ .discard_hook = discard_hook_lattd
-+ },
-+ /* Check-ultimately compression mode:
-+ Turn off compression forever as soon as we meet
-+ incompressible data */
-+ [ULTIM_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = ULTIM_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "ultim",
-+ .desc = "Check ultimately",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_common,
-+ .accept_hook = NULL,
-+ .discard_hook = discard_hook_ultim
-+ },
-+ /* Force-to-compress-everything compression mode */
-+ [FORCE_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = FORCE_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "force",
-+ .desc = "Force to compress everything",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = NULL,
-+ .accept_hook = NULL,
-+ .discard_hook = NULL
-+ },
-+ /* Convert-to-extent compression mode.
-+ In this mode items will be converted to extents and management
-+ will be passed to (classic) unix file plugin as soon as ->write()
-+ detects that the first complete logical cluster (of index #0) is
-+ incompressible. */
-+ [CONVX_COMPRESSION_MODE_ID] = {
-+ .h = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .id = CONVX_COMPRESSION_MODE_ID,
-+ .pops = NULL,
-+ .label = "conv",
-+ .desc = "Convert to extent",
-+ .linkage = {NULL, NULL}
-+ },
-+ .should_deflate = should_deflate_common,
-+ .accept_hook = NULL,
-+ .discard_hook = NULL
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/compress/Makefile linux-2.6.24/fs/reiser4/plugin/compress/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/compress/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/compress/Makefile 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,5 @@
-+obj-$(CONFIG_REISER4_FS) += compress_plugins.o
-+
-+compress_plugins-objs := \
-+ compress.o \
-+ compress_mode.o
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/crypto/cipher.c linux-2.6.24/fs/reiser4/plugin/crypto/cipher.c
---- linux-2.6.24.orig/fs/reiser4/plugin/crypto/cipher.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/crypto/cipher.c 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,37 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser,
-+ licensing governed by reiser4/README */
-+/* Reiser4 cipher transform plugins */
-+
-+#include "../../debug.h"
-+#include "../plugin.h"
-+
-+cipher_plugin cipher_plugins[LAST_CIPHER_ID] = {
-+ [NONE_CIPHER_ID] = {
-+ .h = {
-+ .type_id = REISER4_CIPHER_PLUGIN_TYPE,
-+ .id = NONE_CIPHER_ID,
-+ .pops = NULL,
-+ .label = "none",
-+ .desc = "no cipher transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .alloc = NULL,
-+ .free = NULL,
-+ .scale = NULL,
-+ .align_stream = NULL,
-+ .setkey = NULL,
-+ .encrypt = NULL,
-+ .decrypt = NULL
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/crypto/cipher.h linux-2.6.24/fs/reiser4/plugin/crypto/cipher.h
---- linux-2.6.24.orig/fs/reiser4/plugin/crypto/cipher.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/crypto/cipher.h 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,55 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* This file contains definitions for the objects operated
-+ by reiser4 key manager, which is something like keyring
-+ wrapped by appropriate reiser4 plugin */
-+
-+#if !defined( __FS_REISER4_CRYPT_H__ )
-+#define __FS_REISER4_CRYPT_H__
-+
-+#include <linux/crypto.h>
-+
-+/* key info imported from user space */
-+struct reiser4_crypto_data {
-+ int keysize; /* uninstantiated key size */
-+ __u8 * key; /* uninstantiated key */
-+ int keyid_size; /* size of passphrase */
-+ __u8 * keyid; /* passphrase */
-+};
-+
-+/* This object contains all needed infrastructure to implement
-+ cipher transform. This is operated (allocating, inheriting,
-+ validating, binding to host inode, etc..) by reiser4 key manager.
-+
-+ This info can be allocated in two cases:
-+ 1. importing a key from user space.
-+ 2. reading inode from disk */
-+struct reiser4_crypto_info {
-+ struct inode * host;
-+ struct crypto_hash * digest;
-+ struct crypto_blkcipher * cipher;
-+#if 0
-+ cipher_key_plugin * kplug; /* key manager */
-+#endif
-+ __u8 * keyid; /* key fingerprint, created by digest plugin,
-+ using uninstantiated key and passphrase.
-+ supposed to be stored in disk stat-data */
-+ int inst; /* this indicates if the cipher key is
-+ instantiated (case 1 above) */
-+ int keysize; /* uninstantiated key size (bytes), supposed
-+ to be stored in disk stat-data */
-+ int keyload_count; /* number of the objects which has this
-+ crypto-stat attached */
-+};
-+
-+#endif /* __FS_REISER4_CRYPT_H__ */
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/crypto/digest.c linux-2.6.24/fs/reiser4/plugin/crypto/digest.c
---- linux-2.6.24.orig/fs/reiser4/plugin/crypto/digest.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/crypto/digest.c 2008-01-25 11:39:06.956212841 +0300
-@@ -0,0 +1,58 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* reiser4 digest transform plugin (is used by cryptcompress object plugin) */
-+/* EDWARD-FIXME-HANS: and it does what? a digest is a what? */
-+#include "../../debug.h"
-+#include "../plugin_header.h"
-+#include "../plugin.h"
-+#include "../file/cryptcompress.h"
-+
-+#include <linux/types.h>
-+
-+extern digest_plugin digest_plugins[LAST_DIGEST_ID];
-+
-+static struct crypto_hash * alloc_sha256 (void)
-+{
-+#if REISER4_SHA256
-+ return crypto_alloc_hash ("sha256", 0, CRYPTO_ALG_ASYNC);
-+#else
-+ warning("edward-1418", "sha256 unsupported");
-+ return ERR_PTR(-EINVAL);
-+#endif
-+}
-+
-+static void free_sha256 (struct crypto_hash * tfm)
-+{
-+#if REISER4_SHA256
-+ crypto_free_hash(tfm);
-+#endif
-+ return;
-+}
-+
-+/* digest plugins */
-+digest_plugin digest_plugins[LAST_DIGEST_ID] = {
-+ [SHA256_32_DIGEST_ID] = {
-+ .h = {
-+ .type_id = REISER4_DIGEST_PLUGIN_TYPE,
-+ .id = SHA256_32_DIGEST_ID,
-+ .pops = NULL,
-+ .label = "sha256_32",
-+ .desc = "sha256_32 digest transform",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fipsize = sizeof(__u32),
-+ .alloc = alloc_sha256,
-+ .free = free_sha256
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/dir/dir.h linux-2.6.24/fs/reiser4/plugin/dir/dir.h
---- linux-2.6.24.orig/fs/reiser4/plugin/dir/dir.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/dir/dir.h 2008-01-25 11:39:06.960213871 +0300
-@@ -0,0 +1,36 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* this file contains declarations of methods implementing directory plugins */
-+
-+#if !defined( __REISER4_DIR_H__ )
-+#define __REISER4_DIR_H__
-+
-+/*#include "../../key.h"
-+
-+#include <linux/fs.h>*/
-+
-+/* declarations of functions implementing HASHED_DIR_PLUGIN_ID dir plugin */
-+
-+/* "hashed" directory methods of dir plugin */
-+void build_entry_key_hashed(const struct inode *, const struct qstr *,
-+ reiser4_key *);
-+
-+/* declarations of functions implementing SEEKABLE_HASHED_DIR_PLUGIN_ID dir plugin */
-+
-+/* "seekable" directory methods of dir plugin */
-+void build_entry_key_seekable(const struct inode *, const struct qstr *,
-+ reiser4_key *);
-+
-+/* __REISER4_DIR_H__ */
-+#endif
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/dir/hashed_dir.c linux-2.6.24/fs/reiser4/plugin/dir/hashed_dir.c
---- linux-2.6.24.orig/fs/reiser4/plugin/dir/hashed_dir.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/dir/hashed_dir.c 2008-01-25 11:39:06.960213871 +0300
-@@ -0,0 +1,81 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Directory plugin using hashes (see fs/reiser4/plugin/hash.c) to map file
-+ names to the files. */
-+
-+/*
-+ * Hashed directory logically consists of persistent directory
-+ * entries. Directory entry is a pair of a file name and a key of stat-data of
-+ * a file that has this name in the given directory.
-+ *
-+ * Directory entries are stored in the tree in the form of directory
-+ * items. Directory item should implement dir_entry_ops portion of item plugin
-+ * interface (see plugin/item/item.h). Hashed directory interacts with
-+ * directory item plugin exclusively through dir_entry_ops operations.
-+ *
-+ * Currently there are two implementations of directory items: "simple
-+ * directory item" (plugin/item/sde.[ch]), and "compound directory item"
-+ * (plugin/item/cde.[ch]) with the latter being the default.
-+ *
-+ * There is, however some delicate way through which directory code interferes
-+ * with item plugin: key assignment policy. A key for a directory item is
-+ * chosen by directory code, and as described in kassign.c, this key contains
-+ * a portion of file name. Directory item uses this knowledge to avoid storing
-+ * this portion of file name twice: in the key and in the directory item body.
-+ *
-+ */
-+
-+#include "../../inode.h"
-+
-+void complete_entry_key(const struct inode *, const char *name,
-+ int len, reiser4_key * result);
-+
-+/* this is implementation of build_entry_key method of dir
-+ plugin for HASHED_DIR_PLUGIN_ID
-+ */
-+void build_entry_key_hashed(const struct inode *dir, /* directory where entry is
-+ * (or will be) in.*/
-+ const struct qstr *qname, /* name of file referenced
-+ * by this entry */
-+ reiser4_key * result /* resulting key of directory
-+ * entry */ )
-+{
-+ const char *name;
-+ int len;
-+
-+ assert("nikita-1139", dir != NULL);
-+ assert("nikita-1140", qname != NULL);
-+ assert("nikita-1141", qname->name != NULL);
-+ assert("nikita-1142", result != NULL);
-+
-+ name = qname->name;
-+ len = qname->len;
-+
-+ assert("nikita-2867", strlen(name) == len);
-+
-+ reiser4_key_init(result);
-+ /* locality of directory entry's key is objectid of parent
-+ directory */
-+ set_key_locality(result, get_inode_oid(dir));
-+ /* minor packing locality is constant */
-+ set_key_type(result, KEY_FILE_NAME_MINOR);
-+ /* dot is special case---we always want it to be first entry in
-+ a directory. Actually, we just want to have smallest
-+ directory entry.
-+ */
-+ if (len == 1 && name[0] == '.')
-+ return;
-+
-+ /* initialize part of entry key which depends on file name */
-+ complete_entry_key(dir, name, len, result);
-+}
-+
-+/* Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/dir/Makefile linux-2.6.24/fs/reiser4/plugin/dir/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/dir/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/dir/Makefile 2008-01-25 11:39:06.960213871 +0300
-@@ -0,0 +1,5 @@
-+obj-$(CONFIG_REISER4_FS) += dir_plugins.o
-+
-+dir_plugins-objs := \
-+ hashed_dir.o \
-+ seekable_dir.o
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/dir/seekable_dir.c linux-2.6.24/fs/reiser4/plugin/dir/seekable_dir.c
---- linux-2.6.24.orig/fs/reiser4/plugin/dir/seekable_dir.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/dir/seekable_dir.c 2008-01-25 11:39:06.960213871 +0300
-@@ -0,0 +1,46 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "../../inode.h"
-+
-+/* this is implementation of build_entry_key method of dir
-+ plugin for SEEKABLE_HASHED_DIR_PLUGIN_ID
-+ This is for directories where we want repeatable and restartable readdir()
-+ even in case 32bit user level struct dirent (readdir(3)).
-+*/
-+void
-+build_entry_key_seekable(const struct inode *dir, const struct qstr *name,
-+ reiser4_key * result)
-+{
-+ oid_t objectid;
-+
-+ assert("nikita-2283", dir != NULL);
-+ assert("nikita-2284", name != NULL);
-+ assert("nikita-2285", name->name != NULL);
-+ assert("nikita-2286", result != NULL);
-+
-+ reiser4_key_init(result);
-+ /* locality of directory entry's key is objectid of parent
-+ directory */
-+ set_key_locality(result, get_inode_oid(dir));
-+ /* minor packing locality is constant */
-+ set_key_type(result, KEY_FILE_NAME_MINOR);
-+ /* dot is special case---we always want it to be first entry in
-+ a directory. Actually, we just want to have smallest
-+ directory entry.
-+ */
-+ if ((name->len == 1) && (name->name[0] == '.'))
-+ return;
-+
-+ /* objectid of key is 31 lowest bits of hash. */
-+ objectid =
-+ inode_hash_plugin(dir)->hash(name->name,
-+ (int)name->len) & 0x7fffffff;
-+
-+ assert("nikita-2303", !(objectid & ~KEY_OBJECTID_MASK));
-+ set_key_objectid(result, objectid);
-+
-+ /* offset is always 0. */
-+ set_key_offset(result, (__u64) 0);
-+ return;
-+}
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/dir_plugin_common.c linux-2.6.24/fs/reiser4/plugin/dir_plugin_common.c
---- linux-2.6.24.orig/fs/reiser4/plugin/dir_plugin_common.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/dir_plugin_common.c 2008-01-25 11:39:06.964214902 +0300
-@@ -0,0 +1,872 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+
-+/* this file contains typical implementations for most of methods of
-+ directory plugin
-+*/
-+
-+#include "../inode.h"
-+
-+int reiser4_find_entry(struct inode *dir, struct dentry *name,
-+ lock_handle *, znode_lock_mode, reiser4_dir_entry_desc *);
-+int reiser4_lookup_name(struct inode *parent, struct dentry *dentry, reiser4_key * key);
-+void check_light_weight(struct inode *inode, struct inode *parent);
-+
-+/* this is common implementation of get_parent method of dir plugin
-+ this is used by NFS kernel server to "climb" up directory tree to
-+ check permissions
-+ */
-+struct dentry *get_parent_common(struct inode *child)
-+{
-+ struct super_block *s;
-+ struct inode *parent;
-+ struct dentry dotdot;
-+ struct dentry *dentry;
-+ reiser4_key key;
-+ int result;
-+
-+ /*
-+ * lookup dotdot entry.
-+ */
-+
-+ s = child->i_sb;
-+ memset(&dotdot, 0, sizeof(dotdot));
-+ dotdot.d_name.name = "..";
-+ dotdot.d_name.len = 2;
-+ dotdot.d_op = &get_super_private(s)->ops.dentry;
-+
-+ result = reiser4_lookup_name(child, &dotdot, &key);
-+ if (result != 0)
-+ return ERR_PTR(result);
-+
-+ parent = reiser4_iget(s, &key, 1);
-+ if (!IS_ERR(parent)) {
-+ /*
-+ * FIXME-NIKITA dubious: attributes are inherited from @child
-+ * to @parent. But:
-+ *
-+ * (*) this is the only this we can do
-+ *
-+ * (*) attributes of light-weight object are inherited
-+ * from a parent through which object was looked up first,
-+ * so it is ambiguous anyway.
-+ *
-+ */
-+ check_light_weight(parent, child);
-+ reiser4_iget_complete(parent);
-+ dentry = d_alloc_anon(parent);
-+ if (dentry == NULL) {
-+ iput(parent);
-+ dentry = ERR_PTR(RETERR(-ENOMEM));
-+ } else
-+ dentry->d_op = &get_super_private(s)->ops.dentry;
-+ } else if (PTR_ERR(parent) == -ENOENT)
-+ dentry = ERR_PTR(RETERR(-ESTALE));
-+ else
-+ dentry = (void *)parent;
-+ return dentry;
-+}
-+
-+/* this is common implementation of is_name_acceptable method of dir
-+ plugin
-+ */
-+int is_name_acceptable_common(const struct inode *inode, /* directory to check */
-+ const char *name UNUSED_ARG, /* name to check */
-+ int len /* @name's length */ )
-+{
-+ assert("nikita-733", inode != NULL);
-+ assert("nikita-734", name != NULL);
-+ assert("nikita-735", len > 0);
-+
-+ return len <= reiser4_max_filename_len(inode);
-+}
-+
-+/* there is no common implementation of build_entry_key method of dir
-+ plugin. See plugin/dir/hashed_dir.c:build_entry_key_hashed() or
-+ plugin/dir/seekable.c:build_entry_key_seekable() for example
-+*/
-+
-+/* this is common implementation of build_readdir_key method of dir
-+ plugin
-+ see reiser4_readdir_common for more details
-+*/
-+int build_readdir_key_common(struct file *dir /* directory being read */ ,
-+ reiser4_key * result /* where to store key */ )
-+{
-+ reiser4_file_fsdata *fdata;
-+ struct inode *inode;
-+
-+ assert("nikita-1361", dir != NULL);
-+ assert("nikita-1362", result != NULL);
-+ assert("nikita-1363", dir->f_dentry != NULL);
-+ inode = dir->f_dentry->d_inode;
-+ assert("nikita-1373", inode != NULL);
-+
-+ fdata = reiser4_get_file_fsdata(dir);
-+ if (IS_ERR(fdata))
-+ return PTR_ERR(fdata);
-+ assert("nikita-1364", fdata != NULL);
-+ return extract_key_from_de_id(get_inode_oid(inode),
-+ &fdata->dir.readdir.position.
-+ dir_entry_key, result);
-+
-+}
-+
-+void reiser4_adjust_dir_file(struct inode *, const struct dentry *, int offset,
-+ int adj);
-+
-+/* this is common implementation of add_entry method of dir plugin
-+*/
-+int reiser4_add_entry_common(struct inode *object, /* directory to add new name
-+ * in */
-+ struct dentry *where, /* new name */
-+ reiser4_object_create_data * data, /* parameters of
-+ * new object */
-+ reiser4_dir_entry_desc * entry /* parameters of
-+ * new directory
-+ * entry */)
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle lh;
-+ struct reiser4_dentry_fsdata *fsdata;
-+ reiser4_block_nr reserve;
-+
-+ assert("nikita-1114", object != NULL);
-+ assert("nikita-1250", where != NULL);
-+
-+ fsdata = reiser4_get_dentry_fsdata(where);
-+ if (unlikely(IS_ERR(fsdata)))
-+ return PTR_ERR(fsdata);
-+
-+ reserve = inode_dir_plugin(object)->estimate.add_entry(object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ init_lh(&lh);
-+ coord = &fsdata->dec.entry_coord;
-+ coord_clear_iplug(coord);
-+
-+ /* check for this entry in a directory. This is plugin method. */
-+ result = reiser4_find_entry(object, where, &lh, ZNODE_WRITE_LOCK,
-+ entry);
-+ if (likely(result == -ENOENT)) {
-+ /* add new entry. Just pass control to the directory
-+ item plugin. */
-+ assert("nikita-1709", inode_dir_item_plugin(object));
-+ assert("nikita-2230", coord->node == lh.node);
-+ reiser4_seal_done(&fsdata->dec.entry_seal);
-+ result =
-+ inode_dir_item_plugin(object)->s.dir.add_entry(object,
-+ coord, &lh,
-+ where,
-+ entry);
-+ if (result == 0) {
-+ reiser4_adjust_dir_file(object, where,
-+ fsdata->dec.pos + 1, +1);
-+ INODE_INC_FIELD(object, i_size);
-+ }
-+ } else if (result == 0) {
-+ assert("nikita-2232", coord->node == lh.node);
-+ result = RETERR(-EEXIST);
-+ }
-+ done_lh(&lh);
-+
-+ return result;
-+}
-+
-+/**
-+ * rem_entry - remove entry from directory item
-+ * @dir:
-+ * @dentry:
-+ * @entry:
-+ * @coord:
-+ * @lh:
-+ *
-+ * Checks that coordinate @coord is set properly and calls item plugin
-+ * method to cut entry.
-+ */
-+static int
-+rem_entry(struct inode *dir, struct dentry *dentry,
-+ reiser4_dir_entry_desc * entry, coord_t * coord, lock_handle * lh)
-+{
-+ item_plugin *iplug;
-+ struct inode *child;
-+
-+ iplug = inode_dir_item_plugin(dir);
-+ child = dentry->d_inode;
-+ assert("nikita-3399", child != NULL);
-+
-+ /* check that we are really destroying an entry for @child */
-+ if (REISER4_DEBUG) {
-+ int result;
-+ reiser4_key key;
-+
-+ result = iplug->s.dir.extract_key(coord, &key);
-+ if (result != 0)
-+ return result;
-+ if (get_key_objectid(&key) != get_inode_oid(child)) {
-+ warning("nikita-3397",
-+ "rem_entry: %#llx != %#llx\n",
-+ get_key_objectid(&key),
-+ (unsigned long long)get_inode_oid(child));
-+ return RETERR(-EIO);
-+ }
-+ }
-+ return iplug->s.dir.rem_entry(dir, &dentry->d_name, coord, lh, entry);
-+}
-+
-+/**
-+ * reiser4_rem_entry_common - remove entry from a directory
-+ * @dir: directory to remove entry from
-+ * @where: name that is being removed
-+ * @entry: description of entry being removed
-+ *
-+ * This is common implementation of rem_entry method of dir plugin.
-+ */
-+int reiser4_rem_entry_common(struct inode *dir,
-+ struct dentry *dentry,
-+ reiser4_dir_entry_desc *entry)
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle lh;
-+ struct reiser4_dentry_fsdata *fsdata;
-+ __u64 tograb;
-+
-+ assert("nikita-1124", dir != NULL);
-+ assert("nikita-1125", dentry != NULL);
-+
-+ tograb = inode_dir_plugin(dir)->estimate.rem_entry(dir);
-+ result = reiser4_grab_space(tograb, BA_CAN_COMMIT | BA_RESERVED);
-+ if (result != 0)
-+ return RETERR(-ENOSPC);
-+
-+ init_lh(&lh);
-+
-+ /* check for this entry in a directory. This is plugin method. */
-+ result = reiser4_find_entry(dir, dentry, &lh, ZNODE_WRITE_LOCK, entry);
-+ fsdata = reiser4_get_dentry_fsdata(dentry);
-+ if (IS_ERR(fsdata)) {
-+ done_lh(&lh);
-+ return PTR_ERR(fsdata);
-+ }
-+
-+ coord = &fsdata->dec.entry_coord;
-+
-+ assert("nikita-3404",
-+ get_inode_oid(dentry->d_inode) != get_inode_oid(dir) ||
-+ dir->i_size <= 1);
-+
-+ coord_clear_iplug(coord);
-+ if (result == 0) {
-+ /* remove entry. Just pass control to the directory item
-+ plugin. */
-+ assert("vs-542", inode_dir_item_plugin(dir));
-+ reiser4_seal_done(&fsdata->dec.entry_seal);
-+ reiser4_adjust_dir_file(dir, dentry, fsdata->dec.pos, -1);
-+ result =
-+ WITH_COORD(coord,
-+ rem_entry(dir, dentry, entry, coord, &lh));
-+ if (result == 0) {
-+ if (dir->i_size >= 1)
-+ INODE_DEC_FIELD(dir, i_size);
-+ else {
-+ warning("nikita-2509", "Dir %llu is runt",
-+ (unsigned long long)
-+ get_inode_oid(dir));
-+ result = RETERR(-EIO);
-+ }
-+
-+ assert("nikita-3405", dentry->d_inode->i_nlink != 1 ||
-+ dentry->d_inode->i_size != 2 ||
-+ inode_dir_plugin(dentry->d_inode) == NULL);
-+ }
-+ }
-+ done_lh(&lh);
-+
-+ return result;
-+}
-+
-+static reiser4_block_nr estimate_init(struct inode *parent,
-+ struct inode *object);
-+static int create_dot_dotdot(struct inode *object, struct inode *parent);
-+
-+/* this is common implementation of init method of dir plugin
-+ create "." and ".." entries
-+*/
-+int reiser4_dir_init_common(struct inode *object, /* new directory */
-+ struct inode *parent, /* parent directory */
-+ reiser4_object_create_data * data /* info passed
-+ * to us, this
-+ * is filled by
-+ * reiser4()
-+ * syscall in
-+ * particular */)
-+{
-+ reiser4_block_nr reserve;
-+
-+ assert("nikita-680", object != NULL);
-+ assert("nikita-681", S_ISDIR(object->i_mode));
-+ assert("nikita-682", parent != NULL);
-+ assert("nikita-684", data != NULL);
-+ assert("nikita-686", data->id == DIRECTORY_FILE_PLUGIN_ID);
-+ assert("nikita-687", object->i_mode & S_IFDIR);
-+
-+ reserve = estimate_init(parent, object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ return create_dot_dotdot(object, parent);
-+}
-+
-+/* this is common implementation of done method of dir plugin
-+ remove "." entry
-+*/
-+int reiser4_dir_done_common(struct inode *object /* object being deleted */ )
-+{
-+ int result;
-+ reiser4_block_nr reserve;
-+ struct dentry goodby_dots;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-1449", object != NULL);
-+
-+ if (reiser4_inode_get_flag(object, REISER4_NO_SD))
-+ return 0;
-+
-+ /* of course, this can be rewritten to sweep everything in one
-+ reiser4_cut_tree(). */
-+ memset(&entry, 0, sizeof entry);
-+
-+ /* FIXME: this done method is called from reiser4_delete_dir_common which
-+ * reserved space already */
-+ reserve = inode_dir_plugin(object)->estimate.rem_entry(object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT | BA_RESERVED))
-+ return RETERR(-ENOSPC);
-+
-+ memset(&goodby_dots, 0, sizeof goodby_dots);
-+ entry.obj = goodby_dots.d_inode = object;
-+ goodby_dots.d_name.name = ".";
-+ goodby_dots.d_name.len = 1;
-+ result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
-+ reiser4_free_dentry_fsdata(&goodby_dots);
-+ if (unlikely(result != 0 && result != -ENOMEM && result != -ENOENT))
-+ /* only worth a warning
-+
-+ "values of \ eB\ f will give rise to dom!\n"
-+ -- v6src/s2/mv.c:89
-+ */
-+ warning("nikita-2252", "Cannot remove dot of %lli: %i",
-+ (unsigned long long)get_inode_oid(object), result);
-+ return 0;
-+}
-+
-+/* this is common implementation of attach method of dir plugin
-+*/
-+int reiser4_attach_common(struct inode *child UNUSED_ARG,
-+ struct inode *parent UNUSED_ARG)
-+{
-+ assert("nikita-2647", child != NULL);
-+ assert("nikita-2648", parent != NULL);
-+
-+ return 0;
-+}
-+
-+/* this is common implementation of detach method of dir plugin
-+ remove "..", decrease nlink on parent
-+*/
-+int reiser4_detach_common(struct inode *object, struct inode *parent)
-+{
-+ int result;
-+ struct dentry goodby_dots;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-2885", object != NULL);
-+ assert("nikita-2886", !reiser4_inode_get_flag(object, REISER4_NO_SD));
-+
-+ memset(&entry, 0, sizeof entry);
-+
-+ /* NOTE-NIKITA this only works if @parent is -the- parent of
-+ @object, viz. object whose key is stored in dotdot
-+ entry. Wouldn't work with hard-links on directories. */
-+ memset(&goodby_dots, 0, sizeof goodby_dots);
-+ entry.obj = goodby_dots.d_inode = parent;
-+ goodby_dots.d_name.name = "..";
-+ goodby_dots.d_name.len = 2;
-+ result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
-+ reiser4_free_dentry_fsdata(&goodby_dots);
-+ if (result == 0) {
-+ /* the dot should be the only entry remaining at this time... */
-+ assert("nikita-3400",
-+ object->i_size == 1 && object->i_nlink <= 2);
-+#if 0
-+ /* and, together with the only name directory can have, they
-+ * provides for the last 2 remaining references. If we get
-+ * here as part of error handling during mkdir, @object
-+ * possibly has no name yet, so its nlink == 1. If we get here
-+ * from rename (targeting empty directory), it has no name
-+ * already, so its nlink == 1. */
-+ assert("nikita-3401",
-+ object->i_nlink == 2 || object->i_nlink == 1);
-+#endif
-+
-+ /* decrement nlink of directory removed ".." pointed
-+ to */
-+ reiser4_del_nlink(parent, NULL, 0);
-+ }
-+ return result;
-+}
-+
-+/* this is common implementation of estimate.add_entry method of
-+ dir plugin
-+ estimation of adding entry which supposes that entry is inserting a
-+ unit into item
-+*/
-+reiser4_block_nr estimate_add_entry_common(const struct inode * inode)
-+{
-+ return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
-+}
-+
-+/* this is common implementation of estimate.rem_entry method of dir
-+ plugin
-+*/
-+reiser4_block_nr estimate_rem_entry_common(const struct inode * inode)
-+{
-+ return estimate_one_item_removal(reiser4_tree_by_inode(inode));
-+}
-+
-+/* this is common implementation of estimate.unlink method of dir
-+ plugin
-+*/
-+reiser4_block_nr
-+dir_estimate_unlink_common(const struct inode * parent,
-+ const struct inode * object)
-+{
-+ reiser4_block_nr res;
-+
-+ /* hashed_rem_entry(object) */
-+ res = inode_dir_plugin(object)->estimate.rem_entry(object);
-+ /* del_nlink(parent) */
-+ res += 2 * inode_file_plugin(parent)->estimate.update(parent);
-+
-+ return res;
-+}
-+
-+/*
-+ * helper for inode_ops ->lookup() and dir plugin's ->get_parent()
-+ * methods: if @inode is a light-weight file, setup its credentials
-+ * that are not stored in the stat-data in this case
-+ */
-+void check_light_weight(struct inode *inode, struct inode *parent)
-+{
-+ if (reiser4_inode_get_flag(inode, REISER4_LIGHT_WEIGHT)) {
-+ inode->i_uid = parent->i_uid;
-+ inode->i_gid = parent->i_gid;
-+ /* clear light-weight flag. If inode would be read by any
-+ other name, [ug]id wouldn't change. */
-+ reiser4_inode_clr_flag(inode, REISER4_LIGHT_WEIGHT);
-+ }
-+}
-+
-+/* looks for name specified in @dentry in directory @parent and if name is
-+ found - key of object found entry points to is stored in @entry->key */
-+int reiser4_lookup_name(struct inode *parent, /* inode of directory to lookup for
-+ * name in */
-+ struct dentry *dentry, /* name to look for */
-+ reiser4_key * key /* place to store key */ )
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle lh;
-+ const char *name;
-+ int len;
-+ reiser4_dir_entry_desc entry;
-+ struct reiser4_dentry_fsdata *fsdata;
-+
-+ assert("nikita-1247", parent != NULL);
-+ assert("nikita-1248", dentry != NULL);
-+ assert("nikita-1123", dentry->d_name.name != NULL);
-+ assert("vs-1486",
-+ dentry->d_op == &get_super_private(parent->i_sb)->ops.dentry);
-+
-+ name = dentry->d_name.name;
-+ len = dentry->d_name.len;
-+
-+ if (!inode_dir_plugin(parent)->is_name_acceptable(parent, name, len))
-+ /* some arbitrary error code to return */
-+ return RETERR(-ENAMETOOLONG);
-+
-+ fsdata = reiser4_get_dentry_fsdata(dentry);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+
-+ coord = &fsdata->dec.entry_coord;
-+ coord_clear_iplug(coord);
-+ init_lh(&lh);
-+
-+ /* find entry in a directory. This is plugin method. */
-+ result = reiser4_find_entry(parent, dentry, &lh, ZNODE_READ_LOCK,
-+ &entry);
-+ if (result == 0) {
-+ /* entry was found, extract object key from it. */
-+ result =
-+ WITH_COORD(coord,
-+ item_plugin_by_coord(coord)->s.dir.
-+ extract_key(coord, key));
-+ }
-+ done_lh(&lh);
-+ return result;
-+
-+}
-+
-+/* helper for reiser4_dir_init_common(): estimate number of blocks to reserve */
-+static reiser4_block_nr
-+estimate_init(struct inode *parent, struct inode *object)
-+{
-+ reiser4_block_nr res = 0;
-+
-+ assert("vpf-321", parent != NULL);
-+ assert("vpf-322", object != NULL);
-+
-+ /* hashed_add_entry(object) */
-+ res += inode_dir_plugin(object)->estimate.add_entry(object);
-+ /* reiser4_add_nlink(object) */
-+ res += inode_file_plugin(object)->estimate.update(object);
-+ /* hashed_add_entry(object) */
-+ res += inode_dir_plugin(object)->estimate.add_entry(object);
-+ /* reiser4_add_nlink(parent) */
-+ res += inode_file_plugin(parent)->estimate.update(parent);
-+
-+ return 0;
-+}
-+
-+/* helper function for reiser4_dir_init_common(). Create "." and ".." */
-+static int create_dot_dotdot(struct inode *object /* object to create dot and
-+ * dotdot for */ ,
-+ struct inode *parent /* parent of @object */)
-+{
-+ int result;
-+ struct dentry dots_entry;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-688", object != NULL);
-+ assert("nikita-689", S_ISDIR(object->i_mode));
-+ assert("nikita-691", parent != NULL);
-+
-+ /* We store dot and dotdot as normal directory entries. This is
-+ not necessary, because almost all information stored in them
-+ is already in the stat-data of directory, the only thing
-+ being missed is objectid of grand-parent directory that can
-+ easily be added there as extension.
-+
-+ But it is done the way it is done, because not storing dot
-+ and dotdot will lead to the following complications:
-+
-+ . special case handling in ->lookup().
-+ . addition of another extension to the sd.
-+ . dependency on key allocation policy for stat data.
-+
-+ */
-+
-+ memset(&entry, 0, sizeof entry);
-+ memset(&dots_entry, 0, sizeof dots_entry);
-+ entry.obj = dots_entry.d_inode = object;
-+ dots_entry.d_name.name = ".";
-+ dots_entry.d_name.len = 1;
-+ result = reiser4_add_entry_common(object, &dots_entry, NULL, &entry);
-+ reiser4_free_dentry_fsdata(&dots_entry);
-+
-+ if (result == 0) {
-+ result = reiser4_add_nlink(object, object, 0);
-+ if (result == 0) {
-+ entry.obj = dots_entry.d_inode = parent;
-+ dots_entry.d_name.name = "..";
-+ dots_entry.d_name.len = 2;
-+ result = reiser4_add_entry_common(object,
-+ &dots_entry, NULL, &entry);
-+ reiser4_free_dentry_fsdata(&dots_entry);
-+ /* if creation of ".." failed, iput() will delete
-+ object with ".". */
-+ if (result == 0) {
-+ result = reiser4_add_nlink(parent, object, 0);
-+ if (result != 0)
-+ /*
-+ * if we failed to bump i_nlink, try
-+ * to remove ".."
-+ */
-+ reiser4_detach_common(object, parent);
-+ }
-+ }
-+ }
-+
-+ if (result != 0) {
-+ /*
-+ * in the case of error, at least update stat-data so that,
-+ * ->i_nlink updates are not lingering.
-+ */
-+ reiser4_update_sd(object);
-+ reiser4_update_sd(parent);
-+ }
-+
-+ return result;
-+}
-+
-+/*
-+ * return 0 iff @coord contains a directory entry for the file with the name
-+ * @name.
-+ */
-+static int
-+check_item(const struct inode *dir, const coord_t * coord, const char *name)
-+{
-+ item_plugin *iplug;
-+ char buf[DE_NAME_BUF_LEN];
-+
-+ iplug = item_plugin_by_coord(coord);
-+ if (iplug == NULL) {
-+ warning("nikita-1135", "Cannot get item plugin");
-+ print_coord("coord", coord, 1);
-+ return RETERR(-EIO);
-+ } else if (item_id_by_coord(coord) !=
-+ item_id_by_plugin(inode_dir_item_plugin(dir))) {
-+ /* item id of current item does not match to id of items a
-+ directory is built of */
-+ warning("nikita-1136", "Wrong item plugin");
-+ print_coord("coord", coord, 1);
-+ return RETERR(-EIO);
-+ }
-+ assert("nikita-1137", iplug->s.dir.extract_name);
-+
-+ /* Compare name stored in this entry with name we are looking for.
-+
-+ NOTE-NIKITA Here should go code for support of something like
-+ unicode, code tables, etc.
-+ */
-+ return !!strcmp(name, iplug->s.dir.extract_name(coord, buf));
-+}
-+
-+static int
-+check_entry(const struct inode *dir, coord_t * coord, const struct qstr *name)
-+{
-+ return WITH_COORD(coord, check_item(dir, coord, name->name));
-+}
-+
-+/*
-+ * argument package used by entry_actor to scan entries with identical keys.
-+ */
-+struct entry_actor_args {
-+ /* name we are looking for */
-+ const char *name;
-+ /* key of directory entry. entry_actor() scans through sequence of
-+ * items/units having the same key */
-+ reiser4_key *key;
-+ /* how many entries with duplicate key was scanned so far. */
-+ int non_uniq;
-+#if REISER4_USE_COLLISION_LIMIT
-+ /* scan limit */
-+ int max_non_uniq;
-+#endif
-+ /* return parameter: set to true, if ->name wasn't found */
-+ int not_found;
-+ /* what type of lock to take when moving to the next node during
-+ * scan */
-+ znode_lock_mode mode;
-+
-+ /* last coord that was visited during scan */
-+ coord_t last_coord;
-+ /* last node locked during scan */
-+ lock_handle last_lh;
-+ /* inode of directory */
-+ const struct inode *inode;
-+};
-+
-+/* Function called by reiser4_find_entry() to look for given name
-+ in the directory. */
-+static int entry_actor(reiser4_tree * tree UNUSED_ARG /* tree being scanned */ ,
-+ coord_t * coord /* current coord */ ,
-+ lock_handle * lh /* current lock handle */ ,
-+ void *entry_actor_arg /* argument to scan */ )
-+{
-+ reiser4_key unit_key;
-+ struct entry_actor_args *args;
-+
-+ assert("nikita-1131", tree != NULL);
-+ assert("nikita-1132", coord != NULL);
-+ assert("nikita-1133", entry_actor_arg != NULL);
-+
-+ args = entry_actor_arg;
-+ ++args->non_uniq;
-+#if REISER4_USE_COLLISION_LIMIT
-+ if (args->non_uniq > args->max_non_uniq) {
-+ args->not_found = 1;
-+ /* hash collision overflow. */
-+ return RETERR(-EBUSY);
-+ }
-+#endif
-+
-+ /*
-+ * did we just reach the end of the sequence of items/units with
-+ * identical keys?
-+ */
-+ if (!keyeq(args->key, unit_key_by_coord(coord, &unit_key))) {
-+ assert("nikita-1791",
-+ keylt(args->key, unit_key_by_coord(coord, &unit_key)));
-+ args->not_found = 1;
-+ args->last_coord.between = AFTER_UNIT;
-+ return 0;
-+ }
-+
-+ coord_dup(&args->last_coord, coord);
-+ /*
-+ * did scan just moved to the next node?
-+ */
-+ if (args->last_lh.node != lh->node) {
-+ int lock_result;
-+
-+ /*
-+ * if so, lock new node with the mode requested by the caller
-+ */
-+ done_lh(&args->last_lh);
-+ assert("nikita-1896", znode_is_any_locked(lh->node));
-+ lock_result = longterm_lock_znode(&args->last_lh, lh->node,
-+ args->mode, ZNODE_LOCK_HIPRI);
-+ if (lock_result != 0)
-+ return lock_result;
-+ }
-+ return check_item(args->inode, coord, args->name);
-+}
-+
-+/* Look for given @name within directory @dir.
-+
-+ This is called during lookup, creation and removal of directory
-+ entries and on reiser4_rename_common
-+
-+ First calculate key that directory entry for @name would have. Search
-+ for this key in the tree. If such key is found, scan all items with
-+ the same key, checking name in each directory entry along the way.
-+*/
-+int reiser4_find_entry(struct inode *dir, /* directory to scan */
-+ struct dentry *de, /* name to search for */
-+ lock_handle * lh, /* resulting lock handle */
-+ znode_lock_mode mode, /* required lock mode */
-+ reiser4_dir_entry_desc * entry /* parameters of found
-+ directory entry */)
-+{
-+ const struct qstr *name;
-+ seal_t *seal;
-+ coord_t *coord;
-+ int result;
-+ __u32 flags;
-+ struct de_location *dec;
-+ struct reiser4_dentry_fsdata *fsdata;
-+
-+ assert("nikita-1130", lh != NULL);
-+ assert("nikita-1128", dir != NULL);
-+
-+ name = &de->d_name;
-+ assert("nikita-1129", name != NULL);
-+
-+ /* dentry private data don't require lock, because dentry
-+ manipulations are protected by i_mutex on parent.
-+
-+ This is not so for inodes, because there is no -the- parent in
-+ inode case.
-+ */
-+ fsdata = reiser4_get_dentry_fsdata(de);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+ dec = &fsdata->dec;
-+
-+ coord = &dec->entry_coord;
-+ coord_clear_iplug(coord);
-+ seal = &dec->entry_seal;
-+ /* compose key of directory entry for @name */
-+ inode_dir_plugin(dir)->build_entry_key(dir, name, &entry->key);
-+
-+ if (reiser4_seal_is_set(seal)) {
-+ /* check seal */
-+ result = reiser4_seal_validate(seal, coord, &entry->key,
-+ lh, mode, ZNODE_LOCK_LOPRI);
-+ if (result == 0) {
-+ /* key was found. Check that it is really item we are
-+ looking for. */
-+ result = check_entry(dir, coord, name);
-+ if (result == 0)
-+ return 0;
-+ }
-+ }
-+ flags = (mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
-+ /*
-+ * find place in the tree where directory item should be located.
-+ */
-+ result = reiser4_object_lookup(dir, &entry->key, coord, lh, mode,
-+ FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL,
-+ flags, NULL /*ra_info */ );
-+ if (result == CBK_COORD_FOUND) {
-+ struct entry_actor_args arg;
-+
-+ /* fast path: no hash collisions */
-+ result = check_entry(dir, coord, name);
-+ if (result == 0) {
-+ reiser4_seal_init(seal, coord, &entry->key);
-+ dec->pos = 0;
-+ } else if (result > 0) {
-+ /* Iterate through all units with the same keys. */
-+ arg.name = name->name;
-+ arg.key = &entry->key;
-+ arg.not_found = 0;
-+ arg.non_uniq = 0;
-+#if REISER4_USE_COLLISION_LIMIT
-+ arg.max_non_uniq = max_hash_collisions(dir);
-+ assert("nikita-2851", arg.max_non_uniq > 1);
-+#endif
-+ arg.mode = mode;
-+ arg.inode = dir;
-+ coord_init_zero(&arg.last_coord);
-+ init_lh(&arg.last_lh);
-+
-+ result = reiser4_iterate_tree
-+ (reiser4_tree_by_inode(dir),
-+ coord, lh,
-+ entry_actor, &arg, mode, 1);
-+ /* if end of the tree or extent was reached during
-+ scanning. */
-+ if (arg.not_found || (result == -E_NO_NEIGHBOR)) {
-+ /* step back */
-+ done_lh(lh);
-+
-+ result = zload(arg.last_coord.node);
-+ if (result == 0) {
-+ coord_clear_iplug(&arg.last_coord);
-+ coord_dup(coord, &arg.last_coord);
-+ move_lh(lh, &arg.last_lh);
-+ result = RETERR(-ENOENT);
-+ zrelse(arg.last_coord.node);
-+ --arg.non_uniq;
-+ }
-+ }
-+
-+ done_lh(&arg.last_lh);
-+ if (result == 0)
-+ reiser4_seal_init(seal, coord, &entry->key);
-+
-+ if (result == 0 || result == -ENOENT) {
-+ assert("nikita-2580", arg.non_uniq > 0);
-+ dec->pos = arg.non_uniq - 1;
-+ }
-+ }
-+ } else
-+ dec->pos = -1;
-+ return result;
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format40.c linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format40.c
---- linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format40.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format40.c 2008-01-25 11:39:06.964214902 +0300
-@@ -0,0 +1,655 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../key.h"
-+#include "../node/node.h"
-+#include "../space/space_allocator.h"
-+#include "disk_format40.h"
-+#include "../plugin.h"
-+#include "../../txnmgr.h"
-+#include "../../jnode.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../wander.h"
-+#include "../../inode.h"
-+#include "../../ktxnmgrd.h"
-+#include "../../status_flags.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/buffer_head.h>
-+
-+/* reiser 4.0 default disk layout */
-+
-+/* Amount of free blocks needed to perform release_format40 when fs gets
-+ mounted RW: 1 for SB, 1 for non-leaves in overwrite set, 2 for tx header
-+ & tx record. */
-+#define RELEASE_RESERVED 4
-+
-+/* The greatest supported format40 version number */
-+#define FORMAT40_VERSION PLUGIN_LIBRARY_VERSION
-+
-+/* This flag indicates that backup should be updated
-+ (the update is performed by fsck) */
-+#define FORMAT40_UPDATE_BACKUP (1 << 31)
-+
-+/* functions to access fields of format40_disk_super_block */
-+static __u64 get_format40_block_count(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->block_count));
-+}
-+
-+static __u64 get_format40_free_blocks(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->free_blocks));
-+}
-+
-+static __u64 get_format40_root_block(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->root_block));
-+}
-+
-+static __u16 get_format40_tree_height(const format40_disk_super_block * sb)
-+{
-+ return le16_to_cpu(get_unaligned(&sb->tree_height));
-+}
-+
-+static __u64 get_format40_file_count(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->file_count));
-+}
-+
-+static __u64 get_format40_oid(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->oid));
-+}
-+
-+static __u32 get_format40_mkfs_id(const format40_disk_super_block * sb)
-+{
-+ return le32_to_cpu(get_unaligned(&sb->mkfs_id));
-+}
-+
-+static __u64 get_format40_flags(const format40_disk_super_block * sb)
-+{
-+ return le64_to_cpu(get_unaligned(&sb->flags));
-+}
-+
-+static __u32 get_format40_version(const format40_disk_super_block * sb)
-+{
-+ return le32_to_cpu(get_unaligned(&sb->version)) &
-+ ~FORMAT40_UPDATE_BACKUP;
-+}
-+
-+static int update_backup_version(const format40_disk_super_block * sb)
-+{
-+ return (le32_to_cpu(get_unaligned(&sb->version)) &
-+ FORMAT40_UPDATE_BACKUP);
-+}
-+
-+static int update_disk_version(const format40_disk_super_block * sb)
-+{
-+ return (get_format40_version(sb) < FORMAT40_VERSION);
-+}
-+
-+static int incomplete_compatibility(const format40_disk_super_block * sb)
-+{
-+ return (get_format40_version(sb) > FORMAT40_VERSION);
-+}
-+
-+static format40_super_info *get_sb_info(struct super_block *super)
-+{
-+ return &get_super_private(super)->u.format40;
-+}
-+
-+static int consult_diskmap(struct super_block *s)
-+{
-+ format40_super_info *info;
-+ journal_location *jloc;
-+
-+ info = get_sb_info(s);
-+ jloc = &get_super_private(s)->jloc;
-+ /* Default format-specific locations, if there is nothing in
-+ * diskmap */
-+ jloc->footer = FORMAT40_JOURNAL_FOOTER_BLOCKNR;
-+ jloc->header = FORMAT40_JOURNAL_HEADER_BLOCKNR;
-+ info->loc.super = FORMAT40_OFFSET / s->s_blocksize;
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JF,
-+ &jloc->footer);
-+ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JH,
-+ &jloc->header);
-+ reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_SUPER,
-+ &info->loc.super);
-+#endif
-+ return 0;
-+}
-+
-+/* find any valid super block of disk_format40 (even if the first
-+ super block is destroyed), will change block numbers of actual journal header/footer (jf/jh)
-+ if needed */
-+static struct buffer_head *find_a_disk_format40_super_block(struct super_block
-+ *s)
-+{
-+ struct buffer_head *super_bh;
-+ format40_disk_super_block *disk_sb;
-+ format40_super_info *info;
-+
-+ assert("umka-487", s != NULL);
-+
-+ info = get_sb_info(s);
-+
-+ super_bh = sb_bread(s, info->loc.super);
-+ if (super_bh == NULL)
-+ return ERR_PTR(RETERR(-EIO));
-+
-+ disk_sb = (format40_disk_super_block *) super_bh->b_data;
-+ if (strncmp(disk_sb->magic, FORMAT40_MAGIC, sizeof(FORMAT40_MAGIC))) {
-+ brelse(super_bh);
-+ return ERR_PTR(RETERR(-EINVAL));
-+ }
-+
-+ reiser4_set_block_count(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)));
-+ reiser4_set_data_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)) -
-+ le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
-+ reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
-+
-+ return super_bh;
-+}
-+
-+/* find the most recent version of super block. This is called after journal is
-+ replayed */
-+static struct buffer_head *read_super_block(struct super_block *s UNUSED_ARG)
-+{
-+ /* Here the most recent superblock copy has to be read. However, as
-+ journal replay isn't complete, we are using
-+ find_a_disk_format40_super_block() function. */
-+ return find_a_disk_format40_super_block(s);
-+}
-+
-+static int get_super_jnode(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ jnode *sb_jnode;
-+ int ret;
-+
-+ sb_jnode = reiser4_alloc_io_head(&get_sb_info(s)->loc.super);
-+
-+ ret = jload(sb_jnode);
-+
-+ if (ret) {
-+ reiser4_drop_io_head(sb_jnode);
-+ return ret;
-+ }
-+
-+ pin_jnode_data(sb_jnode);
-+ jrelse(sb_jnode);
-+
-+ sbinfo->u.format40.sb_jnode = sb_jnode;
-+
-+ return 0;
-+}
-+
-+static void done_super_jnode(struct super_block *s)
-+{
-+ jnode *sb_jnode = get_super_private(s)->u.format40.sb_jnode;
-+
-+ if (sb_jnode) {
-+ unpin_jnode_data(sb_jnode);
-+ reiser4_drop_io_head(sb_jnode);
-+ }
-+}
-+
-+typedef enum format40_init_stage {
-+ NONE_DONE = 0,
-+ CONSULT_DISKMAP,
-+ FIND_A_SUPER,
-+ INIT_JOURNAL_INFO,
-+ INIT_STATUS,
-+ JOURNAL_REPLAY,
-+ READ_SUPER,
-+ KEY_CHECK,
-+ INIT_OID,
-+ INIT_TREE,
-+ JOURNAL_RECOVER,
-+ INIT_SA,
-+ INIT_JNODE,
-+ ALL_DONE
-+} format40_init_stage;
-+
-+static format40_disk_super_block *copy_sb(const struct buffer_head *super_bh)
-+{
-+ format40_disk_super_block *sb_copy;
-+
-+ sb_copy = kmalloc(sizeof(format40_disk_super_block),
-+ reiser4_ctx_gfp_mask_get());
-+ if (sb_copy == NULL)
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ memcpy(sb_copy, ((format40_disk_super_block *) super_bh->b_data),
-+ sizeof(format40_disk_super_block));
-+ return sb_copy;
-+}
-+
-+static int check_key_format(const format40_disk_super_block *sb_copy)
-+{
-+ if (!equi(REISER4_LARGE_KEY,
-+ get_format40_flags(sb_copy) & (1 << FORMAT40_LARGE_KEYS))) {
-+ warning("nikita-3228", "Key format mismatch. "
-+ "Only %s keys are supported.",
-+ REISER4_LARGE_KEY ? "large" : "small");
-+ return RETERR(-EINVAL);
-+ }
-+ return 0;
-+}
-+
-+/**
-+ * try_init_format40
-+ * @super:
-+ * @stage:
-+ *
-+ */
-+static int try_init_format40(struct super_block *super,
-+ format40_init_stage *stage)
-+{
-+ int result;
-+ struct buffer_head *super_bh;
-+ reiser4_super_info_data *sbinfo;
-+ format40_disk_super_block *sb_copy;
-+ tree_level height;
-+ reiser4_block_nr root_block;
-+ node_plugin *nplug;
-+
-+ assert("vs-475", super != NULL);
-+ assert("vs-474", get_super_private(super));
-+
-+ *stage = NONE_DONE;
-+
-+ result = consult_diskmap(super);
-+ if (result)
-+ return result;
-+ *stage = CONSULT_DISKMAP;
-+
-+ super_bh = find_a_disk_format40_super_block(super);
-+ if (IS_ERR(super_bh))
-+ return PTR_ERR(super_bh);
-+ brelse(super_bh);
-+ *stage = FIND_A_SUPER;
-+
-+ /* ok, we are sure that filesystem format is a format40 format */
-+
-+ /* map jnodes for journal control blocks (header, footer) to disk */
-+ result = reiser4_init_journal_info(super);
-+ if (result)
-+ return result;
-+ *stage = INIT_JOURNAL_INFO;
-+
-+ /* ok, we are sure that filesystem format is a format40 format */
-+ /* Now check it's state */
-+ result = reiser4_status_init(FORMAT40_STATUS_BLOCKNR);
-+ if (result != 0 && result != -EINVAL)
-+ /* -EINVAL means there is no magic, so probably just old
-+ * fs. */
-+ return result;
-+ *stage = INIT_STATUS;
-+
-+ result = reiser4_status_query(NULL, NULL);
-+ if (result == REISER4_STATUS_MOUNT_WARN)
-+ notice("vpf-1363", "Warning: mounting %s with errors.",
-+ super->s_id);
-+ if (result == REISER4_STATUS_MOUNT_RO)
-+ notice("vpf-1364", "Warning: mounting %s with fatal errors,"
-+ " forcing read-only mount.", super->s_id);
-+ result = reiser4_journal_replay(super);
-+ if (result)
-+ return result;
-+ *stage = JOURNAL_REPLAY;
-+
-+ super_bh = read_super_block(super);
-+ if (IS_ERR(super_bh))
-+ return PTR_ERR(super_bh);
-+ *stage = READ_SUPER;
-+
-+ /* allocate and make a copy of format40_disk_super_block */
-+ sb_copy = copy_sb(super_bh);
-+ brelse(super_bh);
-+
-+ if (IS_ERR(sb_copy))
-+ return PTR_ERR(sb_copy);
-+ printk("reiser4: %s: found disk format 4.0.%u.\n",
-+ super->s_id,
-+ get_format40_version(sb_copy));
-+ if (incomplete_compatibility(sb_copy))
-+ printk("reiser4: Warning: The last completely supported "
-+ "version of disk format40 is %u. Some objects of "
-+ "the semantic tree can be unaccessible.\n",
-+ FORMAT40_VERSION);
-+ /* make sure that key format of kernel and filesystem match */
-+ result = check_key_format(sb_copy);
-+ if (result) {
-+ kfree(sb_copy);
-+ return result;
-+ }
-+ *stage = KEY_CHECK;
-+
-+ result = oid_init_allocator(super, get_format40_file_count(sb_copy),
-+ get_format40_oid(sb_copy));
-+ if (result) {
-+ kfree(sb_copy);
-+ return result;
-+ }
-+ *stage = INIT_OID;
-+
-+ /* get things necessary to init reiser4_tree */
-+ root_block = get_format40_root_block(sb_copy);
-+ height = get_format40_tree_height(sb_copy);
-+ nplug = node_plugin_by_id(NODE40_ID);
-+
-+ /* initialize reiser4_super_info_data */
-+ sbinfo = get_super_private(super);
-+ assert("", sbinfo->tree.super == super);
-+ /* init reiser4_tree for the filesystem */
-+ result = reiser4_init_tree(&sbinfo->tree, &root_block, height, nplug);
-+ if (result) {
-+ kfree(sb_copy);
-+ return result;
-+ }
-+ *stage = INIT_TREE;
-+
-+ /*
-+ * initialize reiser4_super_info_data with data from format40 super
-+ * block
-+ */
-+ sbinfo->default_uid = 0;
-+ sbinfo->default_gid = 0;
-+ sbinfo->mkfs_id = get_format40_mkfs_id(sb_copy);
-+ /* number of blocks in filesystem and reserved space */
-+ reiser4_set_block_count(super, get_format40_block_count(sb_copy));
-+ sbinfo->blocks_free = get_format40_free_blocks(sb_copy);
-+ sbinfo->version = get_format40_version(sb_copy);
-+ kfree(sb_copy);
-+
-+ if (update_backup_version(sb_copy))
-+ printk("reiser4: Warning: metadata backup is not updated. "
-+ "Please run 'fsck.reiser4 --fix' on %s.\n",
-+ super->s_id);
-+
-+ sbinfo->fsuid = 0;
-+ sbinfo->fs_flags |= (1 << REISER4_ADG); /* hard links for directories
-+ * are not supported */
-+ sbinfo->fs_flags |= (1 << REISER4_ONE_NODE_PLUGIN); /* all nodes in
-+ * layout 40 are
-+ * of one
-+ * plugin */
-+ /* sbinfo->tmgr is initialized already */
-+
-+ /* recover sb data which were logged separately from sb block */
-+
-+ /* NOTE-NIKITA: reiser4_journal_recover_sb_data() calls
-+ * oid_init_allocator() and reiser4_set_free_blocks() with new
-+ * data. What's the reason to call them above? */
-+ result = reiser4_journal_recover_sb_data(super);
-+ if (result != 0)
-+ return result;
-+ *stage = JOURNAL_RECOVER;
-+
-+ /*
-+ * Set number of used blocks. The number of used blocks is not stored
-+ * neither in on-disk super block nor in the journal footer blocks. At
-+ * this moment actual values of total blocks and free block counters
-+ * are set in the reiser4 super block (in-memory structure) and we can
-+ * calculate number of used blocks from them.
-+ */
-+ reiser4_set_data_blocks(super,
-+ reiser4_block_count(super) -
-+ reiser4_free_blocks(super));
-+
-+#if REISER4_DEBUG
-+ sbinfo->min_blocks_used = 16 /* reserved area */ +
-+ 2 /* super blocks */ +
-+ 2 /* journal footer and header */ ;
-+#endif
-+
-+ /* init disk space allocator */
-+ result = sa_init_allocator(reiser4_get_space_allocator(super),
-+ super, NULL);
-+ if (result)
-+ return result;
-+ *stage = INIT_SA;
-+
-+ result = get_super_jnode(super);
-+ if (result == 0)
-+ *stage = ALL_DONE;
-+ return result;
-+}
-+
-+/* plugin->u.format.get_ready */
-+int init_format_format40(struct super_block *s, void *data UNUSED_ARG)
-+{
-+ int result;
-+ format40_init_stage stage;
-+
-+ result = try_init_format40(s, &stage);
-+ switch (stage) {
-+ case ALL_DONE:
-+ assert("nikita-3458", result == 0);
-+ break;
-+ case INIT_JNODE:
-+ done_super_jnode(s);
-+ case INIT_SA:
-+ sa_destroy_allocator(reiser4_get_space_allocator(s), s);
-+ case JOURNAL_RECOVER:
-+ case INIT_TREE:
-+ reiser4_done_tree(&get_super_private(s)->tree);
-+ case INIT_OID:
-+ case KEY_CHECK:
-+ case READ_SUPER:
-+ case JOURNAL_REPLAY:
-+ case INIT_STATUS:
-+ reiser4_status_finish();
-+ case INIT_JOURNAL_INFO:
-+ reiser4_done_journal_info(s);
-+ case FIND_A_SUPER:
-+ case CONSULT_DISKMAP:
-+ case NONE_DONE:
-+ break;
-+ default:
-+ impossible("nikita-3457", "init stage: %i", stage);
-+ }
-+
-+ if (!rofs_super(s) && reiser4_free_blocks(s) < RELEASE_RESERVED)
-+ return RETERR(-ENOSPC);
-+
-+ return result;
-+}
-+
-+static void pack_format40_super(const struct super_block *s, char *data)
-+{
-+ format40_disk_super_block *super_data =
-+ (format40_disk_super_block *) data;
-+
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+
-+ assert("zam-591", data != NULL);
-+
-+ put_unaligned(cpu_to_le64(reiser4_free_committed_blocks(s)),
-+ &super_data->free_blocks);
-+
-+ put_unaligned(cpu_to_le64(sbinfo->tree.root_block),
-+ &super_data->root_block);
-+
-+ put_unaligned(cpu_to_le64(oid_next(s)),
-+ &super_data->oid);
-+
-+ put_unaligned(cpu_to_le64(oids_used(s)),
-+ &super_data->file_count);
-+
-+ put_unaligned(cpu_to_le16(sbinfo->tree.height),
-+ &super_data->tree_height);
-+
-+ if (update_disk_version(super_data)) {
-+ __u32 version = FORMAT40_VERSION | FORMAT40_UPDATE_BACKUP;
-+
-+ put_unaligned(cpu_to_le32(version), &super_data->version);
-+ }
-+}
-+
-+/* plugin->u.format.log_super
-+ return a jnode which should be added to transaction when the super block
-+ gets logged */
-+jnode *log_super_format40(struct super_block *s)
-+{
-+ jnode *sb_jnode;
-+
-+ sb_jnode = get_super_private(s)->u.format40.sb_jnode;
-+
-+ jload(sb_jnode);
-+
-+ pack_format40_super(s, jdata(sb_jnode));
-+
-+ jrelse(sb_jnode);
-+
-+ return sb_jnode;
-+}
-+
-+/* plugin->u.format.release */
-+int release_format40(struct super_block *s)
-+{
-+ int ret;
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(s);
-+ assert("zam-579", sbinfo != NULL);
-+
-+ if (!rofs_super(s)) {
-+ ret = reiser4_capture_super_block(s);
-+ if (ret != 0)
-+ warning("vs-898",
-+ "reiser4_capture_super_block failed: %d",
-+ ret);
-+
-+ ret = txnmgr_force_commit_all(s, 1);
-+ if (ret != 0)
-+ warning("jmacd-74438", "txn_force failed: %d", ret);
-+
-+ all_grabbed2free();
-+ }
-+
-+ sa_destroy_allocator(&sbinfo->space_allocator, s);
-+ reiser4_done_journal_info(s);
-+ done_super_jnode(s);
-+
-+ rcu_barrier();
-+ reiser4_done_tree(&sbinfo->tree);
-+ /* call finish_rcu(), because some znode were "released" in
-+ * reiser4_done_tree(). */
-+ rcu_barrier();
-+
-+ return 0;
-+}
-+
-+#define FORMAT40_ROOT_LOCALITY 41
-+#define FORMAT40_ROOT_OBJECTID 42
-+
-+/* plugin->u.format.root_dir_key */
-+const reiser4_key *root_dir_key_format40(const struct super_block *super
-+ UNUSED_ARG)
-+{
-+ static const reiser4_key FORMAT40_ROOT_DIR_KEY = {
-+ .el = {
-+ __constant_cpu_to_le64((FORMAT40_ROOT_LOCALITY << 4) | KEY_SD_MINOR),
-+#if REISER4_LARGE_KEY
-+ ON_LARGE_KEY(0ull,)
-+#endif
-+ __constant_cpu_to_le64(FORMAT40_ROOT_OBJECTID),
-+ 0ull
-+ }
-+ };
-+
-+ return &FORMAT40_ROOT_DIR_KEY;
-+}
-+
-+/* plugin->u.format.check_open.
-+ Check the opened object for validness. For now it checks for the valid oid &
-+ locality only, can be improved later and it its work may depend on the mount
-+ options. */
-+int check_open_format40(const struct inode *object)
-+{
-+ oid_t max, oid;
-+
-+ max = oid_next(object->i_sb) - 1;
-+
-+ /* Check the oid. */
-+ oid = get_inode_oid(object);
-+ if (oid > max) {
-+ warning("vpf-1360", "The object with the oid %llu "
-+ "greater then the max used oid %llu found.",
-+ (unsigned long long)oid, (unsigned long long)max);
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ /* Check the locality. */
-+ oid = reiser4_inode_data(object)->locality_id;
-+ if (oid > max) {
-+ warning("vpf-1361", "The object with the locality %llu "
-+ "greater then the max used oid %llu found.",
-+ (unsigned long long)oid, (unsigned long long)max);
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.format.version_update.
-+ Perform all version update operations from the on-disk
-+ format40_disk_super_block.version on disk to FORMAT40_VERSION.
-+ */
-+int version_update_format40(struct super_block *super) {
-+ txn_handle * trans;
-+ lock_handle lh;
-+ txn_atom *atom;
-+ int ret;
-+
-+ /* Nothing to do if RO mount or the on-disk version is not less. */
-+ if (super->s_flags & MS_RDONLY)
-+ return 0;
-+
-+ if (get_super_private(super)->version >= FORMAT40_VERSION)
-+ return 0;
-+
-+ printk("reiser4: Updating disk format to 4.0.%u. The reiser4 metadata "
-+ "backup is left unchanged. Please run 'fsck.reiser4 --fix' "
-+ "on %s to update it too.\n", FORMAT40_VERSION, super->s_id);
-+
-+ /* Mark the uber znode dirty to call log_super on write_logs. */
-+ init_lh(&lh);
-+ ret = get_uber_znode(reiser4_get_tree(super), ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_HIPRI, &lh);
-+ if (ret != 0)
-+ return ret;
-+
-+ znode_make_dirty(lh.node);
-+ done_lh(&lh);
-+
-+ /* Update the backup blocks. */
-+
-+ /* Force write_logs immediately. */
-+ trans = get_current_context()->trans;
-+ atom = get_current_atom_locked();
-+ assert("vpf-1906", atom != NULL);
-+
-+ spin_lock_txnh(trans);
-+ return force_commit_atom(trans);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format40.h linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format40.h
---- linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format40.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format40.h 2008-01-25 11:39:06.968215932 +0300
-@@ -0,0 +1,109 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* this file contains:
-+ - definition of ondisk super block of standart disk layout for
-+ reiser 4.0 (layout 40)
-+ - definition of layout 40 specific portion of in-core super block
-+ - declarations of functions implementing methods of layout plugin
-+ for layout 40
-+ - declarations of functions used to get/set fields in layout 40 super block
-+*/
-+
-+#ifndef __DISK_FORMAT40_H__
-+#define __DISK_FORMAT40_H__
-+
-+/* magic for default reiser4 layout */
-+#define FORMAT40_MAGIC "ReIsEr40FoRmAt"
-+#define FORMAT40_OFFSET (REISER4_MASTER_OFFSET + PAGE_CACHE_SIZE)
-+
-+#include "../../dformat.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+
-+typedef enum {
-+ FORMAT40_LARGE_KEYS
-+} format40_flags;
-+
-+/* ondisk super block for format 40. It is 512 bytes long */
-+typedef struct format40_disk_super_block {
-+ /* 0 */ d64 block_count;
-+ /* number of block in a filesystem */
-+ /* 8 */ d64 free_blocks;
-+ /* number of free blocks */
-+ /* 16 */ d64 root_block;
-+ /* filesystem tree root block */
-+ /* 24 */ d64 oid;
-+ /* smallest free objectid */
-+ /* 32 */ d64 file_count;
-+ /* number of files in a filesystem */
-+ /* 40 */ d64 flushes;
-+ /* number of times super block was
-+ flushed. Needed if format 40
-+ will have few super blocks */
-+ /* 48 */ d32 mkfs_id;
-+ /* unique identifier of fs */
-+ /* 52 */ char magic[16];
-+ /* magic string ReIsEr40FoRmAt */
-+ /* 68 */ d16 tree_height;
-+ /* height of filesystem tree */
-+ /* 70 */ d16 formatting_policy;
-+ /* not used anymore */
-+ /* 72 */ d64 flags;
-+ /* 80 */ d32 version;
-+ /* on-disk format version number
-+ initially assigned by mkfs as the greatest format40
-+ version number supported by reiser4progs and updated
-+ in mount time in accordance with the greatest format40
-+ version number supported by kernel.
-+ Is used by fsck to catch possible corruption and
-+ for various compatibility issues */
-+ /* 84 */ char not_used[428];
-+} format40_disk_super_block;
-+
-+/* format 40 specific part of reiser4_super_info_data */
-+typedef struct format40_super_info {
-+/* format40_disk_super_block actual_sb; */
-+ jnode *sb_jnode;
-+ struct {
-+ reiser4_block_nr super;
-+ } loc;
-+} format40_super_info;
-+
-+/* Defines for journal header and footer respectively. */
-+#define FORMAT40_JOURNAL_HEADER_BLOCKNR \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 3)
-+
-+#define FORMAT40_JOURNAL_FOOTER_BLOCKNR \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 4)
-+
-+#define FORMAT40_STATUS_BLOCKNR \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 5)
-+
-+/* Diskmap declarations */
-+#define FORMAT40_PLUGIN_DISKMAP_ID ((REISER4_FORMAT_PLUGIN_TYPE<<16) | (FORMAT40_ID))
-+#define FORMAT40_SUPER 1
-+#define FORMAT40_JH 2
-+#define FORMAT40_JF 3
-+
-+/* declarations of functions implementing methods of layout plugin for
-+ format 40. The functions theirself are in disk_format40.c */
-+extern int init_format_format40(struct super_block *, void *data);
-+extern const reiser4_key *root_dir_key_format40(const struct super_block *);
-+extern int release_format40(struct super_block *s);
-+extern jnode *log_super_format40(struct super_block *s);
-+extern int check_open_format40(const struct inode *object);
-+extern int version_update_format40(struct super_block *super);
-+
-+/* __DISK_FORMAT40_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format.c linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format.c
---- linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format.c 2008-01-25 11:39:06.968215932 +0300
-@@ -0,0 +1,38 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../plugin_header.h"
-+#include "disk_format40.h"
-+#include "disk_format.h"
-+#include "../plugin.h"
-+
-+/* initialization of disk layout plugins */
-+disk_format_plugin format_plugins[LAST_FORMAT_ID] = {
-+ [FORMAT40_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMAT_PLUGIN_TYPE,
-+ .id = FORMAT40_ID,
-+ .pops = NULL,
-+ .label = "reiser40",
-+ .desc = "standard disk layout for reiser40",
-+ .linkage = {NULL, NULL}
-+ },
-+ .init_format = init_format_format40,
-+ .root_dir_key = root_dir_key_format40,
-+ .release = release_format40,
-+ .log_super = log_super_format40,
-+ .check_open = check_open_format40,
-+ .version_update = version_update_format40
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format.h linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format.h
---- linux-2.6.24.orig/fs/reiser4/plugin/disk_format/disk_format.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/disk_format/disk_format.h 2008-01-25 11:39:06.968215932 +0300
-@@ -0,0 +1,27 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* identifiers for disk layouts, they are also used as indexes in array of disk
-+ plugins */
-+
-+#if !defined( __REISER4_DISK_FORMAT_H__ )
-+#define __REISER4_DISK_FORMAT_H__
-+
-+typedef enum {
-+ /* standard reiser4 disk layout plugin id */
-+ FORMAT40_ID,
-+ LAST_FORMAT_ID
-+} disk_format_id;
-+
-+/* __REISER4_DISK_FORMAT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/disk_format/Makefile linux-2.6.24/fs/reiser4/plugin/disk_format/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/disk_format/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/disk_format/Makefile 2008-01-25 11:39:06.968215932 +0300
-@@ -0,0 +1,5 @@
-+obj-$(CONFIG_REISER4_FS) += df_plugins.o
-+
-+df_plugins-objs := \
-+ disk_format40.o \
-+ disk_format.o
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/fibration.c linux-2.6.24/fs/reiser4/plugin/fibration.c
---- linux-2.6.24.orig/fs/reiser4/plugin/fibration.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/fibration.c 2008-01-25 11:39:06.968215932 +0300
-@@ -0,0 +1,175 @@
-+/* Copyright 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Directory fibrations */
-+
-+/*
-+ * Suppose we have a directory tree with sources of some project. During
-+ * compilation .o files are created within this tree. This makes access
-+ * to the original source files less efficient, because source files are
-+ * now "diluted" by object files: default directory plugin uses prefix
-+ * of a file name as a part of the key for directory entry (and this
-+ * part is also inherited by the key of file body). This means that
-+ * foo.o will be located close to foo.c and foo.h in the tree.
-+ *
-+ * To avoid this effect directory plugin fill highest 7 (unused
-+ * originally) bits of the second component of the directory entry key
-+ * by bit-pattern depending on the file name (see
-+ * fs/reiser4/kassign.c:build_entry_key_common()). These bits are called
-+ * "fibre". Fibre of the file name key is inherited by key of stat data
-+ * and keys of file body (in the case of REISER4_LARGE_KEY).
-+ *
-+ * Fibre for a given file is chosen by per-directory fibration
-+ * plugin. Names within given fibre are ordered lexicographically.
-+ */
-+
-+#include "../debug.h"
-+#include "plugin_header.h"
-+#include "plugin.h"
-+#include "../super.h"
-+#include "../inode.h"
-+
-+#include <linux/types.h>
-+
-+static const int fibre_shift = 57;
-+
-+#define FIBRE_NO(n) (((__u64)(n)) << fibre_shift)
-+
-+/*
-+ * Trivial fibration: all files of directory are just ordered
-+ * lexicographically.
-+ */
-+static __u64 fibre_trivial(const struct inode *dir, const char *name, int len)
-+{
-+ return FIBRE_NO(0);
-+}
-+
-+/*
-+ * dot-o fibration: place .o files after all others.
-+ */
-+static __u64 fibre_dot_o(const struct inode *dir, const char *name, int len)
-+{
-+ /* special treatment for .*\.o */
-+ if (len > 2 && name[len - 1] == 'o' && name[len - 2] == '.')
-+ return FIBRE_NO(1);
-+ else
-+ return FIBRE_NO(0);
-+}
-+
-+/*
-+ * ext.1 fibration: subdivide directory into 128 fibrations one for each
-+ * 7bit extension character (file "foo.h" goes into fibre "h"), plus
-+ * default fibre for the rest.
-+ */
-+static __u64 fibre_ext_1(const struct inode *dir, const char *name, int len)
-+{
-+ if (len > 2 && name[len - 2] == '.')
-+ return FIBRE_NO(name[len - 1]);
-+ else
-+ return FIBRE_NO(0);
-+}
-+
-+/*
-+ * ext.3 fibration: try to separate files with different 3-character
-+ * extensions from each other.
-+ */
-+static __u64 fibre_ext_3(const struct inode *dir, const char *name, int len)
-+{
-+ if (len > 4 && name[len - 4] == '.')
-+ return FIBRE_NO(name[len - 3] + name[len - 2] + name[len - 1]);
-+ else
-+ return FIBRE_NO(0);
-+}
-+
-+static int change_fibration(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ int result;
-+
-+ assert("nikita-3503", inode != NULL);
-+ assert("nikita-3504", plugin != NULL);
-+
-+ assert("nikita-3505", is_reiser4_inode(inode));
-+ assert("nikita-3506", inode_dir_plugin(inode) != NULL);
-+ assert("nikita-3507",
-+ plugin->h.type_id == REISER4_FIBRATION_PLUGIN_TYPE);
-+
-+ result = 0;
-+ if (inode_fibration_plugin(inode) == NULL ||
-+ inode_fibration_plugin(inode)->h.id != plugin->h.id) {
-+ if (is_dir_empty(inode) == 0)
-+ result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_FIBRATION, plugin);
-+ else
-+ result = RETERR(-ENOTEMPTY);
-+
-+ }
-+ return result;
-+}
-+
-+static reiser4_plugin_ops fibration_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = change_fibration
-+};
-+
-+/* fibration plugins */
-+fibration_plugin fibration_plugins[LAST_FIBRATION_ID] = {
-+ [FIBRATION_LEXICOGRAPHIC] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_LEXICOGRAPHIC,
-+ .pops = &fibration_plugin_ops,
-+ .label = "lexicographic",
-+ .desc = "no fibration",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_trivial
-+ },
-+ [FIBRATION_DOT_O] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_DOT_O,
-+ .pops = &fibration_plugin_ops,
-+ .label = "dot-o",
-+ .desc = "fibrate .o files separately",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_dot_o
-+ },
-+ [FIBRATION_EXT_1] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_EXT_1,
-+ .pops = &fibration_plugin_ops,
-+ .label = "ext-1",
-+ .desc = "fibrate file by single character extension",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_ext_1
-+ },
-+ [FIBRATION_EXT_3] = {
-+ .h = {
-+ .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .id = FIBRATION_EXT_3,
-+ .pops = &fibration_plugin_ops,
-+ .label = "ext-3",
-+ .desc = "fibrate file by three character extension",
-+ .linkage = {NULL, NULL}
-+ },
-+ .fibre = fibre_ext_3
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/fibration.h linux-2.6.24/fs/reiser4/plugin/fibration.h
---- linux-2.6.24.orig/fs/reiser4/plugin/fibration.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/fibration.h 2008-01-25 11:39:06.968215932 +0300
-@@ -0,0 +1,37 @@
-+/* Copyright 2004 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Fibration plugin used by hashed directory plugin to segment content
-+ * of directory. See fs/reiser4/plugin/fibration.c for more on this. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_FIBRATION_H__ )
-+#define __FS_REISER4_PLUGIN_FIBRATION_H__
-+
-+#include "plugin_header.h"
-+
-+typedef struct fibration_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+
-+ __u64(*fibre) (const struct inode * dir, const char *name, int len);
-+} fibration_plugin;
-+
-+typedef enum {
-+ FIBRATION_LEXICOGRAPHIC,
-+ FIBRATION_DOT_O,
-+ FIBRATION_EXT_1,
-+ FIBRATION_EXT_3,
-+ LAST_FIBRATION_ID
-+} reiser4_fibration_id;
-+
-+/* __FS_REISER4_PLUGIN_FIBRATION_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/cryptcompress.c linux-2.6.24/fs/reiser4/plugin/file/cryptcompress.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file/cryptcompress.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/cryptcompress.c 2008-01-25 11:40:16.690167725 +0300
-@@ -0,0 +1,3776 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+/*
-+ * Written by Edward Shishkin.
-+ *
-+ * Implementations of inode/file/address_space operations
-+ * specific for cryptcompress file plugin which manages
-+ * regular files built of compressed and(or) encrypted bodies.
-+ * See http://dev.namesys.com/CryptcompressPlugin for details.
-+ */
-+
-+#include "../../inode.h"
-+#include "../cluster.h"
-+#include "../object.h"
-+#include "../../tree_walk.h"
-+#include "cryptcompress.h"
-+
-+#include <linux/pagevec.h>
-+#include <asm/uaccess.h>
-+#include <linux/swap.h>
-+#include <linux/writeback.h>
-+#include <linux/random.h>
-+#include <linux/scatterlist.h>
-+
-+/*
-+ Managing primary and secondary caches by Reiser4
-+ cryptcompress file plugin. Synchronization scheme.
-+
-+
-+ +------------------+
-+ +------------------->| tfm stream |
-+ | | (compressed data)|
-+ flush | +------------------+
-+ +-----------------+ |
-+ |(->)longterm lock| V
-+--+ writepages() | | +-***-+ reiser4 +---+
-+ | | +--+ | *** | storage tree | |
-+ | | | +-***-+ (primary cache)| |
-+u | write() (secondary| cache) V / | \ | |
-+s | ----> +----+ +----+ +----+ +----+ +-***** ******* **----+ ----> | d |
-+e | | | |page cluster | | | **disk cluster** | | i |
-+r | <---- +----+ +----+ +----+ +----+ +-***** **********----+ <---- | s |
-+ | read() ^ ^ | | k |
-+ | | (->)longterm lock| | page_io()| |
-+ | | +------+ | |
-+--+ readpages() | | +---+
-+ | V
-+ | +------------------+
-+ +--------------------| tfm stream |
-+ | (plain text) |
-+ +------------------+
-+*/
-+
-+/* get cryptcompress specific portion of inode */
-+struct cryptcompress_info *cryptcompress_inode_data(const struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->file_plugin_data.cryptcompress_info;
-+}
-+
-+/* plugin->u.file.init_inode_data */
-+void init_inode_data_cryptcompress(struct inode *inode,
-+ reiser4_object_create_data * crd,
-+ int create)
-+{
-+ struct cryptcompress_info *data;
-+
-+ data = cryptcompress_inode_data(inode);
-+ assert("edward-685", data != NULL);
-+
-+ memset(data, 0, sizeof(*data));
-+
-+ mutex_init(&data->checkin_mutex);
-+ data->trunc_index = ULONG_MAX;
-+ turn_on_compression(data);
-+ set_lattice_factor(data, MIN_LATTICE_FACTOR);
-+ init_inode_ordering(inode, crd, create);
-+}
-+
-+/* The following is a part of reiser4 cipher key manager
-+ which is called when opening/creating a cryptcompress file */
-+
-+/* get/set cipher key info */
-+struct reiser4_crypto_info * inode_crypto_info (struct inode * inode)
-+{
-+ assert("edward-90", inode != NULL);
-+ assert("edward-91", reiser4_inode_data(inode) != NULL);
-+ return cryptcompress_inode_data(inode)->crypt;
-+}
-+
-+static void set_inode_crypto_info (struct inode * inode,
-+ struct reiser4_crypto_info * info)
-+{
-+ cryptcompress_inode_data(inode)->crypt = info;
-+}
-+
-+/* allocate a cipher key info */
-+struct reiser4_crypto_info * reiser4_alloc_crypto_info (struct inode * inode)
-+{
-+ struct reiser4_crypto_info *info;
-+ int fipsize;
-+
-+ info = kzalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
-+ if (!info)
-+ return ERR_PTR(-ENOMEM);
-+
-+ fipsize = inode_digest_plugin(inode)->fipsize;
-+ info->keyid = kmalloc(fipsize, reiser4_ctx_gfp_mask_get());
-+ if (!info->keyid) {
-+ kfree(info);
-+ return ERR_PTR(-ENOMEM);
-+ }
-+ info->host = inode;
-+ return info;
-+}
-+
-+#if 0
-+/* allocate/free low-level info for cipher and digest
-+ transforms */
-+static int alloc_crypto_tfms(struct reiser4_crypto_info * info)
-+{
-+ struct crypto_blkcipher * ctfm = NULL;
-+ struct crypto_hash * dtfm = NULL;
-+ cipher_plugin * cplug = inode_cipher_plugin(info->host);
-+ digest_plugin * dplug = inode_digest_plugin(info->host);
-+
-+ if (cplug->alloc) {
-+ ctfm = cplug->alloc();
-+ if (IS_ERR(ctfm)) {
-+ warning("edward-1364",
-+ "Can not allocate info for %s\n",
-+ cplug->h.desc);
-+ return RETERR(PTR_ERR(ctfm));
-+ }
-+ }
-+ info_set_cipher(info, ctfm);
-+ if (dplug->alloc) {
-+ dtfm = dplug->alloc();
-+ if (IS_ERR(dtfm)) {
-+ warning("edward-1365",
-+ "Can not allocate info for %s\n",
-+ dplug->h.desc);
-+ goto unhappy_with_digest;
-+ }
-+ }
-+ info_set_digest(info, dtfm);
-+ return 0;
-+ unhappy_with_digest:
-+ if (cplug->free) {
-+ cplug->free(ctfm);
-+ info_set_cipher(info, NULL);
-+ }
-+ return RETERR(PTR_ERR(dtfm));
-+}
-+#endif
-+
-+static void
-+free_crypto_tfms(struct reiser4_crypto_info * info)
-+{
-+ assert("edward-1366", info != NULL);
-+ if (!info_get_cipher(info)) {
-+ assert("edward-1601", !info_get_digest(info));
-+ return;
-+ }
-+ inode_cipher_plugin(info->host)->free(info_get_cipher(info));
-+ info_set_cipher(info, NULL);
-+ inode_digest_plugin(info->host)->free(info_get_digest(info));
-+ info_set_digest(info, NULL);
-+ return;
-+}
-+
-+#if 0
-+/* create a key fingerprint for disk stat-data */
-+static int create_keyid (struct reiser4_crypto_info * info,
-+ struct reiser4_crypto_data * data)
-+{
-+ int ret = -ENOMEM;
-+ size_t blk, pad;
-+ __u8 * dmem;
-+ __u8 * cmem;
-+ struct hash_desc ddesc;
-+ struct blkcipher_desc cdesc;
-+ struct scatterlist sg;
-+
-+ assert("edward-1367", info != NULL);
-+ assert("edward-1368", info->keyid != NULL);
-+
-+ ddesc.tfm = info_get_digest(info);
-+ ddesc.flags = 0;
-+ cdesc.tfm = info_get_cipher(info);
-+ cdesc.flags = 0;
-+
-+ dmem = kmalloc((size_t)crypto_hash_digestsize(ddesc.tfm),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!dmem)
-+ goto exit1;
-+
-+ blk = crypto_blkcipher_blocksize(cdesc.tfm);
-+
-+ pad = data->keyid_size % blk;
-+ pad = (pad ? blk - pad : 0);
-+
-+ cmem = kmalloc((size_t)data->keyid_size + pad,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!cmem)
-+ goto exit2;
-+ memcpy(cmem, data->keyid, data->keyid_size);
-+ memset(cmem + data->keyid_size, 0, pad);
-+
-+ sg_init_one(&sg, cmem, data->keyid_size + pad);
-+
-+ ret = crypto_blkcipher_encrypt(&cdesc, &sg, &sg,
-+ data->keyid_size + pad);
-+ if (ret) {
-+ warning("edward-1369",
-+ "encryption failed flags=%x\n", cdesc.flags);
-+ goto exit3;
-+ }
-+ ret = crypto_hash_digest(&ddesc, &sg, sg.length, dmem);
-+ if (ret) {
-+ warning("edward-1602",
-+ "digest failed flags=%x\n", ddesc.flags);
-+ goto exit3;
-+ }
-+ memcpy(info->keyid, dmem, inode_digest_plugin(info->host)->fipsize);
-+ exit3:
-+ kfree(cmem);
-+ exit2:
-+ kfree(dmem);
-+ exit1:
-+ return ret;
-+}
-+#endif
-+
-+static void destroy_keyid(struct reiser4_crypto_info * info)
-+{
-+ assert("edward-1370", info != NULL);
-+ assert("edward-1371", info->keyid != NULL);
-+ kfree(info->keyid);
-+ return;
-+}
-+
-+static void __free_crypto_info (struct inode * inode)
-+{
-+ struct reiser4_crypto_info * info = inode_crypto_info(inode);
-+ assert("edward-1372", info != NULL);
-+
-+ free_crypto_tfms(info);
-+ destroy_keyid(info);
-+ kfree(info);
-+}
-+
-+#if 0
-+static void instantiate_crypto_info(struct reiser4_crypto_info * info)
-+{
-+ assert("edward-1373", info != NULL);
-+ assert("edward-1374", info->inst == 0);
-+ info->inst = 1;
-+}
-+#endif
-+
-+static void uninstantiate_crypto_info(struct reiser4_crypto_info * info)
-+{
-+ assert("edward-1375", info != NULL);
-+ info->inst = 0;
-+}
-+
-+#if 0
-+static int is_crypto_info_instantiated(struct reiser4_crypto_info * info)
-+{
-+ return info->inst;
-+}
-+
-+static int inode_has_cipher_key(struct inode * inode)
-+{
-+ assert("edward-1376", inode != NULL);
-+ return inode_crypto_info(inode) &&
-+ is_crypto_info_instantiated(inode_crypto_info(inode));
-+}
-+#endif
-+
-+static void free_crypto_info (struct inode * inode)
-+{
-+ uninstantiate_crypto_info(inode_crypto_info(inode));
-+ __free_crypto_info(inode);
-+}
-+
-+static int need_cipher(struct inode * inode)
-+{
-+ return inode_cipher_plugin(inode) !=
-+ cipher_plugin_by_id(NONE_CIPHER_ID);
-+}
-+
-+/* Parse @data which contains a (uninstantiated) cipher key imported
-+ from user space, create a low-level cipher info and attach it to
-+ the @object. If success, then info contains an instantiated key */
-+#if 0
-+struct reiser4_crypto_info * create_crypto_info(struct inode * object,
-+ struct reiser4_crypto_data * data)
-+{
-+ int ret;
-+ struct reiser4_crypto_info * info;
-+
-+ assert("edward-1377", data != NULL);
-+ assert("edward-1378", need_cipher(object));
-+
-+ if (inode_file_plugin(object) !=
-+ file_plugin_by_id(DIRECTORY_FILE_PLUGIN_ID))
-+ return ERR_PTR(-EINVAL);
-+
-+ info = reiser4_alloc_crypto_info(object);
-+ if (IS_ERR(info))
-+ return info;
-+ ret = alloc_crypto_tfms(info);
-+ if (ret)
-+ goto err;
-+ /* instantiating a key */
-+ ret = crypto_blkcipher_setkey(info_get_cipher(info),
-+ data->key,
-+ data->keysize);
-+ if (ret) {
-+ warning("edward-1379",
-+ "setkey failed flags=%x",
-+ crypto_blkcipher_get_flags(info_get_cipher(info)));
-+ goto err;
-+ }
-+ info->keysize = data->keysize;
-+ ret = create_keyid(info, data);
-+ if (ret)
-+ goto err;
-+ instantiate_crypto_info(info);
-+ return info;
-+ err:
-+ __free_crypto_info(object);
-+ return ERR_PTR(ret);
-+}
-+#endif
-+
-+/* increment/decrement a load counter when
-+ attaching/detaching the crypto-stat to any object */
-+static void load_crypto_info(struct reiser4_crypto_info * info)
-+{
-+ assert("edward-1380", info != NULL);
-+ inc_keyload_count(info);
-+}
-+
-+static void unload_crypto_info(struct inode * inode)
-+{
-+ struct reiser4_crypto_info * info = inode_crypto_info(inode);
-+ assert("edward-1381", info->keyload_count > 0);
-+
-+ dec_keyload_count(inode_crypto_info(inode));
-+ if (info->keyload_count == 0)
-+ /* final release */
-+ free_crypto_info(inode);
-+}
-+
-+/* attach/detach an existing crypto-stat */
-+void reiser4_attach_crypto_info(struct inode * inode,
-+ struct reiser4_crypto_info * info)
-+{
-+ assert("edward-1382", inode != NULL);
-+ assert("edward-1383", info != NULL);
-+ assert("edward-1384", inode_crypto_info(inode) == NULL);
-+
-+ set_inode_crypto_info(inode, info);
-+ load_crypto_info(info);
-+}
-+
-+/* returns true, if crypto stat can be attached to the @host */
-+#if REISER4_DEBUG
-+static int host_allows_crypto_info(struct inode * host)
-+{
-+ int ret;
-+ file_plugin * fplug = inode_file_plugin(host);
-+
-+ switch (fplug->h.id) {
-+ case CRYPTCOMPRESS_FILE_PLUGIN_ID:
-+ ret = 1;
-+ break;
-+ default:
-+ ret = 0;
-+ }
-+ return ret;
-+}
-+#endif /* REISER4_DEBUG */
-+
-+static void reiser4_detach_crypto_info(struct inode * inode)
-+{
-+ assert("edward-1385", inode != NULL);
-+ assert("edward-1386", host_allows_crypto_info(inode));
-+
-+ if (inode_crypto_info(inode))
-+ unload_crypto_info(inode);
-+ set_inode_crypto_info(inode, NULL);
-+}
-+
-+#if 0
-+
-+/* compare fingerprints of @child and @parent */
-+static int keyid_eq(struct reiser4_crypto_info * child,
-+ struct reiser4_crypto_info * parent)
-+{
-+ return !memcmp(child->keyid,
-+ parent->keyid,
-+ info_digest_plugin(parent)->fipsize);
-+}
-+
-+/* check if a crypto-stat (which is bound to @parent) can be inherited */
-+int can_inherit_crypto_cryptcompress(struct inode *child, struct inode *parent)
-+{
-+ if (!need_cipher(child))
-+ return 0;
-+ /* the child is created */
-+ if (!inode_crypto_info(child))
-+ return 1;
-+ /* the child is looked up */
-+ if (!inode_crypto_info(parent))
-+ return 0;
-+ return (inode_cipher_plugin(child) == inode_cipher_plugin(parent) &&
-+ inode_digest_plugin(child) == inode_digest_plugin(parent) &&
-+ inode_crypto_info(child)->keysize ==
-+ inode_crypto_info(parent)->keysize &&
-+ keyid_eq(inode_crypto_info(child), inode_crypto_info(parent)));
-+}
-+#endif
-+
-+/* helper functions for ->create() method of the cryptcompress plugin */
-+static int inode_set_crypto(struct inode * object)
-+{
-+ reiser4_inode * info;
-+ if (!inode_crypto_info(object)) {
-+ if (need_cipher(object))
-+ return RETERR(-EINVAL);
-+ /* the file is not to be encrypted */
-+ return 0;
-+ }
-+ info = reiser4_inode_data(object);
-+ info->extmask |= (1 << CRYPTO_STAT);
-+ return 0;
-+}
-+
-+static int inode_init_compression(struct inode * object)
-+{
-+ int result = 0;
-+ assert("edward-1461", object != NULL);
-+ if (inode_compression_plugin(object)->init)
-+ result = inode_compression_plugin(object)->init();
-+ return result;
-+}
-+
-+static int inode_check_cluster(struct inode * object)
-+{
-+ assert("edward-696", object != NULL);
-+
-+ if (unlikely(inode_cluster_size(object) < PAGE_CACHE_SIZE)) {
-+ warning("edward-1320", "Can not support '%s' "
-+ "logical clusters (less then page size)",
-+ inode_cluster_plugin(object)->h.label);
-+ return RETERR(-EINVAL);
-+ }
-+ if (unlikely(inode_cluster_shift(object)) >= BITS_PER_BYTE*sizeof(int)){
-+ warning("edward-1463", "Can not support '%s' "
-+ "logical clusters (too big for transform)",
-+ inode_cluster_plugin(object)->h.label);
-+ return RETERR(-EINVAL);
-+ }
-+ return 0;
-+}
-+
-+/* plugin->destroy_inode() */
-+void destroy_inode_cryptcompress(struct inode * inode)
-+{
-+ assert("edward-1464", INODE_PGCOUNT(inode) == 0);
-+ reiser4_detach_crypto_info(inode);
-+ return;
-+}
-+
-+/* plugin->create_object():
-+. install plugins
-+. attach crypto info if specified
-+. attach compression info if specified
-+. attach cluster info
-+*/
-+int create_object_cryptcompress(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data * data)
-+{
-+ int result;
-+ reiser4_inode *info;
-+
-+ assert("edward-23", object != NULL);
-+ assert("edward-24", parent != NULL);
-+ assert("edward-30", data != NULL);
-+ assert("edward-26", reiser4_inode_get_flag(object, REISER4_NO_SD));
-+ assert("edward-27", data->id == CRYPTCOMPRESS_FILE_PLUGIN_ID);
-+
-+ info = reiser4_inode_data(object);
-+
-+ assert("edward-29", info != NULL);
-+
-+ /* set file bit */
-+ info->plugin_mask |= (1 << PSET_FILE);
-+
-+ /* set crypto */
-+ result = inode_set_crypto(object);
-+ if (result)
-+ goto error;
-+ /* set compression */
-+ result = inode_init_compression(object);
-+ if (result)
-+ goto error;
-+ /* set cluster */
-+ result = inode_check_cluster(object);
-+ if (result)
-+ goto error;
-+
-+ /* save everything in disk stat-data */
-+ result = write_sd_by_inode_common(object);
-+ if (!result)
-+ return 0;
-+ error:
-+ reiser4_detach_crypto_info(object);
-+ return result;
-+}
-+
-+/* plugin->open() */
-+int open_cryptcompress(struct inode * inode, struct file * file)
-+{
-+ return 0;
-+}
-+
-+/* returns a blocksize, the attribute of a cipher algorithm */
-+static unsigned int
-+cipher_blocksize(struct inode * inode)
-+{
-+ assert("edward-758", need_cipher(inode));
-+ assert("edward-1400", inode_crypto_info(inode) != NULL);
-+ return crypto_blkcipher_blocksize
-+ (info_get_cipher(inode_crypto_info(inode)));
-+}
-+
-+/* returns offset translated by scale factor of the crypto-algorithm */
-+static loff_t inode_scaled_offset (struct inode * inode,
-+ const loff_t src_off /* input offset */)
-+{
-+ assert("edward-97", inode != NULL);
-+
-+ if (!need_cipher(inode) ||
-+ src_off == get_key_offset(reiser4_min_key()) ||
-+ src_off == get_key_offset(reiser4_max_key()))
-+ return src_off;
-+
-+ return inode_cipher_plugin(inode)->scale(inode,
-+ cipher_blocksize(inode),
-+ src_off);
-+}
-+
-+/* returns disk cluster size */
-+size_t inode_scaled_cluster_size(struct inode * inode)
-+{
-+ assert("edward-110", inode != NULL);
-+
-+ return inode_scaled_offset(inode, inode_cluster_size(inode));
-+}
-+
-+/* set number of cluster pages */
-+static void set_cluster_nrpages(struct cluster_handle * clust,
-+ struct inode *inode)
-+{
-+ struct reiser4_slide * win;
-+
-+ assert("edward-180", clust != NULL);
-+ assert("edward-1040", inode != NULL);
-+
-+ clust->old_nrpages = size_in_pages(lbytes(clust->index, inode));
-+ win = clust->win;
-+ if (!win) {
-+ clust->nr_pages = size_in_pages(lbytes(clust->index, inode));
-+ return;
-+ }
-+ assert("edward-1176", clust->op != LC_INVAL);
-+ assert("edward-1064", win->off + win->count + win->delta != 0);
-+
-+ if (win->stat == HOLE_WINDOW &&
-+ win->off == 0 && win->count == inode_cluster_size(inode)) {
-+ /* special case: writing a "fake" logical cluster */
-+ clust->nr_pages = 0;
-+ return;
-+ }
-+ clust->nr_pages = size_in_pages(max(win->off + win->count + win->delta,
-+ lbytes(clust->index, inode)));
-+ return;
-+}
-+
-+/* plugin->key_by_inode()
-+ build key of a disk cluster */
-+int key_by_inode_cryptcompress(struct inode *inode, loff_t off,
-+ reiser4_key * key)
-+{
-+ assert("edward-64", inode != 0);
-+
-+ if (likely(off != get_key_offset(reiser4_max_key())))
-+ off = off_to_clust_to_off(off, inode);
-+ if (inode_crypto_info(inode))
-+ off = inode_scaled_offset(inode, off);
-+
-+ key_by_inode_and_offset_common(inode, 0, key);
-+ set_key_offset(key, (__u64)off);
-+ return 0;
-+}
-+
-+/* plugin->flow_by_inode() */
-+/* flow is used to read/write disk clusters */
-+int flow_by_inode_cryptcompress(struct inode *inode, const char __user * buf,
-+ int user, /* 1: @buf is of user space,
-+ 0: kernel space */
-+ loff_t size, /* @buf size */
-+ loff_t off, /* offset to start io from */
-+ rw_op op, /* READ or WRITE */
-+ flow_t * f /* resulting flow */)
-+{
-+ assert("edward-436", f != NULL);
-+ assert("edward-149", inode != NULL);
-+ assert("edward-150", inode_file_plugin(inode) != NULL);
-+ assert("edward-1465", user == 0); /* we use flow to read/write
-+ disk clusters located in
-+ kernel space */
-+ f->length = size;
-+ memcpy(&f->data, &buf, sizeof(buf));
-+ f->user = user;
-+ f->op = op;
-+
-+ return key_by_inode_cryptcompress(inode, off, &f->key);
-+}
-+
-+static int
-+cryptcompress_hint_validate(hint_t * hint, const reiser4_key * key,
-+ znode_lock_mode lock_mode)
-+{
-+ coord_t *coord;
-+
-+ assert("edward-704", hint != NULL);
-+ assert("edward-1089", !hint_is_valid(hint));
-+ assert("edward-706", hint->lh.owner == NULL);
-+
-+ coord = &hint->ext_coord.coord;
-+
-+ if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
-+ /* hint either not set or set by different operation */
-+ return RETERR(-E_REPEAT);
-+
-+ if (get_key_offset(key) != hint->offset)
-+ /* hint is set for different key */
-+ return RETERR(-E_REPEAT);
-+
-+ assert("edward-707", reiser4_schedulable());
-+
-+ return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord,
-+ key, &hint->lh, lock_mode,
-+ ZNODE_LOCK_LOPRI);
-+}
-+
-+/* reserve disk space when writing a logical cluster */
-+static int reserve4cluster(struct inode *inode, struct cluster_handle *clust)
-+{
-+ int result = 0;
-+
-+ assert("edward-965", reiser4_schedulable());
-+ assert("edward-439", inode != NULL);
-+ assert("edward-440", clust != NULL);
-+ assert("edward-441", clust->pages != NULL);
-+
-+ if (clust->nr_pages == 0) {
-+ assert("edward-1152", clust->win != NULL);
-+ assert("edward-1153", clust->win->stat == HOLE_WINDOW);
-+ /* don't reserve disk space for fake logical cluster */
-+ return 0;
-+ }
-+ assert("edward-442", jprivate(clust->pages[0]) != NULL);
-+
-+ result = reiser4_grab_space_force(estimate_insert_cluster(inode) +
-+ estimate_update_cluster(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ return result;
-+ clust->reserved = 1;
-+ grabbed2cluster_reserved(estimate_insert_cluster(inode) +
-+ estimate_update_cluster(inode));
-+#if REISER4_DEBUG
-+ clust->reserved_prepped = estimate_update_cluster(inode);
-+ clust->reserved_unprepped = estimate_insert_cluster(inode);
-+#endif
-+ /* there can be space grabbed by txnmgr_force_commit_all */
-+ return 0;
-+}
-+
-+/* free reserved disk space if writing a logical cluster fails */
-+static void free_reserved4cluster(struct inode *inode,
-+ struct cluster_handle *ch, int count)
-+{
-+ assert("edward-967", ch->reserved == 1);
-+
-+ cluster_reserved2free(count);
-+ ch->reserved = 0;
-+}
-+
-+/* The core search procedure of the cryptcompress plugin.
-+ If returned value is not cbk_errored, then current znode is locked */
-+static int find_cluster_item(hint_t * hint,
-+ const reiser4_key * key, /* key of the item we are
-+ looking for */
-+ znode_lock_mode lock_mode /* which lock */ ,
-+ ra_info_t * ra_info, lookup_bias bias, __u32 flags)
-+{
-+ int result;
-+ reiser4_key ikey;
-+ int went_right = 0;
-+ coord_t *coord = &hint->ext_coord.coord;
-+ coord_t orig = *coord;
-+
-+ assert("edward-152", hint != NULL);
-+
-+ if (!hint_is_valid(hint)) {
-+ result = cryptcompress_hint_validate(hint, key, lock_mode);
-+ if (result == -E_REPEAT)
-+ goto traverse_tree;
-+ else if (result) {
-+ assert("edward-1216", 0);
-+ return result;
-+ }
-+ hint_set_valid(hint);
-+ }
-+ assert("edward-709", znode_is_any_locked(coord->node));
-+
-+ /* In-place lookup is going here, it means we just need to
-+ check if next item of the @coord match to the @keyhint) */
-+
-+ if (equal_to_rdk(coord->node, key)) {
-+ result = goto_right_neighbor(coord, &hint->lh);
-+ if (result == -E_NO_NEIGHBOR) {
-+ assert("edward-1217", 0);
-+ return RETERR(-EIO);
-+ }
-+ if (result)
-+ return result;
-+ assert("edward-1218", equal_to_ldk(coord->node, key));
-+ went_right = 1;
-+ } else {
-+ coord->item_pos++;
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+ }
-+ result = zload(coord->node);
-+ if (result)
-+ return result;
-+ assert("edward-1219", !node_is_empty(coord->node));
-+
-+ if (!coord_is_existing_item(coord)) {
-+ zrelse(coord->node);
-+ goto not_found;
-+ }
-+ item_key_by_coord(coord, &ikey);
-+ zrelse(coord->node);
-+ if (!keyeq(key, &ikey))
-+ goto not_found;
-+ /* Ok, item is found, update node counts */
-+ if (went_right)
-+ dclust_inc_extension_ncount(hint);
-+ return CBK_COORD_FOUND;
-+
-+ not_found:
-+ assert("edward-1220", coord->item_pos > 0);
-+ //coord->item_pos--;
-+ /* roll back */
-+ *coord = orig;
-+ ON_DEBUG(coord_update_v(coord));
-+ return CBK_COORD_NOTFOUND;
-+
-+ traverse_tree:
-+ assert("edward-713", hint->lh.owner == NULL);
-+ assert("edward-714", reiser4_schedulable());
-+
-+ reiser4_unset_hint(hint);
-+ dclust_init_extension(hint);
-+ coord_init_zero(coord);
-+ result = coord_by_key(current_tree, key, coord, &hint->lh,
-+ lock_mode, bias, LEAF_LEVEL, LEAF_LEVEL,
-+ CBK_UNIQUE | flags, ra_info);
-+ if (cbk_errored(result))
-+ return result;
-+ if(result == CBK_COORD_FOUND)
-+ dclust_inc_extension_ncount(hint);
-+ hint_set_valid(hint);
-+ return result;
-+}
-+
-+/* This function is called by deflate[inflate] manager when
-+ creating a transformed/plain stream to check if we should
-+ create/cut some overhead. If this returns true, then @oh
-+ contains the size of this overhead.
-+ */
-+static int need_cut_or_align(struct inode * inode,
-+ struct cluster_handle * ch, rw_op rw, int * oh)
-+{
-+ struct tfm_cluster * tc = &ch->tc;
-+ switch (rw) {
-+ case WRITE_OP: /* estimate align */
-+ *oh = tc->len % cipher_blocksize(inode);
-+ if (*oh != 0)
-+ return 1;
-+ break;
-+ case READ_OP: /* estimate cut */
-+ *oh = *(tfm_output_data(ch) + tc->len - 1);
-+ break;
-+ default:
-+ impossible("edward-1401", "bad option");
-+ }
-+ return (tc->len != tc->lsize);
-+}
-+
-+/* create/cut an overhead of transformed/plain stream */
-+static void align_or_cut_overhead(struct inode * inode,
-+ struct cluster_handle * ch, rw_op rw)
-+{
-+ int oh;
-+ cipher_plugin * cplug = inode_cipher_plugin(inode);
-+
-+ assert("edward-1402", need_cipher(inode));
-+
-+ if (!need_cut_or_align(inode, ch, rw, &oh))
-+ return;
-+ switch (rw) {
-+ case WRITE_OP: /* do align */
-+ ch->tc.len +=
-+ cplug->align_stream(tfm_input_data(ch) +
-+ ch->tc.len, ch->tc.len,
-+ cipher_blocksize(inode));
-+ *(tfm_input_data(ch) + ch->tc.len - 1) =
-+ cipher_blocksize(inode) - oh;
-+ break;
-+ case READ_OP: /* do cut */
-+ assert("edward-1403", oh <= cipher_blocksize(inode));
-+ ch->tc.len -= oh;
-+ break;
-+ default:
-+ impossible("edward-1404", "bad option");
-+ }
-+ return;
-+}
-+
-+static unsigned max_cipher_overhead(struct inode * inode)
-+{
-+ if (!need_cipher(inode) || !inode_cipher_plugin(inode)->align_stream)
-+ return 0;
-+ return cipher_blocksize(inode);
-+}
-+
-+static int deflate_overhead(struct inode *inode)
-+{
-+ return (inode_compression_plugin(inode)->
-+ checksum ? DC_CHECKSUM_SIZE : 0);
-+}
-+
-+static unsigned deflate_overrun(struct inode * inode, int ilen)
-+{
-+ return coa_overrun(inode_compression_plugin(inode), ilen);
-+}
-+
-+/* Estimating compressibility of a logical cluster by various
-+ policies represented by compression mode plugin.
-+ If this returns false, then compressor won't be called for
-+ the cluster of index @index.
-+*/
-+static int should_compress(struct tfm_cluster * tc, cloff_t index,
-+ struct inode *inode)
-+{
-+ compression_plugin *cplug = inode_compression_plugin(inode);
-+ compression_mode_plugin *mplug = inode_compression_mode_plugin(inode);
-+
-+ assert("edward-1321", tc->len != 0);
-+ assert("edward-1322", cplug != NULL);
-+ assert("edward-1323", mplug != NULL);
-+
-+ return /* estimate by size */
-+ (cplug->min_size_deflate ?
-+ tc->len >= cplug->min_size_deflate() :
-+ 1) &&
-+ /* estimate by compression mode plugin */
-+ (mplug->should_deflate ?
-+ mplug->should_deflate(inode, index) :
-+ 1);
-+}
-+
-+/* Evaluating results of compression transform.
-+ Returns true, if we need to accept this results */
-+static int save_compressed(int size_before, int size_after, struct inode *inode)
-+{
-+ return (size_after + deflate_overhead(inode) +
-+ max_cipher_overhead(inode) < size_before);
-+}
-+
-+/* Guess result of the evaluation above */
-+static int need_inflate(struct cluster_handle * ch, struct inode * inode,
-+ int encrypted /* is cluster encrypted */ )
-+{
-+ struct tfm_cluster * tc = &ch->tc;
-+
-+ assert("edward-142", tc != 0);
-+ assert("edward-143", inode != NULL);
-+
-+ return tc->len <
-+ (encrypted ?
-+ inode_scaled_offset(inode, tc->lsize) :
-+ tc->lsize);
-+}
-+
-+/* If results of compression were accepted, then we add
-+ a checksum to catch possible disk cluster corruption.
-+ The following is a format of the data stored in disk clusters:
-+
-+ data This is (transformed) logical cluster.
-+ cipher_overhead This is created by ->align() method
-+ of cipher plugin. May be absent.
-+ checksum (4) This is created by ->checksum method
-+ of compression plugin to check
-+ integrity. May be absent.
-+
-+ Crypto overhead format:
-+
-+ data
-+ control_byte (1) contains aligned overhead size:
-+ 1 <= overhead <= cipher_blksize
-+*/
-+/* Append a checksum at the end of a transformed stream */
-+static void dc_set_checksum(compression_plugin * cplug, struct tfm_cluster * tc)
-+{
-+ __u32 checksum;
-+
-+ assert("edward-1309", tc != NULL);
-+ assert("edward-1310", tc->len > 0);
-+ assert("edward-1311", cplug->checksum != NULL);
-+
-+ checksum = cplug->checksum(tfm_stream_data(tc, OUTPUT_STREAM), tc->len);
-+ put_unaligned(cpu_to_le32(checksum),
-+ (d32 *)(tfm_stream_data(tc, OUTPUT_STREAM) + tc->len));
-+ tc->len += (int)DC_CHECKSUM_SIZE;
-+}
-+
-+/* Check a disk cluster checksum.
-+ Returns 0 if checksum is correct, otherwise returns 1 */
-+static int dc_check_checksum(compression_plugin * cplug, struct tfm_cluster * tc)
-+{
-+ assert("edward-1312", tc != NULL);
-+ assert("edward-1313", tc->len > (int)DC_CHECKSUM_SIZE);
-+ assert("edward-1314", cplug->checksum != NULL);
-+
-+ if (cplug->checksum(tfm_stream_data(tc, INPUT_STREAM),
-+ tc->len - (int)DC_CHECKSUM_SIZE) !=
-+ le32_to_cpu(get_unaligned((d32 *)
-+ (tfm_stream_data(tc, INPUT_STREAM)
-+ + tc->len - (int)DC_CHECKSUM_SIZE)))) {
-+ warning("edward-156",
-+ "Bad disk cluster checksum %d, (should be %d) Fsck?\n",
-+ (int)le32_to_cpu
-+ (get_unaligned((d32 *)
-+ (tfm_stream_data(tc, INPUT_STREAM) +
-+ tc->len - (int)DC_CHECKSUM_SIZE))),
-+ (int)cplug->checksum
-+ (tfm_stream_data(tc, INPUT_STREAM),
-+ tc->len - (int)DC_CHECKSUM_SIZE));
-+ return 1;
-+ }
-+ tc->len -= (int)DC_CHECKSUM_SIZE;
-+ return 0;
-+}
-+
-+/* get input/output stream for some transform action */
-+int grab_tfm_stream(struct inode * inode, struct tfm_cluster * tc,
-+ tfm_stream_id id)
-+{
-+ size_t size = inode_scaled_cluster_size(inode);
-+
-+ assert("edward-901", tc != NULL);
-+ assert("edward-1027", inode_compression_plugin(inode) != NULL);
-+
-+ if (cluster_get_tfm_act(tc) == TFMA_WRITE)
-+ size += deflate_overrun(inode, inode_cluster_size(inode));
-+
-+ if (!get_tfm_stream(tc, id) && id == INPUT_STREAM)
-+ alternate_streams(tc);
-+ if (!get_tfm_stream(tc, id))
-+ return alloc_tfm_stream(tc, size, id);
-+
-+ assert("edward-902", tfm_stream_is_set(tc, id));
-+
-+ if (tfm_stream_size(tc, id) < size)
-+ return realloc_tfm_stream(tc, size, id);
-+ return 0;
-+}
-+
-+/* Common deflate manager */
-+int reiser4_deflate_cluster(struct cluster_handle * clust, struct inode * inode)
-+{
-+ int result = 0;
-+ int compressed = 0;
-+ int encrypted = 0;
-+ struct tfm_cluster * tc = &clust->tc;
-+ compression_plugin * coplug;
-+
-+ assert("edward-401", inode != NULL);
-+ assert("edward-903", tfm_stream_is_set(tc, INPUT_STREAM));
-+ assert("edward-1348", cluster_get_tfm_act(tc) == TFMA_WRITE);
-+ assert("edward-498", !tfm_cluster_is_uptodate(tc));
-+
-+ coplug = inode_compression_plugin(inode);
-+ if (should_compress(tc, clust->index, inode)) {
-+ /* try to compress, discard bad results */
-+ __u32 dst_len;
-+ compression_mode_plugin * mplug =
-+ inode_compression_mode_plugin(inode);
-+ assert("edward-602", coplug != NULL);
-+ assert("edward-1423", coplug->compress != NULL);
-+
-+ result = grab_coa(tc, coplug);
-+ if (result) {
-+ warning("edward-1424",
-+ "alloc_coa failed with ret=%d, skipped compression",
-+ result);
-+ goto cipher;
-+ }
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result) {
-+ warning("edward-1425",
-+ "alloc stream failed with ret=%d, skipped compression",
-+ result);
-+ goto cipher;
-+ }
-+ dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
-+ coplug->compress(get_coa(tc, coplug->h.id, tc->act),
-+ tfm_input_data(clust), tc->len,
-+ tfm_output_data(clust), &dst_len);
-+ /* make sure we didn't overwrite extra bytes */
-+ assert("edward-603",
-+ dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
-+
-+ /* evaluate results of compression transform */
-+ if (save_compressed(tc->len, dst_len, inode)) {
-+ /* good result, accept */
-+ tc->len = dst_len;
-+ if (mplug->accept_hook != NULL) {
-+ result = mplug->accept_hook(inode, clust->index);
-+ if (result)
-+ warning("edward-1426",
-+ "accept_hook failed with ret=%d",
-+ result);
-+ }
-+ compressed = 1;
-+ }
-+ else {
-+ /* bad result, discard */
-+#if 0
-+ if (cluster_is_complete(clust, inode))
-+ warning("edward-1496",
-+ "incompressible cluster %lu (inode %llu)",
-+ clust->index,
-+ (unsigned long long)get_inode_oid(inode));
-+#endif
-+ if (mplug->discard_hook != NULL &&
-+ cluster_is_complete(clust, inode)) {
-+ result = mplug->discard_hook(inode,
-+ clust->index);
-+ if (result)
-+ warning("edward-1427",
-+ "discard_hook failed with ret=%d",
-+ result);
-+ }
-+ }
-+ }
-+ cipher:
-+ if (need_cipher(inode)) {
-+ cipher_plugin * ciplug;
-+ struct blkcipher_desc desc;
-+ struct scatterlist src;
-+ struct scatterlist dst;
-+
-+ ciplug = inode_cipher_plugin(inode);
-+ desc.tfm = info_get_cipher(inode_crypto_info(inode));
-+ desc.flags = 0;
-+ if (compressed)
-+ alternate_streams(tc);
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ return result;
-+
-+ align_or_cut_overhead(inode, clust, WRITE_OP);
-+ sg_init_one(&src, tfm_input_data(clust), tc->len);
-+ sg_init_one(&dst, tfm_output_data(clust), tc->len);
-+
-+ result = crypto_blkcipher_encrypt(&desc, &dst, &src, tc->len);
-+ if (result) {
-+ warning("edward-1405",
-+ "encryption failed flags=%x\n", desc.flags);
-+ return result;
-+ }
-+ encrypted = 1;
-+ }
-+ if (compressed && coplug->checksum != NULL)
-+ dc_set_checksum(coplug, tc);
-+ if (!compressed && !encrypted)
-+ alternate_streams(tc);
-+ return result;
-+}
-+
-+/* Common inflate manager. */
-+int reiser4_inflate_cluster(struct cluster_handle * clust, struct inode * inode)
-+{
-+ int result = 0;
-+ int transformed = 0;
-+ struct tfm_cluster * tc = &clust->tc;
-+ compression_plugin * coplug;
-+
-+ assert("edward-905", inode != NULL);
-+ assert("edward-1178", clust->dstat == PREP_DISK_CLUSTER);
-+ assert("edward-906", tfm_stream_is_set(&clust->tc, INPUT_STREAM));
-+ assert("edward-1349", tc->act == TFMA_READ);
-+ assert("edward-907", !tfm_cluster_is_uptodate(tc));
-+
-+ /* Handle a checksum (if any) */
-+ coplug = inode_compression_plugin(inode);
-+ if (need_inflate(clust, inode, need_cipher(inode)) &&
-+ coplug->checksum != NULL) {
-+ result = dc_check_checksum(coplug, tc);
-+ if (unlikely(result)) {
-+ warning("edward-1460",
-+ "Inode %llu: disk cluster %lu looks corrupted",
-+ (unsigned long long)get_inode_oid(inode),
-+ clust->index);
-+ return RETERR(-EIO);
-+ }
-+ }
-+ if (need_cipher(inode)) {
-+ cipher_plugin * ciplug;
-+ struct blkcipher_desc desc;
-+ struct scatterlist src;
-+ struct scatterlist dst;
-+
-+ ciplug = inode_cipher_plugin(inode);
-+ desc.tfm = info_get_cipher(inode_crypto_info(inode));
-+ desc.flags = 0;
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ return result;
-+ assert("edward-909", tfm_cluster_is_set(tc));
-+
-+ sg_init_one(&src, tfm_input_data(clust), tc->len);
-+ sg_init_one(&dst, tfm_output_data(clust), tc->len);
-+
-+ result = crypto_blkcipher_decrypt(&desc, &dst, &src, tc->len);
-+ if (result) {
-+ warning("edward-1600", "decrypt failed flags=%x\n",
-+ desc.flags);
-+ return result;
-+ }
-+ align_or_cut_overhead(inode, clust, READ_OP);
-+ transformed = 1;
-+ }
-+ if (need_inflate(clust, inode, 0)) {
-+ unsigned dst_len = inode_cluster_size(inode);
-+ if(transformed)
-+ alternate_streams(tc);
-+
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ return result;
-+ assert("edward-1305", coplug->decompress != NULL);
-+ assert("edward-910", tfm_cluster_is_set(tc));
-+
-+ coplug->decompress(get_coa(tc, coplug->h.id, tc->act),
-+ tfm_input_data(clust), tc->len,
-+ tfm_output_data(clust), &dst_len);
-+ /* check length */
-+ tc->len = dst_len;
-+ assert("edward-157", dst_len == tc->lsize);
-+ transformed = 1;
-+ }
-+ if (!transformed)
-+ alternate_streams(tc);
-+ return result;
-+}
-+
-+/* This is implementation of readpage method of struct
-+ address_space_operations for cryptcompress plugin. */
-+int readpage_cryptcompress(struct file *file, struct page *page)
-+{
-+ reiser4_context *ctx;
-+ struct cluster_handle clust;
-+ item_plugin *iplug;
-+ int result;
-+
-+ assert("edward-88", PageLocked(page));
-+ assert("vs-976", !PageUptodate(page));
-+ assert("edward-89", page->mapping && page->mapping->host);
-+
-+ ctx = reiser4_init_context(page->mapping->host->i_sb);
-+ if (IS_ERR(ctx)) {
-+ unlock_page(page);
-+ return PTR_ERR(ctx);
-+ }
-+ assert("edward-113",
-+ ergo(file != NULL,
-+ page->mapping == file->f_dentry->d_inode->i_mapping));
-+
-+ if (PageUptodate(page)) {
-+ warning("edward-1338", "page is already uptodate\n");
-+ unlock_page(page);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+ cluster_init_read(&clust, NULL);
-+ clust.file = file;
-+ iplug = item_plugin_by_id(CTAIL_ID);
-+ if (!iplug->s.file.readpage) {
-+ unlock_page(page);
-+ put_cluster_handle(&clust);
-+ reiser4_exit_context(ctx);
-+ return -EINVAL;
-+ }
-+ result = iplug->s.file.readpage(&clust, page);
-+
-+ put_cluster_handle(&clust);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* number of pages to check in */
-+static int get_new_nrpages(struct cluster_handle * clust)
-+{
-+ switch (clust->op) {
-+ case LC_APPOV:
-+ return clust->nr_pages;
-+ case LC_TRUNC:
-+ assert("edward-1179", clust->win != NULL);
-+ return size_in_pages(clust->win->off + clust->win->count);
-+ default:
-+ impossible("edward-1180", "bad page cluster option");
-+ return 0;
-+ }
-+}
-+
-+static void set_cluster_pages_dirty(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ int i;
-+ struct page *pg;
-+ int nrpages = get_new_nrpages(clust);
-+
-+ for (i = 0; i < nrpages; i++) {
-+
-+ pg = clust->pages[i];
-+ assert("edward-968", pg != NULL);
-+ lock_page(pg);
-+ assert("edward-1065", PageUptodate(pg));
-+ reiser4_set_page_dirty_internal(pg);
-+ unlock_page(pg);
-+ mark_page_accessed(pg);
-+ }
-+}
-+
-+/* Grab a page cluster for read/write operations.
-+ Attach a jnode for write operations (when preparing for modifications, which
-+ are supposed to be committed).
-+
-+ We allocate only one jnode per page cluster; this jnode is binded to the
-+ first page of this cluster, so we have an extra-reference that will be put
-+ as soon as jnode is evicted from memory), other references will be cleaned
-+ up in flush time (assume that check in page cluster was successful).
-+*/
-+int grab_page_cluster(struct inode * inode,
-+ struct cluster_handle * clust, rw_op rw)
-+{
-+ int i;
-+ int result = 0;
-+ jnode *node = NULL;
-+
-+ assert("edward-182", clust != NULL);
-+ assert("edward-183", clust->pages != NULL);
-+ assert("edward-1466", clust->node == NULL);
-+ assert("edward-1428", inode != NULL);
-+ assert("edward-1429", inode->i_mapping != NULL);
-+ assert("edward-184", clust->nr_pages <= cluster_nrpages(inode));
-+
-+ if (clust->nr_pages == 0)
-+ return 0;
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+
-+ assert("edward-1044", clust->pages[i] == NULL);
-+
-+ clust->pages[i] =
-+ find_or_create_page(inode->i_mapping,
-+ clust_to_pg(clust->index, inode) + i,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!clust->pages[i]) {
-+ result = RETERR(-ENOMEM);
-+ break;
-+ }
-+ if (i == 0 && rw == WRITE_OP) {
-+ node = jnode_of_page(clust->pages[i]);
-+ if (IS_ERR(node)) {
-+ result = PTR_ERR(node);
-+ unlock_page(clust->pages[i]);
-+ break;
-+ }
-+ JF_SET(node, JNODE_CLUSTER_PAGE);
-+ assert("edward-920", jprivate(clust->pages[0]));
-+ }
-+ INODE_PGCOUNT_INC(inode);
-+ unlock_page(clust->pages[i]);
-+ }
-+ if (unlikely(result)) {
-+ while (i) {
-+ put_cluster_page(clust->pages[--i]);
-+ INODE_PGCOUNT_DEC(inode);
-+ }
-+ if (node && !IS_ERR(node))
-+ jput(node);
-+ return result;
-+ }
-+ clust->node = node;
-+ return 0;
-+}
-+
-+static void truncate_page_cluster_range(struct inode * inode,
-+ struct page ** pages,
-+ cloff_t index,
-+ int from, int count,
-+ int even_cows)
-+{
-+ assert("edward-1467", count > 0);
-+ reiser4_invalidate_pages(inode->i_mapping,
-+ clust_to_pg(index, inode) + from,
-+ count, even_cows);
-+}
-+
-+/* Put @count pages starting from @from offset */
-+void __put_page_cluster(int from, int count,
-+ struct page ** pages, struct inode * inode)
-+{
-+ int i;
-+ assert("edward-1468", pages != NULL);
-+ assert("edward-1469", inode != NULL);
-+ assert("edward-1470", from >= 0 && count >= 0);
-+
-+ for (i = 0; i < count; i++) {
-+ assert("edward-1471", pages[from + i] != NULL);
-+ assert("edward-1472",
-+ pages[from + i]->index == pages[from]->index + i);
-+
-+ put_cluster_page(pages[from + i]);
-+ INODE_PGCOUNT_DEC(inode);
-+ }
-+}
-+
-+/*
-+ * This is dual to grab_page_cluster,
-+ * however if @rw == WRITE_OP, then we call this function
-+ * only if something is failed before checkin page cluster.
-+ */
-+void put_page_cluster(struct cluster_handle * clust,
-+ struct inode * inode, rw_op rw)
-+{
-+ assert("edward-445", clust != NULL);
-+ assert("edward-922", clust->pages != NULL);
-+ assert("edward-446",
-+ ergo(clust->nr_pages != 0, clust->pages[0] != NULL));
-+
-+ __put_page_cluster(0, clust->nr_pages, clust->pages, inode);
-+ if (rw == WRITE_OP) {
-+ if (unlikely(clust->node)) {
-+ assert("edward-447",
-+ clust->node == jprivate(clust->pages[0]));
-+ jput(clust->node);
-+ clust->node = NULL;
-+ }
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+int cryptcompress_inode_ok(struct inode *inode)
-+{
-+ if (!(reiser4_inode_data(inode)->plugin_mask & (1 << PSET_FILE)))
-+ return 0;
-+ if (!cluster_shift_ok(inode_cluster_shift(inode)))
-+ return 0;
-+ return 1;
-+}
-+
-+static int window_ok(struct reiser4_slide * win, struct inode *inode)
-+{
-+ assert("edward-1115", win != NULL);
-+ assert("edward-1116", ergo(win->delta, win->stat == HOLE_WINDOW));
-+
-+ return (win->off != inode_cluster_size(inode)) &&
-+ (win->off + win->count + win->delta <= inode_cluster_size(inode));
-+}
-+
-+static int cluster_ok(struct cluster_handle * clust, struct inode *inode)
-+{
-+ assert("edward-279", clust != NULL);
-+
-+ if (!clust->pages)
-+ return 0;
-+ return (clust->win ? window_ok(clust->win, inode) : 1);
-+}
-+#if 0
-+static int pages_truncate_ok(struct inode *inode, pgoff_t start)
-+{
-+ int found;
-+ struct page * page;
-+
-+ found = find_get_pages(inode->i_mapping, start, 1, &page);
-+ if (found)
-+ put_cluster_page(page);
-+ return !found;
-+}
-+#else
-+#define pages_truncate_ok(inode, start) 1
-+#endif
-+
-+static int jnode_truncate_ok(struct inode *inode, cloff_t index)
-+{
-+ jnode *node;
-+ node = jlookup(current_tree, get_inode_oid(inode),
-+ clust_to_pg(index, inode));
-+ if (likely(!node))
-+ return 1;
-+ jput(node);
-+ return 0;
-+}
-+
-+static int find_fake_appended(struct inode *inode, cloff_t * index);
-+
-+static int body_truncate_ok(struct inode *inode, cloff_t aidx)
-+{
-+ int result;
-+ cloff_t raidx;
-+
-+ result = find_fake_appended(inode, &raidx);
-+ return !result && (aidx == raidx);
-+}
-+#endif
-+
-+/* guess next window stat */
-+static inline window_stat next_window_stat(struct reiser4_slide * win)
-+{
-+ assert("edward-1130", win != NULL);
-+ return ((win->stat == HOLE_WINDOW && win->delta == 0) ?
-+ HOLE_WINDOW : DATA_WINDOW);
-+}
-+
-+/* guess and set next cluster index and window params */
-+static void move_update_window(struct inode * inode,
-+ struct cluster_handle * clust,
-+ loff_t file_off, loff_t to_file)
-+{
-+ struct reiser4_slide * win;
-+
-+ assert("edward-185", clust != NULL);
-+ assert("edward-438", clust->pages != NULL);
-+ assert("edward-281", cluster_ok(clust, inode));
-+
-+ win = clust->win;
-+ if (!win)
-+ return;
-+
-+ switch (win->stat) {
-+ case DATA_WINDOW:
-+ /* increment */
-+ clust->index++;
-+ win->stat = DATA_WINDOW;
-+ win->off = 0;
-+ win->count = min((loff_t)inode_cluster_size(inode), to_file);
-+ break;
-+ case HOLE_WINDOW:
-+ switch (next_window_stat(win)) {
-+ case HOLE_WINDOW:
-+ /* skip */
-+ clust->index = off_to_clust(file_off, inode);
-+ win->stat = HOLE_WINDOW;
-+ win->off = 0;
-+ win->count = off_to_cloff(file_off, inode);
-+ win->delta = min((loff_t)(inode_cluster_size(inode) -
-+ win->count), to_file);
-+ break;
-+ case DATA_WINDOW:
-+ /* stay */
-+ win->stat = DATA_WINDOW;
-+ /* off+count+delta=inv */
-+ win->off = win->off + win->count;
-+ win->count = win->delta;
-+ win->delta = 0;
-+ break;
-+ default:
-+ impossible("edward-282", "wrong next window state");
-+ }
-+ break;
-+ default:
-+ impossible("edward-283", "wrong current window state");
-+ }
-+ assert("edward-1068", cluster_ok(clust, inode));
-+}
-+
-+static int update_sd_cryptcompress(struct inode *inode)
-+{
-+ int result = 0;
-+
-+ assert("edward-978", reiser4_schedulable());
-+
-+ result = reiser4_grab_space_force(/* one for stat data update */
-+ estimate_update_common(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ return result;
-+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-+ result = reiser4_update_sd(inode);
-+
-+ return result;
-+}
-+
-+static void uncapture_cluster_jnode(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert_spin_locked(&(node->guard));
-+
-+ atom = jnode_get_atom(node);
-+ if (atom == NULL) {
-+ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+ reiser4_uncapture_block(node);
-+ spin_unlock_atom(atom);
-+ jput(node);
-+}
-+
-+static void put_found_pages(struct page **pages, int nr)
-+{
-+ int i;
-+ for (i = 0; i < nr; i++) {
-+ assert("edward-1045", pages[i] != NULL);
-+ put_cluster_page(pages[i]);
-+ }
-+}
-+
-+/* Lifecycle of a logical cluster in the system.
-+ *
-+ *
-+ * Logical cluster of a cryptcompress file is represented in the system by
-+ * . page cluster (in memory, primary cache, contains plain text);
-+ * . disk cluster (in memory, secondary cache, contains transformed text).
-+ * Primary cache is to reduce number of transform operations (compression,
-+ * encryption), i.e. to implement transform-caching strategy.
-+ * Secondary cache is to reduce number of I/O operations, i.e. for usual
-+ * write-caching strategy. Page cluster is a set of pages, i.e. mapping of
-+ * a logical cluster to the primary cache. Disk cluster is a set of items
-+ * of the same type defined by some reiser4 item plugin id.
-+ *
-+ * 1. Performing modifications
-+ *
-+ * Every modification of a cryptcompress file is considered as a set of
-+ * operations performed on file's logical clusters. Every such "atomic"
-+ * modification is truncate, append and(or) overwrite some bytes of a
-+ * logical cluster performed in the primary cache with the following
-+ * synchronization with the secondary cache (in flush time). Disk clusters,
-+ * which live in the secondary cache, are supposed to be synchronized with
-+ * disk. The mechanism of synchronization of primary and secondary caches
-+ * includes so-called checkin/checkout technique described below.
-+ *
-+ * 2. Submitting modifications
-+ *
-+ * Each page cluster has associated jnode (a special in-memory header to
-+ * keep a track of transactions in reiser4), which is attached to its first
-+ * page when grabbing page cluster for modifications (see grab_page_cluster).
-+ * Submitting modifications (see checkin_logical_cluster) is going per logical
-+ * cluster and includes:
-+ * . checkin_cluster_size;
-+ * . checkin_page_cluster.
-+ * checkin_cluster_size() is resolved to file size update (which completely
-+ * defines new size of logical cluster (number of file's bytes in a logical
-+ * cluster).
-+ * checkin_page_cluster() captures jnode of a page cluster and installs
-+ * jnode's dirty flag (if needed) to indicate that modifications are
-+ * successfully checked in.
-+ *
-+ * 3. Checking out modifications
-+ *
-+ * Is going per logical cluster in flush time (see checkout_logical_cluster).
-+ * This is the time of synchronizing primary and secondary caches.
-+ * checkout_logical_cluster() includes:
-+ * . checkout_page_cluster (retrieving checked in pages).
-+ * . uncapture jnode (including clear dirty flag and unlock)
-+ *
-+ * 4. Committing modifications
-+ *
-+ * Proceeding a synchronization of primary and secondary caches. When checking
-+ * out page cluster (the phase above) pages are locked/flushed/unlocked
-+ * one-by-one in ascending order of their indexes to contiguous stream, which
-+ * is supposed to be transformed (compressed, encrypted), chopped up into items
-+ * and committed to disk as a disk cluster.
-+ *
-+ * 5. Managing page references
-+ *
-+ * Every checked in page have a special additional "control" reference,
-+ * which is dropped at checkout. We need this to avoid unexpected evicting
-+ * pages from memory before checkout. Control references are managed so
-+ * they are not accumulated with every checkin:
-+ *
-+ * 0
-+ * checkin -> 1
-+ * 0 -> checkout
-+ * checkin -> 1
-+ * checkin -> 1
-+ * checkin -> 1
-+ * 0 -> checkout
-+ * ...
-+ *
-+ * Every page cluster has its own unique "cluster lock". Update/drop
-+ * references are serialized via this lock. Number of checked in cluster
-+ * pages is calculated by i_size under cluster lock. File size is updated
-+ * at every checkin action also under cluster lock (except cases of
-+ * appending/truncating fake logical clusters).
-+ *
-+ * Proof of correctness:
-+ *
-+ * Since we update file size under cluster lock, in the case of non-fake
-+ * logical cluster with its lock held we do have expected number of checked
-+ * in pages. On the other hand, append/truncate of fake logical clusters
-+ * doesn't change number of checked in pages of any cluster.
-+ *
-+ * NOTE-EDWARD: As cluster lock we use guard (spinlock_t) of its jnode.
-+ * Currently, I don't see any reason to create a special lock for those
-+ * needs.
-+ */
-+
-+static inline void lock_cluster(jnode * node)
-+{
-+ spin_lock_jnode(node);
-+}
-+
-+static inline void unlock_cluster(jnode * node)
-+{
-+ spin_unlock_jnode(node);
-+}
-+
-+static inline void unlock_cluster_uncapture(jnode * node)
-+{
-+ uncapture_cluster_jnode(node);
-+}
-+
-+/* Set new file size by window. Cluster lock is required. */
-+static void checkin_file_size(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ loff_t new_size;
-+ struct reiser4_slide * win;
-+
-+ assert("edward-1181", clust != NULL);
-+ assert("edward-1182", inode != NULL);
-+ assert("edward-1473", clust->pages != NULL);
-+ assert("edward-1474", clust->pages[0] != NULL);
-+ assert("edward-1475", jprivate(clust->pages[0]) != NULL);
-+ assert_spin_locked(&(jprivate(clust->pages[0])->guard));
-+
-+
-+ win = clust->win;
-+ assert("edward-1183", win != NULL);
-+
-+ new_size = clust_to_off(clust->index, inode) + win->off;
-+
-+ switch (clust->op) {
-+ case LC_APPOV:
-+ if (new_size + win->count <= i_size_read(inode))
-+ /* overwrite only */
-+ return;
-+ new_size += win->count;
-+ break;
-+ case LC_TRUNC:
-+ break;
-+ default:
-+ impossible("edward-1184", "bad page cluster option");
-+ break;
-+ }
-+ inode_check_scale_nolock(inode, i_size_read(inode), new_size);
-+ i_size_write(inode, new_size);
-+ return;
-+}
-+
-+static inline void checkin_cluster_size(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ if (clust->win)
-+ checkin_file_size(clust, inode);
-+}
-+
-+static int checkin_page_cluster(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ int result;
-+ jnode * node;
-+ int old_nrpages = clust->old_nrpages;
-+ int new_nrpages = get_new_nrpages(clust);
-+
-+ node = clust->node;
-+
-+ assert("edward-221", node != NULL);
-+ assert("edward-971", clust->reserved == 1);
-+ assert("edward-1263",
-+ clust->reserved_prepped == estimate_update_cluster(inode));
-+ assert("edward-1264", clust->reserved_unprepped == 0);
-+
-+ if (JF_ISSET(node, JNODE_DIRTY)) {
-+ /*
-+ * page cluster was checked in, but not yet
-+ * checked out, so release related resources
-+ */
-+ free_reserved4cluster(inode, clust,
-+ estimate_update_cluster(inode));
-+ __put_page_cluster(0, clust->old_nrpages,
-+ clust->pages, inode);
-+ } else {
-+ result = capture_cluster_jnode(node);
-+ if (unlikely(result)) {
-+ unlock_cluster(node);
-+ return result;
-+ }
-+ jnode_make_dirty_locked(node);
-+ clust->reserved = 0;
-+ }
-+ unlock_cluster(node);
-+
-+ if (new_nrpages < old_nrpages) {
-+ /* truncate >= 1 complete pages */
-+ __put_page_cluster(new_nrpages,
-+ old_nrpages - new_nrpages,
-+ clust->pages, inode);
-+ truncate_page_cluster_range(inode,
-+ clust->pages, clust->index,
-+ new_nrpages,
-+ old_nrpages - new_nrpages,
-+ 0);
-+ }
-+#if REISER4_DEBUG
-+ clust->reserved_prepped -= estimate_update_cluster(inode);
-+#endif
-+ return 0;
-+}
-+
-+/* Submit modifications of a logical cluster */
-+static int checkin_logical_cluster(struct cluster_handle * clust,
-+ struct inode *inode)
-+{
-+ int result = 0;
-+ jnode * node;
-+
-+ node = clust->node;
-+
-+ assert("edward-1035", node != NULL);
-+ assert("edward-1029", clust != NULL);
-+ assert("edward-1030", clust->reserved == 1);
-+ assert("edward-1031", clust->nr_pages != 0);
-+ assert("edward-1032", clust->pages != NULL);
-+ assert("edward-1033", clust->pages[0] != NULL);
-+ assert("edward-1446", jnode_is_cluster_page(node));
-+ assert("edward-1476", node == jprivate(clust->pages[0]));
-+
-+ lock_cluster(node);
-+ checkin_cluster_size(clust, inode);
-+ /* this will unlock cluster */
-+ result = checkin_page_cluster(clust, inode);
-+ jput(node);
-+ clust->node = NULL;
-+ return result;
-+}
-+
-+/*
-+ * Retrieve size of logical cluster that was checked in at
-+ * the latest modifying session (cluster lock is required)
-+ */
-+static inline void checkout_cluster_size(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ struct tfm_cluster *tc = &clust->tc;
-+
-+ tc->len = lbytes(clust->index, inode);
-+ assert("edward-1478", tc->len != 0);
-+}
-+
-+/*
-+ * Retrieve a page cluster with the latest submitted modifications
-+ * and flush its pages to previously allocated contiguous stream.
-+ */
-+static void checkout_page_cluster(struct cluster_handle * clust,
-+ jnode * node, struct inode * inode)
-+{
-+ int i;
-+ int found;
-+ int to_put;
-+ struct tfm_cluster *tc = &clust->tc;
-+
-+ /* find and put checked in pages: cluster is locked,
-+ * so we must get expected number (to_put) of pages
-+ */
-+ to_put = size_in_pages(lbytes(clust->index, inode));
-+ found = find_get_pages(inode->i_mapping,
-+ clust_to_pg(clust->index, inode),
-+ to_put, clust->pages);
-+ BUG_ON(found != to_put);
-+
-+ __put_page_cluster(0, to_put, clust->pages, inode);
-+ unlock_cluster_uncapture(node);
-+
-+ /* Flush found pages.
-+ *
-+ * Note, that we don't disable modifications while flushing,
-+ * moreover, some found pages can be truncated, as we have
-+ * released cluster lock.
-+ */
-+ for (i = 0; i < found; i++) {
-+ int in_page;
-+ char * data;
-+ assert("edward-1479",
-+ clust->pages[i]->index == clust->pages[0]->index + i);
-+
-+ lock_page(clust->pages[i]);
-+ if (!PageUptodate(clust->pages[i])) {
-+ /* page was truncated */
-+ assert("edward-1480",
-+ i_size_read(inode) <= page_offset(clust->pages[i]));
-+ assert("edward-1481",
-+ clust->pages[i]->mapping != inode->i_mapping);
-+ unlock_page(clust->pages[i]);
-+ break;
-+ }
-+ /* Update the number of bytes in the logical cluster,
-+ * as it could be partially truncated. Note, that only
-+ * partial truncate is possible (complete truncate can
-+ * not go here, as it is performed via ->kill_hook()
-+ * called by cut_file_items(), and the last one must
-+ * wait for znode locked with parent coord).
-+ */
-+ checkout_cluster_size(clust, inode);
-+
-+ /* this can be zero, as new file size is
-+ checked in before truncating pages */
-+ in_page = __mbp(tc->len, i);
-+
-+ data = kmap(clust->pages[i]);
-+ memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
-+ data, in_page);
-+ kunmap(clust->pages[i]);
-+
-+ if (PageDirty(clust->pages[i]))
-+ cancel_dirty_page(clust->pages[i], PAGE_CACHE_SIZE);
-+
-+ unlock_page(clust->pages[i]);
-+
-+ if (in_page < PAGE_CACHE_SIZE)
-+ /* end of the file */
-+ break;
-+ }
-+ put_found_pages(clust->pages, found); /* find_get_pages */
-+ tc->lsize = tc->len;
-+ return;
-+}
-+
-+/* Check out modifications of a logical cluster */
-+int checkout_logical_cluster(struct cluster_handle * clust,
-+ jnode * node, struct inode *inode)
-+{
-+ int result;
-+ struct tfm_cluster *tc = &clust->tc;
-+
-+ assert("edward-980", node != NULL);
-+ assert("edward-236", inode != NULL);
-+ assert("edward-237", clust != NULL);
-+ assert("edward-240", !clust->win);
-+ assert("edward-241", reiser4_schedulable());
-+ assert("edward-718", cryptcompress_inode_ok(inode));
-+
-+ result = grab_tfm_stream(inode, tc, INPUT_STREAM);
-+ if (result) {
-+ warning("edward-1430", "alloc stream failed with ret=%d",
-+ result);
-+ return RETERR(-E_REPEAT);
-+ }
-+ lock_cluster(node);
-+
-+ if (unlikely(!JF_ISSET(node, JNODE_DIRTY))) {
-+ /* race with another flush */
-+ warning("edward-982",
-+ "checking out logical cluster %lu of inode %llu: "
-+ "jnode is not dirty", clust->index,
-+ (unsigned long long)get_inode_oid(inode));
-+ unlock_cluster(node);
-+ return RETERR(-E_REPEAT);
-+ }
-+ cluster_reserved2grabbed(estimate_update_cluster(inode));
-+
-+ /* this will unlock cluster */
-+ checkout_page_cluster(clust, node, inode);
-+ return 0;
-+}
-+
-+/* set hint for the cluster of the index @index */
-+static void set_hint_cluster(struct inode *inode, hint_t * hint,
-+ cloff_t index, znode_lock_mode mode)
-+{
-+ reiser4_key key;
-+ assert("edward-722", cryptcompress_inode_ok(inode));
-+ assert("edward-723",
-+ inode_file_plugin(inode) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+
-+ inode_file_plugin(inode)->key_by_inode(inode,
-+ clust_to_off(index, inode),
-+ &key);
-+
-+ reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, &key);
-+ hint->offset = get_key_offset(&key);
-+ hint->mode = mode;
-+}
-+
-+void invalidate_hint_cluster(struct cluster_handle * clust)
-+{
-+ assert("edward-1291", clust != NULL);
-+ assert("edward-1292", clust->hint != NULL);
-+
-+ done_lh(&clust->hint->lh);
-+ hint_clr_valid(clust->hint);
-+}
-+
-+static void put_hint_cluster(struct cluster_handle * clust,
-+ struct inode *inode, znode_lock_mode mode)
-+{
-+ assert("edward-1286", clust != NULL);
-+ assert("edward-1287", clust->hint != NULL);
-+
-+ set_hint_cluster(inode, clust->hint, clust->index + 1, mode);
-+ invalidate_hint_cluster(clust);
-+}
-+
-+static int balance_dirty_page_cluster(struct cluster_handle * clust,
-+ struct inode *inode, loff_t off,
-+ loff_t to_file,
-+ int nr_dirtied)
-+{
-+ int result;
-+ struct cryptcompress_info * info;
-+
-+ assert("edward-724", inode != NULL);
-+ assert("edward-725", cryptcompress_inode_ok(inode));
-+ assert("edward-1547",
-+ nr_dirtied != 0 && nr_dirtied <= cluster_nrpages(inode));
-+
-+ /* set next window params */
-+ move_update_window(inode, clust, off, to_file);
-+
-+ result = update_sd_cryptcompress(inode);
-+ if (result)
-+ return result;
-+ assert("edward-726", clust->hint->lh.owner == NULL);
-+ info = cryptcompress_inode_data(inode);
-+
-+ mutex_unlock(&info->checkin_mutex);
-+ reiser4_txn_restart_current();
-+ balance_dirty_pages_ratelimited_nr(inode->i_mapping, nr_dirtied);
-+ mutex_lock(&info->checkin_mutex);
-+ return 0;
-+}
-+
-+/* set zeroes to the page cluster, proceed it, and maybe, try to capture
-+ its pages */
-+static int write_hole(struct inode *inode, struct cluster_handle * clust,
-+ loff_t file_off, loff_t to_file)
-+{
-+ int result = 0;
-+ unsigned cl_off, cl_count = 0;
-+ unsigned to_pg, pg_off;
-+ struct reiser4_slide * win;
-+
-+ assert("edward-190", clust != NULL);
-+ assert("edward-1069", clust->win != NULL);
-+ assert("edward-191", inode != NULL);
-+ assert("edward-727", cryptcompress_inode_ok(inode));
-+ assert("edward-1171", clust->dstat != INVAL_DISK_CLUSTER);
-+ assert("edward-1154",
-+ ergo(clust->dstat != FAKE_DISK_CLUSTER, clust->reserved == 1));
-+
-+ win = clust->win;
-+
-+ assert("edward-1070", win != NULL);
-+ assert("edward-201", win->stat == HOLE_WINDOW);
-+ assert("edward-192", cluster_ok(clust, inode));
-+
-+ if (win->off == 0 && win->count == inode_cluster_size(inode)) {
-+ /* This part of the hole will be represented by "fake"
-+ * logical cluster, i.e. which doesn't have appropriate
-+ * disk cluster until someone modify this logical cluster
-+ * and make it dirty.
-+ * So go forward here..
-+ */
-+ move_update_window(inode, clust, file_off, to_file);
-+ return 0;
-+ }
-+ cl_count = win->count; /* number of zeroes to write */
-+ cl_off = win->off;
-+ pg_off = off_to_pgoff(win->off);
-+
-+ while (cl_count) {
-+ struct page *page;
-+ page = clust->pages[off_to_pg(cl_off)];
-+
-+ assert("edward-284", page != NULL);
-+
-+ to_pg = min((typeof(pg_off))PAGE_CACHE_SIZE - pg_off, cl_count);
-+ lock_page(page);
-+ zero_user_page(page, pg_off, to_pg, KM_USER0);
-+ SetPageUptodate(page);
-+ reiser4_set_page_dirty_internal(page);
-+ mark_page_accessed(page);
-+ unlock_page(page);
-+
-+ cl_off += to_pg;
-+ cl_count -= to_pg;
-+ pg_off = 0;
-+ }
-+ if (!win->delta) {
-+ /* only zeroes in this window, try to capture
-+ */
-+ result = checkin_logical_cluster(clust, inode);
-+ if (result)
-+ return result;
-+ put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
-+ result = balance_dirty_page_cluster(clust,
-+ inode, file_off, to_file,
-+ win_count_to_nrpages(win));
-+ } else
-+ move_update_window(inode, clust, file_off, to_file);
-+ return result;
-+}
-+
-+/*
-+ The main disk search procedure for cryptcompress plugin, which
-+ . scans all items of disk cluster with the lock mode @mode
-+ . maybe reads each one (if @read)
-+ . maybe makes its znode dirty (if write lock mode was specified)
-+
-+ NOTE-EDWARD: Callers should handle the case when disk cluster
-+ is incomplete (-EIO)
-+*/
-+int find_disk_cluster(struct cluster_handle * clust,
-+ struct inode *inode, int read, znode_lock_mode mode)
-+{
-+ flow_t f;
-+ hint_t *hint;
-+ int result = 0;
-+ int was_grabbed;
-+ ra_info_t ra_info;
-+ file_plugin *fplug;
-+ item_plugin *iplug;
-+ struct tfm_cluster *tc;
-+ struct cryptcompress_info * info;
-+
-+ assert("edward-138", clust != NULL);
-+ assert("edward-728", clust->hint != NULL);
-+ assert("edward-226", reiser4_schedulable());
-+ assert("edward-137", inode != NULL);
-+ assert("edward-729", cryptcompress_inode_ok(inode));
-+
-+ hint = clust->hint;
-+ fplug = inode_file_plugin(inode);
-+ was_grabbed = get_current_context()->grabbed_blocks;
-+ info = cryptcompress_inode_data(inode);
-+ tc = &clust->tc;
-+
-+ assert("edward-462", !tfm_cluster_is_uptodate(tc));
-+ assert("edward-461", ergo(read, tfm_stream_is_set(tc, INPUT_STREAM)));
-+
-+ dclust_init_extension(hint);
-+
-+ /* set key of the first disk cluster item */
-+ fplug->flow_by_inode(inode,
-+ (read ? (char __user *)tfm_stream_data(tc, INPUT_STREAM) : NULL),
-+ 0 /* kernel space */ ,
-+ inode_scaled_cluster_size(inode),
-+ clust_to_off(clust->index, inode), READ_OP, &f);
-+ if (mode == ZNODE_WRITE_LOCK) {
-+ /* reserve for flush to make dirty all the leaf nodes
-+ which contain disk cluster */
-+ result =
-+ reiser4_grab_space_force(estimate_dirty_cluster(inode),
-+ BA_CAN_COMMIT);
-+ if (result)
-+ goto out;
-+ }
-+
-+ ra_info.key_to_stop = f.key;
-+ set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
-+
-+ while (f.length) {
-+ result = find_cluster_item(hint, &f.key, mode,
-+ NULL, FIND_EXACT,
-+ (mode == ZNODE_WRITE_LOCK ?
-+ CBK_FOR_INSERT : 0));
-+ switch (result) {
-+ case CBK_COORD_NOTFOUND:
-+ result = 0;
-+ if (inode_scaled_offset
-+ (inode, clust_to_off(clust->index, inode)) ==
-+ get_key_offset(&f.key)) {
-+ /* first item not found, this is treated
-+ as disk cluster is absent */
-+ clust->dstat = FAKE_DISK_CLUSTER;
-+ goto out;
-+ }
-+ /* we are outside the cluster, stop search here */
-+ assert("edward-146",
-+ f.length != inode_scaled_cluster_size(inode));
-+ goto ok;
-+ case CBK_COORD_FOUND:
-+ assert("edward-148",
-+ hint->ext_coord.coord.between == AT_UNIT);
-+ assert("edward-460",
-+ hint->ext_coord.coord.unit_pos == 0);
-+
-+ coord_clear_iplug(&hint->ext_coord.coord);
-+ result = zload_ra(hint->ext_coord.coord.node, &ra_info);
-+ if (unlikely(result))
-+ goto out;
-+ iplug = item_plugin_by_coord(&hint->ext_coord.coord);
-+ assert("edward-147",
-+ item_id_by_coord(&hint->ext_coord.coord) ==
-+ CTAIL_ID);
-+
-+ result = iplug->s.file.read(NULL, &f, hint);
-+ if (result) {
-+ zrelse(hint->ext_coord.coord.node);
-+ goto out;
-+ }
-+ if (mode == ZNODE_WRITE_LOCK) {
-+ /* Don't make dirty more nodes then it was
-+ estimated (see comments before
-+ estimate_dirty_cluster). Missed nodes will be
-+ read up in flush time if they are evicted from
-+ memory */
-+ if (dclust_get_extension_ncount(hint) <=
-+ estimate_dirty_cluster(inode))
-+ znode_make_dirty(hint->ext_coord.coord.node);
-+
-+ znode_set_convertible(hint->ext_coord.coord.
-+ node);
-+ }
-+ zrelse(hint->ext_coord.coord.node);
-+ break;
-+ default:
-+ goto out;
-+ }
-+ }
-+ ok:
-+ /* at least one item was found */
-+ /* NOTE-EDWARD: Callers should handle the case
-+ when disk cluster is incomplete (-EIO) */
-+ tc->len = inode_scaled_cluster_size(inode) - f.length;
-+ tc->lsize = lbytes(clust->index, inode);
-+ assert("edward-1196", tc->len > 0);
-+ assert("edward-1406", tc->lsize > 0);
-+
-+ if (hint_is_unprepped_dclust(clust->hint)) {
-+ clust->dstat = UNPR_DISK_CLUSTER;
-+ } else if (clust->index == info->trunc_index) {
-+ clust->dstat = TRNC_DISK_CLUSTER;
-+ } else {
-+ clust->dstat = PREP_DISK_CLUSTER;
-+ dclust_set_extension_dsize(clust->hint, tc->len);
-+ }
-+ out:
-+ assert("edward-1339",
-+ get_current_context()->grabbed_blocks >= was_grabbed);
-+ grabbed2free(get_current_context(),
-+ get_current_super_private(),
-+ get_current_context()->grabbed_blocks - was_grabbed);
-+ return result;
-+}
-+
-+int get_disk_cluster_locked(struct cluster_handle * clust, struct inode *inode,
-+ znode_lock_mode lock_mode)
-+{
-+ reiser4_key key;
-+ ra_info_t ra_info;
-+
-+ assert("edward-730", reiser4_schedulable());
-+ assert("edward-731", clust != NULL);
-+ assert("edward-732", inode != NULL);
-+
-+ if (hint_is_valid(clust->hint)) {
-+ assert("edward-1293", clust->dstat != INVAL_DISK_CLUSTER);
-+ assert("edward-1294",
-+ znode_is_write_locked(clust->hint->lh.node));
-+ /* already have a valid locked position */
-+ return (clust->dstat ==
-+ FAKE_DISK_CLUSTER ? CBK_COORD_NOTFOUND :
-+ CBK_COORD_FOUND);
-+ }
-+ key_by_inode_cryptcompress(inode, clust_to_off(clust->index, inode),
-+ &key);
-+ ra_info.key_to_stop = key;
-+ set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
-+
-+ return find_cluster_item(clust->hint, &key, lock_mode, NULL, FIND_EXACT,
-+ CBK_FOR_INSERT);
-+}
-+
-+/* Read needed cluster pages before modifying.
-+ If success, @clust->hint contains locked position in the tree.
-+ Also:
-+ . find and set disk cluster state
-+ . make disk cluster dirty if its state is not FAKE_DISK_CLUSTER.
-+*/
-+static int read_some_cluster_pages(struct inode * inode,
-+ struct cluster_handle * clust)
-+{
-+ int i;
-+ int result = 0;
-+ item_plugin *iplug;
-+ struct reiser4_slide * win = clust->win;
-+ znode_lock_mode mode = ZNODE_WRITE_LOCK;
-+
-+ iplug = item_plugin_by_id(CTAIL_ID);
-+
-+ assert("edward-924", !tfm_cluster_is_uptodate(&clust->tc));
-+
-+#if REISER4_DEBUG
-+ if (clust->nr_pages == 0) {
-+ /* start write hole from fake disk cluster */
-+ assert("edward-1117", win != NULL);
-+ assert("edward-1118", win->stat == HOLE_WINDOW);
-+ assert("edward-1119", new_logical_cluster(clust, inode));
-+ }
-+#endif
-+ if (new_logical_cluster(clust, inode)) {
-+ /*
-+ new page cluster is about to be written, nothing to read,
-+ */
-+ assert("edward-734", reiser4_schedulable());
-+ assert("edward-735", clust->hint->lh.owner == NULL);
-+
-+ if (clust->nr_pages) {
-+ int off;
-+ struct page * pg;
-+ assert("edward-1419", clust->pages != NULL);
-+ pg = clust->pages[clust->nr_pages - 1];
-+ assert("edward-1420", pg != NULL);
-+ off = off_to_pgoff(win->off+win->count+win->delta);
-+ if (off) {
-+ lock_page(pg);
-+ zero_user_page(pg, off, PAGE_CACHE_SIZE - off,
-+ KM_USER0);
-+ unlock_page(pg);
-+ }
-+ }
-+ clust->dstat = FAKE_DISK_CLUSTER;
-+ return 0;
-+ }
-+ /*
-+ Here we should search for disk cluster to figure out its real state.
-+ Also there is one more important reason to do disk search: we need
-+ to make disk cluster _dirty_ if it exists
-+ */
-+
-+ /* if windows is specified, read the only pages
-+ that will be modified partially */
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ struct page *pg = clust->pages[i];
-+
-+ lock_page(pg);
-+ if (PageUptodate(pg)) {
-+ unlock_page(pg);
-+ continue;
-+ }
-+ unlock_page(pg);
-+
-+ if (win &&
-+ i >= size_in_pages(win->off) &&
-+ i < off_to_pg(win->off + win->count + win->delta))
-+ /* page will be completely overwritten */
-+ continue;
-+
-+ if (win && (i == clust->nr_pages - 1) &&
-+ /* the last page is
-+ partially modified,
-+ not uptodate .. */
-+ (size_in_pages(i_size_read(inode)) <= pg->index)) {
-+ /* .. and appended,
-+ so set zeroes to the rest */
-+ int offset;
-+ lock_page(pg);
-+ assert("edward-1260",
-+ size_in_pages(win->off + win->count +
-+ win->delta) - 1 == i);
-+
-+ offset =
-+ off_to_pgoff(win->off + win->count + win->delta);
-+ zero_user_page(pg, offset, PAGE_CACHE_SIZE - offset,
-+ KM_USER0);
-+ unlock_page(pg);
-+ /* still not uptodate */
-+ break;
-+ }
-+ lock_page(pg);
-+ result = do_readpage_ctail(inode, clust, pg, mode);
-+
-+ assert("edward-1526", ergo(!result, PageUptodate(pg)));
-+ unlock_page(pg);
-+ if (result) {
-+ warning("edward-219", "do_readpage_ctail failed");
-+ goto out;
-+ }
-+ }
-+ if (!tfm_cluster_is_uptodate(&clust->tc)) {
-+ /* disk cluster unclaimed, but we need to make its znodes dirty
-+ * to make flush update convert its content
-+ */
-+ result = find_disk_cluster(clust, inode,
-+ 0 /* do not read items */,
-+ mode);
-+ }
-+ out:
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+ return result;
-+}
-+
-+static int should_create_unprepped_cluster(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ assert("edward-737", clust != NULL);
-+
-+ switch (clust->dstat) {
-+ case PREP_DISK_CLUSTER:
-+ case UNPR_DISK_CLUSTER:
-+ return 0;
-+ case FAKE_DISK_CLUSTER:
-+ if (clust->win &&
-+ clust->win->stat == HOLE_WINDOW && clust->nr_pages == 0) {
-+ assert("edward-1172",
-+ new_logical_cluster(clust, inode));
-+ return 0;
-+ }
-+ return 1;
-+ default:
-+ impossible("edward-1173", "bad disk cluster state");
-+ return 0;
-+ }
-+}
-+
-+static int cryptcompress_make_unprepped_cluster(struct cluster_handle * clust,
-+ struct inode *inode)
-+{
-+ int result;
-+
-+ assert("edward-1123", reiser4_schedulable());
-+ assert("edward-737", clust != NULL);
-+ assert("edward-738", inode != NULL);
-+ assert("edward-739", cryptcompress_inode_ok(inode));
-+ assert("edward-1053", clust->hint != NULL);
-+
-+ if (!should_create_unprepped_cluster(clust, inode)) {
-+ if (clust->reserved) {
-+ cluster_reserved2free(estimate_insert_cluster(inode));
-+#if REISER4_DEBUG
-+ assert("edward-1267",
-+ clust->reserved_unprepped ==
-+ estimate_insert_cluster(inode));
-+ clust->reserved_unprepped -=
-+ estimate_insert_cluster(inode);
-+#endif
-+ }
-+ return 0;
-+ }
-+ assert("edward-1268", clust->reserved);
-+ cluster_reserved2grabbed(estimate_insert_cluster(inode));
-+#if REISER4_DEBUG
-+ assert("edward-1441",
-+ clust->reserved_unprepped == estimate_insert_cluster(inode));
-+ clust->reserved_unprepped -= estimate_insert_cluster(inode);
-+#endif
-+ result = ctail_insert_unprepped_cluster(clust, inode);
-+ if (result)
-+ return result;
-+
-+ inode_add_bytes(inode, inode_cluster_size(inode));
-+
-+ assert("edward-743", cryptcompress_inode_ok(inode));
-+ assert("edward-744", znode_is_write_locked(clust->hint->lh.node));
-+
-+ clust->dstat = UNPR_DISK_CLUSTER;
-+ return 0;
-+}
-+
-+/* . Grab page cluster for read, write, setattr, etc. operations;
-+ * . Truncate its complete pages, if needed;
-+ */
-+int prepare_page_cluster(struct inode * inode, struct cluster_handle * clust,
-+ rw_op rw)
-+{
-+ assert("edward-177", inode != NULL);
-+ assert("edward-741", cryptcompress_inode_ok(inode));
-+ assert("edward-740", clust->pages != NULL);
-+
-+ set_cluster_nrpages(clust, inode);
-+ reset_cluster_pgset(clust, cluster_nrpages(inode));
-+ return grab_page_cluster(inode, clust, rw);
-+}
-+
-+/* Truncate complete page cluster of index @index.
-+ * This is called by ->kill_hook() method of item
-+ * plugin when deleting a disk cluster of such index.
-+ */
-+void truncate_complete_page_cluster(struct inode *inode, cloff_t index,
-+ int even_cows)
-+{
-+ int found;
-+ int nr_pages;
-+ jnode *node;
-+ struct page *pages[MAX_CLUSTER_NRPAGES];
-+
-+ node = jlookup(current_tree, get_inode_oid(inode),
-+ clust_to_pg(index, inode));
-+ nr_pages = size_in_pages(lbytes(index, inode));
-+ assert("edward-1483", nr_pages != 0);
-+ if (!node)
-+ goto truncate;
-+ found = find_get_pages(inode->i_mapping,
-+ clust_to_pg(index, inode),
-+ cluster_nrpages(inode), pages);
-+ if (!found) {
-+ assert("edward-1484", jnode_truncate_ok(inode, index));
-+ return;
-+ }
-+ lock_cluster(node);
-+
-+ if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS)
-+ && index == 0)
-+ /* converting to unix_file is in progress */
-+ JF_CLR(node, JNODE_CLUSTER_PAGE);
-+ if (JF_ISSET(node, JNODE_DIRTY)) {
-+ /*
-+ * @nr_pages were checked in, but not yet checked out -
-+ * we need to release them. (also there can be pages
-+ * attached to page cache by read(), etc. - don't take
-+ * them into account).
-+ */
-+ assert("edward-1198", found >= nr_pages);
-+
-+ /* free disk space grabbed for disk cluster converting */
-+ cluster_reserved2grabbed(estimate_update_cluster(inode));
-+ grabbed2free(get_current_context(),
-+ get_current_super_private(),
-+ estimate_update_cluster(inode));
-+ __put_page_cluster(0, nr_pages, pages, inode);
-+
-+ /* This will clear dirty bit, uncapture and unlock jnode */
-+ unlock_cluster_uncapture(node);
-+ } else
-+ unlock_cluster(node);
-+ jput(node); /* jlookup */
-+ put_found_pages(pages, found); /* find_get_pages */
-+ truncate:
-+ if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS) &&
-+ index == 0)
-+ return;
-+ truncate_page_cluster_range(inode, pages, index, 0,
-+ cluster_nrpages(inode),
-+ even_cows);
-+ assert("edward-1201",
-+ ergo(!reiser4_inode_get_flag(inode,
-+ REISER4_FILE_CONV_IN_PROGRESS),
-+ jnode_truncate_ok(inode, index)));
-+ return;
-+}
-+
-+/*
-+ * Set cluster handle @clust of a logical cluster before
-+ * modifications which are supposed to be committed.
-+ *
-+ * . grab cluster pages;
-+ * . reserve disk space;
-+ * . maybe read pages from disk and set the disk cluster dirty;
-+ * . maybe write hole and check in (partially zeroed) logical cluster;
-+ * . create 'unprepped' disk cluster for new or fake logical one.
-+ */
-+static int prepare_logical_cluster(struct inode *inode,
-+ loff_t file_off, /* write position
-+ in the file */
-+ loff_t to_file, /* bytes of users data
-+ to write to the file */
-+ struct cluster_handle * clust,
-+ logical_cluster_op op)
-+{
-+ int result = 0;
-+ struct reiser4_slide * win = clust->win;
-+
-+ reset_cluster_params(clust);
-+ cluster_set_tfm_act(&clust->tc, TFMA_READ);
-+#if REISER4_DEBUG
-+ clust->ctx = get_current_context();
-+#endif
-+ assert("edward-1190", op != LC_INVAL);
-+
-+ clust->op = op;
-+
-+ result = prepare_page_cluster(inode, clust, WRITE_OP);
-+ if (result)
-+ return result;
-+ assert("edward-1447",
-+ ergo(clust->nr_pages != 0, jprivate(clust->pages[0])));
-+ assert("edward-1448",
-+ ergo(clust->nr_pages != 0,
-+ jnode_is_cluster_page(jprivate(clust->pages[0]))));
-+
-+ result = reserve4cluster(inode, clust);
-+ if (result)
-+ goto err1;
-+ result = read_some_cluster_pages(inode, clust);
-+ if (result) {
-+ free_reserved4cluster(inode,
-+ clust,
-+ estimate_update_cluster(inode) +
-+ estimate_insert_cluster(inode));
-+ goto err1;
-+ }
-+ assert("edward-1124", clust->dstat != INVAL_DISK_CLUSTER);
-+
-+ result = cryptcompress_make_unprepped_cluster(clust, inode);
-+ if (result)
-+ goto err2;
-+ if (win && win->stat == HOLE_WINDOW) {
-+ result = write_hole(inode, clust, file_off, to_file);
-+ if (result)
-+ goto err2;
-+ }
-+ return 0;
-+ err2:
-+ free_reserved4cluster(inode, clust,
-+ estimate_update_cluster(inode));
-+ err1:
-+ put_page_cluster(clust, inode, WRITE_OP);
-+ assert("edward-1125", result == -ENOSPC);
-+ return result;
-+}
-+
-+/* set window by two offsets */
-+static void set_window(struct cluster_handle * clust,
-+ struct reiser4_slide * win, struct inode *inode,
-+ loff_t o1, loff_t o2)
-+{
-+ assert("edward-295", clust != NULL);
-+ assert("edward-296", inode != NULL);
-+ assert("edward-1071", win != NULL);
-+ assert("edward-297", o1 <= o2);
-+
-+ clust->index = off_to_clust(o1, inode);
-+
-+ win->off = off_to_cloff(o1, inode);
-+ win->count = min((loff_t)(inode_cluster_size(inode) - win->off),
-+ o2 - o1);
-+ win->delta = 0;
-+
-+ clust->win = win;
-+}
-+
-+static int set_cluster_by_window(struct inode *inode,
-+ struct cluster_handle * clust,
-+ struct reiser4_slide * win, size_t length,
-+ loff_t file_off)
-+{
-+ int result;
-+
-+ assert("edward-197", clust != NULL);
-+ assert("edward-1072", win != NULL);
-+ assert("edward-198", inode != NULL);
-+
-+ result = alloc_cluster_pgset(clust, cluster_nrpages(inode));
-+ if (result)
-+ return result;
-+
-+ if (file_off > i_size_read(inode)) {
-+ /* Uhmm, hole in cryptcompress file... */
-+ loff_t hole_size;
-+ hole_size = file_off - inode->i_size;
-+
-+ set_window(clust, win, inode, inode->i_size, file_off);
-+ win->stat = HOLE_WINDOW;
-+ if (win->off + hole_size < inode_cluster_size(inode))
-+ /* there is also user's data to append to the hole */
-+ win->delta = min(inode_cluster_size(inode) -
-+ (win->off + win->count), length);
-+ return 0;
-+ }
-+ set_window(clust, win, inode, file_off, file_off + length);
-+ win->stat = DATA_WINDOW;
-+ return 0;
-+}
-+
-+int set_cluster_by_page(struct cluster_handle * clust, struct page * page,
-+ int count)
-+{
-+ int result = 0;
-+ int (*setting_actor)(struct cluster_handle * clust, int count);
-+
-+ assert("edward-1358", clust != NULL);
-+ assert("edward-1359", page != NULL);
-+ assert("edward-1360", page->mapping != NULL);
-+ assert("edward-1361", page->mapping->host != NULL);
-+
-+ setting_actor =
-+ (clust->pages ? reset_cluster_pgset : alloc_cluster_pgset);
-+ result = setting_actor(clust, count);
-+ clust->index = pg_to_clust(page->index, page->mapping->host);
-+ return result;
-+}
-+
-+/* reset all the params that not get updated */
-+void reset_cluster_params(struct cluster_handle * clust)
-+{
-+ assert("edward-197", clust != NULL);
-+
-+ clust->dstat = INVAL_DISK_CLUSTER;
-+ clust->tc.uptodate = 0;
-+ clust->tc.len = 0;
-+}
-+
-+/* the heart of write_cryptcompress */
-+static loff_t do_write_cryptcompress(struct file *file, struct inode *inode,
-+ const char __user *buf, size_t to_write,
-+ loff_t pos, struct psched_context *cont)
-+{
-+ int i;
-+ hint_t *hint;
-+ int result = 0;
-+ size_t count;
-+ struct reiser4_slide win;
-+ struct cluster_handle clust;
-+ struct cryptcompress_info * info;
-+
-+ assert("edward-154", buf != NULL);
-+ assert("edward-161", reiser4_schedulable());
-+ assert("edward-748", cryptcompress_inode_ok(inode));
-+ assert("edward-159", current_blocksize == PAGE_CACHE_SIZE);
-+ assert("edward-1274", get_current_context()->grabbed_blocks == 0);
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ result = load_file_hint(file, hint);
-+ if (result) {
-+ kfree(hint);
-+ return result;
-+ }
-+ count = to_write;
-+
-+ reiser4_slide_init(&win);
-+ cluster_init_read(&clust, &win);
-+ clust.hint = hint;
-+ info = cryptcompress_inode_data(inode);
-+
-+ mutex_lock(&info->checkin_mutex);
-+
-+ result = set_cluster_by_window(inode, &clust, &win, to_write, pos);
-+ if (result)
-+ goto out;
-+
-+ if (next_window_stat(&win) == HOLE_WINDOW) {
-+ /* write hole in this iteration
-+ separated from the loop below */
-+ result = write_pschedule_hook(file, inode,
-+ pos,
-+ &clust,
-+ cont);
-+ if (result)
-+ goto out;
-+ result = prepare_logical_cluster(inode, pos, count, &clust,
-+ LC_APPOV);
-+ if (result)
-+ goto out;
-+ }
-+ do {
-+ const char __user * src;
-+ unsigned page_off, to_page;
-+
-+ assert("edward-750", reiser4_schedulable());
-+
-+ result = write_pschedule_hook(file, inode,
-+ pos + to_write - count,
-+ &clust,
-+ cont);
-+ if (result)
-+ goto out;
-+ if (cont->state == PSCHED_ASSIGNED_NEW)
-+ goto out_no_release;
-+
-+ result = prepare_logical_cluster(inode, pos, count, &clust,
-+ LC_APPOV);
-+ if (result)
-+ goto out;
-+
-+ assert("edward-751", cryptcompress_inode_ok(inode));
-+ assert("edward-204", win.stat == DATA_WINDOW);
-+ assert("edward-1288", hint_is_valid(clust.hint));
-+ assert("edward-752",
-+ znode_is_write_locked(hint->ext_coord.coord.node));
-+ put_hint_cluster(&clust, inode, ZNODE_WRITE_LOCK);
-+
-+ /* set write position in page */
-+ page_off = off_to_pgoff(win.off);
-+
-+ /* copy user's data to cluster pages */
-+ for (i = off_to_pg(win.off), src = buf;
-+ i < size_in_pages(win.off + win.count);
-+ i++, src += to_page) {
-+ to_page = __mbp(win.off + win.count, i) - page_off;
-+ assert("edward-1039",
-+ page_off + to_page <= PAGE_CACHE_SIZE);
-+ assert("edward-287", clust.pages[i] != NULL);
-+
-+ fault_in_pages_readable(src, to_page);
-+
-+ lock_page(clust.pages[i]);
-+ result =
-+ __copy_from_user((char *)kmap(clust.pages[i]) +
-+ page_off, src, to_page);
-+ kunmap(clust.pages[i]);
-+ if (unlikely(result)) {
-+ unlock_page(clust.pages[i]);
-+ result = -EFAULT;
-+ goto err2;
-+ }
-+ SetPageUptodate(clust.pages[i]);
-+ reiser4_set_page_dirty_internal(clust.pages[i]);
-+ flush_dcache_page(clust.pages[i]);
-+ mark_page_accessed(clust.pages[i]);
-+ unlock_page(clust.pages[i]);
-+ page_off = 0;
-+ }
-+ assert("edward-753", cryptcompress_inode_ok(inode));
-+
-+ result = checkin_logical_cluster(&clust, inode);
-+ if (result)
-+ goto err2;
-+
-+ buf += win.count;
-+ count -= win.count;
-+
-+ result = balance_dirty_page_cluster(&clust, inode, 0, count,
-+ win_count_to_nrpages(&win));
-+ if (result)
-+ goto err1;
-+ assert("edward-755", hint->lh.owner == NULL);
-+ reset_cluster_params(&clust);
-+ continue;
-+ err2:
-+ put_page_cluster(&clust, inode, WRITE_OP);
-+ err1:
-+ if (clust.reserved)
-+ free_reserved4cluster(inode,
-+ &clust,
-+ estimate_update_cluster(inode));
-+ break;
-+ } while (count);
-+ out:
-+ done_lh(&hint->lh);
-+ mutex_unlock(&info->checkin_mutex);
-+ save_file_hint(file, hint);
-+ out_no_release:
-+ kfree(hint);
-+ put_cluster_handle(&clust);
-+ assert("edward-195",
-+ ergo((to_write == count),
-+ (result < 0 || cont->state == PSCHED_ASSIGNED_NEW)));
-+ return (to_write - count) ? (to_write - count) : result;
-+}
-+
-+/**
-+ * plugin->write()
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @read_amount: number of bytes to write
-+ * @off: position in file to write to
-+ */
-+ssize_t write_cryptcompress(struct file *file, const char __user *buf,
-+ size_t count, loff_t *off,
-+ struct psched_context *cont)
-+{
-+ ssize_t result;
-+ struct inode *inode;
-+ reiser4_context *ctx;
-+ loff_t pos = *off;
-+ struct cryptcompress_info *info;
-+
-+ assert("edward-1449", cont->state == PSCHED_INVAL_STATE);
-+
-+ inode = file->f_dentry->d_inode;
-+ assert("edward-196", cryptcompress_inode_ok(inode));
-+
-+ info = cryptcompress_inode_data(inode);
-+ ctx = get_current_context();
-+
-+ result = generic_write_checks(file, &pos, &count, 0);
-+ if (unlikely(result != 0)) {
-+ context_set_commit_async(ctx);
-+ return result;
-+ }
-+ if (unlikely(count == 0))
-+ return 0;
-+ result = remove_suid(file->f_dentry);
-+ if (unlikely(result != 0)) {
-+ context_set_commit_async(ctx);
-+ return result;
-+ }
-+ /* remove_suid might create a transaction */
-+ reiser4_txn_restart(ctx);
-+
-+ result = do_write_cryptcompress(file, inode, buf, count, pos, cont);
-+
-+ if (unlikely(result < 0)) {
-+ context_set_commit_async(ctx);
-+ return result;
-+ }
-+ /* update position in a file */
-+ *off = pos + result;
-+ return result;
-+}
-+
-+/* plugin->readpages */
-+int readpages_cryptcompress(struct file *file, struct address_space *mapping,
-+ struct list_head *pages, unsigned nr_pages)
-+{
-+ reiser4_context * ctx;
-+ int ret;
-+
-+ ctx = reiser4_init_context(mapping->host->i_sb);
-+ if (IS_ERR(ctx)) {
-+ ret = PTR_ERR(ctx);
-+ goto err;
-+ }
-+ /* cryptcompress file can be built of ctail items only */
-+ ret = readpages_ctail(file, mapping, pages);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ if (ret) {
-+err:
-+ put_pages_list(pages);
-+ }
-+ return ret;
-+}
-+
-+static reiser4_block_nr cryptcompress_estimate_read(struct inode *inode)
-+{
-+ /* reserve one block to update stat data item */
-+ assert("edward-1193",
-+ inode_file_plugin(inode)->estimate.update ==
-+ estimate_update_common);
-+ return estimate_update_common(inode);
-+}
-+
-+/**
-+ * plugin->read
-+ * @file: file to read from
-+ * @buf: address of user-space buffer
-+ * @read_amount: number of bytes to read
-+ * @off: position in file to read from
-+ */
-+ssize_t read_cryptcompress(struct file * file, char __user *buf, size_t size,
-+ loff_t * off)
-+{
-+ ssize_t result;
-+ struct inode *inode;
-+ reiser4_context *ctx;
-+ struct cryptcompress_info *info;
-+ reiser4_block_nr needed;
-+
-+ inode = file->f_dentry->d_inode;
-+ assert("edward-1194", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ info = cryptcompress_inode_data(inode);
-+ needed = cryptcompress_estimate_read(inode);
-+
-+ result = reiser4_grab_space(needed, BA_CAN_COMMIT);
-+ if (result != 0) {
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ result = do_sync_read(file, buf, size, off);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return result;
-+}
-+
-+/* Look for a disk cluster and keep lookup result in @found.
-+ * If @index > 0, then find disk cluster of the index (@index - 1);
-+ * If @index == 0, then find the rightmost disk cluster.
-+ * Keep incremented index of the found disk cluster in @found.
-+ * @found == 0 means that disk cluster was not found (in the last
-+ * case (@index == 0) it means that file doesn't have disk clusters).
-+ */
-+static int lookup_disk_cluster(struct inode *inode, cloff_t * found,
-+ cloff_t index)
-+{
-+ int result;
-+ reiser4_key key;
-+ loff_t offset;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ lookup_bias bias;
-+ coord_t *coord;
-+ item_plugin *iplug;
-+
-+ assert("edward-1131", inode != NULL);
-+ assert("edward-95", cryptcompress_inode_ok(inode));
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ bias = (index ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN);
-+ offset =
-+ (index ? clust_to_off(index, inode) -
-+ 1 : get_key_offset(reiser4_max_key()));
-+
-+ key_by_inode_cryptcompress(inode, offset, &key);
-+
-+ /* find the last item of this object */
-+ result =
-+ find_cluster_item(hint, &key, ZNODE_READ_LOCK, NULL /* ra_info */,
-+ bias, 0);
-+ if (cbk_errored(result)) {
-+ done_lh(lh);
-+ kfree(hint);
-+ return result;
-+ }
-+ if (result == CBK_COORD_NOTFOUND) {
-+ /* no real disk clusters */
-+ done_lh(lh);
-+ kfree(hint);
-+ *found = 0;
-+ return 0;
-+ }
-+ /* disk cluster is found */
-+ coord = &hint->ext_coord.coord;
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (unlikely(result)) {
-+ done_lh(lh);
-+ kfree(hint);
-+ return result;
-+ }
-+ iplug = item_plugin_by_coord(coord);
-+ assert("edward-277", iplug == item_plugin_by_id(CTAIL_ID));
-+ assert("edward-1202", ctail_ok(coord));
-+
-+ item_key_by_coord(coord, &key);
-+ *found = off_to_clust(get_key_offset(&key), inode) + 1;
-+
-+ assert("edward-1132", ergo(index, index == *found));
-+
-+ zrelse(coord->node);
-+ done_lh(lh);
-+ kfree(hint);
-+ return 0;
-+}
-+
-+static int find_fake_appended(struct inode *inode, cloff_t * index)
-+{
-+ return lookup_disk_cluster(inode, index,
-+ 0 /* find last real one */ );
-+}
-+
-+/* Set left coord when unit is not found after node_lookup()
-+ This takes into account that there can be holes in a sequence
-+ of disk clusters */
-+
-+static void adjust_left_coord(coord_t * left_coord)
-+{
-+ switch (left_coord->between) {
-+ case AFTER_UNIT:
-+ left_coord->between = AFTER_ITEM;
-+ case AFTER_ITEM:
-+ case BEFORE_UNIT:
-+ break;
-+ default:
-+ impossible("edward-1204", "bad left coord to cut");
-+ }
-+ return;
-+}
-+
-+#define CRC_CUT_TREE_MIN_ITERATIONS 64
-+
-+/* plugin->cut_tree_worker */
-+int cut_tree_worker_cryptcompress(tap_t * tap, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed,
-+ struct inode *object, int truncate,
-+ int *progress)
-+{
-+ lock_handle next_node_lock;
-+ coord_t left_coord;
-+ int result;
-+
-+ assert("edward-1158", tap->coord->node != NULL);
-+ assert("edward-1159", znode_is_write_locked(tap->coord->node));
-+ assert("edward-1160", znode_get_level(tap->coord->node) == LEAF_LEVEL);
-+
-+ *progress = 0;
-+ init_lh(&next_node_lock);
-+
-+ while (1) {
-+ znode *node; /* node from which items are cut */
-+ node_plugin *nplug; /* node plugin for @node */
-+
-+ node = tap->coord->node;
-+
-+ /* Move next_node_lock to the next node on the left. */
-+ result =
-+ reiser4_get_left_neighbor(&next_node_lock, node,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result != 0 && result != -E_NO_NEIGHBOR)
-+ break;
-+ /* FIXME-EDWARD: Check can we delete the node as a whole. */
-+ result = reiser4_tap_load(tap);
-+ if (result)
-+ return result;
-+
-+ /* Prepare the second (right) point for cut_node() */
-+ if (*progress)
-+ coord_init_last_unit(tap->coord, node);
-+
-+ else if (item_plugin_by_coord(tap->coord)->b.lookup == NULL)
-+ /* set rightmost unit for the items without lookup method */
-+ tap->coord->unit_pos = coord_last_unit_pos(tap->coord);
-+
-+ nplug = node->nplug;
-+
-+ assert("edward-1161", nplug);
-+ assert("edward-1162", nplug->lookup);
-+
-+ /* left_coord is leftmost unit cut from @node */
-+ result = nplug->lookup(node, from_key, FIND_EXACT, &left_coord);
-+
-+ if (IS_CBKERR(result))
-+ break;
-+
-+ if (result == CBK_COORD_NOTFOUND)
-+ adjust_left_coord(&left_coord);
-+
-+ /* adjust coordinates so that they are set to existing units */
-+ if (coord_set_to_right(&left_coord)
-+ || coord_set_to_left(tap->coord)) {
-+ result = 0;
-+ break;
-+ }
-+
-+ if (coord_compare(&left_coord, tap->coord) ==
-+ COORD_CMP_ON_RIGHT) {
-+ /* keys from @from_key to @to_key are not in the tree */
-+ result = 0;
-+ break;
-+ }
-+
-+ /* cut data from one node */
-+ *smallest_removed = *reiser4_min_key();
-+ result = kill_node_content(&left_coord,
-+ tap->coord,
-+ from_key,
-+ to_key,
-+ smallest_removed,
-+ next_node_lock.node,
-+ object, truncate);
-+ reiser4_tap_relse(tap);
-+
-+ if (result)
-+ break;
-+
-+ ++(*progress);
-+
-+ /* Check whether all items with keys >= from_key were removed
-+ * from the tree. */
-+ if (keyle(smallest_removed, from_key))
-+ /* result = 0; */
-+ break;
-+
-+ if (next_node_lock.node == NULL)
-+ break;
-+
-+ result = reiser4_tap_move(tap, &next_node_lock);
-+ done_lh(&next_node_lock);
-+ if (result)
-+ break;
-+
-+ /* Break long cut_tree operation (deletion of a large file) if
-+ * atom requires commit. */
-+ if (*progress > CRC_CUT_TREE_MIN_ITERATIONS
-+ && current_atom_should_commit()) {
-+ result = -E_REPEAT;
-+ break;
-+ }
-+ }
-+ done_lh(&next_node_lock);
-+ return result;
-+}
-+
-+/* Append or expand hole in two steps:
-+ * 1) set zeroes to the rightmost page of the rightmost non-fake
-+ * logical cluster;
-+ * 2) expand hole via fake logical clusters (just increase i_size)
-+ */
-+static int cryptcompress_append_hole(struct inode *inode /* with old size */,
-+ loff_t new_size)
-+{
-+ int result = 0;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ loff_t hole_size;
-+ int nr_zeroes;
-+ struct reiser4_slide win;
-+ struct cluster_handle clust;
-+
-+ assert("edward-1133", inode->i_size < new_size);
-+ assert("edward-1134", reiser4_schedulable());
-+ assert("edward-1135", cryptcompress_inode_ok(inode));
-+ assert("edward-1136", current_blocksize == PAGE_CACHE_SIZE);
-+ assert("edward-1333", off_to_cloff(inode->i_size, inode) != 0);
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ reiser4_slide_init(&win);
-+ cluster_init_read(&clust, &win);
-+ clust.hint = hint;
-+
-+ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
-+ if (result)
-+ goto out;
-+ if (off_to_cloff(inode->i_size, inode) == 0)
-+ goto append_fake;
-+ hole_size = new_size - inode->i_size;
-+ nr_zeroes =
-+ inode_cluster_size(inode) - off_to_cloff(inode->i_size, inode);
-+ if (hole_size < nr_zeroes)
-+ nr_zeroes = hole_size;
-+ set_window(&clust, &win, inode, inode->i_size,
-+ inode->i_size + nr_zeroes);
-+ win.stat = HOLE_WINDOW;
-+
-+ assert("edward-1137",
-+ clust.index == off_to_clust(inode->i_size, inode));
-+
-+ result = prepare_logical_cluster(inode, 0, 0, &clust, LC_APPOV);
-+
-+ assert("edward-1271", !result || result == -ENOSPC);
-+ if (result)
-+ goto out;
-+ assert("edward-1139",
-+ clust.dstat == PREP_DISK_CLUSTER ||
-+ clust.dstat == UNPR_DISK_CLUSTER);
-+
-+ assert("edward-1431", hole_size >= nr_zeroes);
-+ if (hole_size == nr_zeroes)
-+ /* nothing to append anymore */
-+ goto out;
-+ append_fake:
-+ INODE_SET_SIZE(inode, new_size);
-+ out:
-+ done_lh(lh);
-+ kfree(hint);
-+ put_cluster_handle(&clust);
-+ return result;
-+}
-+
-+static int update_cryptcompress_size(struct inode *inode, loff_t new_size,
-+ int update_sd)
-+{
-+ return (new_size & ((loff_t) (inode_cluster_size(inode)) - 1)
-+ ? 0 : reiser4_update_file_size(inode, new_size, update_sd));
-+}
-+
-+/* Prune cryptcompress file in two steps:
-+ * 1) cut all nominated logical clusters except the leftmost one which
-+ * is to be partially truncated. Note, that there can be "holes"
-+ * represented by fake logical clusters.
-+ * 2) set zeroes and capture leftmost partially truncated logical
-+ * cluster, if it is not fake; otherwise prune fake logical cluster
-+ * (just decrease i_size).
-+ */
-+static int prune_cryptcompress(struct inode *inode, loff_t new_size,
-+ int update_sd, cloff_t aidx)
-+{
-+ int result = 0;
-+ unsigned nr_zeroes;
-+ loff_t to_prune;
-+ loff_t old_size;
-+ cloff_t ridx;
-+
-+ hint_t *hint;
-+ lock_handle *lh;
-+ struct reiser4_slide win;
-+ struct cluster_handle clust;
-+
-+ assert("edward-1140", inode->i_size >= new_size);
-+ assert("edward-1141", reiser4_schedulable());
-+ assert("edward-1142", cryptcompress_inode_ok(inode));
-+ assert("edward-1143", current_blocksize == PAGE_CACHE_SIZE);
-+
-+ old_size = inode->i_size;
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ reiser4_slide_init(&win);
-+ cluster_init_read(&clust, &win);
-+ clust.hint = hint;
-+
-+ /* calculate index of the rightmost logical cluster
-+ that will be completely truncated */
-+ ridx = size_in_lc(new_size, inode);
-+
-+ /* truncate all disk clusters starting from @ridx */
-+ assert("edward-1174", ridx <= aidx);
-+ old_size = inode->i_size;
-+ if (ridx != aidx) {
-+ struct cryptcompress_info * info;
-+ info = cryptcompress_inode_data(inode);
-+ result = cut_file_items(inode,
-+ clust_to_off(ridx, inode),
-+ update_sd,
-+ clust_to_off(aidx, inode),
-+ update_cryptcompress_size);
-+ info->trunc_index = ULONG_MAX;
-+ if (result)
-+ goto out;
-+ }
-+ /*
-+ * there can be pages of fake logical clusters, truncate them
-+ */
-+ truncate_inode_pages(inode->i_mapping, clust_to_off(ridx, inode));
-+ assert("edward-1524",
-+ pages_truncate_ok(inode, clust_to_pg(ridx, inode)));
-+ /*
-+ * now perform partial truncate of last logical cluster
-+ */
-+ if (!off_to_cloff(new_size, inode)) {
-+ /* no partial truncate is needed */
-+ assert("edward-1145", inode->i_size == new_size);
-+ goto truncate_fake;
-+ }
-+ assert("edward-1146", new_size < inode->i_size);
-+
-+ to_prune = inode->i_size - new_size;
-+
-+ /* check if the last logical cluster is fake */
-+ result = lookup_disk_cluster(inode, &aidx, ridx);
-+ if (result)
-+ goto out;
-+ if (!aidx)
-+ /* yup, this is fake one */
-+ goto truncate_fake;
-+
-+ assert("edward-1148", aidx == ridx);
-+
-+ /* do partial truncate of the last page cluster,
-+ and try to capture this one */
-+ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
-+ if (result)
-+ goto out;
-+ nr_zeroes = (off_to_pgoff(new_size) ?
-+ PAGE_CACHE_SIZE - off_to_pgoff(new_size) : 0);
-+ set_window(&clust, &win, inode, new_size, new_size + nr_zeroes);
-+ win.stat = HOLE_WINDOW;
-+
-+ assert("edward-1149", clust.index == ridx - 1);
-+
-+ result = prepare_logical_cluster(inode, 0, 0, &clust, LC_TRUNC);
-+ if (result)
-+ goto out;
-+ assert("edward-1151",
-+ clust.dstat == PREP_DISK_CLUSTER ||
-+ clust.dstat == UNPR_DISK_CLUSTER);
-+
-+ assert("edward-1191", inode->i_size == new_size);
-+ assert("edward-1206", body_truncate_ok(inode, ridx));
-+ truncate_fake:
-+ /* drop all the pages that don't have jnodes (i.e. pages
-+ which can not be truncated by cut_file_items() because
-+ of holes represented by fake disk clusters) including
-+ the pages of partially truncated cluster which was
-+ released by prepare_logical_cluster() */
-+ INODE_SET_SIZE(inode, new_size);
-+ truncate_inode_pages(inode->i_mapping, new_size);
-+ out:
-+ assert("edward-1334", !result || result == -ENOSPC);
-+ assert("edward-1497",
-+ pages_truncate_ok(inode, size_in_pages(new_size)));
-+
-+ done_lh(lh);
-+ kfree(hint);
-+ put_cluster_handle(&clust);
-+ return result;
-+}
-+
-+/* Prepare cryptcompress file for truncate:
-+ * prune or append rightmost fake logical clusters (if any)
-+ */
-+static int start_truncate_fake(struct inode *inode, cloff_t aidx,
-+ loff_t new_size, int update_sd)
-+{
-+ int result = 0;
-+ int bytes;
-+
-+ if (new_size > inode->i_size) {
-+ /* append */
-+ if (inode->i_size < clust_to_off(aidx, inode))
-+ /* no fake bytes */
-+ return 0;
-+ bytes = new_size - inode->i_size;
-+ INODE_SET_SIZE(inode, inode->i_size + bytes);
-+ } else {
-+ /* prune */
-+ if (inode->i_size <= clust_to_off(aidx, inode))
-+ /* no fake bytes */
-+ return 0;
-+ bytes = inode->i_size -
-+ max(new_size, clust_to_off(aidx, inode));
-+ if (!bytes)
-+ return 0;
-+ INODE_SET_SIZE(inode, inode->i_size - bytes);
-+ /* In the case of fake prune we need to drop page cluster.
-+ There are only 2 cases for partially truncated page:
-+ 1. If is is dirty, therefore it is anonymous
-+ (was dirtied via mmap), and will be captured
-+ later via ->capture().
-+ 2. If is clean, therefore it is filled by zeroes.
-+ In both cases we don't need to make it dirty and
-+ capture here.
-+ */
-+ truncate_inode_pages(inode->i_mapping, inode->i_size);
-+ }
-+ if (update_sd)
-+ result = update_sd_cryptcompress(inode);
-+ return result;
-+}
-+
-+/**
-+ * This is called in setattr_cryptcompress when it is used to truncate,
-+ * and in delete_object_cryptcompress
-+ */
-+static int cryptcompress_truncate(struct inode *inode, /* old size */
-+ loff_t new_size, /* new size */
-+ int update_sd)
-+{
-+ int result;
-+ cloff_t aidx;
-+
-+ result = find_fake_appended(inode, &aidx);
-+ if (result)
-+ return result;
-+ assert("edward-1208",
-+ ergo(aidx > 0, inode->i_size > clust_to_off(aidx - 1, inode)));
-+
-+ result = start_truncate_fake(inode, aidx, new_size, update_sd);
-+ if (result)
-+ return result;
-+ if (inode->i_size == new_size)
-+ /* nothing to truncate anymore */
-+ return 0;
-+ result = (inode->i_size < new_size ?
-+ cryptcompress_append_hole(inode, new_size) :
-+ prune_cryptcompress(inode, new_size, update_sd, aidx));
-+ if (!result && update_sd)
-+ result = update_sd_cryptcompress(inode);
-+ return result;
-+}
-+
-+/* Capture an anonymous pager cluster. (Page cluser is
-+ * anonymous if it contains at least one anonymous page
-+ */
-+static int capture_anon_page_cluster(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ int result;
-+
-+ assert("edward-1073", clust != NULL);
-+ assert("edward-1074", inode != NULL);
-+ assert("edward-1075", clust->dstat == INVAL_DISK_CLUSTER);
-+
-+ result = prepare_logical_cluster(inode, 0, 0, clust, LC_APPOV);
-+ if (result)
-+ return result;
-+ set_cluster_pages_dirty(clust, inode);
-+ result = checkin_logical_cluster(clust, inode);
-+ put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
-+ if (unlikely(result))
-+ put_page_cluster(clust, inode, WRITE_OP);
-+ return result;
-+}
-+
-+/* Starting from @index find tagged pages of the same page cluster.
-+ * Clear the tag for each of them. Return number of found pages.
-+ */
-+static int find_anon_page_cluster(struct address_space * mapping,
-+ pgoff_t * index, struct page ** pages)
-+{
-+ int i = 0;
-+ int found;
-+ write_lock_irq(&mapping->tree_lock);
-+ do {
-+ /* looking for one page */
-+ found = radix_tree_gang_lookup_tag(&mapping->page_tree,
-+ (void **)&pages[i],
-+ *index, 1,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ if (!found)
-+ break;
-+ if (!same_page_cluster(pages[0], pages[i]))
-+ break;
-+
-+ /* found */
-+ page_cache_get(pages[i]);
-+ *index = pages[i]->index + 1;
-+
-+ radix_tree_tag_clear(&mapping->page_tree,
-+ pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ if (last_page_in_cluster(pages[i++]))
-+ break;
-+ } while (1);
-+ write_unlock_irq(&mapping->tree_lock);
-+ return i;
-+}
-+
-+#define MAX_PAGES_TO_CAPTURE (1024)
-+
-+/* Capture anonymous page clusters */
-+static int capture_anon_pages(struct address_space * mapping, pgoff_t * index,
-+ int to_capture)
-+{
-+ int count = 0;
-+ int found = 0;
-+ int result = 0;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ struct inode * inode;
-+ struct cluster_handle clust;
-+ struct page * pages[MAX_CLUSTER_NRPAGES];
-+
-+ assert("edward-1127", mapping != NULL);
-+ assert("edward-1128", mapping->host != NULL);
-+ assert("edward-1440", mapping->host->i_mapping == mapping);
-+
-+ inode = mapping->host;
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+ hint_init_zero(hint);
-+ lh = &hint->lh;
-+
-+ cluster_init_read(&clust, NULL);
-+ clust.hint = hint;
-+
-+ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
-+ if (result)
-+ goto out;
-+
-+ while (to_capture > 0) {
-+ found = find_anon_page_cluster(mapping, index, pages);
-+ if (!found) {
-+ *index = (pgoff_t) - 1;
-+ break;
-+ }
-+ move_cluster_forward(&clust, inode, pages[0]->index);
-+ result = capture_anon_page_cluster(&clust, inode);
-+
-+ put_found_pages(pages, found); /* find_anon_page_cluster */
-+ if (result)
-+ break;
-+ to_capture -= clust.nr_pages;
-+ count += clust.nr_pages;
-+ }
-+ if (result) {
-+ warning("edward-1077",
-+ "Capture failed (inode %llu, result=%i, captured=%d)\n",
-+ (unsigned long long)get_inode_oid(inode), result, count);
-+ } else {
-+ assert("edward-1078", ergo(found > 0, count > 0));
-+ if (to_capture <= 0)
-+ /* there may be left more pages */
-+ __mark_inode_dirty(inode, I_DIRTY_PAGES);
-+ result = count;
-+ }
-+ out:
-+ done_lh(lh);
-+ kfree(hint);
-+ put_cluster_handle(&clust);
-+ return result;
-+}
-+
-+/* Returns true if inode's mapping has dirty pages
-+ which do not belong to any atom */
-+static int cryptcompress_inode_has_anon_pages(struct inode *inode)
-+{
-+ int result;
-+ read_lock_irq(&inode->i_mapping->tree_lock);
-+ result = radix_tree_tagged(&inode->i_mapping->page_tree,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ read_unlock_irq(&inode->i_mapping->tree_lock);
-+ return result;
-+}
-+
-+/* plugin->writepages */
-+int writepages_cryptcompress(struct address_space *mapping,
-+ struct writeback_control *wbc)
-+{
-+ int result = 0;
-+ long to_capture;
-+ pgoff_t nrpages;
-+ pgoff_t index = 0;
-+ struct inode *inode;
-+ struct cryptcompress_info *info;
-+
-+ inode = mapping->host;
-+ if (!cryptcompress_inode_has_anon_pages(inode))
-+ goto end;
-+ info = cryptcompress_inode_data(inode);
-+ nrpages = size_in_pages(i_size_read(inode));
-+
-+ if (wbc->sync_mode != WB_SYNC_ALL)
-+ to_capture = min(wbc->nr_to_write, (long)MAX_PAGES_TO_CAPTURE);
-+ else
-+ to_capture = MAX_PAGES_TO_CAPTURE;
-+ do {
-+ reiser4_context *ctx;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ result = PTR_ERR(ctx);
-+ break;
-+ }
-+ /* avoid recursive calls to ->sync_inodes */
-+ ctx->nobalance = 1;
-+
-+ assert("edward-1079",
-+ lock_stack_isclean(get_current_lock_stack()));
-+
-+ reiser4_txn_restart_current();
-+
-+ if (get_current_context()->entd) {
-+ if (mutex_trylock(&info->checkin_mutex) == 0) {
-+ /* the mutex might be occupied by
-+ entd caller */
-+ result = RETERR(-EBUSY);
-+ reiser4_exit_context(ctx);
-+ break;
-+ }
-+ } else
-+ mutex_lock(&info->checkin_mutex);
-+
-+ result = capture_anon_pages(inode->i_mapping, &index,
-+ to_capture);
-+ mutex_unlock(&info->checkin_mutex);
-+
-+ if (result < 0) {
-+ reiser4_exit_context(ctx);
-+ break;
-+ }
-+ wbc->nr_to_write -= result;
-+ if (wbc->sync_mode != WB_SYNC_ALL) {
-+ reiser4_exit_context(ctx);
-+ break;
-+ }
-+ result = txnmgr_force_commit_all(inode->i_sb, 0);
-+ reiser4_exit_context(ctx);
-+ } while (result >= 0 && index < nrpages);
-+
-+ end:
-+ if (is_in_reiser4_context()) {
-+ if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
-+ /* there are already pages to flush, flush them out,
-+ do not delay until end of reiser4_sync_inodes */
-+ reiser4_writeout(inode->i_sb, wbc);
-+ get_current_context()->nr_captured = 0;
-+ }
-+ }
-+ return result;
-+}
-+
-+/* plugin->ioctl */
-+int ioctl_cryptcompress(struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ return RETERR(-ENOSYS);
-+}
-+
-+/* plugin->mmap */
-+int mmap_cryptcompress(struct file *file, struct vm_area_struct *vma)
-+{
-+ int result;
-+ struct inode *inode;
-+ reiser4_context *ctx;
-+
-+ inode = file->f_dentry->d_inode;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ /*
-+ * generic_file_mmap will do update_atime. Grab space for stat data
-+ * update.
-+ */
-+ result = reiser4_grab_space_force
-+ (inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+ if (result) {
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ result = generic_file_mmap(file, vma);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* plugin->delete_object */
-+int delete_object_cryptcompress(struct inode *inode)
-+{
-+ int result;
-+ struct cryptcompress_info * info;
-+
-+ assert("edward-429", inode->i_nlink == 0);
-+
-+ reiser4_txn_restart_current();
-+ info = cryptcompress_inode_data(inode);
-+
-+ mutex_lock(&info->checkin_mutex);
-+ result = cryptcompress_truncate(inode, 0, 0);
-+ mutex_unlock(&info->checkin_mutex);
-+
-+ if (result) {
-+ warning("edward-430",
-+ "cannot truncate cryptcompress file %lli: %i",
-+ (unsigned long long)get_inode_oid(inode),
-+ result);
-+ }
-+ truncate_inode_pages(inode->i_mapping, 0);
-+ assert("edward-1487", pages_truncate_ok(inode, 0));
-+ /* and remove stat data */
-+ return reiser4_delete_object_common(inode);
-+}
-+
-+/*
-+ * plugin->setattr
-+ * This implements actual truncate (see comments in reiser4/page_cache.c)
-+ */
-+int setattr_cryptcompress(struct dentry *dentry, struct iattr *attr)
-+{
-+ int result;
-+ struct inode *inode;
-+ struct cryptcompress_info * info;
-+
-+ inode = dentry->d_inode;
-+ info = cryptcompress_inode_data(inode);
-+
-+ if (attr->ia_valid & ATTR_SIZE) {
-+ if (i_size_read(inode) != attr->ia_size) {
-+ reiser4_context *ctx;
-+ loff_t old_size;
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ result = setattr_pschedule_hook(inode);
-+ if (result) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ old_size = i_size_read(inode);
-+ inode_check_scale(inode, old_size, attr->ia_size);
-+
-+ mutex_lock(&info->checkin_mutex);
-+ result = cryptcompress_truncate(inode,
-+ attr->ia_size,
-+ 1/* update sd */);
-+ mutex_unlock(&info->checkin_mutex);
-+ if (result) {
-+ warning("edward-1192",
-+ "truncate_cryptcompress failed: oid %lli, "
-+ "old size %lld, new size %lld, retval %d",
-+ (unsigned long long)
-+ get_inode_oid(inode), old_size,
-+ attr->ia_size, result);
-+ }
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ } else
-+ result = 0;
-+ } else
-+ result = reiser4_setattr_common(dentry, attr);
-+ return result;
-+}
-+
-+/* plugin->release */
-+int release_cryptcompress(struct inode *inode, struct file *file)
-+{
-+ reiser4_context *ctx = reiser4_init_context(inode->i_sb);
-+
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ reiser4_free_file_fsdata(file);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+/* plugin->prepare_write */
-+int prepare_write_cryptcompress(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ return -EINVAL;
-+}
-+
-+/* plugin->commit_write */
-+int commit_write_cryptcompress(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ BUG();
-+ return 0;
-+}
-+
-+/* plugin->bmap */
-+sector_t bmap_cryptcompress(struct address_space *mapping, sector_t lblock)
-+{
-+ return -EINVAL;
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/cryptcompress.h linux-2.6.24/fs/reiser4/plugin/file/cryptcompress.h
---- linux-2.6.24.orig/fs/reiser4/plugin/file/cryptcompress.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/cryptcompress.h 2008-01-25 11:39:06.980219023 +0300
-@@ -0,0 +1,616 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* See http://www.namesys.com/cryptcompress_design.html */
-+
-+#if !defined( __FS_REISER4_CRYPTCOMPRESS_H__ )
-+#define __FS_REISER4_CRYPTCOMPRESS_H__
-+
-+#include "../../page_cache.h"
-+#include "../compress/compress.h"
-+#include "../crypto/cipher.h"
-+
-+#include <linux/pagemap.h>
-+
-+#define MIN_CLUSTER_SHIFT PAGE_CACHE_SHIFT
-+#define MAX_CLUSTER_SHIFT 16
-+#define MAX_CLUSTER_NRPAGES (1U << MAX_CLUSTER_SHIFT >> PAGE_CACHE_SHIFT)
-+#define DC_CHECKSUM_SIZE 4
-+
-+#define MIN_LATTICE_FACTOR 1
-+#define MAX_LATTICE_FACTOR 32
-+
-+/* this mask contains all non-standard plugins that might
-+ be present in reiser4-specific part of inode managed by
-+ cryptcompress file plugin */
-+#define cryptcompress_mask \
-+ ((1 << PSET_FILE) | \
-+ (1 << PSET_CLUSTER) | \
-+ (1 << PSET_CIPHER) | \
-+ (1 << PSET_DIGEST) | \
-+ (1 << PSET_COMPRESSION) | \
-+ (1 << PSET_COMPRESSION_MODE))
-+
-+#if REISER4_DEBUG
-+static inline int cluster_shift_ok(int shift)
-+{
-+ return (shift >= MIN_CLUSTER_SHIFT) && (shift <= MAX_CLUSTER_SHIFT);
-+}
-+#endif
-+
-+#if REISER4_DEBUG
-+#define INODE_PGCOUNT(inode) \
-+({ \
-+ assert("edward-1530", inode_file_plugin(inode) == \
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); \
-+ atomic_read(&cryptcompress_inode_data(inode)->pgcount); \
-+ })
-+#define INODE_PGCOUNT_INC(inode) \
-+do { \
-+ assert("edward-1531", inode_file_plugin(inode) == \
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); \
-+ atomic_inc(&cryptcompress_inode_data(inode)->pgcount); \
-+} while (0)
-+#define INODE_PGCOUNT_DEC(inode) \
-+do { \
-+ if (inode_file_plugin(inode) == \
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)) \
-+ atomic_dec(&cryptcompress_inode_data(inode)->pgcount); \
-+} while (0)
-+#else
-+#define INODE_PGCOUNT(inode) (0)
-+#define INODE_PGCOUNT_INC(inode)
-+#define INODE_PGCOUNT_DEC(inode)
-+#endif /* REISER4_DEBUG */
-+
-+struct tfm_stream {
-+ __u8 *data;
-+ size_t size;
-+};
-+
-+typedef enum {
-+ INPUT_STREAM,
-+ OUTPUT_STREAM,
-+ LAST_STREAM
-+} tfm_stream_id;
-+
-+typedef struct tfm_stream * tfm_unit[LAST_STREAM];
-+
-+static inline __u8 *ts_data(struct tfm_stream * stm)
-+{
-+ assert("edward-928", stm != NULL);
-+ return stm->data;
-+}
-+
-+static inline size_t ts_size(struct tfm_stream * stm)
-+{
-+ assert("edward-929", stm != NULL);
-+ return stm->size;
-+}
-+
-+static inline void set_ts_size(struct tfm_stream * stm, size_t size)
-+{
-+ assert("edward-930", stm != NULL);
-+
-+ stm->size = size;
-+}
-+
-+static inline int alloc_ts(struct tfm_stream ** stm)
-+{
-+ assert("edward-931", stm);
-+ assert("edward-932", *stm == NULL);
-+
-+ *stm = kzalloc(sizeof(**stm), reiser4_ctx_gfp_mask_get());
-+ if (!*stm)
-+ return -ENOMEM;
-+ return 0;
-+}
-+
-+static inline void free_ts(struct tfm_stream * stm)
-+{
-+ assert("edward-933", !ts_data(stm));
-+ assert("edward-934", !ts_size(stm));
-+
-+ kfree(stm);
-+}
-+
-+static inline int alloc_ts_data(struct tfm_stream * stm, size_t size)
-+{
-+ assert("edward-935", !ts_data(stm));
-+ assert("edward-936", !ts_size(stm));
-+ assert("edward-937", size != 0);
-+
-+ stm->data = reiser4_vmalloc(size);
-+ if (!stm->data)
-+ return -ENOMEM;
-+ set_ts_size(stm, size);
-+ return 0;
-+}
-+
-+static inline void free_ts_data(struct tfm_stream * stm)
-+{
-+ assert("edward-938", equi(ts_data(stm), ts_size(stm)));
-+
-+ if (ts_data(stm))
-+ vfree(ts_data(stm));
-+ memset(stm, 0, sizeof *stm);
-+}
-+
-+/* Write modes for item conversion in flush convert phase */
-+typedef enum {
-+ CRC_APPEND_ITEM = 1,
-+ CRC_OVERWRITE_ITEM = 2,
-+ CRC_CUT_ITEM = 3
-+} cryptcompress_write_mode_t;
-+
-+typedef enum {
-+ LC_INVAL = 0, /* invalid value */
-+ LC_APPOV = 1, /* append and/or overwrite */
-+ LC_TRUNC = 2 /* truncate */
-+} logical_cluster_op;
-+
-+/* Transform cluster.
-+ * Intermediate state between page cluster and disk cluster
-+ * Is used for data transform (compression/encryption)
-+ */
-+struct tfm_cluster {
-+ coa_set coa; /* compression algorithms info */
-+ tfm_unit tun; /* plain and transformed streams */
-+ tfm_action act;
-+ int uptodate;
-+ int lsize; /* number of bytes in logical cluster */
-+ int len; /* length of the transform stream */
-+};
-+
-+static inline coa_t get_coa(struct tfm_cluster * tc, reiser4_compression_id id,
-+ tfm_action act)
-+{
-+ return tc->coa[id][act];
-+}
-+
-+static inline void set_coa(struct tfm_cluster * tc, reiser4_compression_id id,
-+ tfm_action act, coa_t coa)
-+{
-+ tc->coa[id][act] = coa;
-+}
-+
-+static inline int alloc_coa(struct tfm_cluster * tc, compression_plugin * cplug)
-+{
-+ coa_t coa;
-+
-+ coa = cplug->alloc(tc->act);
-+ if (IS_ERR(coa))
-+ return PTR_ERR(coa);
-+ set_coa(tc, cplug->h.id, tc->act, coa);
-+ return 0;
-+}
-+
-+static inline int
-+grab_coa(struct tfm_cluster * tc, compression_plugin * cplug)
-+{
-+ return (cplug->alloc && !get_coa(tc, cplug->h.id, tc->act) ?
-+ alloc_coa(tc, cplug) : 0);
-+}
-+
-+static inline void free_coa_set(struct tfm_cluster * tc)
-+{
-+ tfm_action j;
-+ reiser4_compression_id i;
-+ compression_plugin *cplug;
-+
-+ assert("edward-810", tc != NULL);
-+
-+ for (j = 0; j < TFMA_LAST; j++)
-+ for (i = 0; i < LAST_COMPRESSION_ID; i++) {
-+ if (!get_coa(tc, i, j))
-+ continue;
-+ cplug = compression_plugin_by_id(i);
-+ assert("edward-812", cplug->free != NULL);
-+ cplug->free(get_coa(tc, i, j), j);
-+ set_coa(tc, i, j, 0);
-+ }
-+ return;
-+}
-+
-+static inline struct tfm_stream * get_tfm_stream(struct tfm_cluster * tc,
-+ tfm_stream_id id)
-+{
-+ return tc->tun[id];
-+}
-+
-+static inline void set_tfm_stream(struct tfm_cluster * tc,
-+ tfm_stream_id id, struct tfm_stream * ts)
-+{
-+ tc->tun[id] = ts;
-+}
-+
-+static inline __u8 *tfm_stream_data(struct tfm_cluster * tc, tfm_stream_id id)
-+{
-+ return ts_data(get_tfm_stream(tc, id));
-+}
-+
-+static inline void set_tfm_stream_data(struct tfm_cluster * tc,
-+ tfm_stream_id id, __u8 * data)
-+{
-+ get_tfm_stream(tc, id)->data = data;
-+}
-+
-+static inline size_t tfm_stream_size(struct tfm_cluster * tc, tfm_stream_id id)
-+{
-+ return ts_size(get_tfm_stream(tc, id));
-+}
-+
-+static inline void
-+set_tfm_stream_size(struct tfm_cluster * tc, tfm_stream_id id, size_t size)
-+{
-+ get_tfm_stream(tc, id)->size = size;
-+}
-+
-+static inline int
-+alloc_tfm_stream(struct tfm_cluster * tc, size_t size, tfm_stream_id id)
-+{
-+ assert("edward-939", tc != NULL);
-+ assert("edward-940", !get_tfm_stream(tc, id));
-+
-+ tc->tun[id] = kzalloc(sizeof(struct tfm_stream),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!tc->tun[id])
-+ return -ENOMEM;
-+ return alloc_ts_data(get_tfm_stream(tc, id), size);
-+}
-+
-+static inline int
-+realloc_tfm_stream(struct tfm_cluster * tc, size_t size, tfm_stream_id id)
-+{
-+ assert("edward-941", tfm_stream_size(tc, id) < size);
-+ free_ts_data(get_tfm_stream(tc, id));
-+ return alloc_ts_data(get_tfm_stream(tc, id), size);
-+}
-+
-+static inline void free_tfm_stream(struct tfm_cluster * tc, tfm_stream_id id)
-+{
-+ free_ts_data(get_tfm_stream(tc, id));
-+ free_ts(get_tfm_stream(tc, id));
-+ set_tfm_stream(tc, id, 0);
-+}
-+
-+static inline unsigned coa_overrun(compression_plugin * cplug, int ilen)
-+{
-+ return (cplug->overrun != NULL ? cplug->overrun(ilen) : 0);
-+}
-+
-+static inline void free_tfm_unit(struct tfm_cluster * tc)
-+{
-+ tfm_stream_id id;
-+ for (id = 0; id < LAST_STREAM; id++) {
-+ if (!get_tfm_stream(tc, id))
-+ continue;
-+ free_tfm_stream(tc, id);
-+ }
-+}
-+
-+static inline void put_tfm_cluster(struct tfm_cluster * tc)
-+{
-+ assert("edward-942", tc != NULL);
-+ free_coa_set(tc);
-+ free_tfm_unit(tc);
-+}
-+
-+static inline int tfm_cluster_is_uptodate(struct tfm_cluster * tc)
-+{
-+ assert("edward-943", tc != NULL);
-+ assert("edward-944", tc->uptodate == 0 || tc->uptodate == 1);
-+ return (tc->uptodate == 1);
-+}
-+
-+static inline void tfm_cluster_set_uptodate(struct tfm_cluster * tc)
-+{
-+ assert("edward-945", tc != NULL);
-+ assert("edward-946", tc->uptodate == 0 || tc->uptodate == 1);
-+ tc->uptodate = 1;
-+ return;
-+}
-+
-+static inline void tfm_cluster_clr_uptodate(struct tfm_cluster * tc)
-+{
-+ assert("edward-947", tc != NULL);
-+ assert("edward-948", tc->uptodate == 0 || tc->uptodate == 1);
-+ tc->uptodate = 0;
-+ return;
-+}
-+
-+static inline int tfm_stream_is_set(struct tfm_cluster * tc, tfm_stream_id id)
-+{
-+ return (get_tfm_stream(tc, id) &&
-+ tfm_stream_data(tc, id) && tfm_stream_size(tc, id));
-+}
-+
-+static inline int tfm_cluster_is_set(struct tfm_cluster * tc)
-+{
-+ int i;
-+ for (i = 0; i < LAST_STREAM; i++)
-+ if (!tfm_stream_is_set(tc, i))
-+ return 0;
-+ return 1;
-+}
-+
-+static inline void alternate_streams(struct tfm_cluster * tc)
-+{
-+ struct tfm_stream *tmp = get_tfm_stream(tc, INPUT_STREAM);
-+
-+ set_tfm_stream(tc, INPUT_STREAM, get_tfm_stream(tc, OUTPUT_STREAM));
-+ set_tfm_stream(tc, OUTPUT_STREAM, tmp);
-+}
-+
-+/* Set of states to indicate a kind of data
-+ * that will be written to the window */
-+typedef enum {
-+ DATA_WINDOW, /* user's data */
-+ HOLE_WINDOW /* zeroes (such kind of data can be written
-+ * if we start to write from offset > i_size) */
-+} window_stat;
-+
-+/* Window (of logical cluster size) discretely sliding along a file.
-+ * Is used to locate hole region in a logical cluster to be properly
-+ * represented on disk.
-+ * We split a write to cryptcompress file into writes to its logical
-+ * clusters. Before writing to a logical cluster we set a window, i.e.
-+ * calculate values of the following fields:
-+ */
-+struct reiser4_slide {
-+ unsigned off; /* offset to write from */
-+ unsigned count; /* number of bytes to write */
-+ unsigned delta; /* number of bytes to append to the hole */
-+ window_stat stat; /* what kind of data will be written starting
-+ from @off */
-+};
-+
-+/* Possible states of a disk cluster */
-+typedef enum {
-+ INVAL_DISK_CLUSTER, /* unknown state */
-+ PREP_DISK_CLUSTER, /* disk cluster got converted by flush
-+ * at least 1 time */
-+ UNPR_DISK_CLUSTER, /* disk cluster just created and should be
-+ * converted by flush */
-+ FAKE_DISK_CLUSTER, /* disk cluster doesn't exist neither in memory
-+ * nor on disk */
-+ TRNC_DISK_CLUSTER /* disk cluster is partially truncated */
-+} disk_cluster_stat;
-+
-+/* The following structure represents various stages of the same logical
-+ * cluster of index @index:
-+ * . fixed slide
-+ * . page cluster (stage in primary cache)
-+ * . transform cluster (transition stage)
-+ * . disk cluster (stage in secondary cache)
-+ * This structure is used in transition and synchronizing operations, e.g.
-+ * transform cluster is a transition state when synchronizing page cluster
-+ * and disk cluster.
-+ * FIXME: Encapsulate page cluster, disk cluster.
-+ */
-+struct cluster_handle {
-+ cloff_t index; /* offset in a file (unit is a cluster size) */
-+ int index_valid; /* for validating the index above, if needed */
-+ struct file *file; /* host file */
-+
-+ /* logical cluster */
-+ struct reiser4_slide *win; /* sliding window to locate holes */
-+ logical_cluster_op op; /* logical cluster operation (truncate or
-+ append/overwrite) */
-+ /* transform cluster */
-+ struct tfm_cluster tc; /* contains all needed info to synchronize
-+ page cluster and disk cluster) */
-+ /* page cluster */
-+ int nr_pages; /* number of pages of current checkin action */
-+ int old_nrpages; /* number of pages of last checkin action */
-+ struct page **pages; /* attached pages */
-+ jnode * node; /* jnode for capture */
-+
-+ /* disk cluster */
-+ hint_t *hint; /* current position in the tree */
-+ disk_cluster_stat dstat; /* state of the current disk cluster */
-+ int reserved; /* is space for disk cluster reserved */
-+#if REISER4_DEBUG
-+ reiser4_context *ctx;
-+ int reserved_prepped;
-+ int reserved_unprepped;
-+#endif
-+
-+};
-+
-+static inline __u8 * tfm_input_data (struct cluster_handle * clust)
-+{
-+ return tfm_stream_data(&clust->tc, INPUT_STREAM);
-+}
-+
-+static inline __u8 * tfm_output_data (struct cluster_handle * clust)
-+{
-+ return tfm_stream_data(&clust->tc, OUTPUT_STREAM);
-+}
-+
-+static inline int reset_cluster_pgset(struct cluster_handle * clust,
-+ int nrpages)
-+{
-+ assert("edward-1057", clust->pages != NULL);
-+ memset(clust->pages, 0, sizeof(*clust->pages) * nrpages);
-+ return 0;
-+}
-+
-+static inline int alloc_cluster_pgset(struct cluster_handle * clust,
-+ int nrpages)
-+{
-+ assert("edward-949", clust != NULL);
-+ assert("edward-1362", clust->pages == NULL);
-+ assert("edward-950", nrpages != 0 && nrpages <= MAX_CLUSTER_NRPAGES);
-+
-+ clust->pages = kzalloc(sizeof(*clust->pages) * nrpages,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!clust->pages)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+static inline void move_cluster_pgset(struct cluster_handle *clust,
-+ struct page ***pages, int * nr_pages)
-+{
-+ assert("edward-1545", clust != NULL && clust->pages != NULL);
-+ assert("edward-1546", pages != NULL && *pages == NULL);
-+ *pages = clust->pages;
-+ *nr_pages = clust->nr_pages;
-+ clust->pages = NULL;
-+}
-+
-+static inline void free_cluster_pgset(struct cluster_handle * clust)
-+{
-+ assert("edward-951", clust->pages != NULL);
-+ kfree(clust->pages);
-+ clust->pages = NULL;
-+}
-+
-+static inline void put_cluster_handle(struct cluster_handle * clust)
-+{
-+ assert("edward-435", clust != NULL);
-+
-+ put_tfm_cluster(&clust->tc);
-+ if (clust->pages)
-+ free_cluster_pgset(clust);
-+ memset(clust, 0, sizeof *clust);
-+}
-+
-+static inline void inc_keyload_count(struct reiser4_crypto_info * data)
-+{
-+ assert("edward-1410", data != NULL);
-+ data->keyload_count++;
-+}
-+
-+static inline void dec_keyload_count(struct reiser4_crypto_info * data)
-+{
-+ assert("edward-1411", data != NULL);
-+ assert("edward-1412", data->keyload_count > 0);
-+ data->keyload_count--;
-+}
-+
-+static inline int capture_cluster_jnode(jnode * node)
-+{
-+ return reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+}
-+
-+/* cryptcompress specific part of reiser4_inode */
-+struct cryptcompress_info {
-+ struct mutex checkin_mutex; /* This is to serialize
-+ * checkin_logical_cluster operations */
-+ cloff_t trunc_index; /* Index of the leftmost truncated disk
-+ * cluster (to resolve races with read) */
-+ struct reiser4_crypto_info *crypt;
-+ /*
-+ * the following 2 fields are controlled by compression mode plugin
-+ */
-+ int compress_toggle; /* Current status of compressibility */
-+ int lattice_factor; /* Factor of dynamic lattice. FIXME: Have
-+ * a compression_toggle to keep the factor
-+ */
-+#if REISER4_DEBUG
-+ atomic_t pgcount; /* number of grabbed pages */
-+#endif
-+};
-+
-+static inline void set_compression_toggle (struct cryptcompress_info * info, int val)
-+{
-+ info->compress_toggle = val;
-+}
-+
-+static inline int get_compression_toggle (struct cryptcompress_info * info)
-+{
-+ return info->compress_toggle;
-+}
-+
-+static inline int compression_is_on(struct cryptcompress_info * info)
-+{
-+ return get_compression_toggle(info) == 1;
-+}
-+
-+static inline void turn_on_compression(struct cryptcompress_info * info)
-+{
-+ set_compression_toggle(info, 1);
-+}
-+
-+static inline void turn_off_compression(struct cryptcompress_info * info)
-+{
-+ set_compression_toggle(info, 0);
-+}
-+
-+static inline void set_lattice_factor(struct cryptcompress_info * info, int val)
-+{
-+ info->lattice_factor = val;
-+}
-+
-+static inline int get_lattice_factor(struct cryptcompress_info * info)
-+{
-+ return info->lattice_factor;
-+}
-+
-+struct cryptcompress_info *cryptcompress_inode_data(const struct inode *);
-+int equal_to_rdk(znode *, const reiser4_key *);
-+int goto_right_neighbor(coord_t *, lock_handle *);
-+int cryptcompress_inode_ok(struct inode *inode);
-+int coord_is_unprepped_ctail(const coord_t * coord);
-+extern int do_readpage_ctail(struct inode *, struct cluster_handle *,
-+ struct page * page, znode_lock_mode mode);
-+extern int ctail_insert_unprepped_cluster(struct cluster_handle * clust,
-+ struct inode * inode);
-+extern int readpages_cryptcompress(struct file*, struct address_space*,
-+ struct list_head*, unsigned);
-+int bind_cryptcompress(struct inode *child, struct inode *parent);
-+void destroy_inode_cryptcompress(struct inode * inode);
-+int grab_page_cluster(struct inode *inode, struct cluster_handle * clust,
-+ rw_op rw);
-+int write_pschedule_hook(struct file *file, struct inode * inode,
-+ loff_t pos, struct cluster_handle * clust,
-+ struct psched_context * cont);
-+int setattr_pschedule_hook(struct inode * inode);
-+struct reiser4_crypto_info * inode_crypto_info(struct inode * inode);
-+void inherit_crypto_info_common(struct inode * parent, struct inode * object,
-+ int (*can_inherit)(struct inode * child,
-+ struct inode * parent));
-+void reiser4_attach_crypto_info(struct inode * inode,
-+ struct reiser4_crypto_info * info);
-+void change_crypto_info(struct inode * inode, struct reiser4_crypto_info * new);
-+struct reiser4_crypto_info * reiser4_alloc_crypto_info (struct inode * inode);
-+
-+static inline struct crypto_blkcipher * info_get_cipher(struct reiser4_crypto_info * info)
-+{
-+ return info->cipher;
-+}
-+
-+static inline void info_set_cipher(struct reiser4_crypto_info * info,
-+ struct crypto_blkcipher * tfm)
-+{
-+ info->cipher = tfm;
-+}
-+
-+static inline struct crypto_hash * info_get_digest(struct reiser4_crypto_info * info)
-+{
-+ return info->digest;
-+}
-+
-+static inline void info_set_digest(struct reiser4_crypto_info * info,
-+ struct crypto_hash * tfm)
-+{
-+ info->digest = tfm;
-+}
-+
-+static inline void put_cluster_page(struct page * page)
-+{
-+ page_cache_release(page);
-+}
-+
-+#endif /* __FS_REISER4_CRYPTCOMPRESS_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/file.c linux-2.6.24/fs/reiser4/plugin/file/file.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file/file.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/file.c 2008-01-25 11:40:16.694168755 +0300
-@@ -0,0 +1,2724 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * this file contains implementations of inode/file/address_space/file plugin
-+ * operations specific for "unix file plugin" (plugin id is
-+ * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only
-+ * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have
-+ * no items but stat data)
-+ */
-+
-+#include "../../inode.h"
-+#include "../../super.h"
-+#include "../../tree_walk.h"
-+#include "../../carry.h"
-+#include "../../page_cache.h"
-+#include "../../ioctl.h"
-+#include "../object.h"
-+#include "../cluster.h"
-+#include "../../safe_link.h"
-+
-+#include <linux/writeback.h>
-+#include <linux/pagevec.h>
-+#include <linux/syscalls.h>
-+
-+
-+static int unpack(struct file *file, struct inode *inode, int forever);
-+static void drop_access(struct unix_file_info *);
-+static int hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
-+ znode_lock_mode lock_mode);
-+
-+/* Get exclusive access and make sure that file is not partially
-+ * converted (It may happen that another process is doing tail
-+ * conversion. If so, wait until it completes)
-+ */
-+static inline void get_exclusive_access_careful(struct unix_file_info * uf_info,
-+ struct inode *inode)
-+{
-+ do {
-+ get_exclusive_access(uf_info);
-+ if (!reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))
-+ break;
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ } while (1);
-+}
-+
-+/* get unix file plugin specific portion of inode */
-+struct unix_file_info *unix_file_inode_data(const struct inode *inode)
-+{
-+ return &reiser4_inode_data(inode)->file_plugin_data.unix_file_info;
-+}
-+
-+/**
-+ * equal_to_rdk - compare key and znode's right delimiting key
-+ * @node: node whose right delimiting key to compare with @key
-+ * @key: key to compare with @node's right delimiting key
-+ *
-+ * Returns true if @key is equal to right delimiting key of @node.
-+ */
-+int equal_to_rdk(znode *node, const reiser4_key *key)
-+{
-+ int result;
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = keyeq(key, znode_get_rd_key(node));
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/**
-+ * equal_to_ldk - compare key and znode's left delimiting key
-+ * @node: node whose left delimiting key to compare with @key
-+ * @key: key to compare with @node's left delimiting key
-+ *
-+ * Returns true if @key is equal to left delimiting key of @node.
-+ */
-+int equal_to_ldk(znode *node, const reiser4_key *key)
-+{
-+ int result;
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = keyeq(key, znode_get_ld_key(node));
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+/**
-+ * check_coord - check whether coord corresponds to key
-+ * @coord: coord to check
-+ * @key: key @coord has to correspond to
-+ *
-+ * Returns true if @coord is set as if it was set as result of lookup with @key
-+ * in coord->node.
-+ */
-+static int check_coord(const coord_t *coord, const reiser4_key *key)
-+{
-+ coord_t twin;
-+
-+ node_plugin_by_node(coord->node)->lookup(coord->node, key,
-+ FIND_MAX_NOT_MORE_THAN, &twin);
-+ return coords_equal(coord, &twin);
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/**
-+ * init_uf_coord - initialize extended coord
-+ * @uf_coord:
-+ * @lh:
-+ *
-+ *
-+ */
-+void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh)
-+{
-+ coord_init_zero(&uf_coord->coord);
-+ coord_clear_iplug(&uf_coord->coord);
-+ uf_coord->lh = lh;
-+ init_lh(lh);
-+ memset(&uf_coord->extension, 0, sizeof(uf_coord->extension));
-+ uf_coord->valid = 0;
-+}
-+
-+static void validate_extended_coord(uf_coord_t *uf_coord, loff_t offset)
-+{
-+ assert("vs-1333", uf_coord->valid == 0);
-+
-+ if (coord_is_between_items(&uf_coord->coord))
-+ return;
-+
-+ assert("vs-1348",
-+ item_plugin_by_coord(&uf_coord->coord)->s.file.
-+ init_coord_extension);
-+
-+ item_body_by_coord(&uf_coord->coord);
-+ item_plugin_by_coord(&uf_coord->coord)->s.file.
-+ init_coord_extension(uf_coord, offset);
-+}
-+
-+/**
-+ * goto_right_neighbor - lock right neighbor, drop current node lock
-+ * @coord:
-+ * @lh:
-+ *
-+ * Obtain lock on right neighbor and drop lock on current node.
-+ */
-+int goto_right_neighbor(coord_t *coord, lock_handle *lh)
-+{
-+ int result;
-+ lock_handle lh_right;
-+
-+ assert("vs-1100", znode_is_locked(coord->node));
-+
-+ init_lh(&lh_right);
-+ result = reiser4_get_right_neighbor(&lh_right, coord->node,
-+ znode_is_wlocked(coord->node) ?
-+ ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result) {
-+ done_lh(&lh_right);
-+ return result;
-+ }
-+
-+ /*
-+ * we hold two longterm locks on neighboring nodes. Unlock left of
-+ * them
-+ */
-+ done_lh(lh);
-+
-+ coord_init_first_unit_nocheck(coord, lh_right.node);
-+ move_lh(lh, &lh_right);
-+
-+ return 0;
-+
-+}
-+
-+/**
-+ * set_file_state
-+ * @uf_info:
-+ * @cbk_result:
-+ * @level:
-+ *
-+ * This is to be used by find_file_item and in find_file_state to
-+ * determine real state of file
-+ */
-+static void set_file_state(struct unix_file_info *uf_info, int cbk_result,
-+ tree_level level)
-+{
-+ if (cbk_errored(cbk_result))
-+ /* error happened in find_file_item */
-+ return;
-+
-+ assert("vs-1164", level == LEAF_LEVEL || level == TWIG_LEVEL);
-+
-+ if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ if (cbk_result == CBK_COORD_NOTFOUND)
-+ uf_info->container = UF_CONTAINER_EMPTY;
-+ else if (level == LEAF_LEVEL)
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ else
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ } else {
-+ /*
-+ * file state is known, check whether it is set correctly if
-+ * file is not being tail converted
-+ */
-+ if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info),
-+ REISER4_PART_IN_CONV)) {
-+ assert("vs-1162",
-+ ergo(level == LEAF_LEVEL &&
-+ cbk_result == CBK_COORD_FOUND,
-+ uf_info->container == UF_CONTAINER_TAILS));
-+ assert("vs-1165",
-+ ergo(level == TWIG_LEVEL &&
-+ cbk_result == CBK_COORD_FOUND,
-+ uf_info->container == UF_CONTAINER_EXTENTS));
-+ }
-+ }
-+}
-+
-+int find_file_item_nohint(coord_t *coord, lock_handle *lh,
-+ const reiser4_key *key, znode_lock_mode lock_mode,
-+ struct inode *inode)
-+{
-+ return reiser4_object_lookup(inode, key, coord, lh, lock_mode,
-+ FIND_MAX_NOT_MORE_THAN,
-+ TWIG_LEVEL, LEAF_LEVEL,
-+ (lock_mode == ZNODE_READ_LOCK) ? CBK_UNIQUE :
-+ (CBK_UNIQUE | CBK_FOR_INSERT),
-+ NULL /* ra_info */ );
-+}
-+
-+/**
-+ * find_file_item - look for file item in the tree
-+ * @hint: provides coordinate, lock handle, seal
-+ * @key: key for search
-+ * @mode: mode of lock to put on returned node
-+ * @ra_info:
-+ * @inode:
-+ *
-+ * This finds position in the tree corresponding to @key. It first tries to use
-+ * @hint's seal if it is set.
-+ */
-+int find_file_item(hint_t *hint, const reiser4_key *key,
-+ znode_lock_mode lock_mode,
-+ struct inode *inode)
-+{
-+ int result;
-+ coord_t *coord;
-+ lock_handle *lh;
-+
-+ assert("nikita-3030", reiser4_schedulable());
-+ assert("vs-1707", hint != NULL);
-+ assert("vs-47", inode != NULL);
-+
-+ coord = &hint->ext_coord.coord;
-+ lh = hint->ext_coord.lh;
-+ init_lh(lh);
-+
-+ result = hint_validate(hint, key, 1 /* check key */, lock_mode);
-+ if (!result) {
-+ if (coord->between == AFTER_UNIT &&
-+ equal_to_rdk(coord->node, key)) {
-+ result = goto_right_neighbor(coord, lh);
-+ if (result == -E_NO_NEIGHBOR)
-+ return RETERR(-EIO);
-+ if (result)
-+ return result;
-+ assert("vs-1152", equal_to_ldk(coord->node, key));
-+ /*
-+ * we moved to different node. Invalidate coord
-+ * extension, zload is necessary to init it again
-+ */
-+ hint->ext_coord.valid = 0;
-+ }
-+
-+ set_file_state(unix_file_inode_data(inode), CBK_COORD_FOUND,
-+ znode_get_level(coord->node));
-+
-+ return CBK_COORD_FOUND;
-+ }
-+
-+ coord_init_zero(coord);
-+ result = find_file_item_nohint(coord, lh, key, lock_mode, inode);
-+ set_file_state(unix_file_inode_data(inode), result,
-+ znode_get_level(coord->node));
-+
-+ /* FIXME: we might already have coord extension initialized */
-+ hint->ext_coord.valid = 0;
-+ return result;
-+}
-+
-+/* plugin->u.file.write_flowom = NULL
-+ plugin->u.file.read_flow = NULL */
-+
-+void hint_init_zero(hint_t * hint)
-+{
-+ memset(hint, 0, sizeof(*hint));
-+ init_lh(&hint->lh);
-+ hint->ext_coord.lh = &hint->lh;
-+}
-+
-+static int find_file_state(struct inode *inode, struct unix_file_info *uf_info)
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ assert("vs-1628", ea_obtained(uf_info));
-+
-+ if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ key_by_inode_and_offset_common(inode, 0, &key);
-+ init_lh(&lh);
-+ result = find_file_item_nohint(&coord, &lh, &key,
-+ ZNODE_READ_LOCK, inode);
-+ set_file_state(uf_info, result, znode_get_level(coord.node));
-+ done_lh(&lh);
-+ if (!cbk_errored(result))
-+ result = 0;
-+ } else
-+ result = 0;
-+ assert("vs-1074",
-+ ergo(result == 0, uf_info->container != UF_CONTAINER_UNKNOWN));
-+ reiser4_txn_restart_current();
-+ return result;
-+}
-+
-+/**
-+ * Estimate and reserve space needed to truncate page
-+ * which gets partially truncated: one block for page
-+ * itself, stat-data update (estimate_one_insert_into_item)
-+ * and one item insertion (estimate_one_insert_into_item)
-+ * which may happen if page corresponds to hole extent and
-+ * unallocated one will have to be created
-+ */
-+static int reserve_partial_page(reiser4_tree * tree)
-+{
-+ grab_space_enable();
-+ return reiser4_grab_reserved(reiser4_get_current_sb(),
-+ 1 +
-+ 2 * estimate_one_insert_into_item(tree),
-+ BA_CAN_COMMIT);
-+}
-+
-+/* estimate and reserve space needed to cut one item and update one stat data */
-+static int reserve_cut_iteration(reiser4_tree * tree)
-+{
-+ __u64 estimate = estimate_one_item_removal(tree)
-+ + estimate_one_insert_into_item(tree);
-+
-+ assert("nikita-3172", lock_stack_isclean(get_current_lock_stack()));
-+
-+ grab_space_enable();
-+ /* We need to double our estimate now that we can delete more than one
-+ node. */
-+ return reiser4_grab_reserved(reiser4_get_current_sb(), estimate * 2,
-+ BA_CAN_COMMIT);
-+}
-+
-+int reiser4_update_file_size(struct inode *inode, loff_t new_size,
-+ int update_sd)
-+{
-+ int result = 0;
-+
-+ INODE_SET_SIZE(inode, new_size);
-+ if (update_sd) {
-+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-+ result = reiser4_update_sd(inode);
-+ }
-+ return result;
-+}
-+
-+/**
-+ * Cut file items one by one starting from the last one until
-+ * new file size (inode->i_size) is reached. Reserve space
-+ * and update file stat data on every single cut from the tree
-+ */
-+int cut_file_items(struct inode *inode, loff_t new_size,
-+ int update_sd, loff_t cur_size,
-+ int (*update_actor) (struct inode *, loff_t, int))
-+{
-+ reiser4_key from_key, to_key;
-+ reiser4_key smallest_removed;
-+ file_plugin *fplug = inode_file_plugin(inode);
-+ int result;
-+ int progress = 0;
-+
-+ assert("vs-1248",
-+ fplug == file_plugin_by_id(UNIX_FILE_PLUGIN_ID) ||
-+ fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+
-+ fplug->key_by_inode(inode, new_size, &from_key);
-+ to_key = from_key;
-+ set_key_offset(&to_key, cur_size - 1 /*get_key_offset(reiser4_max_key()) */ );
-+ /* this loop normally runs just once */
-+ while (1) {
-+ result = reserve_cut_iteration(reiser4_tree_by_inode(inode));
-+ if (result)
-+ break;
-+
-+ result = reiser4_cut_tree_object(current_tree, &from_key, &to_key,
-+ &smallest_removed, inode, 1,
-+ &progress);
-+ if (result == -E_REPEAT) {
-+ /**
-+ * -E_REPEAT is a signal to interrupt a long
-+ * file truncation process
-+ */
-+ if (progress) {
-+ result = update_actor(inode,
-+ get_key_offset(&smallest_removed),
-+ update_sd);
-+ if (result)
-+ break;
-+ }
-+ /* the below does up(sbinfo->delete_mutex).
-+ * Do not get folled */
-+ reiser4_release_reserved(inode->i_sb);
-+ /**
-+ * reiser4_cut_tree_object() was interrupted probably
-+ * because current atom requires commit, we have to
-+ * release transaction handle to allow atom commit.
-+ */
-+ reiser4_txn_restart_current();
-+ continue;
-+ }
-+ if (result
-+ && !(result == CBK_COORD_NOTFOUND && new_size == 0
-+ && inode->i_size == 0))
-+ break;
-+
-+ set_key_offset(&smallest_removed, new_size);
-+ /* Final sd update after the file gets its correct size */
-+ result = update_actor(inode, get_key_offset(&smallest_removed),
-+ update_sd);
-+ break;
-+ }
-+
-+ /* the below does up(sbinfo->delete_mutex). Do not get folled */
-+ reiser4_release_reserved(inode->i_sb);
-+
-+ return result;
-+}
-+
-+int find_or_create_extent(struct page *page);
-+
-+/* part of truncate_file_body: it is called when truncate is used to make file
-+ shorter */
-+static int shorten_file(struct inode *inode, loff_t new_size)
-+{
-+ int result;
-+ struct page *page;
-+ int padd_from;
-+ unsigned long index;
-+ struct unix_file_info *uf_info;
-+
-+ /*
-+ * all items of ordinary reiser4 file are grouped together. That is why
-+ * we can use reiser4_cut_tree. Plan B files (for instance) can not be
-+ * truncated that simply
-+ */
-+ result = cut_file_items(inode, new_size, 1 /*update_sd */ ,
-+ get_key_offset(reiser4_max_key()),
-+ reiser4_update_file_size);
-+ if (result)
-+ return result;
-+
-+ uf_info = unix_file_inode_data(inode);
-+ assert("vs-1105", new_size == inode->i_size);
-+ if (new_size == 0) {
-+ uf_info->container = UF_CONTAINER_EMPTY;
-+ return 0;
-+ }
-+
-+ result = find_file_state(inode, uf_info);
-+ if (result)
-+ return result;
-+ if (uf_info->container == UF_CONTAINER_TAILS)
-+ /*
-+ * No need to worry about zeroing last page after new file
-+ * end
-+ */
-+ return 0;
-+
-+ padd_from = inode->i_size & (PAGE_CACHE_SIZE - 1);
-+ if (!padd_from)
-+ /* file is truncated to page boundary */
-+ return 0;
-+
-+ result = reserve_partial_page(reiser4_tree_by_inode(inode));
-+ if (result) {
-+ reiser4_release_reserved(inode->i_sb);
-+ return result;
-+ }
-+
-+ /* last page is partially truncated - zero its content */
-+ index = (inode->i_size >> PAGE_CACHE_SHIFT);
-+ page = read_mapping_page(inode->i_mapping, index, NULL);
-+ if (IS_ERR(page)) {
-+ /*
-+ * the below does up(sbinfo->delete_mutex). Do not get
-+ * confused
-+ */
-+ reiser4_release_reserved(inode->i_sb);
-+ if (likely(PTR_ERR(page) == -EINVAL)) {
-+ /* looks like file is built of tail items */
-+ return 0;
-+ }
-+ return PTR_ERR(page);
-+ }
-+ wait_on_page_locked(page);
-+ if (!PageUptodate(page)) {
-+ page_cache_release(page);
-+ /*
-+ * the below does up(sbinfo->delete_mutex). Do not get
-+ * confused
-+ */
-+ reiser4_release_reserved(inode->i_sb);
-+ return RETERR(-EIO);
-+ }
-+
-+ /*
-+ * if page correspons to hole extent unit - unallocated one will be
-+ * created here. This is not necessary
-+ */
-+ result = find_or_create_extent(page);
-+
-+ /*
-+ * FIXME: cut_file_items has already updated inode. Probably it would
-+ * be better to update it here when file is really truncated
-+ */
-+ if (result) {
-+ page_cache_release(page);
-+ /*
-+ * the below does up(sbinfo->delete_mutex). Do not get
-+ * confused
-+ */
-+ reiser4_release_reserved(inode->i_sb);
-+ return result;
-+ }
-+
-+ lock_page(page);
-+ assert("vs-1066", PageLocked(page));
-+ zero_user_page(page, padd_from, PAGE_CACHE_SIZE - padd_from, KM_USER0);
-+ unlock_page(page);
-+ page_cache_release(page);
-+ /* the below does up(sbinfo->delete_mutex). Do not get confused */
-+ reiser4_release_reserved(inode->i_sb);
-+ return 0;
-+}
-+
-+/**
-+ * should_have_notail
-+ * @uf_info:
-+ * @new_size:
-+ *
-+ * Calls formatting plugin to see whether file of size @new_size has to be
-+ * stored in unformatted nodes or in tail items. 0 is returned for later case.
-+ */
-+static int should_have_notail(const struct unix_file_info *uf_info, loff_t new_size)
-+{
-+ if (!uf_info->tplug)
-+ return 1;
-+ return !uf_info->tplug->have_tail(unix_file_info_to_inode(uf_info),
-+ new_size);
-+
-+}
-+
-+/**
-+ * truncate_file_body - change length of file
-+ * @inode: inode of file
-+ * @new_size: new file length
-+ *
-+ * Adjusts items file @inode is built of to match @new_size. It may either cut
-+ * items or add them to represent a hole at the end of file. The caller has to
-+ * obtain exclusive access to the file.
-+ */
-+static int truncate_file_body(struct inode *inode, struct iattr *attr)
-+{
-+ int result;
-+ loff_t new_size = attr->ia_size;
-+
-+ if (inode->i_size < new_size) {
-+ /* expanding truncate */
-+ struct unix_file_info *uf_info = unix_file_inode_data(inode);
-+
-+ result = find_file_state(inode, uf_info);
-+ if (result)
-+ return result;
-+
-+ if (should_have_notail(uf_info, new_size)) {
-+ /*
-+ * file of size @new_size has to be built of
-+ * extents. If it is built of tails - convert to
-+ * extents
-+ */
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * if file is being convered by another process
-+ * - wait until it completes
-+ */
-+ while (1) {
-+ if (reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV)) {
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ get_exclusive_access(uf_info);
-+ continue;
-+ }
-+ break;
-+ }
-+
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ result = tail2extent(uf_info);
-+ if (result)
-+ return result;
-+ }
-+ }
-+ result = reiser4_write_extent(NULL, inode, NULL,
-+ 0, &new_size);
-+ if (result)
-+ return result;
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ } else {
-+ if (uf_info->container == UF_CONTAINER_EXTENTS) {
-+ result = reiser4_write_extent(NULL, inode, NULL,
-+ 0, &new_size);
-+ if (result)
-+ return result;
-+ } else {
-+ result = reiser4_write_tail(NULL, inode, NULL,
-+ 0, &new_size);
-+ if (result)
-+ return result;
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ }
-+ }
-+ BUG_ON(result > 0);
-+ result = reiser4_update_file_size(inode, new_size, 1);
-+ BUG_ON(result != 0);
-+ } else
-+ result = shorten_file(inode, new_size);
-+ return result;
-+}
-+
-+/* plugin->u.write_sd_by_inode = write_sd_by_inode_common */
-+
-+/**
-+ * load_file_hint - copy hint from struct file to local variable
-+ * @file: file to get hint from
-+ * @hint: structure to fill
-+ *
-+ * Reiser4 specific portion of struct file may contain information (hint)
-+ * stored on exiting from previous read or write. That information includes
-+ * seal of znode and coord within that znode where previous read or write
-+ * stopped. This function copies that information to @hint if it was stored or
-+ * initializes @hint by 0s otherwise.
-+ */
-+int load_file_hint(struct file *file, hint_t *hint)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ if (file) {
-+ fsdata = reiser4_get_file_fsdata(file);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+
-+ spin_lock_inode(file->f_dentry->d_inode);
-+ if (reiser4_seal_is_set(&fsdata->reg.hint.seal)) {
-+ *hint = fsdata->reg.hint;
-+ init_lh(&hint->lh);
-+ hint->ext_coord.lh = &hint->lh;
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+ /*
-+ * force re-validation of the coord on the first
-+ * iteration of the read/write loop.
-+ */
-+ hint->ext_coord.valid = 0;
-+ assert("nikita-19892", coords_equal(&hint->seal.coord1,
-+ &hint->ext_coord.
-+ coord));
-+ return 0;
-+ }
-+ memset(&fsdata->reg.hint, 0, sizeof(hint_t));
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+ }
-+ hint_init_zero(hint);
-+ return 0;
-+}
-+
-+/**
-+ * save_file_hint - copy hint to reiser4 private struct file's part
-+ * @file: file to save hint in
-+ * @hint: hint to save
-+ *
-+ * This copies @hint to reiser4 private part of struct file. It can help
-+ * speedup future accesses to the file.
-+ */
-+void save_file_hint(struct file *file, const hint_t *hint)
-+{
-+ reiser4_file_fsdata *fsdata;
-+
-+ assert("edward-1337", hint != NULL);
-+
-+ if (!file || !reiser4_seal_is_set(&hint->seal))
-+ return;
-+ fsdata = reiser4_get_file_fsdata(file);
-+ assert("vs-965", !IS_ERR(fsdata));
-+ assert("nikita-19891",
-+ coords_equal(&hint->seal.coord1, &hint->ext_coord.coord));
-+ assert("vs-30", hint->lh.owner == NULL);
-+ spin_lock_inode(file->f_dentry->d_inode);
-+ fsdata->reg.hint = *hint;
-+ spin_unlock_inode(file->f_dentry->d_inode);
-+ return;
-+}
-+
-+void reiser4_unset_hint(hint_t * hint)
-+{
-+ assert("vs-1315", hint);
-+ hint->ext_coord.valid = 0;
-+ reiser4_seal_done(&hint->seal);
-+ done_lh(&hint->lh);
-+}
-+
-+/* coord must be set properly. So, that reiser4_set_hint
-+ has nothing to do */
-+void reiser4_set_hint(hint_t * hint, const reiser4_key * key,
-+ znode_lock_mode mode)
-+{
-+ ON_DEBUG(coord_t * coord = &hint->ext_coord.coord);
-+ assert("vs-1207", WITH_DATA(coord->node, check_coord(coord, key)));
-+
-+ reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, key);
-+ hint->offset = get_key_offset(key);
-+ hint->mode = mode;
-+ done_lh(&hint->lh);
-+}
-+
-+int hint_is_set(const hint_t * hint)
-+{
-+ return reiser4_seal_is_set(&hint->seal);
-+}
-+
-+#if REISER4_DEBUG
-+static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
-+{
-+ return (get_key_locality(k1) == get_key_locality(k2) &&
-+ get_key_type(k1) == get_key_type(k2) &&
-+ get_key_band(k1) == get_key_band(k2) &&
-+ get_key_ordering(k1) == get_key_ordering(k2) &&
-+ get_key_objectid(k1) == get_key_objectid(k2));
-+}
-+#endif
-+
-+static int
-+hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
-+ znode_lock_mode lock_mode)
-+{
-+ if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
-+ /* hint either not set or set by different operation */
-+ return RETERR(-E_REPEAT);
-+
-+ assert("vs-1277", all_but_offset_key_eq(key, &hint->seal.key));
-+
-+ if (check_key && get_key_offset(key) != hint->offset)
-+ /* hint is set for different key */
-+ return RETERR(-E_REPEAT);
-+
-+ assert("vs-31", hint->ext_coord.lh == &hint->lh);
-+ return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, key,
-+ hint->ext_coord.lh, lock_mode,
-+ ZNODE_LOCK_LOPRI);
-+}
-+
-+/**
-+ * Look for place at twig level for extent corresponding to page,
-+ * call extent's writepage method to create unallocated extent if
-+ * it does not exist yet, initialize jnode, capture page
-+ */
-+int find_or_create_extent(struct page *page)
-+{
-+ int result;
-+ struct inode *inode;
-+ int plugged_hole;
-+
-+ jnode *node;
-+
-+ assert("vs-1065", page->mapping && page->mapping->host);
-+ inode = page->mapping->host;
-+
-+ lock_page(page);
-+ node = jnode_of_page(page);
-+ if (IS_ERR(node)) {
-+ unlock_page(page);
-+ return PTR_ERR(node);
-+ }
-+ JF_SET(node, JNODE_WRITE_PREPARED);
-+ unlock_page(page);
-+
-+ if (node->blocknr == 0) {
-+ plugged_hole = 0;
-+ result = reiser4_update_extent(inode, node, page_offset(page),
-+ &plugged_hole);
-+ if (result) {
-+ JF_CLR(node, JNODE_WRITE_PREPARED);
-+ jput(node);
-+ warning("edward-1549",
-+ "reiser4_update_extent failed: %d", result);
-+ return result;
-+ }
-+ if (plugged_hole)
-+ reiser4_update_sd(inode);
-+ } else {
-+ spin_lock_jnode(node);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+ }
-+
-+ BUG_ON(node->atom == NULL);
-+ JF_CLR(node, JNODE_WRITE_PREPARED);
-+ jput(node);
-+
-+ if (get_current_context()->entd) {
-+ entd_context *ent = get_entd_context(node->tree->super);
-+
-+ if (ent->cur_request->page == page)
-+ ent->cur_request->node = node;
-+ }
-+ return 0;
-+}
-+
-+/**
-+ * has_anonymous_pages - check whether inode has pages dirtied via mmap
-+ * @inode: inode to check
-+ *
-+ * Returns true if inode's mapping has dirty pages which do not belong to any
-+ * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page
-+ * tree or were eflushed and can be found via jnodes tagged
-+ * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes.
-+ */
-+static int has_anonymous_pages(struct inode *inode)
-+{
-+ int result;
-+
-+ read_lock_irq(&inode->i_mapping->tree_lock);
-+ result = radix_tree_tagged(&inode->i_mapping->page_tree, PAGECACHE_TAG_REISER4_MOVED);
-+ read_unlock_irq(&inode->i_mapping->tree_lock);
-+ return result;
-+}
-+
-+/**
-+ * capture_page_and_create_extent -
-+ * @page: page to be captured
-+ *
-+ * Grabs space for extent creation and stat data update and calls function to
-+ * do actual work.
-+ */
-+static int capture_page_and_create_extent(struct page *page)
-+{
-+ int result;
-+ struct inode *inode;
-+
-+ assert("vs-1084", page->mapping && page->mapping->host);
-+ inode = page->mapping->host;
-+ assert("vs-1139",
-+ unix_file_inode_data(inode)->container == UF_CONTAINER_EXTENTS);
-+ /* page belongs to file */
-+ assert("vs-1393",
-+ inode->i_size > page_offset(page));
-+
-+ /* page capture may require extent creation (if it does not exist yet)
-+ and stat data's update (number of blocks changes on extent
-+ creation) */
-+ grab_space_enable();
-+ result = reiser4_grab_space(2 * estimate_one_insert_into_item
-+ (reiser4_tree_by_inode(inode)),
-+ BA_CAN_COMMIT);
-+ if (likely(!result))
-+ result = find_or_create_extent(page);
-+
-+ if (result != 0)
-+ SetPageError(page);
-+ return result;
-+}
-+
-+/* this is implementation of method commit_write of struct
-+ address_space_operations for unix file plugin */
-+int
-+commit_write_unix_file(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ int result;
-+
-+ assert("umka-3101", file != NULL);
-+ assert("umka-3102", page != NULL);
-+ assert("umka-3093", PageLocked(page));
-+
-+ SetPageUptodate(page);
-+
-+ inode = page->mapping->host;
-+ ctx = reiser4_init_context(page->mapping->host->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ page_cache_get(page);
-+ unlock_page(page);
-+ result = capture_page_and_create_extent(page);
-+ lock_page(page);
-+ page_cache_release(page);
-+
-+ /* don't commit transaction under inode semaphore */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/*
-+ * Support for "anonymous" pages and jnodes.
-+ *
-+ * When file is write-accessed through mmap pages can be dirtied from the user
-+ * level. In this case kernel is not notified until one of following happens:
-+ *
-+ * (1) msync()
-+ *
-+ * (2) truncate() (either explicit or through unlink)
-+ *
-+ * (3) VM scanner starts reclaiming mapped pages, dirtying them before
-+ * starting write-back.
-+ *
-+ * As a result of (3) ->writepage may be called on a dirty page without
-+ * jnode. Such page is called "anonymous" in reiser4. Certain work-loads
-+ * (iozone) generate huge number of anonymous pages. Emergency flush handles
-+ * this situation by creating jnode for anonymous page, starting IO on the
-+ * page, and marking jnode with JNODE_KEEPME bit so that it's not thrown out of
-+ * memory. Such jnode is also called anonymous.
-+ *
-+ * reiser4_sync_sb() method tries to insert anonymous pages and jnodes into
-+ * tree. This is done by capture_anonymous_*() functions below.
-+ */
-+
-+/**
-+ * capture_anonymous_page - involve page into transaction
-+ * @pg: page to deal with
-+ *
-+ * Takes care that @page has corresponding metadata in the tree, creates jnode
-+ * for @page and captures it. On success 1 is returned.
-+ */
-+static int capture_anonymous_page(struct page *page)
-+{
-+ int result;
-+
-+ if (PageWriteback(page))
-+ /* FIXME: do nothing? */
-+ return 0;
-+
-+ result = capture_page_and_create_extent(page);
-+ if (result == 0) {
-+ result = 1;
-+ } else
-+ warning("nikita-3329",
-+ "Cannot capture anon page: %i", result);
-+
-+ return result;
-+}
-+
-+/**
-+ * capture_anonymous_pages - find and capture pages dirtied via mmap
-+ * @mapping: address space where to look for pages
-+ * @index: start index
-+ * @to_capture: maximum number of pages to capture
-+ *
-+ * Looks for pages tagged REISER4_MOVED starting from the *@index-th page,
-+ * captures (involves into atom) them, returns number of captured pages,
-+ * updates @index to next page after the last captured one.
-+ */
-+static int
-+capture_anonymous_pages(struct address_space *mapping, pgoff_t *index,
-+ unsigned int to_capture)
-+{
-+ int result;
-+ struct pagevec pvec;
-+ unsigned int i, count;
-+ int nr;
-+
-+ pagevec_init(&pvec, 0);
-+ count = min(pagevec_space(&pvec), to_capture);
-+ nr = 0;
-+
-+ /* find pages tagged MOVED */
-+ write_lock_irq(&mapping->tree_lock);
-+ pvec.nr = radix_tree_gang_lookup_tag(&mapping->page_tree,
-+ (void **)pvec.pages, *index, count,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ if (pagevec_count(&pvec) == 0) {
-+ /*
-+ * there are no pages tagged MOVED in mapping->page_tree
-+ * starting from *index
-+ */
-+ write_unlock_irq(&mapping->tree_lock);
-+ *index = (pgoff_t)-1;
-+ return 0;
-+ }
-+
-+ /* clear MOVED tag for all found pages */
-+ for (i = 0; i < pagevec_count(&pvec); i++) {
-+ page_cache_get(pvec.pages[i]);
-+ radix_tree_tag_clear(&mapping->page_tree, pvec.pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ }
-+ write_unlock_irq(&mapping->tree_lock);
-+
-+
-+ *index = pvec.pages[i - 1]->index + 1;
-+
-+ for (i = 0; i < pagevec_count(&pvec); i++) {
-+ /*
-+ * tag PAGECACHE_TAG_REISER4_MOVED will be cleared by
-+ * reiser4_set_page_dirty_internal which is called when jnode is
-+ * captured
-+ */
-+ result = capture_anonymous_page(pvec.pages[i]);
-+ if (result == 1)
-+ nr++;
-+ else {
-+ if (result < 0) {
-+ warning("vs-1454",
-+ "failed to capture page: "
-+ "result=%d, captured=%d)\n",
-+ result, i);
-+
-+ /*
-+ * set MOVED tag to all pages which left not
-+ * captured
-+ */
-+ write_lock_irq(&mapping->tree_lock);
-+ for (; i < pagevec_count(&pvec); i ++) {
-+ radix_tree_tag_set(&mapping->page_tree,
-+ pvec.pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ }
-+ write_unlock_irq(&mapping->tree_lock);
-+
-+ pagevec_release(&pvec);
-+ return result;
-+ } else {
-+ /*
-+ * result == 0. capture_anonymous_page returns
-+ * 0 for Writeback-ed page. Set MOVED tag on
-+ * that page
-+ */
-+ write_lock_irq(&mapping->tree_lock);
-+ radix_tree_tag_set(&mapping->page_tree,
-+ pvec.pages[i]->index,
-+ PAGECACHE_TAG_REISER4_MOVED);
-+ write_unlock_irq(&mapping->tree_lock);
-+ if (i == 0)
-+ *index = pvec.pages[0]->index;
-+ else
-+ *index = pvec.pages[i - 1]->index + 1;
-+ }
-+ }
-+ }
-+ pagevec_release(&pvec);
-+ return nr;
-+}
-+
-+/**
-+ * capture_anonymous_jnodes - find and capture anonymous jnodes
-+ * @mapping: address space where to look for jnodes
-+ * @from: start index
-+ * @to: end index
-+ * @to_capture: maximum number of jnodes to capture
-+ *
-+ * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in
-+ * the range of indexes @from-@to and captures them, returns number of captured
-+ * jnodes, updates @from to next jnode after the last captured one.
-+ */
-+static int
-+capture_anonymous_jnodes(struct address_space *mapping,
-+ pgoff_t *from, pgoff_t to, int to_capture)
-+{
-+ *from = to;
-+ return 0;
-+}
-+
-+/*
-+ * Commit atom of the jnode of a page.
-+ */
-+static int sync_page(struct page *page)
-+{
-+ int result;
-+ do {
-+ jnode *node;
-+ txn_atom *atom;
-+
-+ lock_page(page);
-+ node = jprivate(page);
-+ if (node != NULL) {
-+ spin_lock_jnode(node);
-+ atom = jnode_get_atom(node);
-+ spin_unlock_jnode(node);
-+ } else
-+ atom = NULL;
-+ unlock_page(page);
-+ result = reiser4_sync_atom(atom);
-+ } while (result == -E_REPEAT);
-+ /*
-+ * ZAM-FIXME-HANS: document the logic of this loop, is it just to
-+ * handle the case where more pages get added to the atom while we are
-+ * syncing it?
-+ */
-+ assert("nikita-3485", ergo(result == 0,
-+ get_current_context()->trans->atom == NULL));
-+ return result;
-+}
-+
-+/*
-+ * Commit atoms of pages on @pages list.
-+ * call sync_page for each page from mapping's page tree
-+ */
-+static int sync_page_list(struct inode *inode)
-+{
-+ int result;
-+ struct address_space *mapping;
-+ unsigned long from; /* start index for radix_tree_gang_lookup */
-+ unsigned int found; /* return value for radix_tree_gang_lookup */
-+
-+ mapping = inode->i_mapping;
-+ from = 0;
-+ result = 0;
-+ read_lock_irq(&mapping->tree_lock);
-+ while (result == 0) {
-+ struct page *page;
-+
-+ found =
-+ radix_tree_gang_lookup(&mapping->page_tree, (void **)&page,
-+ from, 1);
-+ assert("edward-1550", found < 2);
-+ if (found == 0)
-+ break;
-+ /**
-+ * page may not leave radix tree because it is protected from
-+ * truncating by inode->i_mutex locked by sys_fsync
-+ */
-+ page_cache_get(page);
-+ read_unlock_irq(&mapping->tree_lock);
-+
-+ from = page->index + 1;
-+
-+ result = sync_page(page);
-+
-+ page_cache_release(page);
-+ read_lock_irq(&mapping->tree_lock);
-+ }
-+
-+ read_unlock_irq(&mapping->tree_lock);
-+ return result;
-+}
-+
-+static int commit_file_atoms(struct inode *inode)
-+{
-+ int result;
-+ struct unix_file_info *uf_info;
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access(uf_info);
-+ /*
-+ * find what items file is made from
-+ */
-+ result = find_file_state(inode, uf_info);
-+ drop_exclusive_access(uf_info);
-+ if (result != 0)
-+ return result;
-+
-+ /*
-+ * file state cannot change because we are under ->i_mutex
-+ */
-+ switch (uf_info->container) {
-+ case UF_CONTAINER_EXTENTS:
-+ /* find_file_state might open join an atom */
-+ reiser4_txn_restart_current();
-+ result =
-+ /*
-+ * when we are called by
-+ * filemap_fdatawrite->
-+ * do_writepages()->
-+ * reiser4_writepages()
-+ *
-+ * inode->i_mapping->dirty_pages are spices into
-+ * ->io_pages, leaving ->dirty_pages dirty.
-+ *
-+ * When we are called from
-+ * reiser4_fsync()->sync_unix_file(), we have to
-+ * commit atoms of all pages on the ->dirty_list.
-+ *
-+ * So for simplicity we just commit ->io_pages and
-+ * ->dirty_pages.
-+ */
-+ sync_page_list(inode);
-+ break;
-+ case UF_CONTAINER_TAILS:
-+ /*
-+ * NOTE-NIKITA probably we can be smarter for tails. For now
-+ * just commit all existing atoms.
-+ */
-+ result = txnmgr_force_commit_all(inode->i_sb, 0);
-+ break;
-+ case UF_CONTAINER_EMPTY:
-+ result = 0;
-+ break;
-+ case UF_CONTAINER_UNKNOWN:
-+ default:
-+ result = -EIO;
-+ break;
-+ }
-+
-+ /*
-+ * commit current transaction: there can be captured nodes from
-+ * find_file_state() and finish_conversion().
-+ */
-+ reiser4_txn_restart_current();
-+ return result;
-+}
-+
-+/**
-+ * writepages_unix_file - writepages of struct address_space_operations
-+ * @mapping:
-+ * @wbc:
-+ *
-+ * This captures anonymous pages and anonymous jnodes. Anonymous pages are
-+ * pages which are dirtied via mmapping. Anonymous jnodes are ones which were
-+ * created by reiser4_writepage.
-+ */
-+int writepages_unix_file(struct address_space *mapping,
-+ struct writeback_control *wbc)
-+{
-+ int result;
-+ struct unix_file_info *uf_info;
-+ pgoff_t pindex, jindex, nr_pages;
-+ long to_capture;
-+ struct inode *inode;
-+
-+ inode = mapping->host;
-+ if (!has_anonymous_pages(inode)) {
-+ result = 0;
-+ goto end;
-+ }
-+ jindex = pindex = wbc->range_start >> PAGE_CACHE_SHIFT;
-+ result = 0;
-+ nr_pages = size_in_pages(i_size_read(inode));
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ do {
-+ reiser4_context *ctx;
-+
-+ if (wbc->sync_mode != WB_SYNC_ALL)
-+ to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST);
-+ else
-+ to_capture = CAPTURE_APAGE_BURST;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ result = PTR_ERR(ctx);
-+ break;
-+ }
-+ /* avoid recursive calls to ->sync_inodes */
-+ ctx->nobalance = 1;
-+ assert("zam-760", lock_stack_isclean(get_current_lock_stack()));
-+ assert("edward-1551", LOCK_CNT_NIL(inode_sem_w));
-+ assert("edward-1552", LOCK_CNT_NIL(inode_sem_r));
-+
-+ reiser4_txn_restart_current();
-+
-+ /* we have to get nonexclusive access to the file */
-+ if (get_current_context()->entd) {
-+ /*
-+ * use nonblocking version of nonexclusive_access to
-+ * avoid deadlock which might look like the following:
-+ * process P1 holds NEA on file F1 and called entd to
-+ * reclaim some memory. Entd works for P1 and is going
-+ * to capture pages of file F2. To do that entd has to
-+ * get NEA to F2. F2 is held by process P2 which also
-+ * called entd. But entd is serving P1 at the moment
-+ * and P2 has to wait. Process P3 trying to get EA to
-+ * file F2. Existence of pending EA request to file F2
-+ * makes impossible for entd to get NEA to file
-+ * F2. Neither of these process can continue. Using
-+ * nonblocking version of gettign NEA is supposed to
-+ * avoid this deadlock.
-+ */
-+ if (try_to_get_nonexclusive_access(uf_info) == 0) {
-+ result = RETERR(-EBUSY);
-+ reiser4_exit_context(ctx);
-+ break;
-+ }
-+ } else
-+ get_nonexclusive_access(uf_info);
-+
-+ while (to_capture > 0) {
-+ pgoff_t start;
-+
-+ assert("vs-1727", jindex <= pindex);
-+ if (pindex == jindex) {
-+ start = pindex;
-+ result =
-+ capture_anonymous_pages(inode->i_mapping,
-+ &pindex,
-+ to_capture);
-+ if (result <= 0)
-+ break;
-+ to_capture -= result;
-+ wbc->nr_to_write -= result;
-+ if (start + result == pindex) {
-+ jindex = pindex;
-+ continue;
-+ }
-+ if (to_capture <= 0)
-+ break;
-+ }
-+ /* deal with anonymous jnodes between jindex and pindex */
-+ result =
-+ capture_anonymous_jnodes(inode->i_mapping, &jindex,
-+ pindex, to_capture);
-+ if (result < 0)
-+ break;
-+ to_capture -= result;
-+ get_current_context()->nr_captured += result;
-+
-+ if (jindex == (pgoff_t) - 1) {
-+ assert("vs-1728", pindex == (pgoff_t) - 1);
-+ break;
-+ }
-+ }
-+ if (to_capture <= 0)
-+ /* there may be left more pages */
-+ __mark_inode_dirty(inode, I_DIRTY_PAGES);
-+
-+ drop_nonexclusive_access(uf_info);
-+ if (result < 0) {
-+ /* error happened */
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ if (wbc->sync_mode != WB_SYNC_ALL) {
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+ result = commit_file_atoms(inode);
-+ reiser4_exit_context(ctx);
-+ if (pindex >= nr_pages && jindex == pindex)
-+ break;
-+ } while (1);
-+
-+ end:
-+ if (is_in_reiser4_context()) {
-+ if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
-+ /*
-+ * there are already pages to flush, flush them out, do
-+ * not delay until end of reiser4_sync_inodes
-+ */
-+ reiser4_writeout(inode->i_sb, wbc);
-+ get_current_context()->nr_captured = 0;
-+ }
-+ }
-+ return result;
-+}
-+
-+/**
-+ * readpage_unix_file_nolock - readpage of struct address_space_operations
-+ * @file:
-+ * @page:
-+ *
-+ * Compose a key and search for item containing information about @page
-+ * data. If item is found - its readpage method is called.
-+ */
-+int readpage_unix_file(struct file *file, struct page *page)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *inode;
-+ reiser4_key key;
-+ item_plugin *iplug;
-+ hint_t *hint;
-+ lock_handle *lh;
-+ coord_t *coord;
-+
-+ assert("vs-1062", PageLocked(page));
-+ assert("vs-976", !PageUptodate(page));
-+ assert("vs-1061", page->mapping && page->mapping->host);
-+
-+ if (page->mapping->host->i_size <= page_offset(page)) {
-+ /* page is out of file */
-+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ return 0;
-+ }
-+
-+ inode = page->mapping->host;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ unlock_page(page);
-+ return PTR_ERR(ctx);
-+ }
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL) {
-+ unlock_page(page);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ result = load_file_hint(file, hint);
-+ if (result) {
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ lh = &hint->lh;
-+
-+ /* get key of first byte of the page */
-+ key_by_inode_and_offset_common(inode, page_offset(page), &key);
-+
-+ /* look for file metadata corresponding to first byte of page */
-+ page_cache_get(page);
-+ unlock_page(page);
-+ result = find_file_item(hint, &key, ZNODE_READ_LOCK, inode);
-+ lock_page(page);
-+ page_cache_release(page);
-+
-+ if (page->mapping == NULL) {
-+ /*
-+ * readpage allows truncate to run concurrently. Page was
-+ * truncated while it was not locked
-+ */
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return -EINVAL;
-+ }
-+
-+ if (result != CBK_COORD_FOUND || hint->ext_coord.coord.between != AT_UNIT) {
-+ if (result == CBK_COORD_FOUND &&
-+ hint->ext_coord.coord.between != AT_UNIT)
-+ /* file is truncated */
-+ result = -EINVAL;
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ /*
-+ * item corresponding to page is found. It can not be removed because
-+ * znode lock is held
-+ */
-+ if (PageUptodate(page)) {
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+
-+ coord = &hint->ext_coord.coord;
-+ result = zload(coord->node);
-+ if (result) {
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ validate_extended_coord(&hint->ext_coord, page_offset(page));
-+
-+ if (!coord_is_existing_unit(coord)) {
-+ /* this indicates corruption */
-+ warning("vs-280",
-+ "Looking for page %lu of file %llu (size %lli). "
-+ "No file items found (%d). File is corrupted?\n",
-+ page->index, (unsigned long long)get_inode_oid(inode),
-+ inode->i_size, result);
-+ zrelse(coord->node);
-+ done_lh(lh);
-+ kfree(hint);
-+ unlock_page(page);
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-EIO);
-+ }
-+
-+ /*
-+ * get plugin of found item or use plugin if extent if there are no
-+ * one
-+ */
-+ iplug = item_plugin_by_coord(coord);
-+ if (iplug->s.file.readpage)
-+ result = iplug->s.file.readpage(coord, page);
-+ else
-+ result = RETERR(-EINVAL);
-+
-+ if (!result) {
-+ set_key_offset(&key,
-+ (loff_t) (page->index + 1) << PAGE_CACHE_SHIFT);
-+ /* FIXME should call reiser4_set_hint() */
-+ reiser4_unset_hint(hint);
-+ } else {
-+ unlock_page(page);
-+ reiser4_unset_hint(hint);
-+ }
-+ assert("vs-979",
-+ ergo(result == 0, (PageLocked(page) || PageUptodate(page))));
-+ assert("vs-9791", ergo(result != 0, !PageLocked(page)));
-+
-+ zrelse(coord->node);
-+ done_lh(lh);
-+
-+ save_file_hint(file, hint);
-+ kfree(hint);
-+
-+ /*
-+ * FIXME: explain why it is needed. HINT: page allocation in write can
-+ * not be done when atom is not NULL because reiser4_writepage can not
-+ * kick entd and have to eflush
-+ */
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+struct uf_readpages_context {
-+ lock_handle lh;
-+ coord_t coord;
-+};
-+
-+/* A callback function for readpages_unix_file/read_cache_pages.
-+ * If the file is build of tails, then return error (-ENOENT).
-+ *
-+ * @data -- a pointer to reiser4_readpages_context object,
-+ * to save the twig lock and the coord between
-+ * read_cache_page iterations.
-+ * @page -- page to start read.
-+ */
-+static int uf_readpages_filler(void * data, struct page * page)
-+{
-+ struct uf_readpages_context *rc = data;
-+ jnode * node;
-+ int ret = 0;
-+ reiser4_extent *ext;
-+ __u64 ext_index;
-+ int cbk_done = 0;
-+ struct address_space * mapping = page->mapping;
-+
-+ if (PageUptodate(page)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ page_cache_get(page);
-+
-+ if (rc->lh.node == 0) {
-+ /* no twig lock - have to do tree search. */
-+ reiser4_key key;
-+ repeat:
-+ unlock_page(page);
-+ key_by_inode_and_offset_common(
-+ mapping->host, page_offset(page), &key);
-+ ret = coord_by_key(
-+ &get_super_private(mapping->host->i_sb)->tree,
-+ &key, &rc->coord, &rc->lh,
-+ ZNODE_READ_LOCK, FIND_EXACT,
-+ TWIG_LEVEL, TWIG_LEVEL, CBK_UNIQUE, NULL);
-+ if (unlikely(ret))
-+ goto exit;
-+ lock_page(page);
-+ if (PageUptodate(page))
-+ goto unlock;
-+ cbk_done = 1;
-+ }
-+ ret = zload(rc->coord.node);
-+ if (unlikely(ret))
-+ goto unlock;
-+ if (!coord_is_existing_item(&rc->coord) ||
-+ !item_is_extent(&rc->coord)) {
-+ zrelse(rc->coord.node);
-+ ret = RETERR(-EIO);
-+ goto unlock;
-+ }
-+ ext = extent_by_coord(&rc->coord);
-+ ext_index = extent_unit_index(&rc->coord);
-+ if (page->index < ext_index ||
-+ page->index >= ext_index + extent_get_width(ext)) {
-+ /* the page index doesn't belong to the extent unit
-+ which the coord points to - release the lock and
-+ repeat with tree search. */
-+ zrelse(rc->coord.node);
-+ done_lh(&rc->lh);
-+ /* we can be here after a CBK call only in case of
-+ corruption of the tree or the tree lookup algorithm bug. */
-+ if (unlikely(cbk_done)) {
-+ ret = RETERR(-EIO);
-+ goto unlock;
-+ }
-+ goto repeat;
-+ }
-+ node = jnode_of_page(page);
-+ if (unlikely(IS_ERR(node))) {
-+ zrelse(rc->coord.node);
-+ ret = PTR_ERR(node);
-+ goto unlock;
-+ }
-+ ret = reiser4_do_readpage_extent(ext, page->index - ext_index, page);
-+ jput(node);
-+ zrelse(rc->coord.node);
-+ if (likely(!ret))
-+ goto exit;
-+ unlock:
-+ unlock_page(page);
-+ exit:
-+ page_cache_release(page);
-+ return ret;
-+}
-+
-+/**
-+ * readpages_unix_file - called by the readahead code, starts reading for each
-+ * page of given list of pages
-+ */
-+int readpages_unix_file(
-+ struct file *file, struct address_space *mapping,
-+ struct list_head *pages, unsigned nr_pages)
-+{
-+ reiser4_context *ctx;
-+ struct uf_readpages_context rc;
-+ int ret;
-+
-+ ctx = reiser4_init_context(mapping->host->i_sb);
-+ if (IS_ERR(ctx)) {
-+ put_pages_list(pages);
-+ return PTR_ERR(ctx);
-+ }
-+ init_lh(&rc.lh);
-+ ret = read_cache_pages(mapping, pages, uf_readpages_filler, &rc);
-+ done_lh(&rc.lh);
-+ context_set_commit_async(ctx);
-+ /* close the transaction to protect further page allocation from deadlocks */
-+ reiser4_txn_restart(ctx);
-+ reiser4_exit_context(ctx);
-+ return ret;
-+}
-+
-+static reiser4_block_nr unix_file_estimate_read(struct inode *inode,
-+ loff_t count UNUSED_ARG)
-+{
-+ /* We should reserve one block, because of updating of the stat data
-+ item */
-+ assert("vs-1249",
-+ inode_file_plugin(inode)->estimate.update ==
-+ estimate_update_common);
-+ return estimate_update_common(inode);
-+}
-+
-+/* this is called with nonexclusive access obtained, file's container can not change */
-+static ssize_t read_file(hint_t *hint, struct file *file, /* file to read from to */
-+ char __user *buf, /* address of user-space buffer */
-+ size_t count, /* number of bytes to read */
-+ loff_t *off)
-+{
-+ int result;
-+ struct inode *inode;
-+ flow_t flow;
-+ int (*read_f) (struct file *, flow_t *, hint_t *);
-+ coord_t *coord;
-+ znode *loaded;
-+
-+ inode = file->f_dentry->d_inode;
-+
-+ /* build flow */
-+ assert("vs-1250",
-+ inode_file_plugin(inode)->flow_by_inode ==
-+ flow_by_inode_unix_file);
-+ result =
-+ flow_by_inode_unix_file(inode, buf, 1 /* user space */ , count,
-+ *off, READ_OP, &flow);
-+ if (unlikely(result))
-+ return result;
-+
-+ /* get seal and coord sealed with it from reiser4 private data
-+ of struct file. The coord will tell us where our last read
-+ of this file finished, and the seal will help to determine
-+ if that location is still valid.
-+ */
-+ coord = &hint->ext_coord.coord;
-+ while (flow.length && result == 0) {
-+ result =
-+ find_file_item(hint, &flow.key, ZNODE_READ_LOCK, inode);
-+ if (cbk_errored(result))
-+ /* error happened */
-+ break;
-+
-+ if (coord->between != AT_UNIT) {
-+ /* there were no items corresponding to given offset */
-+ done_lh(hint->ext_coord.lh);
-+ break;
-+ }
-+
-+ loaded = coord->node;
-+ result = zload(loaded);
-+ if (unlikely(result)) {
-+ done_lh(hint->ext_coord.lh);
-+ break;
-+ }
-+
-+ if (hint->ext_coord.valid == 0)
-+ validate_extended_coord(&hint->ext_coord,
-+ get_key_offset(&flow.key));
-+
-+ assert("vs-4", hint->ext_coord.valid == 1);
-+ assert("vs-33", hint->ext_coord.lh == &hint->lh);
-+ /* call item's read method */
-+ read_f = item_plugin_by_coord(coord)->s.file.read;
-+ result = read_f(file, &flow, hint);
-+ zrelse(loaded);
-+ done_lh(hint->ext_coord.lh);
-+ }
-+
-+ return (count - flow.length) ? (count - flow.length) : result;
-+}
-+
-+static ssize_t read_unix_file_container_tails(struct file*, char __user*, size_t, loff_t*);
-+
-+/**
-+ * read_unix_file - read of struct file_operations
-+ * @file: file to read from
-+ * @buf: address of user-space buffer
-+ * @read_amount: number of bytes to read
-+ * @off: position in file to read from
-+ *
-+ * This is implementation of vfs's read method of struct file_operations for
-+ * unix file plugin.
-+ */
-+ssize_t read_unix_file(struct file *file, char __user *buf, size_t read_amount,
-+ loff_t *off)
-+{
-+ reiser4_context *ctx;
-+ ssize_t result;
-+ struct inode *inode;
-+ struct unix_file_info *uf_info;
-+
-+ if (unlikely(read_amount == 0))
-+ return 0;
-+
-+ assert("umka-072", file != NULL);
-+ assert("umka-074", off != NULL);
-+ inode = file->f_dentry->d_inode;
-+ assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ uf_info = unix_file_inode_data(inode);
-+ if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ get_exclusive_access(uf_info);
-+ result = find_file_state(inode, uf_info);
-+ if (unlikely(result != 0))
-+ goto out;
-+ } else
-+ get_nonexclusive_access(uf_info);
-+ result = reiser4_grab_space_force(unix_file_estimate_read(inode, read_amount),
-+ BA_CAN_COMMIT);
-+ if (unlikely(result != 0))
-+ goto out;
-+ if (uf_info->container == UF_CONTAINER_EXTENTS){
-+ result = do_sync_read(file, buf, read_amount, off);
-+ } else if (uf_info->container == UF_CONTAINER_TAILS ||
-+ reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV) ||
-+ reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ result = read_unix_file_container_tails(file, buf, read_amount, off);
-+ } else {
-+ assert("zam-1085", uf_info->container == UF_CONTAINER_EMPTY);
-+ result = 0;
-+ }
-+out:
-+ drop_access(uf_info);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+static ssize_t read_unix_file_container_tails(
-+ struct file *file, char __user *buf, size_t read_amount, loff_t *off)
-+{
-+ int result;
-+ struct inode *inode;
-+ hint_t *hint;
-+ struct unix_file_info *uf_info;
-+ size_t count, read, left;
-+ loff_t size;
-+
-+ assert("umka-072", file != NULL);
-+ assert("umka-074", off != NULL);
-+ inode = file->f_dentry->d_inode;
-+ assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ result = load_file_hint(file, hint);
-+ if (result) {
-+ kfree(hint);
-+ return result;
-+ }
-+
-+ left = read_amount;
-+ count = 0;
-+ uf_info = unix_file_inode_data(inode);
-+ while (left > 0) {
-+ reiser4_txn_restart_current();
-+ size = i_size_read(inode);
-+ if (*off >= size)
-+ /* position to read from is past the end of file */
-+ break;
-+ if (*off + left > size)
-+ left = size - *off;
-+ /* faultin user page */
-+ result = fault_in_pages_writeable(buf, left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left);
-+ if (result)
-+ return RETERR(-EFAULT);
-+
-+ read = read_file(hint, file, buf,
-+ left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left,
-+ off);
-+ if (read < 0) {
-+ result = read;
-+ break;
-+ }
-+ left -= read;
-+ buf += read;
-+
-+ /* update position in a file */
-+ *off += read;
-+ /* total number of read bytes */
-+ count += read;
-+ }
-+ done_lh(&hint->lh);
-+ save_file_hint(file, hint);
-+ kfree(hint);
-+ if (count)
-+ file_accessed(file);
-+ /* return number of read bytes or error code if nothing is read */
-+ return count ? count : result;
-+}
-+
-+/* This function takes care about @file's pages. First of all it checks if
-+ filesystems readonly and if so gets out. Otherwise, it throws out all
-+ pages of file if it was mapped for read and going to be mapped for write
-+ and consists of tails. This is done in order to not manage few copies
-+ of the data (first in page cache and second one in tails them selves)
-+ for the case of mapping files consisting tails.
-+
-+ Here also tail2extent conversion is performed if it is allowed and file
-+ is going to be written or mapped for write. This functions may be called
-+ from write_unix_file() or mmap_unix_file(). */
-+static int check_pages_unix_file(struct file *file, struct inode *inode)
-+{
-+ reiser4_invalidate_pages(inode->i_mapping, 0,
-+ (inode->i_size + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT, 0);
-+ return unpack(file, inode, 0 /* not forever */ );
-+}
-+
-+/**
-+ * mmap_unix_file - mmap of struct file_operations
-+ * @file: file to mmap
-+ * @vma:
-+ *
-+ * This is implementation of vfs's mmap method of struct file_operations for
-+ * unix file plugin. It converts file to extent if necessary. Sets
-+ * reiser4_inode's flag - REISER4_HAS_MMAP.
-+ */
-+int mmap_unix_file(struct file *file, struct vm_area_struct *vma)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *inode;
-+ struct unix_file_info *uf_info;
-+ reiser4_block_nr needed;
-+
-+ inode = file->f_dentry->d_inode;
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access_careful(uf_info, inode);
-+
-+ if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
-+ /*
-+ * we need file built of extent items. If it is still built of
-+ * tail items we have to convert it. Find what items the file
-+ * is built of
-+ */
-+ result = find_file_state(inode, uf_info);
-+ if (result != 0) {
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ assert("vs-1648", (uf_info->container == UF_CONTAINER_TAILS ||
-+ uf_info->container == UF_CONTAINER_EXTENTS ||
-+ uf_info->container == UF_CONTAINER_EMPTY));
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * invalidate all pages and convert file from tails to
-+ * extents
-+ */
-+ result = check_pages_unix_file(file, inode);
-+ if (result) {
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ }
-+ }
-+
-+ /*
-+ * generic_file_mmap will do update_atime. Grab space for stat data
-+ * update.
-+ */
-+ needed = inode_file_plugin(inode)->estimate.update(inode);
-+ result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
-+ if (result) {
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = generic_file_mmap(file, vma);
-+ if (result == 0) {
-+ /* mark file as having mapping. */
-+ reiser4_inode_set_flag(inode, REISER4_HAS_MMAP);
-+ }
-+
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * find_first_item
-+ * @inode:
-+ *
-+ * Finds file item which is responsible for first byte in the file.
-+ */
-+static int find_first_item(struct inode *inode)
-+{
-+ coord_t coord;
-+ lock_handle lh;
-+ reiser4_key key;
-+ int result;
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ inode_file_plugin(inode)->key_by_inode(inode, 0, &key);
-+ result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK,
-+ inode);
-+ if (result == CBK_COORD_FOUND) {
-+ if (coord.between == AT_UNIT) {
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ result = item_id_by_coord(&coord);
-+ zrelse(coord.node);
-+ if (result != EXTENT_POINTER_ID &&
-+ result != FORMATTING_ID)
-+ result = RETERR(-EIO);
-+ }
-+ } else
-+ result = RETERR(-EIO);
-+ }
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+/**
-+ * open_unix_file
-+ * @inode:
-+ * @file:
-+ *
-+ * If filesystem is not readonly - complete uncompleted tail conversion if
-+ * there was one
-+ */
-+int open_unix_file(struct inode *inode, struct file *file)
-+{
-+ int result;
-+ reiser4_context *ctx;
-+ struct unix_file_info *uf_info;
-+
-+ if (IS_RDONLY(inode))
-+ return 0;
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED))
-+ return 0;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access_careful(uf_info, inode);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ /*
-+ * other process completed the conversion
-+ */
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+ }
-+
-+ /*
-+ * file left in semi converted state after unclean shutdown or another
-+ * thread is doing conversion and dropped exclusive access which doing
-+ * balance dirty pages. Complete the conversion
-+ */
-+ result = find_first_item(inode);
-+ if (result == EXTENT_POINTER_ID)
-+ /*
-+ * first item is extent, therefore there was incomplete
-+ * tail2extent conversion. Complete it
-+ */
-+ result = tail2extent(unix_file_inode_data(inode));
-+ else if (result == FORMATTING_ID)
-+ /*
-+ * first item is formatting item, therefore there was
-+ * incomplete extent2tail conversion. Complete it
-+ */
-+ result = extent2tail(file, unix_file_inode_data(inode));
-+ else
-+ result = -EIO;
-+
-+ assert("vs-1712",
-+ ergo(result == 0,
-+ (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED) &&
-+ !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))));
-+ drop_exclusive_access(uf_info);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+#define NEITHER_OBTAINED 0
-+#define EA_OBTAINED 1
-+#define NEA_OBTAINED 2
-+
-+static void drop_access(struct unix_file_info *uf_info)
-+{
-+ if (uf_info->exclusive_use)
-+ drop_exclusive_access(uf_info);
-+ else
-+ drop_nonexclusive_access(uf_info);
-+}
-+
-+#define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \
-+ __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
-+
-+/**
-+ * write_unix_file - private ->write() method of unix_file plugin.
-+ *
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @count: number of bytes to write
-+ * @pos: position in file to write to
-+ * @cont: unused argument, as we don't perform plugin conversion when being
-+ * managed by unix_file plugin.
-+ */
-+ssize_t write_unix_file(struct file *file, const char __user *buf,
-+ size_t count, loff_t *pos, struct psched_context *cont)
-+{
-+ int result;
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ struct unix_file_info *uf_info;
-+ ssize_t written;
-+ int try_free_space;
-+ int to_write = PAGE_CACHE_SIZE * WRITE_GRANULARITY;
-+ size_t left;
-+ ssize_t (*write_op)(struct file *, struct inode *,
-+ const char __user *, size_t,
-+ loff_t *pos);
-+ int ea;
-+ loff_t new_size;
-+
-+ ctx = get_current_context();
-+ inode = file->f_dentry->d_inode;
-+
-+ assert("vs-947", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+ assert("vs-9471", (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)));
-+
-+ /* check amount of bytes to write and writing position */
-+ result = generic_write_checks(file, pos, &count, 0);
-+ if (result) {
-+ context_set_commit_async(ctx);
-+ return result;
-+ }
-+
-+ result = remove_suid(file->f_dentry);
-+ if (result) {
-+ context_set_commit_async(ctx);
-+ return result;
-+ }
-+ /* remove_suid might create a transaction */
-+ reiser4_txn_restart(ctx);
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ current->backing_dev_info = inode->i_mapping->backing_dev_info;
-+ written = 0;
-+ try_free_space = 0;
-+ left = count;
-+ ea = NEITHER_OBTAINED;
-+
-+ new_size = i_size_read(inode);
-+ if (*pos + count > new_size)
-+ new_size = *pos + count;
-+
-+ while (left) {
-+ if (left < to_write)
-+ to_write = left;
-+
-+ if (uf_info->container == UF_CONTAINER_EMPTY) {
-+ get_exclusive_access(uf_info);
-+ ea = EA_OBTAINED;
-+ if (uf_info->container != UF_CONTAINER_EMPTY) {
-+ /* file is made not empty by another process */
-+ drop_exclusive_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ continue;
-+ }
-+ } else if (uf_info->container == UF_CONTAINER_UNKNOWN) {
-+ /*
-+ * get exclusive access directly just to not have to
-+ * re-obtain it if file will appear empty
-+ */
-+ get_exclusive_access(uf_info);
-+ ea = EA_OBTAINED;
-+ result = find_file_state(inode, uf_info);
-+ if (result) {
-+ drop_exclusive_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ break;
-+ }
-+ } else {
-+ get_nonexclusive_access(uf_info);
-+ ea = NEA_OBTAINED;
-+ }
-+
-+ /* either EA or NEA is obtained. Choose item write method */
-+ if (uf_info->container == UF_CONTAINER_EXTENTS) {
-+ /* file is built of extent items */
-+ write_op = reiser4_write_extent;
-+ } else if (uf_info->container == UF_CONTAINER_EMPTY) {
-+ /* file is empty */
-+ if (should_have_notail(uf_info, new_size))
-+ write_op = reiser4_write_extent;
-+ else
-+ write_op = reiser4_write_tail;
-+ } else {
-+ /* file is built of tail items */
-+ if (should_have_notail(uf_info, new_size)) {
-+ if (ea == NEA_OBTAINED) {
-+ drop_nonexclusive_access(uf_info);
-+ get_exclusive_access(uf_info);
-+ ea = EA_OBTAINED;
-+ }
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * if file is being convered by another
-+ * process - wait until it completes
-+ */
-+ while (1) {
-+ if (reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV)) {
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ get_exclusive_access(uf_info);
-+ continue;
-+ }
-+ break;
-+ }
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ result = tail2extent(uf_info);
-+ if (result)
-+ break;
-+ }
-+ }
-+ drop_exclusive_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ continue;
-+ }
-+ write_op = reiser4_write_tail;
-+ }
-+
-+ written = write_op(file, inode, buf, to_write, pos);
-+ if (written == -ENOSPC && try_free_space) {
-+ drop_access(uf_info);
-+ txnmgr_force_commit_all(inode->i_sb, 0);
-+ try_free_space = 0;
-+ continue;
-+ }
-+ if (written < 0) {
-+ drop_access(uf_info);
-+ result = written;
-+ break;
-+ }
-+ /* something is written. */
-+ if (uf_info->container == UF_CONTAINER_EMPTY) {
-+ assert("edward-1553", ea == EA_OBTAINED);
-+ uf_info->container =
-+ (write_op == reiser4_write_extent) ?
-+ UF_CONTAINER_EXTENTS : UF_CONTAINER_TAILS;
-+ } else {
-+ assert("edward-1554", ergo(uf_info->container == UF_CONTAINER_EXTENTS,
-+ write_op == reiser4_write_extent));
-+ assert("edward-1555", ergo(uf_info->container == UF_CONTAINER_TAILS,
-+ write_op == reiser4_write_tail));
-+ }
-+ if (*pos + written > inode->i_size)
-+ INODE_SET_FIELD(inode, i_size, *pos + written);
-+ file_update_time(file);
-+ result = reiser4_update_sd(inode);
-+ if (result) {
-+ current->backing_dev_info = NULL;
-+ drop_access(uf_info);
-+ context_set_commit_async(ctx);
-+ return result;
-+ }
-+ drop_access(uf_info);
-+ ea = NEITHER_OBTAINED;
-+ reiser4_txn_restart(ctx);
-+ current->journal_info = NULL;
-+ /*
-+ * tell VM how many pages were dirtied. Maybe number of pages
-+ * which were dirty already should not be counted
-+ */
-+ balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-+ (written + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE);
-+ current->journal_info = ctx;
-+
-+ left -= written;
-+ buf += written;
-+ *pos += written;
-+ }
-+ if (result == 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-+ reiser4_txn_restart_current();
-+ grab_space_enable();
-+ result = reiser4_sync_file_common(file, file->f_dentry,
-+ 0 /* data and stat data */);
-+ if (result)
-+ warning("reiser4-7", "failed to sync file %llu",
-+ (unsigned long long)get_inode_oid(inode));
-+ }
-+
-+ current->backing_dev_info = NULL;
-+
-+ /*
-+ * return number of written bytes or error code if nothing is
-+ * written. Note, that it does not work correctly in case when
-+ * sync_unix_file returns error
-+ */
-+ return (count - left) ? (count - left) : result;
-+}
-+
-+/**
-+ * release_unix_file - release of struct file_operations
-+ * @inode: inode of released file
-+ * @file: file to release
-+ *
-+ * Implementation of release method of struct file_operations for unix file
-+ * plugin. If last reference to indode is released - convert all extent items
-+ * into tail items if necessary. Frees reiser4 specific file data.
-+ */
-+int release_unix_file(struct inode *inode, struct file *file)
-+{
-+ reiser4_context *ctx;
-+ struct unix_file_info *uf_info;
-+ int result;
-+ int in_reiser4;
-+
-+ in_reiser4 = is_in_reiser4_context();
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ result = 0;
-+ if (in_reiser4 == 0) {
-+ uf_info = unix_file_inode_data(inode);
-+
-+ get_exclusive_access_careful(uf_info, inode);
-+ if (atomic_read(&file->f_dentry->d_count) == 1 &&
-+ uf_info->container == UF_CONTAINER_EXTENTS &&
-+ !should_have_notail(uf_info, inode->i_size) &&
-+ !rofs_inode(inode)) {
-+ result = extent2tail(file, uf_info);
-+ if (result != 0) {
-+ warning("nikita-3233",
-+ "Failed (%d) to convert in %s (%llu)",
-+ result, __FUNCTION__,
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+ }
-+ }
-+ drop_exclusive_access(uf_info);
-+ } else {
-+ /*
-+ we are within reiser4 context already. How latter is
-+ possible? Simple:
-+
-+ (gdb) bt
-+ #0 get_exclusive_access ()
-+ #2 0xc01e56d3 in release_unix_file ()
-+ #3 0xc01c3643 in reiser4_release ()
-+ #4 0xc014cae0 in __fput ()
-+ #5 0xc013ffc3 in remove_vm_struct ()
-+ #6 0xc0141786 in exit_mmap ()
-+ #7 0xc0118480 in mmput ()
-+ #8 0xc0133205 in oom_kill ()
-+ #9 0xc01332d1 in out_of_memory ()
-+ #10 0xc013bc1d in try_to_free_pages ()
-+ #11 0xc013427b in __alloc_pages ()
-+ #12 0xc013f058 in do_anonymous_page ()
-+ #13 0xc013f19d in do_no_page ()
-+ #14 0xc013f60e in handle_mm_fault ()
-+ #15 0xc01131e5 in do_page_fault ()
-+ #16 0xc0104935 in error_code ()
-+ #17 0xc025c0c6 in __copy_to_user_ll ()
-+ #18 0xc01d496f in reiser4_read_tail ()
-+ #19 0xc01e4def in read_unix_file ()
-+ #20 0xc01c3504 in reiser4_read ()
-+ #21 0xc014bd4f in vfs_read ()
-+ #22 0xc014bf66 in sys_read ()
-+ */
-+ warning("vs-44", "out of memory?");
-+ }
-+
-+ reiser4_free_file_fsdata(file);
-+
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+static void set_file_notail(struct inode *inode)
-+{
-+ reiser4_inode *state;
-+ formatting_plugin *tplug;
-+
-+ state = reiser4_inode_data(inode);
-+ tplug = formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID);
-+ force_plugin_pset(inode, PSET_FORMATTING, (reiser4_plugin *)tplug);
-+}
-+
-+/* if file is built of tails - convert it to extents */
-+static int unpack(struct file *filp, struct inode *inode, int forever)
-+{
-+ int result = 0;
-+ struct unix_file_info *uf_info;
-+
-+ uf_info = unix_file_inode_data(inode);
-+ assert("vs-1628", ea_obtained(uf_info));
-+
-+ result = find_file_state(inode, uf_info);
-+ if (result)
-+ return result;
-+ assert("vs-1074", uf_info->container != UF_CONTAINER_UNKNOWN);
-+
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ /*
-+ * if file is being convered by another process - wait until it
-+ * completes
-+ */
-+ while (1) {
-+ if (reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV)) {
-+ drop_exclusive_access(uf_info);
-+ schedule();
-+ get_exclusive_access(uf_info);
-+ continue;
-+ }
-+ break;
-+ }
-+ if (uf_info->container == UF_CONTAINER_TAILS) {
-+ result = tail2extent(uf_info);
-+ if (result)
-+ return result;
-+ }
-+ }
-+ if (forever) {
-+ /* safe new formatting plugin in stat data */
-+ __u64 tograb;
-+
-+ set_file_notail(inode);
-+
-+ grab_space_enable();
-+ tograb = inode_file_plugin(inode)->estimate.update(inode);
-+ result = reiser4_grab_space(tograb, BA_CAN_COMMIT);
-+ result = reiser4_update_sd(inode);
-+ }
-+
-+ return result;
-+}
-+
-+/* implentation of vfs' ioctl method of struct file_operations for unix file
-+ plugin
-+*/
-+int
-+ioctl_unix_file(struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg UNUSED_ARG)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ switch (cmd) {
-+ case REISER4_IOC_UNPACK:
-+ get_exclusive_access(unix_file_inode_data(inode));
-+ result = unpack(filp, inode, 1 /* forever */ );
-+ drop_exclusive_access(unix_file_inode_data(inode));
-+ break;
-+
-+ default:
-+ result = RETERR(-ENOSYS);
-+ break;
-+ }
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* implentation of vfs' bmap method of struct address_space_operations for unix
-+ file plugin
-+*/
-+sector_t bmap_unix_file(struct address_space * mapping, sector_t lblock)
-+{
-+ reiser4_context *ctx;
-+ sector_t result;
-+ reiser4_key key;
-+ coord_t coord;
-+ lock_handle lh;
-+ struct inode *inode;
-+ item_plugin *iplug;
-+ sector_t block;
-+
-+ inode = mapping->host;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ key_by_inode_and_offset_common(inode,
-+ (loff_t) lblock * current_blocksize,
-+ &key);
-+
-+ init_lh(&lh);
-+ result =
-+ find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode);
-+ if (cbk_errored(result)) {
-+ done_lh(&lh);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = zload(coord.node);
-+ if (result) {
-+ done_lh(&lh);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ iplug = item_plugin_by_coord(&coord);
-+ if (iplug->s.file.get_block) {
-+ result = iplug->s.file.get_block(&coord, lblock, &block);
-+ if (result == 0)
-+ result = block;
-+ } else
-+ result = RETERR(-EINVAL);
-+
-+ zrelse(coord.node);
-+ done_lh(&lh);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * flow_by_inode_unix_file - initizlize structure flow
-+ * @inode: inode of file for which read or write is abou
-+ * @buf: buffer to perform read to or write from
-+ * @user: flag showing whether @buf is user space or kernel space
-+ * @size: size of buffer @buf
-+ * @off: start offset fro read or write
-+ * @op: READ or WRITE
-+ * @flow:
-+ *
-+ * Initializes fields of @flow: key, size of data, i/o mode (read or write).
-+ */
-+int flow_by_inode_unix_file(struct inode *inode,
-+ const char __user *buf, int user,
-+ loff_t size, loff_t off,
-+ rw_op op, flow_t *flow)
-+{
-+ assert("nikita-1100", inode != NULL);
-+
-+ flow->length = size;
-+ memcpy(&flow->data, &buf, sizeof(buf));
-+ flow->user = user;
-+ flow->op = op;
-+ assert("nikita-1931", inode_file_plugin(inode) != NULL);
-+ assert("nikita-1932",
-+ inode_file_plugin(inode)->key_by_inode ==
-+ key_by_inode_and_offset_common);
-+ /* calculate key of write position and insert it into flow->key */
-+ return key_by_inode_and_offset_common(inode, off, &flow->key);
-+}
-+
-+/* plugin->u.file.set_plug_in_sd = NULL
-+ plugin->u.file.set_plug_in_inode = NULL
-+ plugin->u.file.create_blank_sd = NULL */
-+/* plugin->u.file.delete */
-+/*
-+ plugin->u.file.add_link = reiser4_add_link_common
-+ plugin->u.file.rem_link = NULL */
-+
-+/* plugin->u.file.owns_item
-+ this is common_file_owns_item with assertion */
-+/* Audited by: green(2002.06.15) */
-+int
-+owns_item_unix_file(const struct inode *inode /* object to check against */ ,
-+ const coord_t * coord /* coord to check */ )
-+{
-+ int result;
-+
-+ result = owns_item_common(inode, coord);
-+ if (!result)
-+ return 0;
-+ if (!plugin_of_group(item_plugin_by_coord(coord),
-+ UNIX_FILE_METADATA_ITEM_TYPE))
-+ return 0;
-+ assert("vs-547",
-+ item_id_by_coord(coord) == EXTENT_POINTER_ID ||
-+ item_id_by_coord(coord) == FORMATTING_ID);
-+ return 1;
-+}
-+
-+static int setattr_truncate(struct inode *inode, struct iattr *attr)
-+{
-+ int result;
-+ int s_result;
-+ loff_t old_size;
-+ reiser4_tree *tree;
-+
-+ inode_check_scale(inode, inode->i_size, attr->ia_size);
-+
-+ old_size = inode->i_size;
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ result = safe_link_grab(tree, BA_CAN_COMMIT);
-+ if (result == 0)
-+ result = safe_link_add(inode, SAFE_TRUNCATE);
-+ if (result == 0)
-+ result = truncate_file_body(inode, attr);
-+ if (result)
-+ warning("vs-1588", "truncate_file failed: oid %lli, "
-+ "old size %lld, new size %lld, retval %d",
-+ (unsigned long long)get_inode_oid(inode),
-+ old_size, attr->ia_size, result);
-+
-+ s_result = safe_link_grab(tree, BA_CAN_COMMIT);
-+ if (s_result == 0)
-+ s_result =
-+ safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE);
-+ if (s_result != 0) {
-+ warning("nikita-3417", "Cannot kill safelink %lli: %i",
-+ (unsigned long long)get_inode_oid(inode), s_result);
-+ }
-+ safe_link_release(tree);
-+ return result;
-+}
-+
-+/* plugin->u.file.setattr method */
-+/* This calls inode_setattr and if truncate is in effect it also takes
-+ exclusive inode access to avoid races */
-+int setattr_unix_file(struct dentry *dentry, /* Object to change attributes */
-+ struct iattr *attr /* change description */ )
-+{
-+ int result;
-+
-+ if (attr->ia_valid & ATTR_SIZE) {
-+ reiser4_context *ctx;
-+ struct unix_file_info *uf_info;
-+
-+ /* truncate does reservation itself and requires exclusive
-+ access obtained */
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(dentry->d_inode);
-+ get_exclusive_access_careful(uf_info, dentry->d_inode);
-+ result = setattr_truncate(dentry->d_inode, attr);
-+ drop_exclusive_access(uf_info);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ } else
-+ result = reiser4_setattr_common(dentry, attr);
-+
-+ return result;
-+}
-+
-+/* plugin->u.file.init_inode_data */
-+void
-+init_inode_data_unix_file(struct inode *inode,
-+ reiser4_object_create_data * crd, int create)
-+{
-+ struct unix_file_info *data;
-+
-+ data = unix_file_inode_data(inode);
-+ data->container = create ? UF_CONTAINER_EMPTY : UF_CONTAINER_UNKNOWN;
-+ init_rwsem(&data->latch);
-+ data->tplug = inode_formatting_plugin(inode);
-+ data->exclusive_use = 0;
-+
-+#if REISER4_DEBUG
-+ data->ea_owner = NULL;
-+ atomic_set(&data->nr_neas, 0);
-+#endif
-+ init_inode_ordering(inode, crd, create);
-+}
-+
-+/**
-+ * delete_unix_file - delete_object of file_plugin
-+ * @inode: inode to be deleted
-+ *
-+ * Truncates file to length 0, removes stat data and safe link.
-+ */
-+int delete_object_unix_file(struct inode *inode)
-+{
-+ struct unix_file_info *uf_info;
-+ int result;
-+
-+ if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
-+ return 0;
-+
-+ /* truncate file bogy first */
-+ uf_info = unix_file_inode_data(inode);
-+ get_exclusive_access(uf_info);
-+ result = shorten_file(inode, 0 /* size */ );
-+ drop_exclusive_access(uf_info);
-+
-+ if (result)
-+ warning("edward-1556",
-+ "failed to truncate file (%llu) on removal: %d",
-+ get_inode_oid(inode), result);
-+
-+ /* remove stat data and safe link */
-+ return reiser4_delete_object_common(inode);
-+}
-+
-+int
-+prepare_write_unix_file(struct file *file, struct page *page,
-+ unsigned from, unsigned to)
-+{
-+ reiser4_context *ctx;
-+ struct unix_file_info *uf_info;
-+ int ret;
-+
-+ ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ uf_info = unix_file_inode_data(file->f_dentry->d_inode);
-+ get_exclusive_access(uf_info);
-+ ret = find_file_state(file->f_dentry->d_inode, uf_info);
-+ if (ret == 0) {
-+ if (uf_info->container == UF_CONTAINER_TAILS)
-+ ret = -EINVAL;
-+ else
-+ ret = do_prepare_write(file, page, from, to);
-+ }
-+ drop_exclusive_access(uf_info);
-+
-+ /* don't commit transaction under inode semaphore */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return ret;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/file_conversion.c linux-2.6.24/fs/reiser4/plugin/file/file_conversion.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file/file_conversion.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/file_conversion.c 2008-01-25 11:39:06.988221084 +0300
-@@ -0,0 +1,689 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser,
-+ licensing governed by reiser4/README */
-+
-+/**
-+ * This file contains plugin schedule hooks, and plugin conversion methods.
-+ *
-+ * Plugin schedule hook makes a decision (at plugin schedule point) about the
-+ * most reasonable plugins for managing a regular file. Usually such decisions
-+ * is made by some O(1)-heuristic.
-+ *
-+ * By default we assign a unix_file plugin id when writing incompressible file
-+ * managed by cryptcompress plugin id. Currently used heuristic for estimating
-+ * compressibility is very simple: if first complete logical cluster (64K by
-+ * default) of a file is incompressible, then we make a decision, that the whole
-+ * file is incompressible (*).
-+ *
-+ * To enable a conversion we install a special "magic" compression mode plugin
-+ * (CONVX_COMPRESSION_MODE_ID, see plugin/compress/compress_mode.c for details)
-+ * at file creation time (**).
-+ *
-+ * Note, that we don't perform back conversion (unix_file->cryptcompress)
-+ * because of compatibility reasons (see http://dev.namesys.com/Version4.X.Y
-+ * for details).
-+ *
-+ * The conversion is accompanied by rebuilding disk structures of a file, so it
-+ * is important to protect them from being interacted with other plugins which
-+ * don't expect them to be in such inconsistent state. For this to be protected
-+ * we serialize readers and writers of a file's conversion set (FCS).
-+ *
-+ * We define FCS as a file plugin installed in inode's pset plus file's data
-+ * and metadata that this file plugin manipulates with (items, etc).
-+ * Note, that FCS is defined per file.
-+ * FCS reader is defined as a set of instruction of the following type:
-+ * {inode_file_plugin(inode)->method()} (I.e. retrieving a file plugin id
-+ * conjoined with all method's instructions should be atomic).
-+ * FCS writer is a set of instructions that perform file plugin conversion
-+ * (convert items, update pset, etc).
-+ * Example:
-+ * reiser4_write_careful() supplied to VFS as a ->write() file operation is
-+ * composed of the following (optional) instructions:
-+ * 1 2 3
-+ * *********************** ####### -------------------------------------------->
-+ *
-+ * 1) "****" are instructions performed on behalf of cryptcompress file plugin;
-+ * 2) "####" is a FCS writer (performing a conversion cryptcompress->unix_file);
-+ * 3) "----" are instructions performed on behalf of unix_file plugin;
-+ * Here (1) and (3) are FCS readers.
-+ *
-+ * In this example FCS readers and writers are already serialized (by design),
-+ * however there can be readers and writers executing at the same time in
-+ * different contexts, so we need a common mechanism of serialization.
-+ *
-+ * Currently serialization of FCS readers and writers is performed via acquiring
-+ * a special per-inode rw-semaphore (conv_sem). And yes, {down, up}_read is for
-+ * FCS readers, and {down, up}_write is for FCS writers, see the macros below
-+ * for passive/active protection.
-+ *
-+ * ---
-+ * (*) This heuristic can be changed to a better one (benchmarking is needed).
-+ * (**) Such technique allows to keep enable/disable state on disk.
-+ */
-+
-+#include "../../inode.h"
-+#include "../cluster.h"
-+#include "file.h"
-+
-+#define conversion_enabled(inode) \
-+ (inode_compression_mode_plugin(inode) == \
-+ compression_mode_plugin_by_id(CONVX_COMPRESSION_MODE_ID))
-+
-+/**
-+ * Located sections (readers and writers of @pset) are not permanently
-+ * critical: cryptcompress file can be converted only if the conversion
-+ * is enabled (see the macrio above). Also we don't perform back
-+ * conversion. The following helper macro is a sanity check to decide
-+ * if we need the protection (locks are always additional overheads).
-+ */
-+#define should_protect(inode) \
-+ (inode_file_plugin(inode) == \
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID) && \
-+ conversion_enabled(inode))
-+/**
-+ * To avoid confusion with read/write file operations, we'll speak about
-+ * "passive" protection for FCS readers and "active" protection for FCS
-+ * writers. All methods with active or passive protection have suffix
-+ * "careful".
-+ */
-+/**
-+ * Macros for passive protection.
-+ *
-+ * Construct invariant operation to be supplied to VFS.
-+ * The macro accepts the following lexemes:
-+ * @type - type of the value represented by the compound statement;
-+ * @method - name of an operation to be supplied to VFS (reiser4 file
-+ * plugin also should contain a method with such name).
-+ */
-+#define PROT_PASSIVE(type, method, args) \
-+({ \
-+ type _result; \
-+ struct rw_semaphore * guard = \
-+ &reiser4_inode_data(inode)->conv_sem; \
-+ \
-+ if (should_protect(inode)) { \
-+ down_read(guard); \
-+ if (!should_protect(inode)) \
-+ up_read(guard); \
-+ } \
-+ _result = inode_file_plugin(inode)->method args; \
-+ if (should_protect(inode)) \
-+ up_read(guard); \
-+ _result; \
-+})
-+
-+#define PROT_PASSIVE_VOID(method, args) \
-+({ \
-+ struct rw_semaphore * guard = \
-+ &reiser4_inode_data(inode)->conv_sem; \
-+ \
-+ if (should_protect(inode)) { \
-+ down_read(guard); \
-+ if (!should_protect(inode)) \
-+ up_read(guard); \
-+ } \
-+ inode_file_plugin(inode)->method args; \
-+ \
-+ if (should_protect(inode)) \
-+ up_read(guard); \
-+})
-+
-+/* Pass management to the unix-file plugin with "notail" policy */
-+static int __cryptcompress2unixfile(struct file *file, struct inode * inode)
-+{
-+ int result;
-+ reiser4_inode *info;
-+ struct unix_file_info * uf;
-+ info = reiser4_inode_data(inode);
-+
-+ result = aset_set_unsafe(&info->pset,
-+ PSET_FILE,
-+ (reiser4_plugin *)
-+ file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
-+ if (result)
-+ return result;
-+ result = aset_set_unsafe(&info->pset,
-+ PSET_FORMATTING,
-+ (reiser4_plugin *)
-+ formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID));
-+ if (result)
-+ return result;
-+ /* get rid of non-standard plugins */
-+ info->plugin_mask &= ~cryptcompress_mask;
-+ /* get rid of plugin stat-data extension */
-+ info->extmask &= ~(1 << PLUGIN_STAT);
-+
-+ reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
-+
-+ /* FIXME use init_inode_data_unix_file() instead,
-+ but aviod init_inode_ordering() */
-+ /* Init unix-file specific part of inode */
-+ uf = unix_file_inode_data(inode);
-+ uf->container = UF_CONTAINER_UNKNOWN;
-+ init_rwsem(&uf->latch);
-+ uf->tplug = inode_formatting_plugin(inode);
-+ uf->exclusive_use = 0;
-+#if REISER4_DEBUG
-+ uf->ea_owner = NULL;
-+ atomic_set(&uf->nr_neas, 0);
-+#endif
-+ /**
-+ * we was carefull for file_ops, inode_ops and as_ops
-+ * to be invariant for plugin conversion, so there is
-+ * no need to update ones already installed in the
-+ * vfs's residence.
-+ */
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+static int disabled_conversion_inode_ok(struct inode * inode)
-+{
-+ __u64 extmask = reiser4_inode_data(inode)->extmask;
-+ __u16 plugin_mask = reiser4_inode_data(inode)->plugin_mask;
-+
-+ return ((extmask & (1 << LIGHT_WEIGHT_STAT)) &&
-+ (extmask & (1 << UNIX_STAT)) &&
-+ (extmask & (1 << LARGE_TIMES_STAT)) &&
-+ (extmask & (1 << PLUGIN_STAT)) &&
-+ (plugin_mask & (1 << PSET_COMPRESSION_MODE)));
-+}
-+#endif
-+
-+/**
-+ * Disable future attempts to schedule/convert file plugin.
-+ * This function is called by plugin schedule hooks.
-+ *
-+ * To disable conversion we assign any compression mode plugin id
-+ * different from CONVX_COMPRESSION_MODE_ID.
-+ */
-+static int disable_conversion(struct inode * inode)
-+{
-+ int result;
-+ result =
-+ force_plugin_pset(inode,
-+ PSET_COMPRESSION_MODE,
-+ (reiser4_plugin *)compression_mode_plugin_by_id
-+ (LATTD_COMPRESSION_MODE_ID));
-+ assert("edward-1500",
-+ ergo(!result, disabled_conversion_inode_ok(inode)));
-+ return result;
-+}
-+
-+/**
-+ * Check if we really have achieved plugin scheduling point
-+ */
-+static int check_psched_point(struct inode * inode,
-+ loff_t pos /* position in the
-+ file to write from */,
-+ struct cluster_handle * clust,
-+ struct psched_context * cont)
-+{
-+ assert("edward-1505", conversion_enabled(inode));
-+ /*
-+ * if file size is more then cluster size, then compressible
-+ * status must be figured out (i.e. compression was disabled,
-+ * or file plugin was converted to unix_file)
-+ */
-+ assert("edward-1506", inode->i_size <= inode_cluster_size(inode));
-+
-+ if (pos > inode->i_size)
-+ /* first logical cluster will contain a (partial) hole */
-+ return disable_conversion(inode);
-+ if (pos < inode_cluster_size(inode))
-+ /* writing to the first logical cluster */
-+ return 0;
-+ /*
-+ * here we have:
-+ * cluster_size <= pos <= i_size <= cluster_size,
-+ * and, hence, pos == i_size == cluster_size
-+ */
-+ assert("edward-1498",
-+ pos == inode->i_size &&
-+ pos == inode_cluster_size(inode));
-+ assert("edward-1539", cont != NULL);
-+ assert("edward-1540", cont->state == PSCHED_INVAL_STATE);
-+
-+ cont->state = PSCHED_SCHED_POINT;
-+ return 0;
-+}
-+
-+static void start_check_compressibility(struct inode * inode,
-+ struct cluster_handle * clust,
-+ hint_t * hint)
-+{
-+ assert("edward-1507", clust->index == 1);
-+ assert("edward-1508", !tfm_cluster_is_uptodate(&clust->tc));
-+ assert("edward-1509", cluster_get_tfm_act(&clust->tc) == TFMA_READ);
-+
-+ hint_init_zero(hint);
-+ clust->hint = hint;
-+ clust->index --;
-+ clust->nr_pages = size_in_pages(lbytes(clust->index, inode));
-+
-+ /* first logical cluster (of index #0) must be complete */
-+ assert("edward-1510", lbytes(clust->index, inode) ==
-+ inode_cluster_size(inode));
-+}
-+
-+static void finish_check_compressibility(struct inode * inode,
-+ struct cluster_handle * clust,
-+ hint_t * hint)
-+{
-+ reiser4_unset_hint(clust->hint);
-+ clust->hint = hint;
-+ clust->index ++;
-+}
-+
-+#if REISER4_DEBUG
-+static int prepped_dclust_ok(hint_t * hint)
-+{
-+ reiser4_key key;
-+ coord_t * coord = &hint->ext_coord.coord;
-+
-+ item_key_by_coord(coord, &key);
-+ return (item_id_by_coord(coord) == CTAIL_ID &&
-+ !coord_is_unprepped_ctail(coord) &&
-+ (get_key_offset(&key) + nr_units_ctail(coord) ==
-+ dclust_get_extension_dsize(hint)));
-+}
-+#endif
-+
-+#define fifty_persent(size) (size >> 1)
-+/* evaluation of data compressibility */
-+#define data_is_compressible(osize, isize) \
-+ (osize < fifty_persent(isize))
-+
-+/**
-+ * A simple O(1)-heuristic for compressibility.
-+ * This is called not more then one time per file's life.
-+ * Read first logical cluster (of index #0) and estimate its compressibility.
-+ * Save estimation result in @cont.
-+ */
-+static int read_check_compressibility(struct inode * inode,
-+ struct cluster_handle * clust,
-+ struct psched_context * cont)
-+{
-+ int i;
-+ int result;
-+ __u32 dst_len;
-+ hint_t tmp_hint;
-+ hint_t * cur_hint = clust->hint;
-+ assert("edward-1541", cont->state == PSCHED_SCHED_POINT);
-+
-+ start_check_compressibility(inode, clust, &tmp_hint);
-+
-+ reset_cluster_pgset(clust, cluster_nrpages(inode));
-+ result = grab_page_cluster(inode, clust, READ_OP);
-+ if (result)
-+ return result;
-+ /* Read page cluster here */
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ struct page *page = clust->pages[i];
-+ lock_page(page);
-+ result = do_readpage_ctail(inode, clust, page,
-+ ZNODE_READ_LOCK);
-+ unlock_page(page);
-+ if (result)
-+ goto error;
-+ }
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+
-+ cluster_set_tfm_act(&clust->tc, TFMA_WRITE);
-+
-+ if (hint_is_valid(&tmp_hint) && !hint_is_unprepped_dclust(&tmp_hint)) {
-+ /* lenght of compressed data is known, no need to compress */
-+ assert("edward-1511",
-+ znode_is_any_locked(tmp_hint.lh.node));
-+ assert("edward-1512",
-+ WITH_DATA(tmp_hint.ext_coord.coord.node,
-+ prepped_dclust_ok(&tmp_hint)));
-+ dst_len = dclust_get_extension_dsize(&tmp_hint);
-+ }
-+ else {
-+ struct tfm_cluster * tc = &clust->tc;
-+ compression_plugin * cplug = inode_compression_plugin(inode);
-+ result = grab_tfm_stream(inode, tc, INPUT_STREAM);
-+ if (result)
-+ goto error;
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ char *data;
-+ lock_page(clust->pages[i]);
-+ BUG_ON(!PageUptodate(clust->pages[i]));
-+ data = kmap(clust->pages[i]);
-+ memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
-+ data, PAGE_CACHE_SIZE);
-+ kunmap(clust->pages[i]);
-+ unlock_page(clust->pages[i]);
-+ }
-+ result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
-+ if (result)
-+ goto error;
-+ result = grab_coa(tc, cplug);
-+ if (result)
-+ goto error;
-+ tc->len = tc->lsize = lbytes(clust->index, inode);
-+ assert("edward-1513", tc->len == inode_cluster_size(inode));
-+ dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
-+ cplug->compress(get_coa(tc, cplug->h.id, tc->act),
-+ tfm_input_data(clust), tc->len,
-+ tfm_output_data(clust), &dst_len);
-+ assert("edward-1514",
-+ dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
-+ }
-+ finish_check_compressibility(inode, clust, cur_hint);
-+ cont->state =
-+ (data_is_compressible(dst_len, inode_cluster_size(inode)) ?
-+ PSCHED_REMAINS_OLD :
-+ PSCHED_ASSIGNED_NEW);
-+ return 0;
-+ error:
-+ put_page_cluster(clust, inode, READ_OP);
-+ return result;
-+}
-+
-+/* Cut disk cluster of index @idx */
-+static int cut_disk_cluster(struct inode * inode, cloff_t idx)
-+{
-+ reiser4_key from, to;
-+ assert("edward-1515", inode_file_plugin(inode) ==
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+ key_by_inode_cryptcompress(inode, clust_to_off(idx, inode), &from);
-+ to = from;
-+ set_key_offset(&to,
-+ get_key_offset(&from) + inode_cluster_size(inode) - 1);
-+ return reiser4_cut_tree(reiser4_tree_by_inode(inode),
-+ &from, &to, inode, 0);
-+}
-+
-+static int reserve_cryptcompress2unixfile(struct inode *inode)
-+{
-+ reiser4_block_nr unformatted_nodes;
-+ reiser4_tree *tree;
-+
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ /* number of unformatted nodes which will be created */
-+ unformatted_nodes = cluster_nrpages(inode); /* N */
-+
-+ /*
-+ * space required for one iteration of extent->tail conversion:
-+ *
-+ * 1. kill ctail items
-+ *
-+ * 2. insert N unformatted nodes
-+ *
-+ * 3. insert N (worst-case single-block
-+ * extents) extent units.
-+ *
-+ * 4. drilling to the leaf level by coord_by_key()
-+ *
-+ * 5. possible update of stat-data
-+ *
-+ */
-+ grab_space_enable();
-+ return reiser4_grab_space
-+ (2 * tree->height +
-+ unformatted_nodes +
-+ unformatted_nodes * estimate_one_insert_into_item(tree) +
-+ 1 + estimate_one_insert_item(tree) +
-+ inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+}
-+
-+/**
-+ * Convert cryptcompress file plugin to unix_file plugin.
-+ */
-+static int cryptcompress2unixfile(struct file * file, struct inode * inode,
-+ struct psched_context * cont)
-+{
-+ int i;
-+ int result = 0;
-+ struct cryptcompress_info *cr_info;
-+ struct unix_file_info *uf_info;
-+ assert("edward-1516", cont->pages[0]->index == 0);
-+
-+ /* release all cryptcompress-specific resources */
-+ cr_info = cryptcompress_inode_data(inode);
-+ result = reserve_cryptcompress2unixfile(inode);
-+ if (result)
-+ goto out;
-+ /* tell kill_hook to not truncate pages */
-+ reiser4_inode_set_flag(inode, REISER4_FILE_CONV_IN_PROGRESS);
-+ result = cut_disk_cluster(inode, 0);
-+ if (result)
-+ goto out;
-+ /* captured jnode of cluster and assotiated resources (pages,
-+ reserved disk space) were released by ->kill_hook() method
-+ of the item plugin */
-+
-+ result = __cryptcompress2unixfile(file, inode);
-+ if (result)
-+ goto out;
-+ /* At this point file is managed by unix file plugin */
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ assert("edward-1518",
-+ ergo(jprivate(cont->pages[0]),
-+ !jnode_is_cluster_page(jprivate(cont->pages[0]))));
-+ for(i = 0; i < cont->nr_pages; i++) {
-+ assert("edward-1519", cont->pages[i]);
-+ assert("edward-1520", PageUptodate(cont->pages[i]));
-+
-+ result = find_or_create_extent(cont->pages[i]);
-+ if (result)
-+ break;
-+ }
-+ if (unlikely(result))
-+ goto out;
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ result = reiser4_update_sd(inode);
-+ out:
-+ all_grabbed2free();
-+ return result;
-+}
-+
-+#define convert_file_plugin cryptcompress2unixfile
-+
-+/**
-+ * This is called by ->write() method of a cryptcompress file plugin.
-+ * Make a decision about the most reasonable file plugin id to manage
-+ * the file.
-+ */
-+int write_pschedule_hook(struct file * file, struct inode * inode,
-+ loff_t pos, struct cluster_handle * clust,
-+ struct psched_context * cont)
-+{
-+ int result;
-+ if (!conversion_enabled(inode))
-+ return 0;
-+ result = check_psched_point(inode, pos, clust, cont);
-+ if (result || cont->state != PSCHED_SCHED_POINT)
-+ return result;
-+ result = read_check_compressibility(inode, clust, cont);
-+ if (result)
-+ return result;
-+ if (cont->state == PSCHED_REMAINS_OLD) {
-+ put_page_cluster(clust, inode, READ_OP);
-+ return disable_conversion(inode);
-+ }
-+ assert("edward-1543", cont->state == PSCHED_ASSIGNED_NEW);
-+ /*
-+ * page cluster is grabbed and uptodate. It will be
-+ * released with a pgset after plugin conversion is
-+ * finished, see put_psched_context().
-+ */
-+ reiser4_unset_hint(clust->hint);
-+ move_cluster_pgset(clust, &cont->pages, &cont->nr_pages);
-+ return 0;
-+}
-+
-+/**
-+ * This is called by ->setattr() method of cryptcompress file plugin.
-+ */
-+int setattr_pschedule_hook(struct inode * inode)
-+{
-+ if (conversion_enabled(inode))
-+ return disable_conversion(inode);
-+ return 0;
-+}
-+
-+static inline void init_psched_context(struct psched_context * cont)
-+{
-+ memset(cont, 0, sizeof(*cont));
-+}
-+
-+static inline void done_psched_context(struct psched_context * cont,
-+ struct inode * inode)
-+{
-+ if (cont->pages) {
-+ __put_page_cluster(0, cont->nr_pages, cont->pages, inode);
-+ kfree(cont->pages);
-+ }
-+}
-+/**
-+ * Here are wrappers with "protection", aka Reiser4 "careful" methods.
-+ * They are used by vfs (as methods of file_ops, inode_ops or as_ops),
-+ * which is not aware of plugin conversion performed by Reiser4.
-+ */
-+
-+/*
-+ * Wrappers with active protection for:
-+ *
-+ * ->write();
-+ */
-+
-+/*
-+ * ->write() file operation supplied to VFS.
-+ * Write a file in 3 steps (some of them can be optional).
-+ */
-+ssize_t reiser4_write_careful(struct file *file, const char __user *buf,
-+ size_t count, loff_t *off)
-+{
-+ int result;
-+ reiser4_context *ctx;
-+ ssize_t written_old = 0; /* bytes written with initial plugin */
-+ ssize_t written_new = 0; /* bytes written with new plugin */
-+ struct psched_context cont;
-+ struct inode * inode = file->f_dentry->d_inode;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ init_psched_context(&cont);
-+ mutex_lock(&inode->i_mutex);
-+ /**
-+ * First step.
-+ * Start write with initial file plugin.
-+ * Keep a plugin schedule status at @cont (if any).
-+ */
-+ written_old = inode_file_plugin(inode)->write(file,
-+ buf,
-+ count,
-+ off,
-+ &cont);
-+ if (cont.state != PSCHED_ASSIGNED_NEW || written_old < 0)
-+ goto exit;
-+ /**
-+ * Second step.
-+ * New file plugin has been scheduled.
-+ * Perform conversion to the new plugin.
-+ */
-+ down_read(&reiser4_inode_data(inode)->conv_sem);
-+ result = convert_file_plugin(file, inode, &cont);
-+ up_read(&reiser4_inode_data(inode)->conv_sem);
-+ if (result) {
-+ warning("edward-1544",
-+ "Inode %llu: file plugin conversion failed (%d)",
-+ (unsigned long long)get_inode_oid(inode),
-+ result);
-+ context_set_commit_async(ctx);
-+ goto exit;
-+ }
-+ reiser4_txn_restart(ctx);
-+ /**
-+ * Third step:
-+ * Finish write with the new file plugin.
-+ */
-+ assert("edward-1536",
-+ inode_file_plugin(inode) ==
-+ file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
-+
-+ written_new = inode_file_plugin(inode)->write(file,
-+ buf + written_old,
-+ count - written_old,
-+ off,
-+ NULL);
-+ exit:
-+ mutex_unlock(&inode->i_mutex);
-+ done_psched_context(&cont, inode);
-+ reiser4_exit_context(ctx);
-+
-+ return written_old + (written_new < 0 ? 0 : written_new);
-+}
-+
-+/* Wrappers with passive protection for:
-+ *
-+ * ->open();
-+ * ->read();
-+ * ->ioctl();
-+ * ->mmap();
-+ * ->release();
-+ * ->bmap().
-+ */
-+
-+int reiser4_open_careful(struct inode *inode, struct file *file)
-+{
-+ return PROT_PASSIVE(int, open, (inode, file));
-+}
-+
-+ssize_t reiser4_read_careful(struct file * file, char __user * buf,
-+ size_t size, loff_t * off)
-+{
-+ struct inode * inode = file->f_dentry->d_inode;
-+ return PROT_PASSIVE(ssize_t, read, (file, buf, size, off));
-+}
-+
-+int reiser4_ioctl_careful(struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ return PROT_PASSIVE(int, ioctl, (inode, filp, cmd, arg));
-+}
-+
-+int reiser4_mmap_careful(struct file *file, struct vm_area_struct *vma)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ return PROT_PASSIVE(int, mmap, (file, vma));
-+}
-+
-+int reiser4_release_careful(struct inode *inode, struct file *file)
-+{
-+ return PROT_PASSIVE(int, release, (inode, file));
-+}
-+
-+sector_t reiser4_bmap_careful(struct address_space * mapping, sector_t lblock)
-+{
-+ struct inode *inode = mapping->host;
-+ return PROT_PASSIVE(sector_t, bmap, (mapping, lblock));
-+}
-+
-+/*
-+ * Wrappers without protection for:
-+ *
-+ * ->setattr()
-+ */
-+int reiser4_setattr(struct dentry *dentry, struct iattr *attr)
-+{
-+ return inode_file_plugin(dentry->d_inode)->setattr(dentry, attr);
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/file.h linux-2.6.24/fs/reiser4/plugin/file/file.h
---- linux-2.6.24.orig/fs/reiser4/plugin/file/file.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/file.h 2008-01-25 11:40:16.694168755 +0300
-@@ -0,0 +1,331 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* this file contains declarations of methods implementing
-+ file plugins (UNIX_FILE_PLUGIN_ID, CRYPTCOMPRESS_FILE_PLUGIN_ID
-+ and SYMLINK_FILE_PLUGIN_ID) */
-+
-+#if !defined( __REISER4_FILE_H__ )
-+#define __REISER4_FILE_H__
-+
-+/* possible states when scheduling a new file plugin */
-+typedef enum {
-+ PSCHED_INVAL_STATE, /* invalid state */
-+ PSCHED_SCHED_POINT, /* scheduling point has been achieved */
-+ PSCHED_REMAINS_OLD, /* made a decision to be managed by old plugin */
-+ PSCHED_ASSIGNED_NEW /* new plugin has been scheduled */
-+} psched_state;
-+
-+struct psched_context {
-+ int nr_pages;
-+ struct page **pages;
-+ psched_state state;
-+};
-+
-+/**
-+ * Declarations of common/careful/generic methods.
-+ * Suppose ->foo() is a vs method (of f_ops, i_ops, or a_ops);
-+ * Then common reiser4 method for foo looks like reiser4_foo_common;
-+ * careful method looks like reiser4_foo_careful;
-+ * generic method looks like reiser4_foo.
-+ *
-+ * Common method is a simple instruction set eligible for more
-+ * then one plugin id.
-+ *
-+ * Generic method looks at the plugin installed in inode's
-+ * plugin set and calls its appropriate method.
-+ *
-+ * Careful method looks like generic method with protected pset
-+ * (see plugin/file/file_conversion.c for details).
-+ */
-+
-+/* inode operations */
-+int reiser4_setattr(struct dentry *, struct iattr *);
-+
-+/* file operations */
-+ssize_t reiser4_read_careful(struct file *, char __user *buf,
-+ size_t count, loff_t *off);
-+ssize_t reiser4_write_careful(struct file *, const char __user *buf,
-+ size_t count, loff_t * off);
-+int reiser4_ioctl_careful(struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg);
-+int reiser4_mmap_careful(struct file *, struct vm_area_struct *);
-+int reiser4_open_careful(struct inode *inode, struct file *file);
-+int reiser4_release_careful(struct inode *, struct file *);
-+int reiser4_sync_file_common(struct file *, struct dentry *, int datasync);
-+
-+/* address space operations */
-+int reiser4_readpage(struct file *, struct page *);
-+int reiser4_readpages(struct file*, struct address_space*, struct list_head*,
-+ unsigned);
-+int reiser4_writepages(struct address_space *, struct writeback_control *);
-+int reiser4_prepare_write(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+int reiser4_commit_write(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+sector_t reiser4_bmap_careful(struct address_space *, sector_t lblock);
-+
-+/*
-+ * Private methods of unix-file plugin
-+ * (UNIX_FILE_PLUGIN_ID)
-+ */
-+
-+/* private inode operations */
-+int setattr_unix_file(struct dentry *, struct iattr *);
-+
-+/* private file operations */
-+
-+ssize_t read_unix_file(struct file *, char __user *buf, size_t read_amount,
-+ loff_t *off);
-+ssize_t write_unix_file(struct file *, const char __user *buf, size_t write_amount,
-+ loff_t * off, struct psched_context * cont);
-+int ioctl_unix_file(struct inode *, struct file *, unsigned int cmd,
-+ unsigned long arg);
-+int mmap_unix_file(struct file *, struct vm_area_struct *);
-+int open_unix_file(struct inode *, struct file *);
-+int release_unix_file(struct inode *, struct file *);
-+
-+/* private address space operations */
-+int readpage_unix_file(struct file *, struct page *);
-+int readpages_unix_file(struct file*, struct address_space*, struct list_head*, unsigned);
-+int writepages_unix_file(struct address_space *, struct writeback_control *);
-+int prepare_write_unix_file(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+int commit_write_unix_file(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+sector_t bmap_unix_file(struct address_space *, sector_t lblock);
-+
-+/* other private methods */
-+int delete_object_unix_file(struct inode *);
-+int flow_by_inode_unix_file(struct inode *, const char __user *buf,
-+ int user, loff_t, loff_t, rw_op, flow_t *);
-+int owns_item_unix_file(const struct inode *, const coord_t *);
-+void init_inode_data_unix_file(struct inode *, reiser4_object_create_data *,
-+ int create);
-+
-+/*
-+ * Private methods of cryptcompress file plugin
-+ * (CRYPTCOMPRESS_FILE_PLUGIN_ID)
-+ */
-+
-+/* private inode operations */
-+int setattr_cryptcompress(struct dentry *, struct iattr *);
-+
-+/* private file operations */
-+ssize_t read_cryptcompress(struct file *, char __user *buf,
-+ size_t count, loff_t *off);
-+ssize_t write_cryptcompress(struct file *, const char __user *buf,
-+ size_t count, loff_t * off,
-+ struct psched_context *cont);
-+int ioctl_cryptcompress(struct inode *, struct file *, unsigned int cmd,
-+ unsigned long arg);
-+int mmap_cryptcompress(struct file *, struct vm_area_struct *);
-+int open_cryptcompress(struct inode *, struct file *);
-+int release_cryptcompress(struct inode *, struct file *);
-+
-+/* private address space operations */
-+int readpage_cryptcompress(struct file *, struct page *);
-+int readpages_cryptcompress(struct file*, struct address_space*,
-+ struct list_head*, unsigned);
-+int writepages_cryptcompress(struct address_space *,
-+ struct writeback_control *);
-+int prepare_write_cryptcompress(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+int commit_write_cryptcompress(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+sector_t bmap_cryptcompress(struct address_space *, sector_t lblock);
-+
-+/* other private methods */
-+int flow_by_inode_cryptcompress(struct inode *, const char __user *buf,
-+ int user, loff_t, loff_t, rw_op, flow_t *);
-+int key_by_inode_cryptcompress(struct inode *, loff_t off, reiser4_key *);
-+int create_object_cryptcompress(struct inode *, struct inode *,
-+ reiser4_object_create_data *);
-+int delete_object_cryptcompress(struct inode *);
-+void init_inode_data_cryptcompress(struct inode *, reiser4_object_create_data *,
-+ int create);
-+int cut_tree_worker_cryptcompress(tap_t *, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed,
-+ struct inode *object, int truncate,
-+ int *progress);
-+void destroy_inode_cryptcompress(struct inode *);
-+
-+/*
-+ * Private methods of symlink file plugin
-+ * (SYMLINK_FILE_PLUGIN_ID)
-+ */
-+int reiser4_create_symlink(struct inode *symlink, struct inode *dir,
-+ reiser4_object_create_data *);
-+void destroy_inode_symlink(struct inode *);
-+
-+/*
-+ * all the write into unix file is performed by item write method. Write method
-+ * of unix file plugin only decides which item plugin (extent or tail) and in
-+ * which mode (one from the enum below) to call
-+ */
-+typedef enum {
-+ FIRST_ITEM = 1,
-+ APPEND_ITEM = 2,
-+ OVERWRITE_ITEM = 3
-+} write_mode_t;
-+
-+/* unix file may be in one the following states */
-+typedef enum {
-+ UF_CONTAINER_UNKNOWN = 0,
-+ UF_CONTAINER_TAILS = 1,
-+ UF_CONTAINER_EXTENTS = 2,
-+ UF_CONTAINER_EMPTY = 3
-+} file_container_t;
-+
-+struct formatting_plugin;
-+struct inode;
-+
-+/* unix file plugin specific part of reiser4 inode */
-+struct unix_file_info {
-+ /*
-+ * this read-write lock protects file containerization change. Accesses
-+ * which do not change file containerization (see file_container_t)
-+ * (read, readpage, writepage, write (until tail conversion is
-+ * involved)) take read-lock. Accesses which modify file
-+ * containerization (truncate, conversion from tail to extent and back)
-+ * take write-lock.
-+ */
-+ struct rw_semaphore latch;
-+ /* this enum specifies which items are used to build the file */
-+ file_container_t container;
-+ /*
-+ * plugin which controls when file is to be converted to extents and
-+ * back to tail
-+ */
-+ struct formatting_plugin *tplug;
-+ /* if this is set, file is in exclusive use */
-+ int exclusive_use;
-+#if REISER4_DEBUG
-+ /* pointer to task struct of thread owning exclusive access to file */
-+ void *ea_owner;
-+ atomic_t nr_neas;
-+ void *last_reader;
-+#endif
-+};
-+
-+struct unix_file_info *unix_file_inode_data(const struct inode *inode);
-+void get_exclusive_access(struct unix_file_info *);
-+void drop_exclusive_access(struct unix_file_info *);
-+void get_nonexclusive_access(struct unix_file_info *);
-+void drop_nonexclusive_access(struct unix_file_info *);
-+int try_to_get_nonexclusive_access(struct unix_file_info *);
-+int find_file_item(hint_t *, const reiser4_key *, znode_lock_mode,
-+ struct inode *);
-+int find_file_item_nohint(coord_t *, lock_handle *,
-+ const reiser4_key *, znode_lock_mode,
-+ struct inode *);
-+
-+int load_file_hint(struct file *, hint_t *);
-+void save_file_hint(struct file *, const hint_t *);
-+
-+#include "../item/extent.h"
-+#include "../item/tail.h"
-+#include "../item/ctail.h"
-+
-+struct uf_coord {
-+ coord_t coord;
-+ lock_handle *lh;
-+ int valid;
-+ union {
-+ struct extent_coord_extension extent;
-+ struct tail_coord_extension tail;
-+ struct ctail_coord_extension ctail;
-+ } extension;
-+};
-+
-+#include "../../forward.h"
-+#include "../../seal.h"
-+#include "../../lock.h"
-+
-+/*
-+ * This structure is used to speed up file operations (reads and writes). A
-+ * hint is a suggestion about where a key resolved to last time. A seal
-+ * indicates whether a node has been modified since a hint was last recorded.
-+ * You check the seal, and if the seal is still valid, you can use the hint
-+ * without traversing the tree again.
-+ */
-+struct hint {
-+ seal_t seal; /* a seal over last file item accessed */
-+ uf_coord_t ext_coord;
-+ loff_t offset;
-+ znode_lock_mode mode;
-+ lock_handle lh;
-+};
-+
-+static inline int hint_is_valid(hint_t * hint)
-+{
-+ return hint->ext_coord.valid;
-+}
-+
-+static inline void hint_set_valid(hint_t * hint)
-+{
-+ hint->ext_coord.valid = 1;
-+}
-+
-+static inline void hint_clr_valid(hint_t * hint)
-+{
-+ hint->ext_coord.valid = 0;
-+}
-+
-+int load_file_hint(struct file *, hint_t *);
-+void save_file_hint(struct file *, const hint_t *);
-+void hint_init_zero(hint_t *);
-+void reiser4_set_hint(hint_t *, const reiser4_key *, znode_lock_mode);
-+int hint_is_set(const hint_t *);
-+void reiser4_unset_hint(hint_t *);
-+
-+int reiser4_update_file_size(struct inode *, loff_t, int update_sd);
-+int cut_file_items(struct inode *, loff_t new_size,
-+ int update_sd, loff_t cur_size,
-+ int (*update_actor) (struct inode *, loff_t, int));
-+#if REISER4_DEBUG
-+
-+/* return 1 is exclusive access is obtained, 0 - otherwise */
-+static inline int ea_obtained(struct unix_file_info * uf_info)
-+{
-+ int ret;
-+
-+ ret = down_read_trylock(&uf_info->latch);
-+ if (ret)
-+ up_read(&uf_info->latch);
-+ return !ret;
-+}
-+
-+#endif
-+
-+#define WRITE_GRANULARITY 32
-+
-+int tail2extent(struct unix_file_info *);
-+int extent2tail(struct file *, struct unix_file_info *);
-+
-+int goto_right_neighbor(coord_t *, lock_handle *);
-+int find_or_create_extent(struct page *);
-+int equal_to_ldk(znode *, const reiser4_key *);
-+
-+void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh);
-+
-+static inline int cbk_errored(int cbk_result)
-+{
-+ return (cbk_result != CBK_COORD_NOTFOUND
-+ && cbk_result != CBK_COORD_FOUND);
-+}
-+
-+/* __REISER4_FILE_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/Makefile linux-2.6.24/fs/reiser4/plugin/file/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/file/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/Makefile 2008-01-25 11:39:06.988221084 +0300
-@@ -0,0 +1,7 @@
-+obj-$(CONFIG_REISER4_FS) += file_plugins.o
-+
-+file_plugins-objs := \
-+ file.o \
-+ tail_conversion.o \
-+ symlink.o \
-+ cryptcompress.o
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/symfile.c linux-2.6.24/fs/reiser4/plugin/file/symfile.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file/symfile.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/symfile.c 2008-01-25 11:39:06.992222114 +0300
-@@ -0,0 +1,87 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Symfiles are a generalization of Unix symlinks.
-+
-+ A symfile when read behaves as though you took its contents and
-+ substituted them into the reiser4 naming system as the right hand side
-+ of an assignment, and then read that which you had assigned to it.
-+
-+ A key issue for symfiles is how to implement writes through to
-+ subfiles. In general, one must have some method of determining what
-+ of that which is written to the symfile is written to what subfile.
-+ This can be done by use of custom plugin methods written by users, or
-+ by using a few general methods we provide for those willing to endure
-+ the insertion of delimiters into what is read.
-+
-+ Writing to symfiles without delimiters to denote what is written to
-+ what subfile is not supported by any plugins we provide in this
-+ release. Our most sophisticated support for writes is that embodied
-+ by the invert plugin (see invert.c).
-+
-+ A read only version of the /etc/passwd file might be
-+ constructed as a symfile whose contents are as follows:
-+
-+ /etc/passwd/userlines/*
-+
-+ or
-+
-+ /etc/passwd/userlines/demidov+/etc/passwd/userlines/edward+/etc/passwd/userlines/reiser+/etc/passwd/userlines/root
-+
-+ or
-+
-+ /etc/passwd/userlines/(demidov+edward+reiser+root)
-+
-+ A symfile with contents
-+
-+ /filenameA+"(some text stored in the uninvertable symfile)+/filenameB
-+
-+ will return when read
-+
-+ The contents of filenameAsome text stored in the uninvertable symfileThe contents of filenameB
-+
-+ and write of what has been read will not be possible to implement as
-+ an identity operation because there are no delimiters denoting the
-+ boundaries of what is to be written to what subfile.
-+
-+ Note that one could make this a read/write symfile if one specified
-+ delimiters, and the write method understood those delimiters delimited
-+ what was written to subfiles.
-+
-+ So, specifying the symfile in a manner that allows writes:
-+
-+ /etc/passwd/userlines/demidov+"(
-+ )+/etc/passwd/userlines/edward+"(
-+ )+/etc/passwd/userlines/reiser+"(
-+ )+/etc/passwd/userlines/root+"(
-+ )
-+
-+ or
-+
-+ /etc/passwd/userlines/(demidov+"(
-+ )+edward+"(
-+ )+reiser+"(
-+ )+root+"(
-+ ))
-+
-+ and the file demidov might be specified as:
-+
-+ /etc/passwd/userlines/demidov/username+"(:)+/etc/passwd/userlines/demidov/password+"(:)+/etc/passwd/userlines/demidov/userid+"(:)+/etc/passwd/userlines/demidov/groupid+"(:)+/etc/passwd/userlines/demidov/gecos+"(:)+/etc/passwd/userlines/demidov/home+"(:)+/etc/passwd/userlines/demidov/shell
-+
-+ or
-+
-+ /etc/passwd/userlines/demidov/(username+"(:)+password+"(:)+userid+"(:)+groupid+"(:)+gecos+"(:)+home+"(:)+shell)
-+
-+ Notice that if the file demidov has a carriage return in it, the
-+ parsing fails, but then if you put carriage returns in the wrong place
-+ in a normal /etc/passwd file it breaks things also.
-+
-+ Note that it is forbidden to have no text between two interpolations
-+ if one wants to be able to define what parts of a write go to what
-+ subfiles referenced in an interpolation.
-+
-+ If one wants to be able to add new lines by writing to the file, one
-+ must either write a custom plugin for /etc/passwd that knows how to
-+ name an added line, or one must use an invert, or one must use a more
-+ sophisticated symfile syntax that we are not planning to write for
-+ version 4.0.
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/symlink.c linux-2.6.24/fs/reiser4/plugin/file/symlink.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file/symlink.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/symlink.c 2008-01-25 11:39:06.992222114 +0300
-@@ -0,0 +1,95 @@
-+/* Copyright 2002, 2003, 2005 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../inode.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+
-+/* file plugin methods specific for symlink files
-+ (SYMLINK_FILE_PLUGIN_ID) */
-+
-+/* this is implementation of create_object method of file plugin for
-+ SYMLINK_FILE_PLUGIN_ID
-+ */
-+
-+/**
-+ * reiser4_create_symlink - create_object of file plugin for SYMLINK_FILE_PLUGIN_ID
-+ * @symlink: inode of symlink object
-+ * @dir: inode of parent directory
-+ * @info: parameters of new object
-+ *
-+ * Inserts stat data with symlink extension where into the tree.
-+ */
-+int reiser4_create_symlink(struct inode *symlink,
-+ struct inode *dir UNUSED_ARG,
-+ reiser4_object_create_data *data /* info passed to us
-+ * this is filled by
-+ * reiser4() syscall
-+ * in particular */)
-+{
-+ int result;
-+
-+ assert("nikita-680", symlink != NULL);
-+ assert("nikita-681", S_ISLNK(symlink->i_mode));
-+ assert("nikita-685", reiser4_inode_get_flag(symlink, REISER4_NO_SD));
-+ assert("nikita-682", dir != NULL);
-+ assert("nikita-684", data != NULL);
-+ assert("nikita-686", data->id == SYMLINK_FILE_PLUGIN_ID);
-+
-+ /*
-+ * stat data of symlink has symlink extension in which we store
-+ * symlink content, that is, path symlink is pointing to.
-+ */
-+ reiser4_inode_data(symlink)->extmask |= (1 << SYMLINK_STAT);
-+
-+ assert("vs-838", symlink->i_private == NULL);
-+ symlink->i_private = (void *)data->name;
-+
-+ assert("vs-843", symlink->i_size == 0);
-+ INODE_SET_FIELD(symlink, i_size, strlen(data->name));
-+
-+ /* insert stat data appended with data->name */
-+ result = inode_file_plugin(symlink)->write_sd_by_inode(symlink);
-+ if (result) {
-+ /* FIXME-VS: Make sure that symlink->i_private is not attached
-+ to kmalloced data */
-+ INODE_SET_FIELD(symlink, i_size, 0);
-+ } else {
-+ assert("vs-849", symlink->i_private
-+ && reiser4_inode_get_flag(symlink,
-+ REISER4_GENERIC_PTR_USED));
-+ assert("vs-850",
-+ !memcmp((char *)symlink->i_private, data->name,
-+ (size_t) symlink->i_size + 1));
-+ }
-+ return result;
-+}
-+
-+/* this is implementation of destroy_inode method of file plugin for
-+ SYMLINK_FILE_PLUGIN_ID
-+ */
-+void destroy_inode_symlink(struct inode *inode)
-+{
-+ assert("edward-799",
-+ inode_file_plugin(inode) ==
-+ file_plugin_by_id(SYMLINK_FILE_PLUGIN_ID));
-+ assert("edward-800", !is_bad_inode(inode) && is_inode_loaded(inode));
-+ assert("edward-801", reiser4_inode_get_flag(inode,
-+ REISER4_GENERIC_PTR_USED));
-+ assert("vs-839", S_ISLNK(inode->i_mode));
-+
-+ kfree(inode->i_private);
-+ inode->i_private = NULL;
-+ reiser4_inode_clr_flag(inode, REISER4_GENERIC_PTR_USED);
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file/tail_conversion.c linux-2.6.24/fs/reiser4/plugin/file/tail_conversion.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file/tail_conversion.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file/tail_conversion.c 2008-01-25 11:40:16.694168755 +0300
-@@ -0,0 +1,726 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../inode.h"
-+#include "../../super.h"
-+#include "../../page_cache.h"
-+#include "../../carry.h"
-+#include "../../safe_link.h"
-+#include "../../vfs_ops.h"
-+
-+#include <linux/writeback.h>
-+
-+/* this file contains:
-+ tail2extent and extent2tail */
-+
-+/* exclusive access to a file is acquired when file state changes: tail2extent, empty2tail, extent2tail, etc */
-+void get_exclusive_access(struct unix_file_info * uf_info)
-+{
-+ assert("nikita-3028", reiser4_schedulable());
-+ assert("nikita-3047", LOCK_CNT_NIL(inode_sem_w));
-+ assert("nikita-3048", LOCK_CNT_NIL(inode_sem_r));
-+ /*
-+ * "deadlock avoidance": sometimes we commit a transaction under
-+ * rw-semaphore on a file. Such commit can deadlock with another
-+ * thread that captured some block (hence preventing atom from being
-+ * committed) and waits on rw-semaphore.
-+ */
-+ reiser4_txn_restart_current();
-+ LOCK_CNT_INC(inode_sem_w);
-+ down_write(&uf_info->latch);
-+ uf_info->exclusive_use = 1;
-+ assert("vs-1713", uf_info->ea_owner == NULL);
-+ assert("vs-1713", atomic_read(&uf_info->nr_neas) == 0);
-+ ON_DEBUG(uf_info->ea_owner = current);
-+}
-+
-+void drop_exclusive_access(struct unix_file_info * uf_info)
-+{
-+ assert("vs-1714", uf_info->ea_owner == current);
-+ assert("vs-1715", atomic_read(&uf_info->nr_neas) == 0);
-+ ON_DEBUG(uf_info->ea_owner = NULL);
-+ uf_info->exclusive_use = 0;
-+ up_write(&uf_info->latch);
-+ assert("nikita-3049", LOCK_CNT_NIL(inode_sem_r));
-+ assert("nikita-3049", LOCK_CNT_GTZ(inode_sem_w));
-+ LOCK_CNT_DEC(inode_sem_w);
-+ reiser4_txn_restart_current();
-+}
-+
-+/**
-+ * nea_grabbed - do something when file semaphore is down_read-ed
-+ * @uf_info:
-+ *
-+ * This is called when nonexclisive access is obtained on file. All it does is
-+ * for debugging purposes.
-+ */
-+static void nea_grabbed(struct unix_file_info *uf_info)
-+{
-+#if REISER4_DEBUG
-+ LOCK_CNT_INC(inode_sem_r);
-+ assert("vs-1716", uf_info->ea_owner == NULL);
-+ atomic_inc(&uf_info->nr_neas);
-+ uf_info->last_reader = current;
-+#endif
-+}
-+
-+/**
-+ * get_nonexclusive_access - get nonexclusive access to a file
-+ * @uf_info: unix file specific part of inode to obtain access to
-+ *
-+ * Nonexclusive access is obtained on a file before read, write, readpage.
-+ */
-+void get_nonexclusive_access(struct unix_file_info *uf_info)
-+{
-+ assert("nikita-3029", reiser4_schedulable());
-+ assert("nikita-3361", get_current_context()->trans->atom == NULL);
-+
-+ down_read(&uf_info->latch);
-+ nea_grabbed(uf_info);
-+}
-+
-+/**
-+ * try_to_get_nonexclusive_access - try to get nonexclusive access to a file
-+ * @uf_info: unix file specific part of inode to obtain access to
-+ *
-+ * Non-blocking version of nonexclusive access obtaining.
-+ */
-+int try_to_get_nonexclusive_access(struct unix_file_info *uf_info)
-+{
-+ int result;
-+
-+ result = down_read_trylock(&uf_info->latch);
-+ if (result)
-+ nea_grabbed(uf_info);
-+ return result;
-+}
-+
-+void drop_nonexclusive_access(struct unix_file_info * uf_info)
-+{
-+ assert("vs-1718", uf_info->ea_owner == NULL);
-+ assert("vs-1719", atomic_read(&uf_info->nr_neas) > 0);
-+ ON_DEBUG(atomic_dec(&uf_info->nr_neas));
-+
-+ up_read(&uf_info->latch);
-+
-+ LOCK_CNT_DEC(inode_sem_r);
-+ reiser4_txn_restart_current();
-+}
-+
-+/* part of tail2extent. Cut all items covering @count bytes starting from
-+ @offset */
-+/* Audited by: green(2002.06.15) */
-+static int cut_formatting_items(struct inode *inode, loff_t offset, int count)
-+{
-+ reiser4_key from, to;
-+
-+ /* AUDIT: How about putting an assertion here, what would check
-+ all provided range is covered by tail items only? */
-+ /* key of first byte in the range to be cut */
-+ inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
-+
-+ /* key of last byte in that range */
-+ to = from;
-+ set_key_offset(&to, (__u64) (offset + count - 1));
-+
-+ /* cut everything between those keys */
-+ return reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, &to,
-+ inode, 0);
-+}
-+
-+static void release_all_pages(struct page **pages, unsigned nr_pages)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < nr_pages; i++) {
-+ if (pages[i] == NULL) {
-+ unsigned j;
-+ for (j = i + 1; j < nr_pages; j++)
-+ assert("vs-1620", pages[j] == NULL);
-+ break;
-+ }
-+ page_cache_release(pages[i]);
-+ pages[i] = NULL;
-+ }
-+}
-+
-+/* part of tail2extent. replace tail items with extent one. Content of tail
-+ items (@count bytes) being cut are copied already into
-+ pages. extent_writepage method is called to create extents corresponding to
-+ those pages */
-+static int replace(struct inode *inode, struct page **pages, unsigned nr_pages, int count)
-+{
-+ int result;
-+ unsigned i;
-+ STORE_COUNTERS;
-+
-+ if (nr_pages == 0)
-+ return 0;
-+
-+ assert("vs-596", pages[0]);
-+
-+ /* cut copied items */
-+ result = cut_formatting_items(inode, page_offset(pages[0]), count);
-+ if (result)
-+ return result;
-+
-+ CHECK_COUNTERS;
-+
-+ /* put into tree replacement for just removed items: extent item, namely */
-+ for (i = 0; i < nr_pages; i++) {
-+ result = add_to_page_cache_lru(pages[i], inode->i_mapping,
-+ pages[i]->index,
-+ mapping_gfp_mask(inode->
-+ i_mapping));
-+ if (result)
-+ break;
-+ unlock_page(pages[i]);
-+ result = find_or_create_extent(pages[i]);
-+ if (result)
-+ break;
-+ SetPageUptodate(pages[i]);
-+ }
-+ return result;
-+}
-+
-+#define TAIL2EXTENT_PAGE_NUM 3 /* number of pages to fill before cutting tail
-+ * items */
-+
-+static int reserve_tail2extent_iteration(struct inode *inode)
-+{
-+ reiser4_block_nr unformatted_nodes;
-+ reiser4_tree *tree;
-+
-+ tree = reiser4_tree_by_inode(inode);
-+
-+ /* number of unformatted nodes which will be created */
-+ unformatted_nodes = TAIL2EXTENT_PAGE_NUM;
-+
-+ /*
-+ * space required for one iteration of extent->tail conversion:
-+ *
-+ * 1. kill N tail items
-+ *
-+ * 2. insert TAIL2EXTENT_PAGE_NUM unformatted nodes
-+ *
-+ * 3. insert TAIL2EXTENT_PAGE_NUM (worst-case single-block
-+ * extents) extent units.
-+ *
-+ * 4. drilling to the leaf level by coord_by_key()
-+ *
-+ * 5. possible update of stat-data
-+ *
-+ */
-+ grab_space_enable();
-+ return reiser4_grab_space
-+ (2 * tree->height +
-+ TAIL2EXTENT_PAGE_NUM +
-+ TAIL2EXTENT_PAGE_NUM * estimate_one_insert_into_item(tree) +
-+ 1 + estimate_one_insert_item(tree) +
-+ inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
-+}
-+
-+/* clear stat data's flag indicating that conversion is being converted */
-+static int complete_conversion(struct inode *inode)
-+{
-+ int result;
-+
-+ grab_space_enable();
-+ result =
-+ reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT);
-+ if (result == 0) {
-+ reiser4_inode_clr_flag(inode, REISER4_PART_MIXED);
-+ result = reiser4_update_sd(inode);
-+ }
-+ if (result)
-+ warning("vs-1696", "Failed to clear converting bit of %llu: %i",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ return 0;
-+}
-+
-+/**
-+ * find_start
-+ * @inode:
-+ * @id:
-+ * @offset:
-+ *
-+ * this is used by tail2extent and extent2tail to detect where previous
-+ * uncompleted conversion stopped
-+ */
-+static int find_start(struct inode *inode, reiser4_plugin_id id, __u64 *offset)
-+{
-+ int result;
-+ lock_handle lh;
-+ coord_t coord;
-+ struct unix_file_info *ufo;
-+ int found;
-+ reiser4_key key;
-+
-+ ufo = unix_file_inode_data(inode);
-+ init_lh(&lh);
-+ result = 0;
-+ found = 0;
-+ inode_file_plugin(inode)->key_by_inode(inode, *offset, &key);
-+ do {
-+ init_lh(&lh);
-+ result = find_file_item_nohint(&coord, &lh, &key,
-+ ZNODE_READ_LOCK, inode);
-+
-+ if (result == CBK_COORD_FOUND) {
-+ if (coord.between == AT_UNIT) {
-+ /*coord_clear_iplug(&coord); */
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ if (item_id_by_coord(&coord) == id)
-+ found = 1;
-+ else
-+ item_plugin_by_coord(&coord)->s.
-+ file.append_key(&coord,
-+ &key);
-+ zrelse(coord.node);
-+ }
-+ } else
-+ result = RETERR(-ENOENT);
-+ }
-+ done_lh(&lh);
-+ } while (result == 0 && !found);
-+ *offset = get_key_offset(&key);
-+ return result;
-+}
-+
-+/**
-+ * tail2extent
-+ * @uf_info:
-+ *
-+ *
-+ */
-+int tail2extent(struct unix_file_info *uf_info)
-+{
-+ int result;
-+ reiser4_key key; /* key of next byte to be moved to page */
-+ char *p_data; /* data of page */
-+ unsigned page_off = 0, /* offset within the page where to copy data */
-+ count; /* number of bytes of item which can be
-+ * copied to page */
-+ struct page *pages[TAIL2EXTENT_PAGE_NUM];
-+ struct page *page;
-+ int done; /* set to 1 when all file is read */
-+ char *item;
-+ int i;
-+ struct inode *inode;
-+ int first_iteration;
-+ int bytes;
-+ __u64 offset;
-+
-+ assert("nikita-3362", ea_obtained(uf_info));
-+ inode = unix_file_info_to_inode(uf_info);
-+ assert("nikita-3412", !IS_RDONLY(inode));
-+ assert("vs-1649", uf_info->container != UF_CONTAINER_EXTENTS);
-+ assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
-+
-+ offset = 0;
-+ first_iteration = 1;
-+ result = 0;
-+ if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ /*
-+ * file is marked on disk as there was a conversion which did
-+ * not complete due to either crash or some error. Find which
-+ * offset tail conversion stopped at
-+ */
-+ result = find_start(inode, FORMATTING_ID, &offset);
-+ if (result == -ENOENT) {
-+ /* no tail items found, everything is converted */
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ complete_conversion(inode);
-+ return 0;
-+ } else if (result != 0)
-+ /* some other error */
-+ return result;
-+ first_iteration = 0;
-+ }
-+
-+ reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
-+
-+ /* get key of first byte of a file */
-+ inode_file_plugin(inode)->key_by_inode(inode, offset, &key);
-+
-+ done = 0;
-+ while (done == 0) {
-+ memset(pages, 0, sizeof(pages));
-+ result = reserve_tail2extent_iteration(inode);
-+ if (result != 0)
-+ goto out;
-+ if (first_iteration) {
-+ reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
-+ reiser4_update_sd(inode);
-+ first_iteration = 0;
-+ }
-+ bytes = 0;
-+ for (i = 0; i < sizeof_array(pages) && done == 0; i++) {
-+ assert("vs-598",
-+ (get_key_offset(&key) & ~PAGE_CACHE_MASK) == 0);
-+ page = alloc_page(reiser4_ctx_gfp_mask_get());
-+ if (!page) {
-+ result = RETERR(-ENOMEM);
-+ goto error;
-+ }
-+
-+ page->index =
-+ (unsigned long)(get_key_offset(&key) >>
-+ PAGE_CACHE_SHIFT);
-+ /*
-+ * usually when one is going to longterm lock znode (as
-+ * find_file_item does, for instance) he must not hold
-+ * locked pages. However, there is an exception for
-+ * case tail2extent. Pages appearing here are not
-+ * reachable to everyone else, they are clean, they do
-+ * not have jnodes attached so keeping them locked do
-+ * not risk deadlock appearance
-+ */
-+ assert("vs-983", !PagePrivate(page));
-+ reiser4_invalidate_pages(inode->i_mapping, page->index,
-+ 1, 0);
-+
-+ for (page_off = 0; page_off < PAGE_CACHE_SIZE;) {
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ /* get next item */
-+ /* FIXME: we might want to readahead here */
-+ init_lh(&lh);
-+ result =
-+ find_file_item_nohint(&coord, &lh, &key,
-+ ZNODE_READ_LOCK,
-+ inode);
-+ if (result != CBK_COORD_FOUND) {
-+ /*
-+ * error happened of not items of file
-+ * were found
-+ */
-+ done_lh(&lh);
-+ page_cache_release(page);
-+ goto error;
-+ }
-+
-+ if (coord.between == AFTER_UNIT) {
-+ /*
-+ * end of file is reached. Padd page
-+ * with zeros
-+ */
-+ done_lh(&lh);
-+ done = 1;
-+ p_data = kmap_atomic(page, KM_USER0);
-+ memset(p_data + page_off, 0,
-+ PAGE_CACHE_SIZE - page_off);
-+ kunmap_atomic(p_data, KM_USER0);
-+ break;
-+ }
-+
-+ result = zload(coord.node);
-+ if (result) {
-+ page_cache_release(page);
-+ done_lh(&lh);
-+ goto error;
-+ }
-+ assert("vs-856", coord.between == AT_UNIT);
-+ item = ((char *)item_body_by_coord(&coord)) +
-+ coord.unit_pos;
-+
-+ /* how many bytes to copy */
-+ count =
-+ item_length_by_coord(&coord) -
-+ coord.unit_pos;
-+ /* limit length of copy to end of page */
-+ if (count > PAGE_CACHE_SIZE - page_off)
-+ count = PAGE_CACHE_SIZE - page_off;
-+
-+ /*
-+ * copy item (as much as will fit starting from
-+ * the beginning of the item) into the page
-+ */
-+ p_data = kmap_atomic(page, KM_USER0);
-+ memcpy(p_data + page_off, item, count);
-+ kunmap_atomic(p_data, KM_USER0);
-+
-+ page_off += count;
-+ bytes += count;
-+ set_key_offset(&key,
-+ get_key_offset(&key) + count);
-+
-+ zrelse(coord.node);
-+ done_lh(&lh);
-+ } /* end of loop which fills one page by content of
-+ * formatting items */
-+
-+ if (page_off) {
-+ /* something was copied into page */
-+ pages[i] = page;
-+ } else {
-+ page_cache_release(page);
-+ assert("vs-1648", done == 1);
-+ break;
-+ }
-+ } /* end of loop through pages of one conversion iteration */
-+
-+ if (i > 0) {
-+ result = replace(inode, pages, i, bytes);
-+ release_all_pages(pages, sizeof_array(pages));
-+ if (result)
-+ goto error;
-+ /*
-+ * We have to drop exclusive access to avoid deadlock
-+ * which may happen because called by reiser4_writepages
-+ * capture_unix_file requires to get non-exclusive
-+ * access to a file. It is safe to drop EA in the middle
-+ * of tail2extent conversion because write_unix_file,
-+ * setattr_unix_file(truncate), mmap_unix_file,
-+ * release_unix_file(extent2tail) checks if conversion
-+ * is not in progress (see comments before
-+ * get_exclusive_access_careful().
-+ * Other processes that acquire non-exclusive access
-+ * (read_unix_file, reiser4_writepages, etc) should work
-+ * on partially converted files.
-+ */
-+ drop_exclusive_access(uf_info);
-+ /* throttle the conversion */
-+ reiser4_throttle_write(inode);
-+ get_exclusive_access(uf_info);
-+
-+ /*
-+ * nobody is allowed to complete conversion but a
-+ * process which started it
-+ */
-+ assert("", reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED));
-+ }
-+ }
-+
-+ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
-+
-+ if (result == 0) {
-+ /* file is converted to extent items */
-+ assert("vs-1697", reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED));
-+
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ complete_conversion(inode);
-+ } else {
-+ /*
-+ * conversion is not complete. Inode was already marked as
-+ * REISER4_PART_CONV and stat-data were updated at the first
-+ * iteration of the loop above.
-+ */
-+ error:
-+ release_all_pages(pages, sizeof_array(pages));
-+ warning("nikita-2282", "Partial conversion of %llu: %i",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ }
-+
-+ out:
-+ return result;
-+}
-+
-+static int reserve_extent2tail_iteration(struct inode *inode)
-+{
-+ reiser4_tree *tree;
-+
-+ tree = reiser4_tree_by_inode(inode);
-+ /*
-+ * reserve blocks for (in this order):
-+ *
-+ * 1. removal of extent item
-+ *
-+ * 2. insertion of tail by insert_flow()
-+ *
-+ * 3. drilling to the leaf level by coord_by_key()
-+ *
-+ * 4. possible update of stat-data
-+ */
-+ grab_space_enable();
-+ return reiser4_grab_space
-+ (estimate_one_item_removal(tree) +
-+ estimate_insert_flow(tree->height) +
-+ 1 + estimate_one_insert_item(tree) +
-+ inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
-+}
-+
-+/* for every page of file: read page, cut part of extent pointing to this page,
-+ put data of page tree by tail item */
-+int extent2tail(struct file * file, struct unix_file_info *uf_info)
-+{
-+ int result;
-+ struct inode *inode;
-+ struct page *page;
-+ unsigned long num_pages, i;
-+ unsigned long start_page;
-+ reiser4_key from;
-+ reiser4_key to;
-+ unsigned count;
-+ __u64 offset;
-+
-+ assert("nikita-3362", ea_obtained(uf_info));
-+ inode = unix_file_info_to_inode(uf_info);
-+ assert("nikita-3412", !IS_RDONLY(inode));
-+ assert("vs-1649", uf_info->container != UF_CONTAINER_TAILS);
-+ assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
-+
-+ offset = 0;
-+ if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
-+ /*
-+ * file is marked on disk as there was a conversion which did
-+ * not complete due to either crash or some error. Find which
-+ * offset tail conversion stopped at
-+ */
-+ result = find_start(inode, EXTENT_POINTER_ID, &offset);
-+ if (result == -ENOENT) {
-+ /* no extent found, everything is converted */
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ complete_conversion(inode);
-+ return 0;
-+ } else if (result != 0)
-+ /* some other error */
-+ return result;
-+ }
-+
-+ reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
-+
-+ /* number of pages in the file */
-+ num_pages =
-+ (inode->i_size + - offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-+ start_page = offset >> PAGE_CACHE_SHIFT;
-+
-+ inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
-+ to = from;
-+
-+ result = 0;
-+ for (i = 0; i < num_pages; i++) {
-+ __u64 start_byte;
-+
-+ result = reserve_extent2tail_iteration(inode);
-+ if (result != 0)
-+ break;
-+ if (i == 0 && offset == 0) {
-+ reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
-+ reiser4_update_sd(inode);
-+ }
-+
-+ page = read_mapping_page(inode->i_mapping,
-+ (unsigned)(i + start_page), NULL);
-+ if (IS_ERR(page)) {
-+ result = PTR_ERR(page);
-+ break;
-+ }
-+
-+ wait_on_page_locked(page);
-+
-+ if (!PageUptodate(page)) {
-+ page_cache_release(page);
-+ result = RETERR(-EIO);
-+ break;
-+ }
-+
-+ /* cut part of file we have read */
-+ start_byte = (__u64) ((i + start_page) << PAGE_CACHE_SHIFT);
-+ set_key_offset(&from, start_byte);
-+ set_key_offset(&to, start_byte + PAGE_CACHE_SIZE - 1);
-+ /*
-+ * reiser4_cut_tree_object() returns -E_REPEAT to allow atom
-+ * commits during over-long truncates. But
-+ * extent->tail conversion should be performed in one
-+ * transaction.
-+ */
-+ result = reiser4_cut_tree(reiser4_tree_by_inode(inode), &from,
-+ &to, inode, 0);
-+
-+ if (result) {
-+ page_cache_release(page);
-+ break;
-+ }
-+
-+ /* put page data into tree via tail_write */
-+ count = PAGE_CACHE_SIZE;
-+ if ((i == (num_pages - 1)) &&
-+ (inode->i_size & ~PAGE_CACHE_MASK))
-+ /* last page can be incompleted */
-+ count = (inode->i_size & ~PAGE_CACHE_MASK);
-+ while (count) {
-+ loff_t pos = start_byte;
-+
-+ assert("edward-1537",
-+ file != NULL && file->f_dentry != NULL);
-+ assert("edward-1538",
-+ file->f_dentry->d_inode == inode);
-+
-+ result = reiser4_write_tail(file, inode,
-+ (char __user *)kmap(page),
-+ count, &pos);
-+ reiser4_free_file_fsdata(file);
-+ if (result <= 0) {
-+ warning("", "reiser4_write_tail failed");
-+ page_cache_release(page);
-+ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
-+ return result;
-+ }
-+ count -= result;
-+ }
-+
-+ /* release page */
-+ lock_page(page);
-+ /* page is already detached from jnode and mapping. */
-+ assert("vs-1086", page->mapping == NULL);
-+ assert("nikita-2690",
-+ (!PagePrivate(page) && jprivate(page) == 0));
-+ /* waiting for writeback completion with page lock held is
-+ * perfectly valid. */
-+ wait_on_page_writeback(page);
-+ reiser4_drop_page(page);
-+ /* release reference taken by read_cache_page() above */
-+ page_cache_release(page);
-+
-+ drop_exclusive_access(uf_info);
-+ /* throttle the conversion */
-+ reiser4_throttle_write(inode);
-+ get_exclusive_access(uf_info);
-+ /*
-+ * nobody is allowed to complete conversion but a process which
-+ * started it
-+ */
-+ assert("", reiser4_inode_get_flag(inode, REISER4_PART_MIXED));
-+ }
-+
-+ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
-+
-+ if (i == num_pages) {
-+ /* file is converted to formatted items */
-+ assert("vs-1698", reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED));
-+ assert("vs-1260",
-+ inode_has_no_jnodes(reiser4_inode_data(inode)));
-+
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ complete_conversion(inode);
-+ return 0;
-+ }
-+ /*
-+ * conversion is not complete. Inode was already marked as
-+ * REISER4_PART_MIXED and stat-data were updated at the first *
-+ * iteration of the loop above.
-+ */
-+ warning("nikita-2282",
-+ "Partial conversion of %llu: %lu of %lu: %i",
-+ (unsigned long long)get_inode_oid(inode), i,
-+ num_pages, result);
-+
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file_ops.c linux-2.6.24/fs/reiser4/plugin/file_ops.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file_ops.c 2008-01-25 11:39:06.992222114 +0300
-@@ -0,0 +1,205 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+
-+/* this file contains typical implementations for some of methods of
-+ struct file_operations and of struct address_space_operations
-+*/
-+
-+#include "../inode.h"
-+#include "object.h"
-+
-+/* file operations */
-+
-+/* implementation of vfs's llseek method of struct file_operations for
-+ typical directory can be found in readdir_common.c
-+*/
-+loff_t reiser4_llseek_dir_common(struct file *, loff_t, int origin);
-+
-+/* implementation of vfs's readdir method of struct file_operations for
-+ typical directory can be found in readdir_common.c
-+*/
-+int reiser4_readdir_common(struct file *, void *dirent, filldir_t);
-+
-+/**
-+ * reiser4_release_dir_common - release of struct file_operations
-+ * @inode: inode of released file
-+ * @file: file to release
-+ *
-+ * Implementation of release method of struct file_operations for typical
-+ * directory. All it does is freeing of reiser4 specific file data.
-+*/
-+int reiser4_release_dir_common(struct inode *inode, struct file *file)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ reiser4_free_file_fsdata(file);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+/* this is common implementation of vfs's fsync method of struct
-+ file_operations
-+*/
-+int reiser4_sync_common(struct file *file, struct dentry *dentry, int datasync)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ result = txnmgr_force_commit_all(dentry->d_inode->i_sb, 0);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/*
-+ * common sync method for regular files.
-+ *
-+ * We are trying to be smart here. Instead of committing all atoms (original
-+ * solution), we scan dirty pages of this file and commit all atoms they are
-+ * part of.
-+ *
-+ * Situation is complicated by anonymous pages: i.e., extent-less pages
-+ * dirtied through mmap. Fortunately sys_fsync() first calls
-+ * filemap_fdatawrite() that will ultimately call reiser4_writepages(), insert
-+ * all missing extents and capture anonymous pages.
-+ */
-+int reiser4_sync_file_common(struct file *file,
-+ struct dentry *dentry, int datasync)
-+{
-+ reiser4_context *ctx;
-+ txn_atom *atom;
-+ reiser4_block_nr reserve;
-+
-+ ctx = reiser4_init_context(dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ reserve = estimate_update_common(dentry->d_inode);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOSPC);
-+ }
-+ write_sd_by_inode_common(dentry->d_inode);
-+
-+ atom = get_current_atom_locked();
-+ spin_lock_txnh(ctx->trans);
-+ force_commit_atom(ctx->trans);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+/* this is common implementation of vfs's sendfile method of struct
-+ file_operations
-+
-+ Reads @count bytes from @file and calls @actor for every page read. This is
-+ needed for loop back devices support.
-+*/
-+#if 0
-+ssize_t
-+sendfile_common(struct file *file, loff_t *ppos, size_t count,
-+ read_actor_t actor, void *target)
-+{
-+ reiser4_context *ctx;
-+ ssize_t result;
-+
-+ ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ result = generic_file_sendfile(file, ppos, count, actor, target);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+#endif /* 0 */
-+
-+/* address space operations */
-+
-+/* this is common implementation of vfs's prepare_write method of struct
-+ address_space_operations
-+*/
-+int
-+prepare_write_common(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+
-+ ctx = reiser4_init_context(page->mapping->host->i_sb);
-+ result = do_prepare_write(file, page, from, to);
-+
-+ /* don't commit transaction under inode semaphore */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return result;
-+}
-+
-+/* this is helper for prepare_write_common and prepare_write_unix_file
-+ */
-+int
-+do_prepare_write(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ int result;
-+ file_plugin *fplug;
-+ struct inode *inode;
-+
-+ assert("umka-3099", file != NULL);
-+ assert("umka-3100", page != NULL);
-+ assert("umka-3095", PageLocked(page));
-+
-+ if (to - from == PAGE_CACHE_SIZE || PageUptodate(page))
-+ return 0;
-+
-+ inode = page->mapping->host;
-+ fplug = inode_file_plugin(inode);
-+
-+ if (page->mapping->a_ops->readpage == NULL)
-+ return RETERR(-EINVAL);
-+
-+ result = page->mapping->a_ops->readpage(file, page);
-+ if (result != 0) {
-+ SetPageError(page);
-+ ClearPageUptodate(page);
-+ /* All reiser4 readpage() implementations should return the
-+ * page locked in case of error. */
-+ assert("nikita-3472", PageLocked(page));
-+ } else {
-+ /*
-+ * ->readpage() either:
-+ *
-+ * 1. starts IO against @page. @page is locked for IO in
-+ * this case.
-+ *
-+ * 2. doesn't start IO. @page is unlocked.
-+ *
-+ * In either case, page should be locked.
-+ */
-+ lock_page(page);
-+ /*
-+ * IO (if any) is completed at this point. Check for IO
-+ * errors.
-+ */
-+ if (!PageUptodate(page))
-+ result = RETERR(-EIO);
-+ }
-+ assert("umka-3098", PageLocked(page));
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file_ops_readdir.c linux-2.6.24/fs/reiser4/plugin/file_ops_readdir.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file_ops_readdir.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file_ops_readdir.c 2008-01-25 11:39:06.996223145 +0300
-@@ -0,0 +1,658 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "../inode.h"
-+
-+/* return true, iff @coord points to the valid directory item that is part of
-+ * @inode directory. */
-+static int is_valid_dir_coord(struct inode *inode, coord_t * coord)
-+{
-+ return plugin_of_group(item_plugin_by_coord(coord),
-+ DIR_ENTRY_ITEM_TYPE) &&
-+ inode_file_plugin(inode)->owns_item(inode, coord);
-+}
-+
-+/* compare two logical positions within the same directory */
-+static cmp_t dir_pos_cmp(const struct dir_pos * p1, const struct dir_pos * p2)
-+{
-+ cmp_t result;
-+
-+ assert("nikita-2534", p1 != NULL);
-+ assert("nikita-2535", p2 != NULL);
-+
-+ result = de_id_cmp(&p1->dir_entry_key, &p2->dir_entry_key);
-+ if (result == EQUAL_TO) {
-+ int diff;
-+
-+ diff = p1->pos - p2->pos;
-+ result =
-+ (diff < 0) ? LESS_THAN : (diff ? GREATER_THAN : EQUAL_TO);
-+ }
-+ return result;
-+}
-+
-+/* see comment before reiser4_readdir_common() for overview of why "adjustment" is
-+ * necessary. */
-+static void
-+adjust_dir_pos(struct file *dir, struct readdir_pos * readdir_spot,
-+ const struct dir_pos * mod_point, int adj)
-+{
-+ struct dir_pos *pos;
-+
-+ /*
-+ * new directory entry was added (adj == +1) or removed (adj == -1) at
-+ * the @mod_point. Directory file descriptor @dir is doing readdir and
-+ * is currently positioned at @readdir_spot. Latter has to be updated
-+ * to maintain stable readdir.
-+ */
-+ /* directory is positioned to the beginning. */
-+ if (readdir_spot->entry_no == 0)
-+ return;
-+
-+ pos = &readdir_spot->position;
-+ switch (dir_pos_cmp(mod_point, pos)) {
-+ case LESS_THAN:
-+ /* @mod_pos is _before_ @readdir_spot, that is, entry was
-+ * added/removed on the left (in key order) of current
-+ * position. */
-+ /* logical number of directory entry readdir is "looking" at
-+ * changes */
-+ readdir_spot->entry_no += adj;
-+ assert("nikita-2577",
-+ ergo(dir != NULL, reiser4_get_dir_fpos(dir) + adj >= 0));
-+ if (de_id_cmp(&pos->dir_entry_key,
-+ &mod_point->dir_entry_key) == EQUAL_TO) {
-+ assert("nikita-2575", mod_point->pos < pos->pos);
-+ /*
-+ * if entry added/removed has the same key as current
-+ * for readdir, update counter of duplicate keys in
-+ * @readdir_spot.
-+ */
-+ pos->pos += adj;
-+ }
-+ break;
-+ case GREATER_THAN:
-+ /* directory is modified after @pos: nothing to do. */
-+ break;
-+ case EQUAL_TO:
-+ /* cannot insert an entry readdir is looking at, because it
-+ already exists. */
-+ assert("nikita-2576", adj < 0);
-+ /* directory entry to which @pos points to is being
-+ removed.
-+
-+ NOTE-NIKITA: Right thing to do is to update @pos to point
-+ to the next entry. This is complex (we are under spin-lock
-+ for one thing). Just rewind it to the beginning. Next
-+ readdir will have to scan the beginning of
-+ directory. Proper solution is to use semaphore in
-+ spin lock's stead and use rewind_right() here.
-+
-+ NOTE-NIKITA: now, semaphore is used, so...
-+ */
-+ memset(readdir_spot, 0, sizeof *readdir_spot);
-+ }
-+}
-+
-+/* scan all file-descriptors for this directory and adjust their
-+ positions respectively. Should be used by implementations of
-+ add_entry and rem_entry of dir plugin */
-+void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
-+ int offset, int adj)
-+{
-+ reiser4_file_fsdata *scan;
-+ struct dir_pos mod_point;
-+
-+ assert("nikita-2536", dir != NULL);
-+ assert("nikita-2538", de != NULL);
-+ assert("nikita-2539", adj != 0);
-+
-+ build_de_id(dir, &de->d_name, &mod_point.dir_entry_key);
-+ mod_point.pos = offset;
-+
-+ spin_lock_inode(dir);
-+
-+ /*
-+ * new entry was added/removed in directory @dir. Scan all file
-+ * descriptors for @dir that are currently involved into @readdir and
-+ * update them.
-+ */
-+
-+ list_for_each_entry(scan, get_readdir_list(dir), dir.linkage)
-+ adjust_dir_pos(scan->back, &scan->dir.readdir, &mod_point, adj);
-+
-+ spin_unlock_inode(dir);
-+}
-+
-+/*
-+ * traverse tree to start/continue readdir from the readdir position @pos.
-+ */
-+static int dir_go_to(struct file *dir, struct readdir_pos * pos, tap_t * tap)
-+{
-+ reiser4_key key;
-+ int result;
-+ struct inode *inode;
-+
-+ assert("nikita-2554", pos != NULL);
-+
-+ inode = dir->f_dentry->d_inode;
-+ result = inode_dir_plugin(inode)->build_readdir_key(dir, &key);
-+ if (result != 0)
-+ return result;
-+ result = reiser4_object_lookup(inode,
-+ &key,
-+ tap->coord,
-+ tap->lh,
-+ tap->mode,
-+ FIND_EXACT,
-+ LEAF_LEVEL, LEAF_LEVEL,
-+ 0, &tap->ra_info);
-+ if (result == CBK_COORD_FOUND)
-+ result = rewind_right(tap, (int)pos->position.pos);
-+ else {
-+ tap->coord->node = NULL;
-+ done_lh(tap->lh);
-+ result = RETERR(-EIO);
-+ }
-+ return result;
-+}
-+
-+/*
-+ * handling of non-unique keys: calculate at what ordinal position within
-+ * sequence of directory items with identical keys @pos is.
-+ */
-+static int set_pos(struct inode *inode, struct readdir_pos * pos, tap_t * tap)
-+{
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+ tap_t scan;
-+ de_id *did;
-+ reiser4_key de_key;
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ reiser4_tap_init(&scan, &coord, &lh, ZNODE_READ_LOCK);
-+ reiser4_tap_copy(&scan, tap);
-+ reiser4_tap_load(&scan);
-+ pos->position.pos = 0;
-+
-+ did = &pos->position.dir_entry_key;
-+
-+ if (is_valid_dir_coord(inode, scan.coord)) {
-+
-+ build_de_id_by_key(unit_key_by_coord(scan.coord, &de_key), did);
-+
-+ while (1) {
-+
-+ result = go_prev_unit(&scan);
-+ if (result != 0)
-+ break;
-+
-+ if (!is_valid_dir_coord(inode, scan.coord)) {
-+ result = -EINVAL;
-+ break;
-+ }
-+
-+ /* get key of directory entry */
-+ unit_key_by_coord(scan.coord, &de_key);
-+ if (de_id_key_cmp(did, &de_key) != EQUAL_TO) {
-+ /* duplicate-sequence is over */
-+ break;
-+ }
-+ pos->position.pos++;
-+ }
-+ } else
-+ result = RETERR(-ENOENT);
-+ reiser4_tap_relse(&scan);
-+ reiser4_tap_done(&scan);
-+ return result;
-+}
-+
-+/*
-+ * "rewind" directory to @offset, i.e., set @pos and @tap correspondingly.
-+ */
-+static int dir_rewind(struct file *dir, struct readdir_pos * pos, tap_t * tap)
-+{
-+ __u64 destination;
-+ __s64 shift;
-+ int result;
-+ struct inode *inode;
-+ loff_t dirpos;
-+
-+ assert("nikita-2553", dir != NULL);
-+ assert("nikita-2548", pos != NULL);
-+ assert("nikita-2551", tap->coord != NULL);
-+ assert("nikita-2552", tap->lh != NULL);
-+
-+ dirpos = reiser4_get_dir_fpos(dir);
-+ shift = dirpos - pos->fpos;
-+ /* this is logical directory entry within @dir which we are rewinding
-+ * to */
-+ destination = pos->entry_no + shift;
-+
-+ inode = dir->f_dentry->d_inode;
-+ if (dirpos < 0)
-+ return RETERR(-EINVAL);
-+ else if (destination == 0ll || dirpos == 0) {
-+ /* rewind to the beginning of directory */
-+ memset(pos, 0, sizeof *pos);
-+ return dir_go_to(dir, pos, tap);
-+ } else if (destination >= inode->i_size)
-+ return RETERR(-ENOENT);
-+
-+ if (shift < 0) {
-+ /* I am afraid of negative numbers */
-+ shift = -shift;
-+ /* rewinding to the left */
-+ if (shift <= (int)pos->position.pos) {
-+ /* destination is within sequence of entries with
-+ duplicate keys. */
-+ result = dir_go_to(dir, pos, tap);
-+ } else {
-+ shift -= pos->position.pos;
-+ while (1) {
-+ /* repetitions: deadlock is possible when
-+ going to the left. */
-+ result = dir_go_to(dir, pos, tap);
-+ if (result == 0) {
-+ result = rewind_left(tap, shift);
-+ if (result == -E_DEADLOCK) {
-+ reiser4_tap_done(tap);
-+ continue;
-+ }
-+ }
-+ break;
-+ }
-+ }
-+ } else {
-+ /* rewinding to the right */
-+ result = dir_go_to(dir, pos, tap);
-+ if (result == 0)
-+ result = rewind_right(tap, shift);
-+ }
-+ if (result == 0) {
-+ result = set_pos(inode, pos, tap);
-+ if (result == 0) {
-+ /* update pos->position.pos */
-+ pos->entry_no = destination;
-+ pos->fpos = dirpos;
-+ }
-+ }
-+ return result;
-+}
-+
-+/*
-+ * Function that is called by common_readdir() on each directory entry while
-+ * doing readdir. ->filldir callback may block, so we had to release long term
-+ * lock while calling it. To avoid repeating tree traversal, seal is used. If
-+ * seal is broken, we return -E_REPEAT. Node is unlocked in this case.
-+ *
-+ * Whether node is unlocked in case of any other error is undefined. It is
-+ * guaranteed to be still locked if success (0) is returned.
-+ *
-+ * When ->filldir() wants no more, feed_entry() returns 1, and node is
-+ * unlocked.
-+ */
-+static int
-+feed_entry(struct file *f, struct readdir_pos * pos, tap_t * tap,
-+ filldir_t filldir, void *dirent)
-+{
-+ item_plugin *iplug;
-+ char *name;
-+ reiser4_key sd_key;
-+ int result;
-+ char buf[DE_NAME_BUF_LEN];
-+ char name_buf[32];
-+ char *local_name;
-+ unsigned file_type;
-+ seal_t seal;
-+ coord_t *coord;
-+ reiser4_key entry_key;
-+
-+ coord = tap->coord;
-+ iplug = item_plugin_by_coord(coord);
-+
-+ /* pointer to name within the node */
-+ name = iplug->s.dir.extract_name(coord, buf);
-+ assert("nikita-1371", name != NULL);
-+
-+ /* key of object the entry points to */
-+ if (iplug->s.dir.extract_key(coord, &sd_key) != 0)
-+ return RETERR(-EIO);
-+
-+ /* we must release longterm znode lock before calling filldir to avoid
-+ deadlock which may happen if filldir causes page fault. So, copy
-+ name to intermediate buffer */
-+ if (strlen(name) + 1 > sizeof(name_buf)) {
-+ local_name = kmalloc(strlen(name) + 1,
-+ reiser4_ctx_gfp_mask_get());
-+ if (local_name == NULL)
-+ return RETERR(-ENOMEM);
-+ } else
-+ local_name = name_buf;
-+
-+ strcpy(local_name, name);
-+ file_type = iplug->s.dir.extract_file_type(coord);
-+
-+ unit_key_by_coord(coord, &entry_key);
-+ reiser4_seal_init(&seal, coord, &entry_key);
-+
-+ longterm_unlock_znode(tap->lh);
-+
-+ /*
-+ * send information about directory entry to the ->filldir() filler
-+ * supplied to us by caller (VFS).
-+ *
-+ * ->filldir is entitled to do weird things. For example, ->filldir
-+ * supplied by knfsd re-enters file system. Make sure no locks are
-+ * held.
-+ */
-+ assert("nikita-3436", lock_stack_isclean(get_current_lock_stack()));
-+
-+ reiser4_txn_restart_current();
-+ result = filldir(dirent, name, (int)strlen(name),
-+ /* offset of this entry */
-+ f->f_pos,
-+ /* inode number of object bounden by this entry */
-+ oid_to_uino(get_key_objectid(&sd_key)), file_type);
-+ if (local_name != name_buf)
-+ kfree(local_name);
-+ if (result < 0)
-+ /* ->filldir() is satisfied. (no space in buffer, IOW) */
-+ result = 1;
-+ else
-+ result = reiser4_seal_validate(&seal, coord, &entry_key,
-+ tap->lh, tap->mode,
-+ ZNODE_LOCK_HIPRI);
-+ return result;
-+}
-+
-+static void move_entry(struct readdir_pos * pos, coord_t * coord)
-+{
-+ reiser4_key de_key;
-+ de_id *did;
-+
-+ /* update @pos */
-+ ++pos->entry_no;
-+ did = &pos->position.dir_entry_key;
-+
-+ /* get key of directory entry */
-+ unit_key_by_coord(coord, &de_key);
-+
-+ if (de_id_key_cmp(did, &de_key) == EQUAL_TO)
-+ /* we are within sequence of directory entries
-+ with duplicate keys. */
-+ ++pos->position.pos;
-+ else {
-+ pos->position.pos = 0;
-+ build_de_id_by_key(&de_key, did);
-+ }
-+ ++pos->fpos;
-+}
-+
-+/*
-+ * STATELESS READDIR
-+ *
-+ * readdir support in reiser4 relies on ability to update readdir_pos embedded
-+ * into reiser4_file_fsdata on each directory modification (name insertion and
-+ * removal), see reiser4_readdir_common() function below. This obviously doesn't
-+ * work when reiser4 is accessed over NFS, because NFS doesn't keep any state
-+ * across client READDIR requests for the same directory.
-+ *
-+ * To address this we maintain a "pool" of detached reiser4_file_fsdata
-+ * (d_cursor). Whenever NFS readdir request comes, we detect this, and try to
-+ * find detached reiser4_file_fsdata corresponding to previous readdir
-+ * request. In other words, additional state is maintained on the
-+ * server. (This is somewhat contrary to the design goals of NFS protocol.)
-+ *
-+ * To efficiently detect when our ->readdir() method is called by NFS server,
-+ * dentry is marked as "stateless" in reiser4_decode_fh() (this is checked by
-+ * file_is_stateless() function).
-+ *
-+ * To find out d_cursor in the pool, we encode client id (cid) in the highest
-+ * bits of NFS readdir cookie: when first readdir request comes to the given
-+ * directory from the given client, cookie is set to 0. This situation is
-+ * detected, global cid_counter is incremented, and stored in highest bits of
-+ * all direntry offsets returned to the client, including last one. As the
-+ * only valid readdir cookie is one obtained as direntry->offset, we are
-+ * guaranteed that next readdir request (continuing current one) will have
-+ * current cid in the highest bits of starting readdir cookie. All d_cursors
-+ * are hashed into per-super-block hash table by (oid, cid) key.
-+ *
-+ * In addition d_cursors are placed into per-super-block radix tree where they
-+ * are keyed by oid alone. This is necessary to efficiently remove them during
-+ * rmdir.
-+ *
-+ * At last, currently unused d_cursors are linked into special list. This list
-+ * is used d_cursor_shrink to reclaim d_cursors on memory pressure.
-+ *
-+ */
-+
-+/*
-+ * prepare for readdir.
-+ */
-+static int dir_readdir_init(struct file *f, tap_t * tap,
-+ struct readdir_pos ** pos)
-+{
-+ struct inode *inode;
-+ reiser4_file_fsdata *fsdata;
-+ int result;
-+
-+ assert("nikita-1359", f != NULL);
-+ inode = f->f_dentry->d_inode;
-+ assert("nikita-1360", inode != NULL);
-+
-+ if (!S_ISDIR(inode->i_mode))
-+ return RETERR(-ENOTDIR);
-+
-+ /* try to find detached readdir state */
-+ result = reiser4_attach_fsdata(f, inode);
-+ if (result != 0)
-+ return result;
-+
-+ fsdata = reiser4_get_file_fsdata(f);
-+ assert("nikita-2571", fsdata != NULL);
-+ if (IS_ERR(fsdata))
-+ return PTR_ERR(fsdata);
-+
-+ /* add file descriptor to the readdir list hanging of directory
-+ * inode. This list is used to scan "readdirs-in-progress" while
-+ * inserting or removing names in the directory. */
-+ spin_lock_inode(inode);
-+ if (list_empty_careful(&fsdata->dir.linkage))
-+ list_add(&fsdata->dir.linkage, get_readdir_list(inode));
-+ *pos = &fsdata->dir.readdir;
-+ spin_unlock_inode(inode);
-+
-+ /* move @tap to the current position */
-+ return dir_rewind(f, *pos, tap);
-+}
-+
-+/* this is implementation of vfs's llseek method of struct file_operations for
-+ typical directory
-+ See comment before reiser4_readdir_common() for explanation.
-+*/
-+loff_t reiser4_llseek_dir_common(struct file * file, loff_t off, int origin)
-+{
-+ reiser4_context *ctx;
-+ loff_t result;
-+ struct inode *inode;
-+
-+ inode = file->f_dentry->d_inode;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ mutex_lock(&inode->i_mutex);
-+
-+ /* update ->f_pos */
-+ result = default_llseek(file, off, origin);
-+ if (result >= 0) {
-+ int ff;
-+ coord_t coord;
-+ lock_handle lh;
-+ tap_t tap;
-+ struct readdir_pos *pos;
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
-+
-+ ff = dir_readdir_init(file, &tap, &pos);
-+ reiser4_detach_fsdata(file);
-+ if (ff != 0)
-+ result = (loff_t) ff;
-+ reiser4_tap_done(&tap);
-+ }
-+ reiser4_detach_fsdata(file);
-+ mutex_unlock(&inode->i_mutex);
-+
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* this is common implementation of vfs's readdir method of struct
-+ file_operations
-+
-+ readdir problems:
-+
-+ readdir(2)/getdents(2) interface is based on implicit assumption that
-+ readdir can be restarted from any particular point by supplying file system
-+ with off_t-full of data. That is, file system fills ->d_off field in struct
-+ dirent and later user passes ->d_off to the seekdir(3), which is, actually,
-+ implemented by glibc as lseek(2) on directory.
-+
-+ Reiser4 cannot restart readdir from 64 bits of data, because two last
-+ components of the key of directory entry are unknown, which given 128 bits:
-+ locality and type fields in the key of directory entry are always known, to
-+ start readdir() from given point objectid and offset fields have to be
-+ filled.
-+
-+ Traditional UNIX API for scanning through directory
-+ (readdir/seekdir/telldir/opendir/closedir/rewindir/getdents) is based on the
-+ assumption that directory is structured very much like regular file, in
-+ particular, it is implied that each name within given directory (directory
-+ entry) can be uniquely identified by scalar offset and that such offset is
-+ stable across the life-time of the name is identifies.
-+
-+ This is manifestly not so for reiser4. In reiser4 the only stable unique
-+ identifies for the directory entry is its key that doesn't fit into
-+ seekdir/telldir API.
-+
-+ solution:
-+
-+ Within each file descriptor participating in readdir-ing of directory
-+ plugin/dir/dir.h:readdir_pos is maintained. This structure keeps track of
-+ the "current" directory entry that file descriptor looks at. It contains a
-+ key of directory entry (plus some additional info to deal with non-unique
-+ keys that we wouldn't dwell onto here) and a logical position of this
-+ directory entry starting from the beginning of the directory, that is
-+ ordinal number of this entry in the readdir order.
-+
-+ Obviously this logical position is not stable in the face of directory
-+ modifications. To work around this, on each addition or removal of directory
-+ entry all file descriptors for directory inode are scanned and their
-+ readdir_pos are updated accordingly (adjust_dir_pos()).
-+*/
-+int reiser4_readdir_common(struct file *f /* directory file being read */,
-+ void *dirent /* opaque data passed to us by VFS */,
-+ filldir_t filld /* filler function passed to us
-+ * by VFS */)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *inode;
-+ coord_t coord;
-+ lock_handle lh;
-+ tap_t tap;
-+ struct readdir_pos *pos;
-+
-+ assert("nikita-1359", f != NULL);
-+ inode = f->f_dentry->d_inode;
-+ assert("nikita-1360", inode != NULL);
-+
-+ if (!S_ISDIR(inode->i_mode))
-+ return RETERR(-ENOTDIR);
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
-+
-+ reiser4_readdir_readahead_init(inode, &tap);
-+
-+ repeat:
-+ result = dir_readdir_init(f, &tap, &pos);
-+ if (result == 0) {
-+ result = reiser4_tap_load(&tap);
-+ /* scan entries one by one feeding them to @filld */
-+ while (result == 0) {
-+ coord_t *coord;
-+
-+ coord = tap.coord;
-+ assert("nikita-2572", coord_is_existing_unit(coord));
-+ assert("nikita-3227", is_valid_dir_coord(inode, coord));
-+
-+ result = feed_entry(f, pos, &tap, filld, dirent);
-+ if (result > 0) {
-+ break;
-+ } else if (result == 0) {
-+ ++f->f_pos;
-+ result = go_next_unit(&tap);
-+ if (result == -E_NO_NEIGHBOR ||
-+ result == -ENOENT) {
-+ result = 0;
-+ break;
-+ } else if (result == 0) {
-+ if (is_valid_dir_coord(inode, coord))
-+ move_entry(pos, coord);
-+ else
-+ break;
-+ }
-+ } else if (result == -E_REPEAT) {
-+ /* feed_entry() had to restart. */
-+ ++f->f_pos;
-+ reiser4_tap_relse(&tap);
-+ goto repeat;
-+ } else
-+ warning("vs-1617",
-+ "reiser4_readdir_common: unexpected error %d",
-+ result);
-+ }
-+ reiser4_tap_relse(&tap);
-+
-+ if (result >= 0)
-+ f->f_version = inode->i_version;
-+ } else if (result == -E_NO_NEIGHBOR || result == -ENOENT)
-+ result = 0;
-+ reiser4_tap_done(&tap);
-+ reiser4_detach_fsdata(f);
-+
-+ /* try to update directory's atime */
-+ if (reiser4_grab_space_force(inode_file_plugin(inode)->estimate.update(inode),
-+ BA_CAN_COMMIT) != 0)
-+ warning("", "failed to update atime on readdir: %llu",
-+ get_inode_oid(inode));
-+ else
-+ file_accessed(f);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return (result <= 0) ? result : 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/file_plugin_common.c linux-2.6.24/fs/reiser4/plugin/file_plugin_common.c
---- linux-2.6.24.orig/fs/reiser4/plugin/file_plugin_common.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/file_plugin_common.c 2008-01-25 11:55:43.900543447 +0300
-@@ -0,0 +1,1009 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ reiser4/README */
-+
-+/* this file contains typical implementations for most of methods of
-+ file plugin
-+*/
-+
-+#include "../inode.h"
-+#include "object.h"
-+#include "../safe_link.h"
-+
-+#include <linux/quotaops.h>
-+
-+static int insert_new_sd(struct inode *inode);
-+static int update_sd(struct inode *inode);
-+
-+/* this is common implementation of write_sd_by_inode method of file plugin
-+ either insert stat data or update it
-+ */
-+int write_sd_by_inode_common(struct inode *inode /* object to save */ )
-+{
-+ int result;
-+
-+ assert("nikita-730", inode != NULL);
-+
-+ if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
-+ /* object doesn't have stat-data yet */
-+ result = insert_new_sd(inode);
-+ else
-+ result = update_sd(inode);
-+ if (result != 0 && result != -ENAMETOOLONG && result != -ENOMEM)
-+ /* Don't issue warnings about "name is too long" */
-+ warning("nikita-2221", "Failed to save sd for %llu: %i",
-+ (unsigned long long)get_inode_oid(inode), result);
-+ return result;
-+}
-+
-+/* this is common implementation of key_by_inode method of file plugin
-+ */
-+int
-+key_by_inode_and_offset_common(struct inode *inode, loff_t off,
-+ reiser4_key * key)
-+{
-+ reiser4_key_init(key);
-+ set_key_locality(key, reiser4_inode_data(inode)->locality_id);
-+ set_key_ordering(key, get_inode_ordering(inode));
-+ set_key_objectid(key, get_inode_oid(inode)); /*FIXME: inode->i_ino */
-+ set_key_type(key, KEY_BODY_MINOR);
-+ set_key_offset(key, (__u64) off);
-+ return 0;
-+}
-+
-+/* this is common implementation of set_plug_in_inode method of file plugin
-+ */
-+int set_plug_in_inode_common(struct inode *object /* inode to set plugin on */ ,
-+ struct inode *parent /* parent object */ ,
-+ reiser4_object_create_data * data /* creational
-+ * data */ )
-+{
-+ __u64 mask;
-+
-+ object->i_mode = data->mode;
-+ /* this should be plugin decision */
-+ object->i_uid = current->fsuid;
-+ object->i_mtime = object->i_atime = object->i_ctime = CURRENT_TIME;
-+
-+ /* support for BSD style group-id assignment. See mount's manual page
-+ description of bsdgroups ext2 mount options for more details */
-+ if (reiser4_is_set(object->i_sb, REISER4_BSD_GID))
-+ object->i_gid = parent->i_gid;
-+ else if (parent->i_mode & S_ISGID) {
-+ /* parent directory has sguid bit */
-+ object->i_gid = parent->i_gid;
-+ if (S_ISDIR(object->i_mode))
-+ /* sguid is inherited by sub-directories */
-+ object->i_mode |= S_ISGID;
-+ } else
-+ object->i_gid = current->fsgid;
-+
-+ /* this object doesn't have stat-data yet */
-+ reiser4_inode_set_flag(object, REISER4_NO_SD);
-+#if 0
-+ /* this is now called after all inode plugins are initialized:
-+ do_create_vfs_child after adjust_to_parent */
-+ /* setup inode and file-operations for this inode */
-+ setup_inode_ops(object, data);
-+#endif
-+ object->i_nlink = 0;
-+ reiser4_seal_init(&reiser4_inode_data(object)->sd_seal, NULL, NULL);
-+ mask = (1 << UNIX_STAT) | (1 << LIGHT_WEIGHT_STAT);
-+ if (!reiser4_is_set(object->i_sb, REISER4_32_BIT_TIMES))
-+ mask |= (1 << LARGE_TIMES_STAT);
-+
-+ reiser4_inode_data(object)->extmask = mask;
-+ return 0;
-+}
-+
-+/* this is common implementation of adjust_to_parent method of file plugin for
-+ regular files
-+ */
-+int adjust_to_parent_common(struct inode *object /* new object */ ,
-+ struct inode *parent /* parent directory */ ,
-+ struct inode *root /* root directory */ )
-+{
-+ assert("nikita-2165", object != NULL);
-+ if (parent == NULL)
-+ parent = root;
-+ assert("nikita-2069", parent != NULL);
-+
-+ /*
-+ * inherit missing plugins from parent
-+ */
-+
-+ grab_plugin_pset(object, parent, PSET_FILE);
-+ grab_plugin_pset(object, parent, PSET_SD);
-+ grab_plugin_pset(object, parent, PSET_FORMATTING);
-+ grab_plugin_pset(object, parent, PSET_PERM);
-+ return 0;
-+}
-+
-+/* this is common implementation of adjust_to_parent method of file plugin for
-+ typical directories
-+ */
-+int adjust_to_parent_common_dir(struct inode *object /* new object */ ,
-+ struct inode *parent /* parent directory */ ,
-+ struct inode *root /* root directory */ )
-+{
-+ int result = 0;
-+ pset_member memb;
-+
-+ assert("nikita-2166", object != NULL);
-+ if (parent == NULL)
-+ parent = root;
-+ assert("nikita-2167", parent != NULL);
-+
-+ /*
-+ * inherit missing plugins from parent
-+ */
-+ for (memb = 0; memb < PSET_LAST; ++memb) {
-+ result = grab_plugin_pset(object, parent, memb);
-+ if (result != 0)
-+ break;
-+ }
-+ return result;
-+}
-+
-+int adjust_to_parent_cryptcompress(struct inode *object /* new object */ ,
-+ struct inode *parent /* parent directory */,
-+ struct inode *root /* root directory */)
-+{
-+ int result;
-+ result = adjust_to_parent_common(object, parent, root);
-+ if (result)
-+ return result;
-+ assert("edward-1416", parent != NULL);
-+
-+ grab_plugin_pset(object, parent, PSET_CLUSTER);
-+ grab_plugin_pset(object, parent, PSET_CIPHER);
-+ grab_plugin_pset(object, parent, PSET_DIGEST);
-+ grab_plugin_pset(object, parent, PSET_COMPRESSION);
-+ grab_plugin_pset(object, parent, PSET_COMPRESSION_MODE);
-+
-+ return 0;
-+}
-+
-+/* this is common implementation of create_object method of file plugin
-+ */
-+int reiser4_create_object_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data * data)
-+{
-+ reiser4_block_nr reserve;
-+ assert("nikita-744", object != NULL);
-+ assert("nikita-745", parent != NULL);
-+ assert("nikita-747", data != NULL);
-+ assert("nikita-748", reiser4_inode_get_flag(object, REISER4_NO_SD));
-+
-+ reserve = estimate_create_common(object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+ return write_sd_by_inode_common(object);
-+}
-+
-+static int common_object_delete_no_reserve(struct inode *inode);
-+
-+/**
-+ * reiser4_delete_object_common - delete_object of file_plugin
-+ * @inode: inode to be deleted
-+ *
-+ * This is common implementation of delete_object method of file_plugin. It
-+ * applies to object its deletion consists of removing two items - stat data
-+ * and safe-link.
-+ */
-+int reiser4_delete_object_common(struct inode *inode)
-+{
-+ int result;
-+
-+ assert("nikita-1477", inode != NULL);
-+ /* FIXME: if file body deletion failed (i/o error, for instance),
-+ inode->i_size can be != 0 here */
-+ assert("nikita-3420", inode->i_size == 0 || S_ISLNK(inode->i_mode));
-+ assert("nikita-3421", inode->i_nlink == 0);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
-+ reiser4_block_nr reserve;
-+
-+ /* grab space which is needed to remove 2 items from the tree:
-+ stat data and safe-link */
-+ reserve = 2 *
-+ estimate_one_item_removal(reiser4_tree_by_inode(inode));
-+ if (reiser4_grab_space_force(reserve,
-+ BA_RESERVED | BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+ result = common_object_delete_no_reserve(inode);
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/**
-+ * reiser4_delete_dir_common - delete_object of file_plugin
-+ * @inode: inode to be deleted
-+ *
-+ * This is common implementation of delete_object method of file_plugin for
-+ * typical directory. It calls done method of dir_plugin to remove "." and
-+ * removes stat data and safe-link.
-+ */
-+int reiser4_delete_dir_common(struct inode *inode)
-+{
-+ int result;
-+ dir_plugin *dplug;
-+
-+ assert("", (get_current_context() &&
-+ get_current_context()->trans->atom == NULL));
-+
-+ dplug = inode_dir_plugin(inode);
-+ assert("vs-1101", dplug && dplug->done);
-+
-+ /* kill cursors which might be attached to inode */
-+ reiser4_kill_cursors(inode);
-+
-+ /* grab space enough for removing two items */
-+ if (reiser4_grab_space
-+ (2 * estimate_one_item_removal(reiser4_tree_by_inode(inode)),
-+ BA_RESERVED | BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ result = dplug->done(inode);
-+ if (!result)
-+ result = common_object_delete_no_reserve(inode);
-+ return result;
-+}
-+
-+/* this is common implementation of add_link method of file plugin
-+ */
-+int reiser4_add_link_common(struct inode *object, struct inode *parent)
-+{
-+ /*
-+ * increment ->i_nlink and update ->i_ctime
-+ */
-+
-+ INODE_INC_FIELD(object, i_nlink);
-+ object->i_ctime = CURRENT_TIME;
-+ return 0;
-+}
-+
-+/* this is common implementation of rem_link method of file plugin
-+ */
-+int reiser4_rem_link_common(struct inode *object, struct inode *parent)
-+{
-+ assert("nikita-2021", object != NULL);
-+ assert("nikita-2163", object->i_nlink > 0);
-+
-+ /*
-+ * decrement ->i_nlink and update ->i_ctime
-+ */
-+
-+ INODE_DEC_FIELD(object, i_nlink);
-+ object->i_ctime = CURRENT_TIME;
-+ return 0;
-+}
-+
-+/* this is common implementation of rem_link method of file plugin for typical
-+ directory
-+*/
-+int rem_link_common_dir(struct inode *object, struct inode *parent UNUSED_ARG)
-+{
-+ assert("nikita-20211", object != NULL);
-+ assert("nikita-21631", object->i_nlink > 0);
-+
-+ /*
-+ * decrement ->i_nlink and update ->i_ctime
-+ */
-+ INODE_DEC_FIELD(object, i_nlink);
-+ if (object->i_nlink == 1)
-+ INODE_DEC_FIELD(object, i_nlink);
-+ object->i_ctime = CURRENT_TIME;
-+ return 0;
-+}
-+
-+/* this is common implementation of owns_item method of file plugin
-+ compare objectids of keys in inode and coord */
-+int owns_item_common(const struct inode *inode, /* object to check
-+ * against */
-+ const coord_t * coord /* coord to check */ )
-+{
-+ reiser4_key item_key;
-+ reiser4_key file_key;
-+
-+ assert("nikita-760", inode != NULL);
-+ assert("nikita-761", coord != NULL);
-+
-+ return coord_is_existing_item(coord) &&
-+ (get_key_objectid(build_sd_key(inode, &file_key)) ==
-+ get_key_objectid(item_key_by_coord(coord, &item_key)));
-+}
-+
-+/* this is common implementation of owns_item method of file plugin
-+ for typical directory
-+*/
-+int owns_item_common_dir(const struct inode *inode, /* object to check against */
-+ const coord_t * coord /* coord of item to check */ )
-+{
-+ reiser4_key item_key;
-+
-+ assert("nikita-1335", inode != NULL);
-+ assert("nikita-1334", coord != NULL);
-+
-+ if (plugin_of_group(item_plugin_by_coord(coord), DIR_ENTRY_ITEM_TYPE))
-+ return get_key_locality(item_key_by_coord(coord, &item_key)) ==
-+ get_inode_oid(inode);
-+ else
-+ return owns_item_common(inode, coord);
-+}
-+
-+/* this is common implementation of can_add_link method of file plugin
-+ checks whether yet another hard links to this object can be added
-+*/
-+int can_add_link_common(const struct inode *object /* object to check */ )
-+{
-+ assert("nikita-732", object != NULL);
-+
-+ /* inode->i_nlink is unsigned int, so just check for integer
-+ overflow */
-+ return object->i_nlink + 1 != 0;
-+}
-+
-+/* this is common implementation of can_rem_link method of file plugin for
-+ typical directory
-+*/
-+int can_rem_link_common_dir(const struct inode *inode)
-+{
-+ /* is_dir_empty() returns 0 is dir is empty */
-+ return !is_dir_empty(inode);
-+}
-+
-+/* this is common implementation of detach method of file plugin for typical
-+ directory
-+*/
-+int reiser4_detach_common_dir(struct inode *child, struct inode *parent)
-+{
-+ dir_plugin *dplug;
-+
-+ dplug = inode_dir_plugin(child);
-+ assert("nikita-2883", dplug != NULL);
-+ assert("nikita-2884", dplug->detach != NULL);
-+ return dplug->detach(child, parent);
-+}
-+
-+/* this is common implementation of bind method of file plugin for typical
-+ directory
-+*/
-+int reiser4_bind_common_dir(struct inode *child, struct inode *parent)
-+{
-+ dir_plugin *dplug;
-+
-+ dplug = inode_dir_plugin(child);
-+ assert("nikita-2646", dplug != NULL);
-+ return dplug->attach(child, parent);
-+}
-+
-+static int process_truncate(struct inode *, __u64 size);
-+
-+/* this is common implementation of safelink method of file plugin
-+ */
-+int safelink_common(struct inode *object, reiser4_safe_link_t link, __u64 value)
-+{
-+ int result;
-+
-+ assert("vs-1705", get_current_context()->trans->atom == NULL);
-+ if (link == SAFE_UNLINK)
-+ /* nothing to do. iput() in the caller (process_safelink) will
-+ * finish with file */
-+ result = 0;
-+ else if (link == SAFE_TRUNCATE)
-+ result = process_truncate(object, value);
-+ else {
-+ warning("nikita-3438", "Unrecognized safe-link type: %i", link);
-+ result = RETERR(-EIO);
-+ }
-+ return result;
-+}
-+
-+/* this is common implementation of estimate.create method of file plugin
-+ can be used when object creation involves insertion of one item (usually stat
-+ data) into tree
-+*/
-+reiser4_block_nr estimate_create_common(const struct inode * object)
-+{
-+ return estimate_one_insert_item(reiser4_tree_by_inode(object));
-+}
-+
-+/* this is common implementation of estimate.create method of file plugin for
-+ typical directory
-+ can be used when directory creation involves insertion of two items (usually
-+ stat data and item containing "." and "..") into tree
-+*/
-+reiser4_block_nr estimate_create_common_dir(const struct inode * object)
-+{
-+ return 2 * estimate_one_insert_item(reiser4_tree_by_inode(object));
-+}
-+
-+/* this is common implementation of estimate.update method of file plugin
-+ can be used when stat data update does not do more than inserting a unit
-+ into a stat data item which is probably true for most cases
-+*/
-+reiser4_block_nr estimate_update_common(const struct inode * inode)
-+{
-+ return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
-+}
-+
-+/* this is common implementation of estimate.unlink method of file plugin
-+ */
-+reiser4_block_nr
-+estimate_unlink_common(const struct inode * object UNUSED_ARG,
-+ const struct inode * parent UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* this is common implementation of estimate.unlink method of file plugin for
-+ typical directory
-+*/
-+reiser4_block_nr
-+estimate_unlink_common_dir(const struct inode * object,
-+ const struct inode * parent)
-+{
-+ dir_plugin *dplug;
-+
-+ dplug = inode_dir_plugin(object);
-+ assert("nikita-2888", dplug != NULL);
-+ assert("nikita-2887", dplug->estimate.unlink != NULL);
-+ return dplug->estimate.unlink(object, parent);
-+}
-+
-+char *wire_write_common(struct inode *inode, char *start)
-+{
-+ return build_inode_onwire(inode, start);
-+}
-+
-+char *wire_read_common(char *addr, reiser4_object_on_wire * obj)
-+{
-+ if (!obj)
-+ return locate_obj_key_id_onwire(addr);
-+ return extract_obj_key_id_from_onwire(addr, &obj->u.std.key_id);
-+}
-+
-+struct dentry *wire_get_common(struct super_block *sb,
-+ reiser4_object_on_wire * obj)
-+{
-+ struct inode *inode;
-+ struct dentry *dentry;
-+ reiser4_key key;
-+
-+ extract_key_from_id(&obj->u.std.key_id, &key);
-+ inode = reiser4_iget(sb, &key, 1);
-+ if (!IS_ERR(inode)) {
-+ reiser4_iget_complete(inode);
-+ dentry = d_alloc_anon(inode);
-+ if (dentry == NULL) {
-+ iput(inode);
-+ dentry = ERR_PTR(-ENOMEM);
-+ } else
-+ dentry->d_op = &get_super_private(sb)->ops.dentry;
-+ } else if (PTR_ERR(inode) == -ENOENT)
-+ /*
-+ * inode wasn't found at the key encoded in the file
-+ * handle. Hence, file handle is stale.
-+ */
-+ dentry = ERR_PTR(RETERR(-ESTALE));
-+ else
-+ dentry = (void *)inode;
-+ return dentry;
-+}
-+
-+int wire_size_common(struct inode *inode)
-+{
-+ return inode_onwire_size(inode);
-+}
-+
-+void wire_done_common(reiser4_object_on_wire * obj)
-+{
-+ /* nothing to do */
-+}
-+
-+/* helper function to print errors */
-+static void key_warning(const reiser4_key * key /* key to print */ ,
-+ const struct inode *inode,
-+ int code /* error code to print */ )
-+{
-+ assert("nikita-716", key != NULL);
-+
-+ if (code != -ENOMEM) {
-+ warning("nikita-717", "Error for inode %llu (%i)",
-+ (unsigned long long)get_key_objectid(key), code);
-+ reiser4_print_key("for key", key);
-+ }
-+}
-+
-+/* NIKITA-FIXME-HANS: perhaps this function belongs in another file? */
-+#if REISER4_DEBUG
-+static void
-+check_inode_seal(const struct inode *inode,
-+ const coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key unit_key;
-+
-+ unit_key_by_coord(coord, &unit_key);
-+ assert("nikita-2752",
-+ WITH_DATA_RET(coord->node, 1, keyeq(key, &unit_key)));
-+ assert("nikita-2753", get_inode_oid(inode) == get_key_objectid(key));
-+}
-+
-+static void check_sd_coord(coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key ukey;
-+
-+ coord_clear_iplug(coord);
-+ if (zload(coord->node))
-+ return;
-+
-+ if (!coord_is_existing_unit(coord) ||
-+ !item_plugin_by_coord(coord) ||
-+ !keyeq(unit_key_by_coord(coord, &ukey), key) ||
-+ (znode_get_level(coord->node) != LEAF_LEVEL) ||
-+ !item_is_statdata(coord)) {
-+ warning("nikita-1901", "Conspicuous seal");
-+ reiser4_print_key("key", key);
-+ print_coord("coord", coord, 1);
-+ impossible("nikita-2877", "no way");
-+ }
-+ zrelse(coord->node);
-+}
-+
-+#else
-+#define check_inode_seal(inode, coord, key) noop
-+#define check_sd_coord(coord, key) noop
-+#endif
-+
-+/* insert new stat-data into tree. Called with inode state
-+ locked. Return inode state locked. */
-+static int insert_new_sd(struct inode *inode /* inode to create sd for */ )
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t coord;
-+ reiser4_item_data data;
-+ char *area;
-+ reiser4_inode *ref;
-+ lock_handle lh;
-+ oid_t oid;
-+
-+ assert("nikita-723", inode != NULL);
-+ assert("nikita-3406", reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ ref = reiser4_inode_data(inode);
-+ spin_lock_inode(inode);
-+
-+ if (ref->plugin_mask != 0)
-+ /* inode has non-standard plugins */
-+ inode_set_extension(inode, PLUGIN_STAT);
-+ /*
-+ * prepare specification of new item to be inserted
-+ */
-+
-+ data.iplug = inode_sd_plugin(inode);
-+ data.length = data.iplug->s.sd.save_len(inode);
-+ spin_unlock_inode(inode);
-+
-+ data.data = NULL;
-+ data.user = 0;
-+/* could be optimized for case where there is only one node format in
-+ * use in the filesystem, probably there are lots of such
-+ * places we could optimize for only one node layout.... -Hans */
-+ if (data.length > reiser4_tree_by_inode(inode)->nplug->max_item_size()){
-+ /* This is silly check, but we don't know actual node where
-+ insertion will go into. */
-+ return RETERR(-ENAMETOOLONG);
-+ }
-+ oid = oid_allocate(inode->i_sb);
-+/* NIKITA-FIXME-HANS: what is your opinion on whether this error check should be encapsulated into oid_allocate? */
-+ if (oid == ABSOLUTE_MAX_OID)
-+ return RETERR(-EOVERFLOW);
-+
-+ set_inode_oid(inode, oid);
-+
-+ coord_init_zero(&coord);
-+ init_lh(&lh);
-+
-+ result = insert_by_key(reiser4_tree_by_inode(inode),
-+ build_sd_key(inode, &key), &data, &coord, &lh,
-+ /* stat data lives on a leaf level */
-+ LEAF_LEVEL, CBK_UNIQUE);
-+
-+ /* we don't want to re-check that somebody didn't insert
-+ stat-data while we were doing io, because if it did,
-+ insert_by_key() returned error. */
-+ /* but what _is_ possible is that plugin for inode's stat-data,
-+ list of non-standard plugins or their state would change
-+ during io, so that stat-data wouldn't fit into sd. To avoid
-+ this race we keep inode_state lock. This lock has to be
-+ taken each time you access inode in a way that would cause
-+ changes in sd size: changing plugins etc.
-+ */
-+
-+ if (result == IBK_INSERT_OK) {
-+ coord_clear_iplug(&coord);
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ /* have we really inserted stat data? */
-+ assert("nikita-725", item_is_statdata(&coord));
-+
-+ /* inode was just created. It is inserted into hash
-+ table, but no directory entry was yet inserted into
-+ parent. So, inode is inaccessible through
-+ ->lookup(). All places that directly grab inode
-+ from hash-table (like old knfsd), should check
-+ IMMUTABLE flag that is set by common_create_child.
-+ */
-+ assert("nikita-3240", data.iplug != NULL);
-+ assert("nikita-3241", data.iplug->s.sd.save != NULL);
-+ area = item_body_by_coord(&coord);
-+ result = data.iplug->s.sd.save(inode, &area);
-+ znode_make_dirty(coord.node);
-+ if (result == 0) {
-+ /* object has stat-data now */
-+ reiser4_inode_clr_flag(inode, REISER4_NO_SD);
-+ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
-+ /* initialise stat-data seal */
-+ reiser4_seal_init(&ref->sd_seal, &coord, &key);
-+ ref->sd_coord = coord;
-+ check_inode_seal(inode, &coord, &key);
-+ } else if (result != -ENOMEM)
-+ /*
-+ * convert any other error code to -EIO to
-+ * avoid confusing user level with unexpected
-+ * errors.
-+ */
-+ result = RETERR(-EIO);
-+ zrelse(coord.node);
-+ }
-+ }
-+ done_lh(&lh);
-+
-+ if (result != 0)
-+ key_warning(&key, inode, result);
-+ else
-+ oid_count_allocated();
-+
-+ return result;
-+}
-+
-+/* find sd of inode in a tree, deal with errors */
-+int lookup_sd(struct inode *inode /* inode to look sd for */ ,
-+ znode_lock_mode lock_mode /* lock mode */ ,
-+ coord_t * coord /* resulting coord */ ,
-+ lock_handle * lh /* resulting lock handle */ ,
-+ const reiser4_key * key /* resulting key */ ,
-+ int silent)
-+{
-+ int result;
-+ __u32 flags;
-+
-+ assert("nikita-1692", inode != NULL);
-+ assert("nikita-1693", coord != NULL);
-+ assert("nikita-1694", key != NULL);
-+
-+ /* look for the object's stat data in a tree.
-+ This returns in "node" pointer to a locked znode and in "pos"
-+ position of an item found in node. Both are only valid if
-+ coord_found is returned. */
-+ flags = (lock_mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
-+ flags |= CBK_UNIQUE;
-+ /*
-+ * traverse tree to find stat data. We cannot use vroot here, because
-+ * it only covers _body_ of the file, and stat data don't belong
-+ * there.
-+ */
-+ result = coord_by_key(reiser4_tree_by_inode(inode),
-+ key,
-+ coord,
-+ lh,
-+ lock_mode,
-+ FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL, flags, NULL);
-+ if (REISER4_DEBUG && result == 0)
-+ check_sd_coord(coord, key);
-+
-+ if (result != 0 && !silent)
-+ key_warning(key, inode, result);
-+ return result;
-+}
-+
-+static int
-+locate_inode_sd(struct inode *inode,
-+ reiser4_key * key, coord_t * coord, lock_handle * lh)
-+{
-+ reiser4_inode *state;
-+ seal_t seal;
-+ int result;
-+
-+ assert("nikita-3483", inode != NULL);
-+
-+ state = reiser4_inode_data(inode);
-+ spin_lock_inode(inode);
-+ *coord = state->sd_coord;
-+ coord_clear_iplug(coord);
-+ seal = state->sd_seal;
-+ spin_unlock_inode(inode);
-+
-+ build_sd_key(inode, key);
-+ if (reiser4_seal_is_set(&seal)) {
-+ /* first, try to use seal */
-+ result = reiser4_seal_validate(&seal,
-+ coord,
-+ key,
-+ lh, ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (result == 0)
-+ check_sd_coord(coord, key);
-+ } else
-+ result = -E_REPEAT;
-+
-+ if (result != 0) {
-+ coord_init_zero(coord);
-+ result = lookup_sd(inode, ZNODE_WRITE_LOCK, coord, lh, key, 0);
-+ }
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
-+{
-+ return (get_key_locality(k1) == get_key_locality(k2) &&
-+ get_key_type(k1) == get_key_type(k2) &&
-+ get_key_band(k1) == get_key_band(k2) &&
-+ get_key_ordering(k1) == get_key_ordering(k2) &&
-+ get_key_objectid(k1) == get_key_objectid(k2));
-+}
-+
-+#include "../tree_walk.h"
-+
-+/* make some checks before and after stat-data resize operation */
-+static int check_sd_resize(struct inode * inode, coord_t * coord,
-+ int length, int progress /* 1 means after resize */)
-+{
-+ int ret = 0;
-+ lock_handle left_lock;
-+ coord_t left_coord;
-+ reiser4_key left_key;
-+ reiser4_key key;
-+
-+ if (inode_file_plugin(inode) !=
-+ file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
-+ return 0;
-+ if (!length)
-+ return 0;
-+ if (coord->item_pos != 0)
-+ return 0;
-+
-+ init_lh(&left_lock);
-+ ret = reiser4_get_left_neighbor(&left_lock,
-+ coord->node,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
-+ ret == -ENOENT || ret == -EINVAL
-+ || ret == -E_DEADLOCK) {
-+ ret = 0;
-+ goto exit;
-+ }
-+ ret = zload(left_lock.node);
-+ if (ret)
-+ goto exit;
-+ coord_init_last_unit(&left_coord, left_lock.node);
-+ item_key_by_coord(&left_coord, &left_key);
-+ item_key_by_coord(coord, &key);
-+
-+ if (all_but_offset_key_eq(&key, &left_key))
-+ /* corruption occured */
-+ ret = 1;
-+ zrelse(left_lock.node);
-+ exit:
-+ done_lh(&left_lock);
-+ return ret;
-+}
-+#endif
-+
-+/* update stat-data at @coord */
-+static int
-+update_sd_at(struct inode *inode, coord_t * coord, reiser4_key * key,
-+ lock_handle * lh)
-+{
-+ int result;
-+ reiser4_item_data data;
-+ char *area;
-+ reiser4_inode *state;
-+ znode *loaded;
-+
-+ state = reiser4_inode_data(inode);
-+
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (result != 0)
-+ return result;
-+ loaded = coord->node;
-+
-+ spin_lock_inode(inode);
-+ assert("nikita-728", inode_sd_plugin(inode) != NULL);
-+ data.iplug = inode_sd_plugin(inode);
-+
-+ /* if inode has non-standard plugins, add appropriate stat data
-+ * extension */
-+ if (state->extmask & (1 << PLUGIN_STAT)) {
-+ if (state->plugin_mask == 0)
-+ inode_clr_extension(inode, PLUGIN_STAT);
-+ } else if (state->plugin_mask != 0)
-+ inode_set_extension(inode, PLUGIN_STAT);
-+
-+ if (state->extmask & (1 << HEIR_STAT)) {
-+ if (state->heir_mask == 0)
-+ inode_clr_extension(inode, HEIR_STAT);
-+ } else if (state->heir_mask != 0)
-+ inode_set_extension(inode, HEIR_STAT);
-+
-+ /* data.length is how much space to add to (or remove
-+ from if negative) sd */
-+ if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
-+ /* recalculate stat-data length */
-+ data.length =
-+ data.iplug->s.sd.save_len(inode) -
-+ item_length_by_coord(coord);
-+ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
-+ } else
-+ data.length = 0;
-+ spin_unlock_inode(inode);
-+
-+ /* if on-disk stat data is of different length than required
-+ for this inode, resize it */
-+
-+ if (data.length != 0) {
-+ data.data = NULL;
-+ data.user = 0;
-+
-+ assert("edward-1441",
-+ !check_sd_resize(inode, coord,
-+ data.length, 0/* before resize */));
-+
-+ /* insertion code requires that insertion point (coord) was
-+ * between units. */
-+ coord->between = AFTER_UNIT;
-+ result = reiser4_resize_item(coord, &data, key, lh,
-+ COPI_DONT_SHIFT_LEFT);
-+ if (result != 0) {
-+ key_warning(key, inode, result);
-+ zrelse(loaded);
-+ return result;
-+ }
-+ if (loaded != coord->node) {
-+ /* reiser4_resize_item moved coord to another node.
-+ Zload it */
-+ zrelse(loaded);
-+ coord_clear_iplug(coord);
-+ result = zload(coord->node);
-+ if (result != 0)
-+ return result;
-+ loaded = coord->node;
-+ }
-+ assert("edward-1442",
-+ !check_sd_resize(inode, coord,
-+ data.length, 1/* after resize */));
-+ }
-+ area = item_body_by_coord(coord);
-+ spin_lock_inode(inode);
-+ result = data.iplug->s.sd.save(inode, &area);
-+ znode_make_dirty(coord->node);
-+
-+ /* re-initialise stat-data seal */
-+
-+ /*
-+ * coord.between was possibly skewed from AT_UNIT when stat-data size
-+ * was changed and new extensions were pasted into item.
-+ */
-+ coord->between = AT_UNIT;
-+ reiser4_seal_init(&state->sd_seal, coord, key);
-+ state->sd_coord = *coord;
-+ spin_unlock_inode(inode);
-+ check_inode_seal(inode, coord, key);
-+ zrelse(loaded);
-+ return result;
-+}
-+
-+/* Update existing stat-data in a tree. Called with inode state locked. Return
-+ inode state locked. */
-+static int update_sd(struct inode *inode /* inode to update sd for */ )
-+{
-+ int result;
-+ reiser4_key key;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ assert("nikita-726", inode != NULL);
-+
-+ /* no stat-data, nothing to update?! */
-+ assert("nikita-3482", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
-+
-+ init_lh(&lh);
-+
-+ result = locate_inode_sd(inode, &key, &coord, &lh);
-+ if (result == 0)
-+ result = update_sd_at(inode, &coord, &key, &lh);
-+ done_lh(&lh);
-+
-+ return result;
-+}
-+
-+/* helper for reiser4_delete_object_common and reiser4_delete_dir_common.
-+ Remove object stat data. Space for that must be reserved by caller before
-+*/
-+static int
-+common_object_delete_no_reserve(struct inode *inode /* object to remove */ )
-+{
-+ int result;
-+
-+ assert("nikita-1477", inode != NULL);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
-+ reiser4_key sd_key;
-+
-+ DQUOT_FREE_INODE(inode);
-+ DQUOT_DROP(inode);
-+
-+ build_sd_key(inode, &sd_key);
-+ result =
-+ reiser4_cut_tree(reiser4_tree_by_inode(inode),
-+ &sd_key, &sd_key, NULL, 0);
-+ if (result == 0) {
-+ reiser4_inode_set_flag(inode, REISER4_NO_SD);
-+ result = oid_release(inode->i_sb, get_inode_oid(inode));
-+ if (result == 0) {
-+ oid_count_released();
-+
-+ result = safe_link_del(reiser4_tree_by_inode(inode),
-+ get_inode_oid(inode),
-+ SAFE_UNLINK);
-+ }
-+ }
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/* helper for safelink_common */
-+static int process_truncate(struct inode *inode, __u64 size)
-+{
-+ int result;
-+ struct iattr attr;
-+ file_plugin *fplug;
-+ reiser4_context *ctx;
-+ struct dentry dentry;
-+
-+ assert("vs-21", is_in_reiser4_context());
-+ ctx = reiser4_init_context(inode->i_sb);
-+ assert("vs-22", !IS_ERR(ctx));
-+
-+ attr.ia_size = size;
-+ attr.ia_valid = ATTR_SIZE | ATTR_CTIME;
-+ fplug = inode_file_plugin(inode);
-+
-+ mutex_lock(&inode->i_mutex);
-+ assert("vs-1704", get_current_context()->trans->atom == NULL);
-+ dentry.d_inode = inode;
-+ result = inode->i_op->setattr(&dentry, &attr);
-+ mutex_unlock(&inode->i_mutex);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+
-+ return result;
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/hash.c linux-2.6.24/fs/reiser4/plugin/hash.c
---- linux-2.6.24.orig/fs/reiser4/plugin/hash.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/hash.c 2008-01-25 11:39:06.996223145 +0300
-@@ -0,0 +1,353 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Hash functions */
-+
-+#include "../debug.h"
-+#include "plugin_header.h"
-+#include "plugin.h"
-+#include "../super.h"
-+#include "../inode.h"
-+
-+#include <linux/types.h>
-+
-+/* old rupasov (yura) hash */
-+static __u64 hash_rupasov(const unsigned char *name /* name to hash */ ,
-+ int len /* @name's length */ )
-+{
-+ int i;
-+ int j;
-+ int pow;
-+ __u64 a;
-+ __u64 c;
-+
-+ assert("nikita-672", name != NULL);
-+ assert("nikita-673", len >= 0);
-+
-+ for (pow = 1, i = 1; i < len; ++i)
-+ pow = pow * 10;
-+
-+ if (len == 1)
-+ a = name[0] - 48;
-+ else
-+ a = (name[0] - 48) * pow;
-+
-+ for (i = 1; i < len; ++i) {
-+ c = name[i] - 48;
-+ for (pow = 1, j = i; j < len - 1; ++j)
-+ pow = pow * 10;
-+ a = a + c * pow;
-+ }
-+ for (; i < 40; ++i) {
-+ c = '0' - 48;
-+ for (pow = 1, j = i; j < len - 1; ++j)
-+ pow = pow * 10;
-+ a = a + c * pow;
-+ }
-+
-+ for (; i < 256; ++i) {
-+ c = i;
-+ for (pow = 1, j = i; j < len - 1; ++j)
-+ pow = pow * 10;
-+ a = a + c * pow;
-+ }
-+
-+ a = a << 7;
-+ return a;
-+}
-+
-+/* r5 hash */
-+static __u64 hash_r5(const unsigned char *name /* name to hash */ ,
-+ int len UNUSED_ARG /* @name's length */ )
-+{
-+ __u64 a = 0;
-+
-+ assert("nikita-674", name != NULL);
-+ assert("nikita-675", len >= 0);
-+
-+ while (*name) {
-+ a += *name << 4;
-+ a += *name >> 4;
-+ a *= 11;
-+ name++;
-+ }
-+ return a;
-+}
-+
-+/* Keyed 32-bit hash function using TEA in a Davis-Meyer function
-+ H0 = Key
-+ Hi = E Mi(Hi-1) + Hi-1
-+
-+ (see Applied Cryptography, 2nd edition, p448).
-+
-+ Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
-+
-+ Jeremy has agreed to the contents of reiserfs/README. -Hans
-+
-+ This code was blindly upgraded to __u64 by s/__u32/__u64/g.
-+*/
-+static __u64 hash_tea(const unsigned char *name /* name to hash */ ,
-+ int len /* @name's length */ )
-+{
-+ __u64 k[] = { 0x9464a485u, 0x542e1a94u, 0x3e846bffu, 0xb75bcfc3u };
-+
-+ __u64 h0 = k[0], h1 = k[1];
-+ __u64 a, b, c, d;
-+ __u64 pad;
-+ int i;
-+
-+ assert("nikita-676", name != NULL);
-+ assert("nikita-677", len >= 0);
-+
-+#define DELTA 0x9E3779B9u
-+#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
-+#define PARTROUNDS 6 /* 6 gets complete mixing */
-+
-+/* a, b, c, d - data; h0, h1 - accumulated hash */
-+#define TEACORE(rounds) \
-+ do { \
-+ __u64 sum = 0; \
-+ int n = rounds; \
-+ __u64 b0, b1; \
-+ \
-+ b0 = h0; \
-+ b1 = h1; \
-+ \
-+ do \
-+ { \
-+ sum += DELTA; \
-+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \
-+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \
-+ } while(--n); \
-+ \
-+ h0 += b0; \
-+ h1 += b1; \
-+ } while(0)
-+
-+ pad = (__u64) len | ((__u64) len << 8);
-+ pad |= pad << 16;
-+
-+ while (len >= 16) {
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
-+ 16 | (__u64) name[7] << 24;
-+ c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
-+ 16 | (__u64) name[11] << 24;
-+ d = (__u64) name[12] | (__u64) name[13] << 8 | (__u64) name[14]
-+ << 16 | (__u64) name[15] << 24;
-+
-+ TEACORE(PARTROUNDS);
-+
-+ len -= 16;
-+ name += 16;
-+ }
-+
-+ if (len >= 12) {
-+ //assert(len < 16);
-+ if (len >= 16)
-+ *(int *)0 = 0;
-+
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
-+ 16 | (__u64) name[7] << 24;
-+ c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
-+ 16 | (__u64) name[11] << 24;
-+
-+ d = pad;
-+ for (i = 12; i < len; i++) {
-+ d <<= 8;
-+ d |= name[i];
-+ }
-+ } else if (len >= 8) {
-+ //assert(len < 12);
-+ if (len >= 12)
-+ *(int *)0 = 0;
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+ b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
-+ 16 | (__u64) name[7] << 24;
-+
-+ c = d = pad;
-+ for (i = 8; i < len; i++) {
-+ c <<= 8;
-+ c |= name[i];
-+ }
-+ } else if (len >= 4) {
-+ //assert(len < 8);
-+ if (len >= 8)
-+ *(int *)0 = 0;
-+ a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
-+ 16 | (__u64) name[3] << 24;
-+
-+ b = c = d = pad;
-+ for (i = 4; i < len; i++) {
-+ b <<= 8;
-+ b |= name[i];
-+ }
-+ } else {
-+ //assert(len < 4);
-+ if (len >= 4)
-+ *(int *)0 = 0;
-+ a = b = c = d = pad;
-+ for (i = 0; i < len; i++) {
-+ a <<= 8;
-+ a |= name[i];
-+ }
-+ }
-+
-+ TEACORE(FULLROUNDS);
-+
-+/* return 0;*/
-+ return h0 ^ h1;
-+
-+}
-+
-+/* classical 64 bit Fowler/Noll/Vo-1 (FNV-1) hash.
-+
-+ See http://www.isthe.com/chongo/tech/comp/fnv/ for details.
-+
-+ Excerpts:
-+
-+ FNV hashes are designed to be fast while maintaining a low collision
-+ rate.
-+
-+ [This version also seems to preserve lexicographical order locally.]
-+
-+ FNV hash algorithms and source code have been released into the public
-+ domain.
-+
-+*/
-+static __u64 hash_fnv1(const unsigned char *name /* name to hash */ ,
-+ int len UNUSED_ARG /* @name's length */ )
-+{
-+ unsigned long long a = 0xcbf29ce484222325ull;
-+ const unsigned long long fnv_64_prime = 0x100000001b3ull;
-+
-+ assert("nikita-678", name != NULL);
-+ assert("nikita-679", len >= 0);
-+
-+ /* FNV-1 hash each octet in the buffer */
-+ for (; *name; ++name) {
-+ /* multiply by the 32 bit FNV magic prime mod 2^64 */
-+ a *= fnv_64_prime;
-+ /* xor the bottom with the current octet */
-+ a ^= (unsigned long long)(*name);
-+ }
-+ /* return our new hash value */
-+ return a;
-+}
-+
-+/* degenerate hash function used to simplify testing of non-unique key
-+ handling */
-+static __u64 hash_deg(const unsigned char *name UNUSED_ARG /* name to hash */ ,
-+ int len UNUSED_ARG /* @name's length */ )
-+{
-+ return 0xc0c0c0c010101010ull;
-+}
-+
-+static int change_hash(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ int result;
-+
-+ assert("nikita-3503", inode != NULL);
-+ assert("nikita-3504", plugin != NULL);
-+
-+ assert("nikita-3505", is_reiser4_inode(inode));
-+ assert("nikita-3507", plugin->h.type_id == REISER4_HASH_PLUGIN_TYPE);
-+
-+ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
-+ return RETERR(-EINVAL);
-+
-+ result = 0;
-+ if (inode_hash_plugin(inode) == NULL ||
-+ inode_hash_plugin(inode)->h.id != plugin->h.id) {
-+ if (is_dir_empty(inode) == 0)
-+ result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
-+ PSET_HASH, plugin);
-+ else
-+ result = RETERR(-ENOTEMPTY);
-+
-+ }
-+ return result;
-+}
-+
-+static reiser4_plugin_ops hash_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = change_hash
-+};
-+
-+/* hash plugins */
-+hash_plugin hash_plugins[LAST_HASH_ID] = {
-+ [RUPASOV_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = RUPASOV_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "rupasov",
-+ .desc = "Original Yura's hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_rupasov
-+ },
-+ [R5_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = R5_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "r5",
-+ .desc = "r5 hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_r5
-+ },
-+ [TEA_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = TEA_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "tea",
-+ .desc = "tea hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_tea
-+ },
-+ [FNV1_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = FNV1_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "fnv1",
-+ .desc = "fnv1 hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_fnv1
-+ },
-+ [DEGENERATE_HASH_ID] = {
-+ .h = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .id = DEGENERATE_HASH_ID,
-+ .pops = &hash_plugin_ops,
-+ .label = "degenerate hash",
-+ .desc = "Degenerate hash: only for testing",
-+ .linkage = {NULL, NULL}
-+ },
-+ .hash = hash_deg
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/inode_ops.c linux-2.6.24/fs/reiser4/plugin/inode_ops.c
---- linux-2.6.24.orig/fs/reiser4/plugin/inode_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/inode_ops.c 2008-01-25 11:39:07.000224175 +0300
-@@ -0,0 +1,897 @@
-+/*
-+ * Copyright 2005 by Hans Reiser, licensing governed by reiser4/README
-+ */
-+
-+/*
-+ * this file contains typical implementations for most of methods of struct
-+ * inode_operations
-+ */
-+
-+#include "../inode.h"
-+#include "../safe_link.h"
-+
-+#include <linux/quotaops.h>
-+#include <linux/namei.h>
-+
-+static int create_vfs_object(struct inode *parent, struct dentry *dentry,
-+ reiser4_object_create_data *data);
-+
-+/**
-+ * reiser4_create_common - create of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of new object to create
-+ * @mode: the permissions to use
-+ * @nameidata:
-+ *
-+ * This is common implementation of vfs's create method of struct
-+ * inode_operations.
-+ * Creates regular file using file plugin from parent directory plugin set.
-+ */
-+int reiser4_create_common(struct inode *parent, struct dentry *dentry,
-+ int mode, struct nameidata *nameidata)
-+{
-+ reiser4_object_create_data data;
-+ file_plugin *fplug;
-+
-+ memset(&data, 0, sizeof data);
-+ data.mode = S_IFREG | mode;
-+ fplug = child_create_plugin(parent) ? : inode_create_plugin(parent);
-+ if (!plugin_of_group(fplug, REISER4_REGULAR_FILE)) {
-+ warning("vpf-1900", "'%s' is not a regular file plugin.",
-+ fplug->h.label);
-+ return RETERR(-EIO);
-+ }
-+ data.id = fplug->h.id;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+int reiser4_lookup_name(struct inode *dir, struct dentry *, reiser4_key *);
-+void check_light_weight(struct inode *inode, struct inode *parent);
-+
-+/**
-+ * reiser4_lookup_common - lookup of inode operations
-+ * @parent: inode of directory to lookup into
-+ * @dentry: name to look for
-+ * @nameidata:
-+ *
-+ * This is common implementation of vfs's lookup method of struct
-+ * inode_operations.
-+ */
-+struct dentry *reiser4_lookup_common(struct inode *parent,
-+ struct dentry *dentry,
-+ struct nameidata *nameidata)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct dentry *new;
-+ struct inode *inode;
-+ reiser4_dir_entry_desc entry;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return (struct dentry *)ctx;
-+
-+ /* set up operations on dentry. */
-+ dentry->d_op = &get_super_private(parent->i_sb)->ops.dentry;
-+
-+ result = reiser4_lookup_name(parent, dentry, &entry.key);
-+ if (result) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ if (result == -ENOENT) {
-+ /* object not found */
-+ if (!IS_DEADDIR(parent))
-+ d_add(dentry, NULL);
-+ return NULL;
-+ }
-+ return ERR_PTR(result);
-+ }
-+
-+ inode = reiser4_iget(parent->i_sb, &entry.key, 0);
-+ if (IS_ERR(inode)) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return ERR_PTR(PTR_ERR(inode));
-+ }
-+
-+ /* success */
-+ check_light_weight(inode, parent);
-+ new = d_splice_alias(inode, dentry);
-+ reiser4_iget_complete(inode);
-+
-+ /* prevent balance_dirty_pages() from being called: we don't want to
-+ * do this under directory i_mutex. */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return new;
-+}
-+
-+static reiser4_block_nr common_estimate_link(struct inode *parent,
-+ struct inode *object);
-+int reiser4_update_dir(struct inode *);
-+
-+/**
-+ * reiser4_link_common - link of inode operations
-+ * @existing: dentry of object which is to get new name
-+ * @parent: directory where new name is to be created
-+ * @newname: new name
-+ *
-+ * This is common implementation of vfs's link method of struct
-+ * inode_operations.
-+ */
-+int reiser4_link_common(struct dentry *existing, struct inode *parent,
-+ struct dentry *newname)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *object;
-+ dir_plugin *parent_dplug;
-+ reiser4_dir_entry_desc entry;
-+ reiser4_object_create_data data;
-+ reiser4_block_nr reserve;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ assert("nikita-1431", existing != NULL);
-+ assert("nikita-1432", parent != NULL);
-+ assert("nikita-1433", newname != NULL);
-+
-+ object = existing->d_inode;
-+ assert("nikita-1434", object != NULL);
-+
-+ /* check for race with create_object() */
-+ if (reiser4_inode_get_flag(object, REISER4_IMMUTABLE)) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-E_REPEAT);
-+ }
-+
-+ parent_dplug = inode_dir_plugin(parent);
-+
-+ memset(&entry, 0, sizeof entry);
-+ entry.obj = object;
-+
-+ data.mode = object->i_mode;
-+ data.id = inode_file_plugin(object)->h.id;
-+
-+ reserve = common_estimate_link(parent, existing->d_inode);
-+ if ((__s64) reserve < 0) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return reserve;
-+ }
-+
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOSPC);
-+ }
-+
-+ /*
-+ * Subtle race handling: sys_link() doesn't take i_mutex on @parent. It
-+ * means that link(2) can race against unlink(2) or rename(2), and
-+ * inode is dead (->i_nlink == 0) when reiser4_link() is entered.
-+ *
-+ * For such inode we have to undo special processing done in
-+ * reiser4_unlink() viz. creation of safe-link.
-+ */
-+ if (unlikely(object->i_nlink == 0)) {
-+ result = safe_link_del(reiser4_tree_by_inode(object),
-+ get_inode_oid(object), SAFE_UNLINK);
-+ if (result != 0) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ }
-+
-+ /* increment nlink of @existing and update its stat data */
-+ result = reiser4_add_nlink(object, parent, 1);
-+ if (result == 0) {
-+ /* add entry to the parent */
-+ result =
-+ parent_dplug->add_entry(parent, newname, &data, &entry);
-+ if (result != 0) {
-+ /* failed to add entry to the parent, decrement nlink
-+ of @existing */
-+ reiser4_del_nlink(object, parent, 1);
-+ /*
-+ * now, if that failed, we have a file with too big
-+ * nlink---space leak, much better than directory
-+ * entry pointing to nowhere
-+ */
-+ }
-+ }
-+ if (result == 0) {
-+ atomic_inc(&object->i_count);
-+ /*
-+ * Upon successful completion, link() shall mark for update
-+ * the st_ctime field of the file. Also, the st_ctime and
-+ * st_mtime fields of the directory that contains the new
-+ * entry shall be marked for update. --SUS
-+ */
-+ result = reiser4_update_dir(parent);
-+ }
-+ if (result == 0)
-+ d_instantiate(newname, existing->d_inode);
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+static int unlink_check_and_grab(struct inode *parent, struct dentry *victim);
-+
-+/**
-+ * reiser4_unlink_common - unlink of inode operations
-+ * @parent: inode of directory to remove name from
-+ * @victim: name to be removed
-+ *
-+ * This is common implementation of vfs's unlink method of struct
-+ * inode_operations.
-+ */
-+int reiser4_unlink_common(struct inode *parent, struct dentry *victim)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *object;
-+ file_plugin *fplug;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ object = victim->d_inode;
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-2882", fplug->detach != NULL);
-+
-+ result = unlink_check_and_grab(parent, victim);
-+ if (result != 0) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = fplug->detach(object, parent);
-+ if (result == 0) {
-+ dir_plugin *parent_dplug;
-+ reiser4_dir_entry_desc entry;
-+
-+ parent_dplug = inode_dir_plugin(parent);
-+ memset(&entry, 0, sizeof entry);
-+
-+ /* first, delete directory entry */
-+ result = parent_dplug->rem_entry(parent, victim, &entry);
-+ if (result == 0) {
-+ /*
-+ * if name was removed successfully, we _have_ to
-+ * return 0 from this function, because upper level
-+ * caller (vfs_{rmdir,unlink}) expect this.
-+ *
-+ * now that directory entry is removed, update
-+ * stat-data
-+ */
-+ reiser4_del_nlink(object, parent, 1);
-+ /*
-+ * Upon successful completion, unlink() shall mark for
-+ * update the st_ctime and st_mtime fields of the
-+ * parent directory. Also, if the file's link count is
-+ * not 0, the st_ctime field of the file shall be
-+ * marked for update. --SUS
-+ */
-+ reiser4_update_dir(parent);
-+ /* add safe-link for this file */
-+ if (object->i_nlink == 0)
-+ safe_link_add(object, SAFE_UNLINK);
-+ }
-+ }
-+
-+ if (unlikely(result != 0)) {
-+ if (result != -ENOMEM)
-+ warning("nikita-3398", "Cannot unlink %llu (%i)",
-+ (unsigned long long)get_inode_oid(object),
-+ result);
-+ /* if operation failed commit pending inode modifications to
-+ * the stat-data */
-+ reiser4_update_sd(object);
-+ reiser4_update_sd(parent);
-+ }
-+
-+ reiser4_release_reserved(object->i_sb);
-+
-+ /* @object's i_ctime was updated by ->rem_link() method(). */
-+
-+ /* @victim can be already removed from the disk by this time. Inode is
-+ then marked so that iput() wouldn't try to remove stat data. But
-+ inode itself is still there.
-+ */
-+
-+ /*
-+ * we cannot release directory semaphore here, because name has
-+ * already been deleted, but dentry (@victim) still exists. Prevent
-+ * balance_dirty_pages() from being called on exiting this context: we
-+ * don't want to do this under directory i_mutex.
-+ */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/**
-+ * reiser4_symlink_common - symlink of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of object to be created
-+ * @linkname: string symlink is to contain
-+ *
-+ * This is common implementation of vfs's symlink method of struct
-+ * inode_operations.
-+ * Creates object using file plugin SYMLINK_FILE_PLUGIN_ID.
-+ */
-+int reiser4_symlink_common(struct inode *parent, struct dentry *dentry,
-+ const char *linkname)
-+{
-+ reiser4_object_create_data data;
-+
-+ memset(&data, 0, sizeof data);
-+ data.name = linkname;
-+ data.id = SYMLINK_FILE_PLUGIN_ID;
-+ data.mode = S_IFLNK | S_IRWXUGO;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+/**
-+ * reiser4_mkdir_common - mkdir of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of object to be created
-+ * @mode: the permissions to use
-+ *
-+ * This is common implementation of vfs's mkdir method of struct
-+ * inode_operations.
-+ * Creates object using file plugin DIRECTORY_FILE_PLUGIN_ID.
-+ */
-+int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, int mode)
-+{
-+ reiser4_object_create_data data;
-+
-+ memset(&data, 0, sizeof data);
-+ data.mode = S_IFDIR | mode;
-+ data.id = DIRECTORY_FILE_PLUGIN_ID;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+/**
-+ * reiser4_mknod_common - mknod of inode operations
-+ * @parent: inode of parent directory
-+ * @dentry: dentry of object to be created
-+ * @mode: the permissions to use and file type
-+ * @rdev: minor and major of new device file
-+ *
-+ * This is common implementation of vfs's mknod method of struct
-+ * inode_operations.
-+ * Creates object using file plugin SPECIAL_FILE_PLUGIN_ID.
-+ */
-+int reiser4_mknod_common(struct inode *parent, struct dentry *dentry,
-+ int mode, dev_t rdev)
-+{
-+ reiser4_object_create_data data;
-+
-+ memset(&data, 0, sizeof data);
-+ data.mode = mode;
-+ data.rdev = rdev;
-+ data.id = SPECIAL_FILE_PLUGIN_ID;
-+ return create_vfs_object(parent, dentry, &data);
-+}
-+
-+/*
-+ * implementation of vfs's rename method of struct inode_operations for typical
-+ * directory is in inode_ops_rename.c
-+ */
-+
-+/**
-+ * reiser4_follow_link_common - follow_link of inode operations
-+ * @dentry: dentry of symlink
-+ * @data:
-+ *
-+ * This is common implementation of vfs's followlink method of struct
-+ * inode_operations.
-+ * Assumes that inode's i_private points to the content of symbolic link.
-+ */
-+void *reiser4_follow_link_common(struct dentry *dentry, struct nameidata *nd)
-+{
-+ assert("vs-851", S_ISLNK(dentry->d_inode->i_mode));
-+
-+ if (!dentry->d_inode->i_private
-+ || !reiser4_inode_get_flag(dentry->d_inode,
-+ REISER4_GENERIC_PTR_USED))
-+ return ERR_PTR(RETERR(-EINVAL));
-+ nd_set_link(nd, dentry->d_inode->i_private);
-+ return NULL;
-+}
-+
-+/**
-+ * reiser4_permission_common - permission of inode operations
-+ * @inode: inode to check permissions for
-+ * @mask: mode bits to check permissions for
-+ * @nameidata:
-+ *
-+ * Uses generic function to check for rwx permissions.
-+ */
-+int reiser4_permission_common(struct inode *inode, int mask,
-+ struct nameidata *nameidata)
-+{
-+ return generic_permission(inode, mask, NULL);
-+}
-+
-+static int setattr_reserve(reiser4_tree *);
-+
-+/* this is common implementation of vfs's setattr method of struct
-+ inode_operations
-+*/
-+int reiser4_setattr_common(struct dentry *dentry, struct iattr *attr)
-+{
-+ reiser4_context *ctx;
-+ struct inode *inode;
-+ int result;
-+
-+ inode = dentry->d_inode;
-+ result = inode_change_ok(inode, attr);
-+ if (result)
-+ return result;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ assert("nikita-3119", !(attr->ia_valid & ATTR_SIZE));
-+
-+ /*
-+ * grab disk space and call standard inode_setattr().
-+ */
-+ result = setattr_reserve(reiser4_tree_by_inode(inode));
-+ if (!result) {
-+ if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid)
-+ || (attr->ia_valid & ATTR_GID
-+ && attr->ia_gid != inode->i_gid)) {
-+ result = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
-+ if (result) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+ }
-+ result = inode_setattr(inode, attr);
-+ if (!result)
-+ reiser4_update_sd(inode);
-+ }
-+
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* this is common implementation of vfs's getattr method of struct
-+ inode_operations
-+*/
-+int reiser4_getattr_common(struct vfsmount *mnt UNUSED_ARG,
-+ struct dentry *dentry, struct kstat *stat)
-+{
-+ struct inode *obj;
-+
-+ assert("nikita-2298", dentry != NULL);
-+ assert("nikita-2299", stat != NULL);
-+ assert("nikita-2300", dentry->d_inode != NULL);
-+
-+ obj = dentry->d_inode;
-+
-+ stat->dev = obj->i_sb->s_dev;
-+ stat->ino = oid_to_uino(get_inode_oid(obj));
-+ stat->mode = obj->i_mode;
-+ /* don't confuse userland with huge nlink. This is not entirely
-+ * correct, because nlink_t is not necessary 16 bit signed. */
-+ stat->nlink = min(obj->i_nlink, (typeof(obj->i_nlink)) 0x7fff);
-+ stat->uid = obj->i_uid;
-+ stat->gid = obj->i_gid;
-+ stat->rdev = obj->i_rdev;
-+ stat->atime = obj->i_atime;
-+ stat->mtime = obj->i_mtime;
-+ stat->ctime = obj->i_ctime;
-+ stat->size = obj->i_size;
-+ stat->blocks =
-+ (inode_get_bytes(obj) + VFS_BLKSIZE - 1) >> VFS_BLKSIZE_BITS;
-+ /* "preferred" blocksize for efficient file system I/O */
-+ stat->blksize = get_super_private(obj->i_sb)->optimal_io_size;
-+
-+ return 0;
-+}
-+
-+/* Estimate the maximum amount of nodes which might be allocated or changed on
-+ typical new object creation. Typical creation consists of calling create
-+ method of file plugin, adding directory entry to parent and update parent
-+ directory's stat data.
-+*/
-+static reiser4_block_nr estimate_create_vfs_object(struct inode *parent, /* parent object */
-+ struct inode *object
-+ /* object */ )
-+{
-+ assert("vpf-309", parent != NULL);
-+ assert("vpf-307", object != NULL);
-+
-+ return
-+ /* object creation estimation */
-+ inode_file_plugin(object)->estimate.create(object) +
-+ /* stat data of parent directory estimation */
-+ inode_file_plugin(parent)->estimate.update(parent) +
-+ /* adding entry estimation */
-+ inode_dir_plugin(parent)->estimate.add_entry(parent) +
-+ /* to undo in the case of failure */
-+ inode_dir_plugin(parent)->estimate.rem_entry(parent);
-+}
-+
-+/* Create child in directory.
-+
-+ . get object's plugin
-+ . get fresh inode
-+ . initialize inode
-+ . add object's stat-data
-+ . initialize object's directory
-+ . add entry to the parent
-+ . instantiate dentry
-+
-+*/
-+static int do_create_vfs_child(reiser4_object_create_data * data, /* parameters of new
-+ object */
-+ struct inode **retobj)
-+{
-+ int result;
-+
-+ struct dentry *dentry; /* parent object */
-+ struct inode *parent; /* new name */
-+
-+ dir_plugin *par_dir; /* directory plugin on the parent */
-+ dir_plugin *obj_dir; /* directory plugin on the new object */
-+ file_plugin *obj_plug; /* object plugin on the new object */
-+ struct inode *object; /* new object */
-+ reiser4_block_nr reserve;
-+
-+ reiser4_dir_entry_desc entry; /* new directory entry */
-+
-+ assert("nikita-1420", data != NULL);
-+ parent = data->parent;
-+ dentry = data->dentry;
-+
-+ assert("nikita-1418", parent != NULL);
-+ assert("nikita-1419", dentry != NULL);
-+
-+ /* check, that name is acceptable for parent */
-+ par_dir = inode_dir_plugin(parent);
-+ if (par_dir->is_name_acceptable &&
-+ !par_dir->is_name_acceptable(parent,
-+ dentry->d_name.name,
-+ (int)dentry->d_name.len))
-+ return RETERR(-ENAMETOOLONG);
-+
-+ result = 0;
-+ obj_plug = file_plugin_by_id((int)data->id);
-+ if (obj_plug == NULL) {
-+ warning("nikita-430", "Cannot find plugin %i", data->id);
-+ return RETERR(-ENOENT);
-+ }
-+ object = new_inode(parent->i_sb);
-+ if (object == NULL)
-+ return RETERR(-ENOMEM);
-+ /* we'll update i_nlink below */
-+ object->i_nlink = 0;
-+ /* new_inode() initializes i_ino to "arbitrary" value. Reset it to 0,
-+ * to simplify error handling: if some error occurs before i_ino is
-+ * initialized with oid, i_ino should already be set to some
-+ * distinguished value. */
-+ object->i_ino = 0;
-+
-+ /* So that on error iput will be called. */
-+ *retobj = object;
-+
-+ if (DQUOT_ALLOC_INODE(object)) {
-+ DQUOT_DROP(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-EDQUOT);
-+ }
-+
-+ memset(&entry, 0, sizeof entry);
-+ entry.obj = object;
-+
-+ set_plugin(&reiser4_inode_data(object)->pset, PSET_FILE,
-+ file_plugin_to_plugin(obj_plug));
-+ result = obj_plug->set_plug_in_inode(object, parent, data);
-+ if (result) {
-+ warning("nikita-431", "Cannot install plugin %i on %llx",
-+ data->id, (unsigned long long)get_inode_oid(object));
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return result;
-+ }
-+
-+ /* reget plugin after installation */
-+ obj_plug = inode_file_plugin(object);
-+
-+ if (obj_plug->create_object == NULL) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-EPERM);
-+ }
-+
-+ /* if any of hash, tail, sd or permission plugins for newly created
-+ object are not set yet set them here inheriting them from parent
-+ directory
-+ */
-+ assert("nikita-2070", obj_plug->adjust_to_parent != NULL);
-+ result = obj_plug->adjust_to_parent(object,
-+ parent,
-+ object->i_sb->s_root->d_inode);
-+ if (result == 0)
-+ result = finish_pset(object);
-+ if (result != 0) {
-+ warning("nikita-432", "Cannot inherit from %llx to %llx",
-+ (unsigned long long)get_inode_oid(parent),
-+ (unsigned long long)get_inode_oid(object));
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return result;
-+ }
-+
-+ /* setup inode and file-operations for this inode */
-+ setup_inode_ops(object, data);
-+
-+ /* call file plugin's method to initialize plugin specific part of
-+ * inode */
-+ if (obj_plug->init_inode_data)
-+ obj_plug->init_inode_data(object, data, 1 /*create */ );
-+
-+ /* obtain directory plugin (if any) for new object. */
-+ obj_dir = inode_dir_plugin(object);
-+ if (obj_dir != NULL && obj_dir->init == NULL) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-EPERM);
-+ }
-+
-+ reiser4_inode_data(object)->locality_id = get_inode_oid(parent);
-+
-+ reserve = estimate_create_vfs_object(parent, object);
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return RETERR(-ENOSPC);
-+ }
-+
-+ /* mark inode `immutable'. We disable changes to the file being
-+ created until valid directory entry for it is inserted. Otherwise,
-+ if file were expanded and insertion of directory entry fails, we
-+ have to remove file, but we only alloted enough space in
-+ transaction to remove _empty_ file. 3.x code used to remove stat
-+ data in different transaction thus possibly leaking disk space on
-+ crash. This all only matters if it's possible to access file
-+ without name, for example, by inode number
-+ */
-+ reiser4_inode_set_flag(object, REISER4_IMMUTABLE);
-+
-+ /* create empty object, this includes allocation of new objectid. For
-+ directories this implies creation of dot and dotdot */
-+ assert("nikita-2265", reiser4_inode_get_flag(object, REISER4_NO_SD));
-+
-+ /* mark inode as `loaded'. From this point onward
-+ reiser4_delete_inode() will try to remove its stat-data. */
-+ reiser4_inode_set_flag(object, REISER4_LOADED);
-+
-+ result = obj_plug->create_object(object, parent, data);
-+ if (result != 0) {
-+ reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
-+ if (result != -ENAMETOOLONG && result != -ENOMEM)
-+ warning("nikita-2219",
-+ "Failed to create sd for %llu",
-+ (unsigned long long)get_inode_oid(object));
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ return result;
-+ }
-+
-+ if (obj_dir != NULL)
-+ result = obj_dir->init(object, parent, data);
-+ if (result == 0) {
-+ assert("nikita-434", !reiser4_inode_get_flag(object,
-+ REISER4_NO_SD));
-+ /* insert inode into VFS hash table */
-+ insert_inode_hash(object);
-+ /* create entry */
-+ result = par_dir->add_entry(parent, dentry, data, &entry);
-+ if (result == 0) {
-+ result = reiser4_add_nlink(object, parent, 0);
-+ /* If O_CREAT is set and the file did not previously
-+ exist, upon successful completion, open() shall
-+ mark for update the st_atime, st_ctime, and
-+ st_mtime fields of the file and the st_ctime and
-+ st_mtime fields of the parent directory. --SUS
-+ */
-+ /* @object times are already updated by
-+ reiser4_add_nlink() */
-+ if (result == 0)
-+ reiser4_update_dir(parent);
-+ if (result != 0)
-+ /* cleanup failure to add nlink */
-+ par_dir->rem_entry(parent, dentry, &entry);
-+ }
-+ if (result != 0)
-+ /* cleanup failure to add entry */
-+ obj_plug->detach(object, parent);
-+ } else if (result != -ENOMEM)
-+ warning("nikita-2219", "Failed to initialize dir for %llu: %i",
-+ (unsigned long long)get_inode_oid(object), result);
-+
-+ /*
-+ * update stat-data, committing all pending modifications to the inode
-+ * fields.
-+ */
-+ reiser4_update_sd(object);
-+ if (result != 0) {
-+ DQUOT_FREE_INODE(object);
-+ object->i_flags |= S_NOQUOTA;
-+ /* if everything was ok (result == 0), parent stat-data is
-+ * already updated above (update_parent_dir()) */
-+ reiser4_update_sd(parent);
-+ /* failure to create entry, remove object */
-+ obj_plug->delete_object(object);
-+ }
-+
-+ /* file has name now, clear immutable flag */
-+ reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
-+
-+ /* on error, iput() will call ->delete_inode(). We should keep track
-+ of the existence of stat-data for this inode and avoid attempt to
-+ remove it in reiser4_delete_inode(). This is accomplished through
-+ REISER4_NO_SD bit in inode.u.reiser4_i.plugin.flags
-+ */
-+ return result;
-+}
-+
-+/* this is helper for common implementations of reiser4_mkdir, reiser4_create,
-+ reiser4_mknod and reiser4_symlink
-+*/
-+static int
-+create_vfs_object(struct inode *parent,
-+ struct dentry *dentry, reiser4_object_create_data * data)
-+{
-+ reiser4_context *ctx;
-+ int result;
-+ struct inode *child;
-+
-+ ctx = reiser4_init_context(parent->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+ context_set_commit_async(ctx);
-+
-+ data->parent = parent;
-+ data->dentry = dentry;
-+ child = NULL;
-+ result = do_create_vfs_child(data, &child);
-+ if (unlikely(result != 0)) {
-+ if (child != NULL) {
-+ reiser4_make_bad_inode(child);
-+ iput(child);
-+ }
-+ } else
-+ d_instantiate(dentry, child);
-+
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+/* helper for link_common. Estimate disk space necessary to add a link
-+ from @parent to @object
-+*/
-+static reiser4_block_nr common_estimate_link(struct inode *parent, /* parent directory */
-+ struct inode *object
-+ /* object to which new link is being cerated */
-+ )
-+{
-+ reiser4_block_nr res = 0;
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ assert("vpf-317", object != NULL);
-+ assert("vpf-318", parent != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ dplug = inode_dir_plugin(parent);
-+ /* VS-FIXME-HANS: why do we do fplug->estimate.update(object) twice instead of multiplying by 2? */
-+ /* reiser4_add_nlink(object) */
-+ res += fplug->estimate.update(object);
-+ /* add_entry(parent) */
-+ res += dplug->estimate.add_entry(parent);
-+ /* reiser4_del_nlink(object) */
-+ res += fplug->estimate.update(object);
-+ /* update_dir(parent) */
-+ res += inode_file_plugin(parent)->estimate.update(parent);
-+ /* safe-link */
-+ res += estimate_one_item_removal(reiser4_tree_by_inode(object));
-+
-+ return res;
-+}
-+
-+/* Estimate disk space necessary to remove a link between @parent and
-+ @object.
-+*/
-+static reiser4_block_nr estimate_unlink(struct inode *parent, /* parent directory */
-+ struct inode *object
-+ /* object to which new link is being cerated */
-+ )
-+{
-+ reiser4_block_nr res = 0;
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ assert("vpf-317", object != NULL);
-+ assert("vpf-318", parent != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ dplug = inode_dir_plugin(parent);
-+
-+ /* rem_entry(parent) */
-+ res += dplug->estimate.rem_entry(parent);
-+ /* reiser4_del_nlink(object) */
-+ res += fplug->estimate.update(object);
-+ /* update_dir(parent) */
-+ res += inode_file_plugin(parent)->estimate.update(parent);
-+ /* fplug->unlink */
-+ res += fplug->estimate.unlink(object, parent);
-+ /* safe-link */
-+ res += estimate_one_insert_item(reiser4_tree_by_inode(object));
-+
-+ return res;
-+}
-+
-+/* helper for reiser4_unlink_common. Estimate and grab space for unlink. */
-+static int unlink_check_and_grab(struct inode *parent, struct dentry *victim)
-+{
-+ file_plugin *fplug;
-+ struct inode *child;
-+ int result;
-+
-+ result = 0;
-+ child = victim->d_inode;
-+ fplug = inode_file_plugin(child);
-+
-+ /* check for race with create_object() */
-+ if (reiser4_inode_get_flag(child, REISER4_IMMUTABLE))
-+ return RETERR(-E_REPEAT);
-+ /* object being deleted should have stat data */
-+ assert("vs-949", !reiser4_inode_get_flag(child, REISER4_NO_SD));
-+
-+ /* ask object plugin */
-+ if (fplug->can_rem_link != NULL && !fplug->can_rem_link(child))
-+ return RETERR(-ENOTEMPTY);
-+
-+ result = (int)estimate_unlink(parent, child);
-+ if (result < 0)
-+ return result;
-+
-+ return reiser4_grab_reserved(child->i_sb, result, BA_CAN_COMMIT);
-+}
-+
-+/* helper for reiser4_setattr_common */
-+static int setattr_reserve(reiser4_tree * tree)
-+{
-+ assert("vs-1096", is_grab_enabled(get_current_context()));
-+ return reiser4_grab_space(estimate_one_insert_into_item(tree),
-+ BA_CAN_COMMIT);
-+}
-+
-+/* helper function. Standards require that for many file-system operations
-+ on success ctime and mtime of parent directory is to be updated. */
-+int reiser4_update_dir(struct inode *dir)
-+{
-+ assert("nikita-2525", dir != NULL);
-+
-+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-+ return reiser4_update_sd(dir);
-+}
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/inode_ops_rename.c linux-2.6.24/fs/reiser4/plugin/inode_ops_rename.c
---- linux-2.6.24.orig/fs/reiser4/plugin/inode_ops_rename.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/inode_ops_rename.c 2008-01-25 11:39:07.000224175 +0300
-@@ -0,0 +1,912 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "../inode.h"
-+#include "../safe_link.h"
-+
-+static const char *possible_leak = "Possible disk space leak.";
-+
-+/* re-bind existing name at @from_coord in @from_dir to point to @to_inode.
-+
-+ Helper function called from hashed_rename() */
-+static int replace_name(struct inode *to_inode, /* inode where @from_coord is
-+ * to be re-targeted at */
-+ struct inode *from_dir, /* directory where @from_coord
-+ * lives */
-+ struct inode *from_inode, /* inode @from_coord
-+ * originally point to */
-+ coord_t * from_coord, /* where directory entry is in
-+ * the tree */
-+ lock_handle * from_lh /* lock handle on @from_coord */ )
-+{
-+ item_plugin *from_item;
-+ int result;
-+ znode *node;
-+
-+ coord_clear_iplug(from_coord);
-+ node = from_coord->node;
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+ from_item = item_plugin_by_coord(from_coord);
-+ if (plugin_of_group(item_plugin_by_coord(from_coord),
-+ DIR_ENTRY_ITEM_TYPE))
-+ {
-+ reiser4_key to_key;
-+
-+ build_sd_key(to_inode, &to_key);
-+
-+ /* everything is found and prepared to change directory entry
-+ at @from_coord to point to @to_inode.
-+
-+ @to_inode is just about to get new name, so bump its link
-+ counter.
-+
-+ */
-+ result = reiser4_add_nlink(to_inode, from_dir, 0);
-+ if (result != 0) {
-+ /* Don't issue warning: this may be plain -EMLINK */
-+ zrelse(node);
-+ return result;
-+ }
-+
-+ result =
-+ from_item->s.dir.update_key(from_coord, &to_key, from_lh);
-+ if (result != 0) {
-+ reiser4_del_nlink(to_inode, from_dir, 0);
-+ zrelse(node);
-+ return result;
-+ }
-+
-+ /* @from_inode just lost its name, he-he.
-+
-+ If @from_inode was directory, it contained dotdot pointing
-+ to @from_dir. @from_dir i_nlink will be decreased when
-+ iput() will be called on @from_inode.
-+
-+ If file-system is not ADG (hard-links are
-+ supported on directories), iput(from_inode) will not remove
-+ @from_inode, and thus above is incorrect, but hard-links on
-+ directories are problematic in many other respects.
-+ */
-+ result = reiser4_del_nlink(from_inode, from_dir, 0);
-+ if (result != 0) {
-+ warning("nikita-2330",
-+ "Cannot remove link from source: %i. %s",
-+ result, possible_leak);
-+ }
-+ /* Has to return success, because entry is already
-+ * modified. */
-+ result = 0;
-+
-+ /* NOTE-NIKITA consider calling plugin method in stead of
-+ accessing inode fields directly. */
-+ from_dir->i_mtime = CURRENT_TIME;
-+ } else {
-+ warning("nikita-2326", "Unexpected item type");
-+ result = RETERR(-EIO);
-+ }
-+ zrelse(node);
-+ return result;
-+}
-+
-+/* add new entry pointing to @inode into @dir at @coord, locked by @lh
-+
-+ Helper function used by hashed_rename(). */
-+static int add_name(struct inode *inode, /* inode where @coord is to be
-+ * re-targeted at */
-+ struct inode *dir, /* directory where @coord lives */
-+ struct dentry *name, /* new name */
-+ coord_t * coord, /* where directory entry is in the tree */
-+ lock_handle * lh, /* lock handle on @coord */
-+ int is_dir /* true, if @inode is directory */ )
-+{
-+ int result;
-+ reiser4_dir_entry_desc entry;
-+
-+ assert("nikita-2333", lh->node == coord->node);
-+ assert("nikita-2334", is_dir == S_ISDIR(inode->i_mode));
-+
-+ memset(&entry, 0, sizeof entry);
-+ entry.obj = inode;
-+ /* build key of directory entry description */
-+ inode_dir_plugin(dir)->build_entry_key(dir, &name->d_name, &entry.key);
-+
-+ /* ext2 does this in different order: first inserts new entry,
-+ then increases directory nlink. We don't want do this,
-+ because reiser4_add_nlink() calls ->add_link() plugin
-+ method that can fail for whatever reason, leaving as with
-+ cleanup problems.
-+ */
-+ /* @inode is getting new name */
-+ reiser4_add_nlink(inode, dir, 0);
-+ /* create @new_name in @new_dir pointing to
-+ @old_inode */
-+ result = WITH_COORD(coord,
-+ inode_dir_item_plugin(dir)->s.dir.add_entry(dir,
-+ coord,
-+ lh,
-+ name,
-+ &entry));
-+ if (result != 0) {
-+ int result2;
-+ result2 = reiser4_del_nlink(inode, dir, 0);
-+ if (result2 != 0) {
-+ warning("nikita-2327",
-+ "Cannot drop link on %lli %i. %s",
-+ (unsigned long long)get_inode_oid(inode),
-+ result2, possible_leak);
-+ }
-+ } else
-+ INODE_INC_FIELD(dir, i_size);
-+ return result;
-+}
-+
-+static reiser4_block_nr estimate_rename(struct inode *old_dir, /* directory where @old is located */
-+ struct dentry *old_name, /* old name */
-+ struct inode *new_dir, /* directory where @new is located */
-+ struct dentry *new_name /* new name */ )
-+{
-+ reiser4_block_nr res1, res2;
-+ dir_plugin *p_parent_old, *p_parent_new;
-+ file_plugin *p_child_old, *p_child_new;
-+
-+ assert("vpf-311", old_dir != NULL);
-+ assert("vpf-312", new_dir != NULL);
-+ assert("vpf-313", old_name != NULL);
-+ assert("vpf-314", new_name != NULL);
-+
-+ p_parent_old = inode_dir_plugin(old_dir);
-+ p_parent_new = inode_dir_plugin(new_dir);
-+ p_child_old = inode_file_plugin(old_name->d_inode);
-+ if (new_name->d_inode)
-+ p_child_new = inode_file_plugin(new_name->d_inode);
-+ else
-+ p_child_new = NULL;
-+
-+ /* find_entry - can insert one leaf. */
-+ res1 = res2 = 1;
-+
-+ /* replace_name */
-+ {
-+ /* reiser4_add_nlink(p_child_old) and reiser4_del_nlink(p_child_old) */
-+ res1 += 2 * p_child_old->estimate.update(old_name->d_inode);
-+ /* update key */
-+ res1 += 1;
-+ /* reiser4_del_nlink(p_child_new) */
-+ if (p_child_new)
-+ res1 += p_child_new->estimate.update(new_name->d_inode);
-+ }
-+
-+ /* else add_name */
-+ {
-+ /* reiser4_add_nlink(p_parent_new) and reiser4_del_nlink(p_parent_new) */
-+ res2 +=
-+ 2 * inode_file_plugin(new_dir)->estimate.update(new_dir);
-+ /* reiser4_add_nlink(p_parent_old) */
-+ res2 += p_child_old->estimate.update(old_name->d_inode);
-+ /* add_entry(p_parent_new) */
-+ res2 += p_parent_new->estimate.add_entry(new_dir);
-+ /* reiser4_del_nlink(p_parent_old) */
-+ res2 += p_child_old->estimate.update(old_name->d_inode);
-+ }
-+
-+ res1 = res1 < res2 ? res2 : res1;
-+
-+ /* reiser4_write_sd(p_parent_new) */
-+ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
-+
-+ /* reiser4_write_sd(p_child_new) */
-+ if (p_child_new)
-+ res1 += p_child_new->estimate.update(new_name->d_inode);
-+
-+ /* hashed_rem_entry(p_parent_old) */
-+ res1 += p_parent_old->estimate.rem_entry(old_dir);
-+
-+ /* reiser4_del_nlink(p_child_old) */
-+ res1 += p_child_old->estimate.update(old_name->d_inode);
-+
-+ /* replace_name */
-+ {
-+ /* reiser4_add_nlink(p_parent_dir_new) */
-+ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
-+ /* update_key */
-+ res1 += 1;
-+ /* reiser4_del_nlink(p_parent_new) */
-+ res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
-+ /* reiser4_del_nlink(p_parent_old) */
-+ res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
-+ }
-+
-+ /* reiser4_write_sd(p_parent_old) */
-+ res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
-+
-+ /* reiser4_write_sd(p_child_old) */
-+ res1 += p_child_old->estimate.update(old_name->d_inode);
-+
-+ return res1;
-+}
-+
-+static int hashed_rename_estimate_and_grab(struct inode *old_dir, /* directory where @old is located */
-+ struct dentry *old_name, /* old name */
-+ struct inode *new_dir, /* directory where @new is located */
-+ struct dentry *new_name
-+ /* new name */ )
-+{
-+ reiser4_block_nr reserve;
-+
-+ reserve = estimate_rename(old_dir, old_name, new_dir, new_name);
-+
-+ if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
-+ return RETERR(-ENOSPC);
-+
-+ return 0;
-+}
-+
-+/* check whether @old_inode and @new_inode can be moved within file system
-+ * tree. This singles out attempts to rename pseudo-files, for example. */
-+static int can_rename(struct inode *old_dir, struct inode *old_inode,
-+ struct inode *new_dir, struct inode *new_inode)
-+{
-+ file_plugin *fplug;
-+ dir_plugin *dplug;
-+
-+ assert("nikita-3370", old_inode != NULL);
-+
-+ dplug = inode_dir_plugin(new_dir);
-+ fplug = inode_file_plugin(old_inode);
-+
-+ if (dplug == NULL)
-+ return RETERR(-ENOTDIR);
-+ else if (new_dir->i_op->create == NULL)
-+ return RETERR(-EPERM);
-+ else if (!fplug->can_add_link(old_inode))
-+ return RETERR(-EMLINK);
-+ else if (new_inode != NULL) {
-+ fplug = inode_file_plugin(new_inode);
-+ if (fplug->can_rem_link != NULL &&
-+ !fplug->can_rem_link(new_inode))
-+ return RETERR(-EBUSY);
-+ }
-+ return 0;
-+}
-+
-+int reiser4_find_entry(struct inode *, struct dentry *, lock_handle *,
-+ znode_lock_mode, reiser4_dir_entry_desc *);
-+int reiser4_update_dir(struct inode *);
-+
-+/* this is common implementation of vfs's rename method of struct
-+ inode_operations
-+ See comments in the body.
-+
-+ It is arguable that this function can be made generic so, that it
-+ will be applicable to any kind of directory plugin that deals with
-+ directories composed out of directory entries. The only obstacle
-+ here is that we don't have any data-type to represent directory
-+ entry. This should be re-considered when more than one different
-+ directory plugin will be implemented.
-+*/
-+int reiser4_rename_common(struct inode *old_dir /* directory where @old
-+ * is located */ ,
-+ struct dentry *old_name /* old name */ ,
-+ struct inode *new_dir /* directory where @new
-+ * is located */ ,
-+ struct dentry *new_name /* new name */ )
-+{
-+ /* From `The Open Group Base Specifications Issue 6'
-+
-+ If either the old or new argument names a symbolic link, rename()
-+ shall operate on the symbolic link itself, and shall not resolve
-+ the last component of the argument. If the old argument and the new
-+ argument resolve to the same existing file, rename() shall return
-+ successfully and perform no other action.
-+
-+ [this is done by VFS: vfs_rename()]
-+
-+ If the old argument points to the pathname of a file that is not a
-+ directory, the new argument shall not point to the pathname of a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the link named by the new argument exists, it shall
-+ be removed and old renamed to new. In this case, a link named new
-+ shall remain visible to other processes throughout the renaming
-+ operation and refer either to the file referred to by new or old
-+ before the operation began.
-+
-+ [we should assure this]
-+
-+ Write access permission is required for
-+ both the directory containing old and the directory containing new.
-+
-+ [checked by VFS: vfs_rename->may_delete(), may_create()]
-+
-+ If the old argument points to the pathname of a directory, the new
-+ argument shall not point to the pathname of a file that is not a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the directory named by the new argument exists, it
-+ shall be removed and old renamed to new. In this case, a link named
-+ new shall exist throughout the renaming operation and shall refer
-+ either to the directory referred to by new or old before the
-+ operation began.
-+
-+ [we should assure this]
-+
-+ If new names an existing directory, it shall be
-+ required to be an empty directory.
-+
-+ [we should check this]
-+
-+ If the old argument points to a pathname of a symbolic link, the
-+ symbolic link shall be renamed. If the new argument points to a
-+ pathname of a symbolic link, the symbolic link shall be removed.
-+
-+ The new pathname shall not contain a path prefix that names
-+ old. Write access permission is required for the directory
-+ containing old and the directory containing new. If the old
-+ argument points to the pathname of a directory, write access
-+ permission may be required for the directory named by old, and, if
-+ it exists, the directory named by new.
-+
-+ [checked by VFS: vfs_rename(), vfs_rename_dir()]
-+
-+ If the link named by the new argument exists and the file's link
-+ count becomes 0 when it is removed and no process has the file
-+ open, the space occupied by the file shall be freed and the file
-+ shall no longer be accessible. If one or more processes have the
-+ file open when the last link is removed, the link shall be removed
-+ before rename() returns, but the removal of the file contents shall
-+ be postponed until all references to the file are closed.
-+
-+ [iput() handles this, but we can do this manually, a la
-+ reiser4_unlink()]
-+
-+ Upon successful completion, rename() shall mark for update the
-+ st_ctime and st_mtime fields of the parent directory of each file.
-+
-+ [N/A]
-+
-+ */
-+ reiser4_context *ctx;
-+ int result;
-+ int is_dir; /* is @old_name directory */
-+
-+ struct inode *old_inode;
-+ struct inode *new_inode;
-+ coord_t *new_coord;
-+
-+ struct reiser4_dentry_fsdata *new_fsdata;
-+ dir_plugin *dplug;
-+ file_plugin *fplug;
-+
-+ reiser4_dir_entry_desc *old_entry, *new_entry, *dotdot_entry;
-+ lock_handle *new_lh, *dotdot_lh;
-+ struct dentry *dotdot_name;
-+ struct reiser4_dentry_fsdata *dataonstack;
-+
-+ ctx = reiser4_init_context(old_dir->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ old_entry = kzalloc(3 * sizeof(*old_entry) + 2 * sizeof(*new_lh) +
-+ sizeof(*dotdot_name) + sizeof(*dataonstack),
-+ reiser4_ctx_gfp_mask_get());
-+ if (!old_entry) {
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ new_entry = old_entry + 1;
-+ dotdot_entry = old_entry + 2;
-+ new_lh = (lock_handle *)(old_entry + 3);
-+ dotdot_lh = new_lh + 1;
-+ dotdot_name = (struct dentry *)(new_lh + 2);
-+ dataonstack = (struct reiser4_dentry_fsdata *)(dotdot_name + 1);
-+
-+ assert("nikita-2318", old_dir != NULL);
-+ assert("nikita-2319", new_dir != NULL);
-+ assert("nikita-2320", old_name != NULL);
-+ assert("nikita-2321", new_name != NULL);
-+
-+ old_inode = old_name->d_inode;
-+ new_inode = new_name->d_inode;
-+
-+ dplug = inode_dir_plugin(old_dir);
-+ fplug = NULL;
-+
-+ new_fsdata = reiser4_get_dentry_fsdata(new_name);
-+ if (IS_ERR(new_fsdata)) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return PTR_ERR(new_fsdata);
-+ }
-+
-+ new_coord = &new_fsdata->dec.entry_coord;
-+ coord_clear_iplug(new_coord);
-+
-+ is_dir = S_ISDIR(old_inode->i_mode);
-+
-+ assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
-+
-+ /* if target is existing directory and it's not empty---return error.
-+
-+ This check is done specifically, because is_dir_empty() requires
-+ tree traversal and have to be done before locks are taken.
-+ */
-+ if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return RETERR(-ENOTEMPTY);
-+ }
-+
-+ result = can_rename(old_dir, old_inode, new_dir, new_inode);
-+ if (result != 0) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ result = hashed_rename_estimate_and_grab(old_dir, old_name,
-+ new_dir, new_name);
-+ if (result != 0) {
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ init_lh(new_lh);
-+
-+ /* find entry for @new_name */
-+ result = reiser4_find_entry(new_dir, new_name, new_lh, ZNODE_WRITE_LOCK,
-+ new_entry);
-+
-+ if (IS_CBKERR(result)) {
-+ done_lh(new_lh);
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+ }
-+
-+ reiser4_seal_done(&new_fsdata->dec.entry_seal);
-+
-+ /* add or replace name for @old_inode as @new_name */
-+ if (new_inode != NULL) {
-+ /* target (@new_name) exists. */
-+ /* Not clear what to do with objects that are
-+ both directories and files at the same time. */
-+ if (result == CBK_COORD_FOUND) {
-+ result = replace_name(old_inode,
-+ new_dir,
-+ new_inode, new_coord, new_lh);
-+ if (result == 0)
-+ fplug = inode_file_plugin(new_inode);
-+ } else if (result == CBK_COORD_NOTFOUND) {
-+ /* VFS told us that @new_name is bound to existing
-+ inode, but we failed to find directory entry. */
-+ warning("nikita-2324", "Target not found");
-+ result = RETERR(-ENOENT);
-+ }
-+ } else {
-+ /* target (@new_name) doesn't exists. */
-+ if (result == CBK_COORD_NOTFOUND)
-+ result = add_name(old_inode,
-+ new_dir,
-+ new_name, new_coord, new_lh, is_dir);
-+ else if (result == CBK_COORD_FOUND) {
-+ /* VFS told us that @new_name is "negative" dentry,
-+ but we found directory entry. */
-+ warning("nikita-2331", "Target found unexpectedly");
-+ result = RETERR(-EIO);
-+ }
-+ }
-+
-+ assert("nikita-3462", ergo(result == 0,
-+ old_inode->i_nlink >= 2 + !!is_dir));
-+
-+ /* We are done with all modifications to the @new_dir, release lock on
-+ node. */
-+ done_lh(new_lh);
-+
-+ if (fplug != NULL) {
-+ /* detach @new_inode from name-space */
-+ result = fplug->detach(new_inode, new_dir);
-+ if (result != 0)
-+ warning("nikita-2330", "Cannot detach %lli: %i. %s",
-+ (unsigned long long)get_inode_oid(new_inode),
-+ result, possible_leak);
-+ }
-+
-+ if (new_inode != NULL)
-+ reiser4_update_sd(new_inode);
-+
-+ if (result == 0) {
-+ old_entry->obj = old_inode;
-+
-+ dplug->build_entry_key(old_dir,
-+ &old_name->d_name, &old_entry->key);
-+
-+ /* At this stage new name was introduced for
-+ @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
-+ counters were updated.
-+
-+ We want to remove @old_name now. If @old_inode wasn't
-+ directory this is simple.
-+ */
-+ result = dplug->rem_entry(old_dir, old_name, old_entry);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2335",
-+ "Cannot remove old name: %i", result);
-+ } else {
-+ result = reiser4_del_nlink(old_inode, old_dir, 0);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2337",
-+ "Cannot drop link on old: %i", result);
-+ }
-+ }
-+
-+ if (result == 0 && is_dir) {
-+ /* @old_inode is directory. We also have to update
-+ dotdot entry. */
-+ coord_t *dotdot_coord;
-+
-+ memset(dataonstack, 0, sizeof dataonstack);
-+ memset(dotdot_entry, 0, sizeof dotdot_entry);
-+ dotdot_entry->obj = old_dir;
-+ memset(dotdot_name, 0, sizeof dotdot_name);
-+ dotdot_name->d_name.name = "..";
-+ dotdot_name->d_name.len = 2;
-+ /*
-+ * allocate ->d_fsdata on the stack to avoid using
-+ * reiser4_get_dentry_fsdata(). Locking is not needed,
-+ * because dentry is private to the current thread.
-+ */
-+ dotdot_name->d_fsdata = dataonstack;
-+ init_lh(dotdot_lh);
-+
-+ dotdot_coord = &dataonstack->dec.entry_coord;
-+ coord_clear_iplug(dotdot_coord);
-+
-+ result = reiser4_find_entry(old_inode, dotdot_name,
-+ dotdot_lh, ZNODE_WRITE_LOCK,
-+ dotdot_entry);
-+ if (result == 0) {
-+ /* replace_name() decreases i_nlink on
-+ * @old_dir */
-+ result = replace_name(new_dir,
-+ old_inode,
-+ old_dir,
-+ dotdot_coord, dotdot_lh);
-+ } else
-+ result = RETERR(-EIO);
-+ done_lh(dotdot_lh);
-+ }
-+ }
-+ reiser4_update_dir(new_dir);
-+ reiser4_update_dir(old_dir);
-+ reiser4_update_sd(old_inode);
-+ if (result == 0) {
-+ file_plugin *fplug;
-+
-+ if (new_inode != NULL) {
-+ /* add safe-link for target file (in case we removed
-+ * last reference to the poor fellow */
-+ fplug = inode_file_plugin(new_inode);
-+ if (new_inode->i_nlink == 0)
-+ result = safe_link_add(new_inode, SAFE_UNLINK);
-+ }
-+ }
-+ kfree(old_entry);
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+
-+#if 0
-+int reiser4_rename_common(struct inode *old_dir /* directory where @old
-+ * is located */ ,
-+ struct dentry *old_name /* old name */ ,
-+ struct inode *new_dir /* directory where @new
-+ * is located */ ,
-+ struct dentry *new_name /* new name */ )
-+{
-+ /* From `The Open Group Base Specifications Issue 6'
-+
-+ If either the old or new argument names a symbolic link, rename()
-+ shall operate on the symbolic link itself, and shall not resolve
-+ the last component of the argument. If the old argument and the new
-+ argument resolve to the same existing file, rename() shall return
-+ successfully and perform no other action.
-+
-+ [this is done by VFS: vfs_rename()]
-+
-+ If the old argument points to the pathname of a file that is not a
-+ directory, the new argument shall not point to the pathname of a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the link named by the new argument exists, it shall
-+ be removed and old renamed to new. In this case, a link named new
-+ shall remain visible to other processes throughout the renaming
-+ operation and refer either to the file referred to by new or old
-+ before the operation began.
-+
-+ [we should assure this]
-+
-+ Write access permission is required for
-+ both the directory containing old and the directory containing new.
-+
-+ [checked by VFS: vfs_rename->may_delete(), may_create()]
-+
-+ If the old argument points to the pathname of a directory, the new
-+ argument shall not point to the pathname of a file that is not a
-+ directory.
-+
-+ [checked by VFS: vfs_rename->may_delete()]
-+
-+ If the directory named by the new argument exists, it
-+ shall be removed and old renamed to new. In this case, a link named
-+ new shall exist throughout the renaming operation and shall refer
-+ either to the directory referred to by new or old before the
-+ operation began.
-+
-+ [we should assure this]
-+
-+ If new names an existing directory, it shall be
-+ required to be an empty directory.
-+
-+ [we should check this]
-+
-+ If the old argument points to a pathname of a symbolic link, the
-+ symbolic link shall be renamed. If the new argument points to a
-+ pathname of a symbolic link, the symbolic link shall be removed.
-+
-+ The new pathname shall not contain a path prefix that names
-+ old. Write access permission is required for the directory
-+ containing old and the directory containing new. If the old
-+ argument points to the pathname of a directory, write access
-+ permission may be required for the directory named by old, and, if
-+ it exists, the directory named by new.
-+
-+ [checked by VFS: vfs_rename(), vfs_rename_dir()]
-+
-+ If the link named by the new argument exists and the file's link
-+ count becomes 0 when it is removed and no process has the file
-+ open, the space occupied by the file shall be freed and the file
-+ shall no longer be accessible. If one or more processes have the
-+ file open when the last link is removed, the link shall be removed
-+ before rename() returns, but the removal of the file contents shall
-+ be postponed until all references to the file are closed.
-+
-+ [iput() handles this, but we can do this manually, a la
-+ reiser4_unlink()]
-+
-+ Upon successful completion, rename() shall mark for update the
-+ st_ctime and st_mtime fields of the parent directory of each file.
-+
-+ [N/A]
-+
-+ */
-+ reiser4_context *ctx;
-+ int result;
-+ int is_dir; /* is @old_name directory */
-+ struct inode *old_inode;
-+ struct inode *new_inode;
-+ reiser4_dir_entry_desc old_entry;
-+ reiser4_dir_entry_desc new_entry;
-+ coord_t *new_coord;
-+ struct reiser4_dentry_fsdata *new_fsdata;
-+ lock_handle new_lh;
-+ dir_plugin *dplug;
-+ file_plugin *fplug;
-+
-+ ctx = reiser4_init_context(old_dir->i_sb);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ assert("nikita-2318", old_dir != NULL);
-+ assert("nikita-2319", new_dir != NULL);
-+ assert("nikita-2320", old_name != NULL);
-+ assert("nikita-2321", new_name != NULL);
-+
-+ old_inode = old_name->d_inode;
-+ new_inode = new_name->d_inode;
-+
-+ dplug = inode_dir_plugin(old_dir);
-+ fplug = NULL;
-+
-+ new_fsdata = reiser4_get_dentry_fsdata(new_name);
-+ if (IS_ERR(new_fsdata)) {
-+ result = PTR_ERR(new_fsdata);
-+ goto exit;
-+ }
-+
-+ new_coord = &new_fsdata->dec.entry_coord;
-+ coord_clear_iplug(new_coord);
-+
-+ is_dir = S_ISDIR(old_inode->i_mode);
-+
-+ assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
-+
-+ /* if target is existing directory and it's not empty---return error.
-+
-+ This check is done specifically, because is_dir_empty() requires
-+ tree traversal and have to be done before locks are taken.
-+ */
-+ if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0)
-+ return RETERR(-ENOTEMPTY);
-+
-+ result = can_rename(old_dir, old_inode, new_dir, new_inode);
-+ if (result != 0)
-+ goto exit;
-+
-+ result = hashed_rename_estimate_and_grab(old_dir, old_name,
-+ new_dir, new_name);
-+ if (result != 0)
-+ goto exit;
-+
-+ init_lh(&new_lh);
-+
-+ /* find entry for @new_name */
-+ result = reiser4_find_entry(new_dir, new_name, &new_lh,
-+ ZNODE_WRITE_LOCK, &new_entry);
-+
-+ if (IS_CBKERR(result)) {
-+ done_lh(&new_lh);
-+ goto exit;
-+ }
-+
-+ reiser4_seal_done(&new_fsdata->dec.entry_seal);
-+
-+ /* add or replace name for @old_inode as @new_name */
-+ if (new_inode != NULL) {
-+ /* target (@new_name) exists. */
-+ /* Not clear what to do with objects that are
-+ both directories and files at the same time. */
-+ if (result == CBK_COORD_FOUND) {
-+ result = replace_name(old_inode,
-+ new_dir,
-+ new_inode, new_coord, &new_lh);
-+ if (result == 0)
-+ fplug = inode_file_plugin(new_inode);
-+ } else if (result == CBK_COORD_NOTFOUND) {
-+ /* VFS told us that @new_name is bound to existing
-+ inode, but we failed to find directory entry. */
-+ warning("nikita-2324", "Target not found");
-+ result = RETERR(-ENOENT);
-+ }
-+ } else {
-+ /* target (@new_name) doesn't exists. */
-+ if (result == CBK_COORD_NOTFOUND)
-+ result = add_name(old_inode,
-+ new_dir,
-+ new_name, new_coord, &new_lh, is_dir);
-+ else if (result == CBK_COORD_FOUND) {
-+ /* VFS told us that @new_name is "negative" dentry,
-+ but we found directory entry. */
-+ warning("nikita-2331", "Target found unexpectedly");
-+ result = RETERR(-EIO);
-+ }
-+ }
-+
-+ assert("nikita-3462", ergo(result == 0,
-+ old_inode->i_nlink >= 2 + !!is_dir));
-+
-+ /* We are done with all modifications to the @new_dir, release lock on
-+ node. */
-+ done_lh(&new_lh);
-+
-+ if (fplug != NULL) {
-+ /* detach @new_inode from name-space */
-+ result = fplug->detach(new_inode, new_dir);
-+ if (result != 0)
-+ warning("nikita-2330", "Cannot detach %lli: %i. %s",
-+ (unsigned long long)get_inode_oid(new_inode),
-+ result, possible_leak);
-+ }
-+
-+ if (new_inode != NULL)
-+ reiser4_update_sd(new_inode);
-+
-+ if (result == 0) {
-+ memset(&old_entry, 0, sizeof old_entry);
-+ old_entry.obj = old_inode;
-+
-+ dplug->build_entry_key(old_dir,
-+ &old_name->d_name, &old_entry.key);
-+
-+ /* At this stage new name was introduced for
-+ @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
-+ counters were updated.
-+
-+ We want to remove @old_name now. If @old_inode wasn't
-+ directory this is simple.
-+ */
-+ result = dplug->rem_entry(old_dir, old_name, &old_entry);
-+ /*result = rem_entry_hashed(old_dir, old_name, &old_entry); */
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2335",
-+ "Cannot remove old name: %i", result);
-+ } else {
-+ result = reiser4_del_nlink(old_inode, old_dir, 0);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("nikita-2337",
-+ "Cannot drop link on old: %i", result);
-+ }
-+ }
-+
-+ if (result == 0 && is_dir) {
-+ /* @old_inode is directory. We also have to update
-+ dotdot entry. */
-+ coord_t *dotdot_coord;
-+ lock_handle dotdot_lh;
-+ struct dentry dotdot_name;
-+ reiser4_dir_entry_desc dotdot_entry;
-+ struct reiser4_dentry_fsdata dataonstack;
-+ struct reiser4_dentry_fsdata *fsdata;
-+
-+ memset(&dataonstack, 0, sizeof dataonstack);
-+ memset(&dotdot_entry, 0, sizeof dotdot_entry);
-+ dotdot_entry.obj = old_dir;
-+ memset(&dotdot_name, 0, sizeof dotdot_name);
-+ dotdot_name.d_name.name = "..";
-+ dotdot_name.d_name.len = 2;
-+ /*
-+ * allocate ->d_fsdata on the stack to avoid using
-+ * reiser4_get_dentry_fsdata(). Locking is not needed,
-+ * because dentry is private to the current thread.
-+ */
-+ dotdot_name.d_fsdata = &dataonstack;
-+ init_lh(&dotdot_lh);
-+
-+ fsdata = &dataonstack;
-+ dotdot_coord = &fsdata->dec.entry_coord;
-+ coord_clear_iplug(dotdot_coord);
-+
-+ result = reiser4_find_entry(old_inode,
-+ &dotdot_name,
-+ &dotdot_lh,
-+ ZNODE_WRITE_LOCK,
-+ &dotdot_entry);
-+ if (result == 0) {
-+ /* replace_name() decreases i_nlink on
-+ * @old_dir */
-+ result = replace_name(new_dir,
-+ old_inode,
-+ old_dir,
-+ dotdot_coord, &dotdot_lh);
-+ } else
-+ result = RETERR(-EIO);
-+ done_lh(&dotdot_lh);
-+ }
-+ }
-+ reiser4_update_dir(new_dir);
-+ reiser4_update_dir(old_dir);
-+ reiser4_update_sd(old_inode);
-+ if (result == 0) {
-+ file_plugin *fplug;
-+
-+ if (new_inode != NULL) {
-+ /* add safe-link for target file (in case we removed
-+ * last reference to the poor fellow */
-+ fplug = inode_file_plugin(new_inode);
-+ if (new_inode->i_nlink == 0)
-+ result = safe_link_add(new_inode, SAFE_UNLINK);
-+ }
-+ }
-+ exit:
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+ return result;
-+}
-+#endif
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/acl.h linux-2.6.24/fs/reiser4/plugin/item/acl.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/acl.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/acl.h 2008-01-25 11:39:07.000224175 +0300
-@@ -0,0 +1,66 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
-+#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+
-+#include <linux/fs.h>
-+#include <linux/dcache.h> /* for struct dentry */
-+
-+typedef struct directory_entry_format {
-+ /* key of object stat-data. It's not necessary to store whole
-+ key here, because it's always key of stat-data, so minor
-+ packing locality and offset can be omitted here. But this
-+ relies on particular key allocation scheme for stat-data, so,
-+ for extensibility sake, whole key can be stored here.
-+
-+ We store key as array of bytes, because we don't want 8-byte
-+ alignment of dir entries.
-+ */
-+ obj_key_id id;
-+ /* file name. Null terminated string. */
-+ d8 name[0];
-+} directory_entry_format;
-+
-+void print_de(const char *prefix, coord_t * coord);
-+int extract_key_de(const coord_t * coord, reiser4_key * key);
-+int update_key_de(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh);
-+char *extract_name_de(const coord_t * coord, char *buf);
-+unsigned extract_file_type_de(const coord_t * coord);
-+int add_entry_de(struct inode *dir, coord_t * coord,
-+ lock_handle * lh, const struct dentry *name,
-+ reiser4_dir_entry_desc * entry);
-+int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
-+ lock_handle * lh, reiser4_dir_entry_desc * entry);
-+int max_name_len_de(const struct inode *dir);
-+
-+int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
-+
-+char *extract_dent_name(const coord_t * coord,
-+ directory_entry_format * dent, char *buf);
-+
-+#if REISER4_LARGE_KEY
-+#define DE_NAME_BUF_LEN (24)
-+#else
-+#define DE_NAME_BUF_LEN (16)
-+#endif
-+
-+/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/blackbox.c linux-2.6.24/fs/reiser4/plugin/item/blackbox.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/blackbox.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/blackbox.c 2008-01-25 11:39:07.004225206 +0300
-@@ -0,0 +1,142 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Black box item implementation */
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../coord.h"
-+#include "../../tree.h"
-+#include "../../lock.h"
-+
-+#include "blackbox.h"
-+#include "item.h"
-+#include "../plugin.h"
-+
-+int
-+store_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length)
-+{
-+ int result;
-+ reiser4_item_data idata;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ memset(&idata, 0, sizeof idata);
-+
-+ idata.data = data;
-+ idata.user = 0;
-+ idata.length = length;
-+ idata.iplug = item_plugin_by_id(BLACK_BOX_ID);
-+
-+ init_lh(&lh);
-+ result = insert_by_key(tree, key,
-+ &idata, &coord, &lh, LEAF_LEVEL, CBK_UNIQUE);
-+
-+ assert("nikita-3413",
-+ ergo(result == 0,
-+ WITH_COORD(&coord,
-+ item_length_by_coord(&coord) == length)));
-+
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+int
-+load_black_box(reiser4_tree * tree,
-+ reiser4_key * key, void *data, int length, int exact)
-+{
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ init_lh(&lh);
-+ result = coord_by_key(tree, key,
-+ &coord, &lh, ZNODE_READ_LOCK,
-+ exact ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN,
-+ LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
-+
-+ if (result == 0) {
-+ int ilen;
-+
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ ilen = item_length_by_coord(&coord);
-+ if (ilen <= length) {
-+ memcpy(data, item_body_by_coord(&coord), ilen);
-+ unit_key_by_coord(&coord, key);
-+ } else if (exact) {
-+ /*
-+ * item is larger than buffer provided by the
-+ * user. Only issue a warning if @exact is
-+ * set. If @exact is false, we are iterating
-+ * over all safe-links and here we are reaching
-+ * the end of the iteration.
-+ */
-+ warning("nikita-3415",
-+ "Wrong black box length: %i > %i",
-+ ilen, length);
-+ result = RETERR(-EIO);
-+ }
-+ zrelse(coord.node);
-+ }
-+ }
-+
-+ done_lh(&lh);
-+ return result;
-+
-+}
-+
-+int
-+update_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length)
-+{
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+
-+ init_lh(&lh);
-+ result = coord_by_key(tree, key,
-+ &coord, &lh, ZNODE_READ_LOCK,
-+ FIND_EXACT,
-+ LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
-+ if (result == 0) {
-+ int ilen;
-+
-+ result = zload(coord.node);
-+ if (result == 0) {
-+ ilen = item_length_by_coord(&coord);
-+ if (length <= ilen) {
-+ memcpy(item_body_by_coord(&coord), data,
-+ length);
-+ } else {
-+ warning("nikita-3437",
-+ "Wrong black box length: %i < %i",
-+ ilen, length);
-+ result = RETERR(-EIO);
-+ }
-+ zrelse(coord.node);
-+ }
-+ }
-+
-+ done_lh(&lh);
-+ return result;
-+
-+}
-+
-+int kill_black_box(reiser4_tree * tree, const reiser4_key * key)
-+{
-+ return reiser4_cut_tree(tree, key, key, NULL, 1);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/blackbox.h linux-2.6.24/fs/reiser4/plugin/item/blackbox.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/blackbox.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/blackbox.h 2008-01-25 11:39:07.004225206 +0300
-@@ -0,0 +1,33 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* "Black box" entry to fixed-width contain user supplied data */
-+
-+#if !defined( __FS_REISER4_BLACK_BOX_H__ )
-+#define __FS_REISER4_BLACK_BOX_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+
-+extern int store_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length);
-+extern int load_black_box(reiser4_tree * tree,
-+ reiser4_key * key, void *data, int length, int exact);
-+extern int kill_black_box(reiser4_tree * tree, const reiser4_key * key);
-+extern int update_black_box(reiser4_tree * tree,
-+ const reiser4_key * key, void *data, int length);
-+
-+/* __FS_REISER4_BLACK_BOX_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/cde.c linux-2.6.24/fs/reiser4/plugin/item/cde.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/cde.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/cde.c 2008-01-25 11:39:07.004225206 +0300
-@@ -0,0 +1,1008 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry implementation */
-+
-+/* DESCRIPTION:
-+
-+ This is "compound" directory item plugin implementation. This directory
-+ item type is compound (as opposed to the "simple directory item" in
-+ fs/reiser4/plugin/item/sde.[ch]), because it consists of several directory
-+ entries.
-+
-+ The reason behind this decision is disk space efficiency: all directory
-+ entries inside the same directory have identical fragment in their
-+ keys. This, of course, depends on key assignment policy. In our default key
-+ assignment policy, all directory entries have the same locality which is
-+ equal to the object id of their directory.
-+
-+ Composing directory item out of several directory entries for the same
-+ directory allows us to store said key fragment only once. That is, this is
-+ some ad hoc form of key compression (stem compression) that is implemented
-+ here, because general key compression is not supposed to be implemented in
-+ v4.0.
-+
-+ Another decision that was made regarding all directory item plugins, is
-+ that they will store entry keys unaligned. This is for that sake of disk
-+ space efficiency again.
-+
-+ In should be noted, that storing keys unaligned increases CPU consumption,
-+ at least on some architectures.
-+
-+ Internal on-disk structure of the compound directory item is the following:
-+
-+ HEADER cde_item_format. Here number of entries is stored.
-+ ENTRY_HEADER_0 cde_unit_header. Here part of entry key and
-+ ENTRY_HEADER_1 offset of entry body are stored.
-+ ENTRY_HEADER_2 (basically two last parts of key)
-+ ...
-+ ENTRY_HEADER_N
-+ ENTRY_BODY_0 directory_entry_format. Here part of stat data key and
-+ ENTRY_BODY_1 NUL-terminated name are stored.
-+ ENTRY_BODY_2 (part of statadta key in the
-+ sence that since all SDs have
-+ zero offset, this offset is not
-+ stored on disk).
-+ ...
-+ ENTRY_BODY_N
-+
-+ When it comes to the balancing, each directory entry in compound directory
-+ item is unit, that is, something that can be cut from one item and pasted
-+ into another item of the same type. Handling of unit cut and paste is major
-+ reason for the complexity of code below.
-+
-+*/
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "sde.h"
-+#include "cde.h"
-+#include "item.h"
-+#include "../node/node.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../carry.h"
-+#include "../../tree.h"
-+#include "../../inode.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/quotaops.h>
-+
-+#if 0
-+#define CHECKME(coord) \
-+({ \
-+ const char *message; \
-+ coord_t dup; \
-+ \
-+ coord_dup_nocheck(&dup, (coord)); \
-+ dup.unit_pos = 0; \
-+ assert("nikita-2871", cde_check(&dup, &message) == 0); \
-+})
-+#else
-+#define CHECKME(coord) noop
-+#endif
-+
-+/* return body of compound directory item at @coord */
-+static inline cde_item_format *formatted_at(const coord_t * coord)
-+{
-+ assert("nikita-1282", coord != NULL);
-+ return item_body_by_coord(coord);
-+}
-+
-+/* return entry header at @coord */
-+static inline cde_unit_header *header_at(const coord_t *
-+ coord /* coord of item */ ,
-+ int idx /* index of unit */ )
-+{
-+ assert("nikita-1283", coord != NULL);
-+ return &formatted_at(coord)->entry[idx];
-+}
-+
-+/* return number of units in compound directory item at @coord */
-+static int units(const coord_t * coord /* coord of item */ )
-+{
-+ return le16_to_cpu(get_unaligned(&formatted_at(coord)->num_of_entries));
-+}
-+
-+/* return offset of the body of @idx-th entry in @coord */
-+static unsigned int offset_of(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ )
-+{
-+ if (idx < units(coord))
-+ return le16_to_cpu(get_unaligned(&header_at(coord, idx)->offset));
-+ else if (idx == units(coord))
-+ return item_length_by_coord(coord);
-+ else
-+ impossible("nikita-1308", "Wrong idx");
-+ return 0;
-+}
-+
-+/* set offset of the body of @idx-th entry in @coord */
-+static void set_offset(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ ,
-+ unsigned int offset /* new offset */ )
-+{
-+ put_unaligned(cpu_to_le16((__u16) offset), &header_at(coord, idx)->offset);
-+}
-+
-+static void adj_offset(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ ,
-+ int delta /* offset change */ )
-+{
-+ d16 *doffset;
-+ __u16 offset;
-+
-+ doffset = &header_at(coord, idx)->offset;
-+ offset = le16_to_cpu(get_unaligned(doffset));
-+ offset += delta;
-+ put_unaligned(cpu_to_le16((__u16) offset), doffset);
-+}
-+
-+/* return pointer to @offset-th byte from the beginning of @coord */
-+static char *address(const coord_t * coord /* coord of item */ ,
-+ int offset)
-+{
-+ return ((char *)item_body_by_coord(coord)) + offset;
-+}
-+
-+/* return pointer to the body of @idx-th entry in @coord */
-+static directory_entry_format *entry_at(const coord_t * coord /* coord of
-+ * item */ ,
-+ int idx /* index of unit */ )
-+{
-+ return (directory_entry_format *) address(coord,
-+ (int)offset_of(coord, idx));
-+}
-+
-+/* return number of unit referenced by @coord */
-+static int idx_of(const coord_t * coord /* coord of item */ )
-+{
-+ assert("nikita-1285", coord != NULL);
-+ return coord->unit_pos;
-+}
-+
-+/* find position where entry with @entry_key would be inserted into @coord */
-+static int find(const coord_t * coord /* coord of item */ ,
-+ const reiser4_key * entry_key /* key to look for */ ,
-+ cmp_t * last /* result of last comparison */ )
-+{
-+ int entries;
-+
-+ int left;
-+ int right;
-+
-+ cde_unit_header *header;
-+
-+ assert("nikita-1295", coord != NULL);
-+ assert("nikita-1296", entry_key != NULL);
-+ assert("nikita-1297", last != NULL);
-+
-+ entries = units(coord);
-+ left = 0;
-+ right = entries - 1;
-+ while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
-+ int median;
-+
-+ median = (left + right) >> 1;
-+
-+ header = header_at(coord, median);
-+ *last = de_id_key_cmp(&header->hash, entry_key);
-+ switch (*last) {
-+ case LESS_THAN:
-+ left = median;
-+ break;
-+ case GREATER_THAN:
-+ right = median;
-+ break;
-+ case EQUAL_TO:{
-+ do {
-+ median--;
-+ header--;
-+ } while (median >= 0 &&
-+ de_id_key_cmp(&header->hash,
-+ entry_key) == EQUAL_TO);
-+ return median + 1;
-+ }
-+ }
-+ }
-+ header = header_at(coord, left);
-+ for (; left < entries; ++left, ++header) {
-+ prefetch(header + 1);
-+ *last = de_id_key_cmp(&header->hash, entry_key);
-+ if (*last != LESS_THAN)
-+ break;
-+ }
-+ if (left < entries)
-+ return left;
-+ else
-+ return RETERR(-ENOENT);
-+
-+}
-+
-+/* expand @coord as to accommodate for insertion of @no new entries starting
-+ from @pos, with total bodies size @size. */
-+static int expand_item(const coord_t * coord /* coord of item */ ,
-+ int pos /* unit position */ , int no /* number of new
-+ * units*/ ,
-+ int size /* total size of new units' data */ ,
-+ unsigned int data_size /* free space already reserved
-+ * in the item for insertion */ )
-+{
-+ int entries;
-+ cde_unit_header *header;
-+ char *dent;
-+ int i;
-+
-+ assert("nikita-1310", coord != NULL);
-+ assert("nikita-1311", pos >= 0);
-+ assert("nikita-1312", no > 0);
-+ assert("nikita-1313", data_size >= no * sizeof(directory_entry_format));
-+ assert("nikita-1343",
-+ item_length_by_coord(coord) >=
-+ (int)(size + data_size + no * sizeof *header));
-+
-+ entries = units(coord);
-+
-+ if (pos == entries)
-+ dent = address(coord, size);
-+ else
-+ dent = (char *)entry_at(coord, pos);
-+ /* place where new header will be in */
-+ header = header_at(coord, pos);
-+ /* free space for new entry headers */
-+ memmove(header + no, header,
-+ (unsigned)(address(coord, size) - (char *)header));
-+ /* if adding to the end initialise first new header */
-+ if (pos == entries) {
-+ set_offset(coord, pos, (unsigned)size);
-+ }
-+
-+ /* adjust entry pointer and size */
-+ dent = dent + no * sizeof *header;
-+ size += no * sizeof *header;
-+ /* free space for new entries */
-+ memmove(dent + data_size, dent,
-+ (unsigned)(address(coord, size) - dent));
-+
-+ /* increase counter */
-+ entries += no;
-+ put_unaligned(cpu_to_le16((__u16) entries), &formatted_at(coord)->num_of_entries);
-+
-+ /* [ 0 ... pos ] entries were shifted by no * ( sizeof *header )
-+ bytes. */
-+ for (i = 0; i <= pos; ++i)
-+ adj_offset(coord, i, no * sizeof *header);
-+ /* [ pos + no ... +\infty ) entries were shifted by ( no *
-+ sizeof *header + data_size ) bytes */
-+ for (i = pos + no; i < entries; ++i)
-+ adj_offset(coord, i, no * sizeof *header + data_size);
-+ return 0;
-+}
-+
-+/* insert new @entry into item */
-+static int expand(const coord_t * coord /* coord of item */ ,
-+ struct cde_entry * entry /* entry to insert */ ,
-+ int len /* length of @entry data */ ,
-+ int *pos /* position to insert */ ,
-+ reiser4_dir_entry_desc * dir_entry /* parameters for new
-+ * entry */ )
-+{
-+ cmp_t cmp_res;
-+ int datasize;
-+
-+ *pos = find(coord, &dir_entry->key, &cmp_res);
-+ if (*pos < 0)
-+ *pos = units(coord);
-+
-+ datasize = sizeof(directory_entry_format);
-+ if (is_longname(entry->name->name, entry->name->len))
-+ datasize += entry->name->len + 1;
-+
-+ expand_item(coord, *pos, 1, item_length_by_coord(coord) - len,
-+ datasize);
-+ return 0;
-+}
-+
-+/* paste body of @entry into item */
-+static int paste_entry(const coord_t * coord /* coord of item */ ,
-+ struct cde_entry * entry /* new entry */ ,
-+ int pos /* position to insert */ ,
-+ reiser4_dir_entry_desc * dir_entry /* parameters for
-+ * new entry */ )
-+{
-+ cde_unit_header *header;
-+ directory_entry_format *dent;
-+ const char *name;
-+ int len;
-+
-+ header = header_at(coord, pos);
-+ dent = entry_at(coord, pos);
-+
-+ build_de_id_by_key(&dir_entry->key, &header->hash);
-+ build_inode_key_id(entry->obj, &dent->id);
-+ /* AUDIT unsafe strcpy() operation! It should be replaced with
-+ much less CPU hungry
-+ memcpy( ( char * ) dent -> name, entry -> name -> name , entry -> name -> len );
-+
-+ Also a more major thing is that there should be a way to figure out
-+ amount of space in dent -> name and be able to check that we are
-+ not going to overwrite more than we supposed to */
-+ name = entry->name->name;
-+ len = entry->name->len;
-+ if (is_longname(name, len)) {
-+ strcpy((unsigned char *)dent->name, name);
-+ put_unaligned(0, &dent->name[len]);
-+ }
-+ return 0;
-+}
-+
-+/* estimate how much space is necessary in item to insert/paste set of entries
-+ described in @data. */
-+int estimate_cde(const coord_t * coord /* coord of item */ ,
-+ const reiser4_item_data * data /* parameters for new item */ )
-+{
-+ struct cde_entry_data *e;
-+ int result;
-+ int i;
-+
-+ e = (struct cde_entry_data *) data->data;
-+
-+ assert("nikita-1288", e != NULL);
-+ assert("nikita-1289", e->num_of_entries >= 0);
-+
-+ if (coord == NULL)
-+ /* insert */
-+ result = sizeof(cde_item_format);
-+ else
-+ /* paste */
-+ result = 0;
-+
-+ result += e->num_of_entries *
-+ (sizeof(cde_unit_header) + sizeof(directory_entry_format));
-+ for (i = 0; i < e->num_of_entries; ++i) {
-+ const char *name;
-+ int len;
-+
-+ name = e->entry[i].name->name;
-+ len = e->entry[i].name->len;
-+ assert("nikita-2054", strlen(name) == len);
-+ if (is_longname(name, len))
-+ result += len + 1;
-+ }
-+ ((reiser4_item_data *) data)->length = result;
-+ return result;
-+}
-+
-+/* ->nr_units() method for this item plugin. */
-+pos_in_node_t nr_units_cde(const coord_t * coord /* coord of item */ )
-+{
-+ return units(coord);
-+}
-+
-+/* ->unit_key() method for this item plugin. */
-+reiser4_key *unit_key_cde(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * key /* resulting key */ )
-+{
-+ assert("nikita-1452", coord != NULL);
-+ assert("nikita-1345", idx_of(coord) < units(coord));
-+ assert("nikita-1346", key != NULL);
-+
-+ item_key_by_coord(coord, key);
-+ extract_key_from_de_id(extract_dir_id_from_key(key),
-+ &header_at(coord, idx_of(coord))->hash, key);
-+ return key;
-+}
-+
-+/* mergeable_cde(): implementation of ->mergeable() item method.
-+
-+ Two directory items are mergeable iff they are from the same
-+ directory. That simple.
-+
-+*/
-+int mergeable_cde(const coord_t * p1 /* coord of first item */ ,
-+ const coord_t * p2 /* coord of second item */ )
-+{
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ assert("nikita-1339", p1 != NULL);
-+ assert("nikita-1340", p2 != NULL);
-+
-+ return
-+ (item_plugin_by_coord(p1) == item_plugin_by_coord(p2)) &&
-+ (extract_dir_id_from_key(item_key_by_coord(p1, &k1)) ==
-+ extract_dir_id_from_key(item_key_by_coord(p2, &k2)));
-+
-+}
-+
-+/* ->max_key_inside() method for this item plugin. */
-+reiser4_key *max_key_inside_cde(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * result /* resulting key */ )
-+{
-+ assert("nikita-1342", coord != NULL);
-+
-+ item_key_by_coord(coord, result);
-+ set_key_ordering(result, get_key_ordering(reiser4_max_key()));
-+ set_key_fulloid(result, get_key_fulloid(reiser4_max_key()));
-+ set_key_offset(result, get_key_offset(reiser4_max_key()));
-+ return result;
-+}
-+
-+/* @data contains data which are to be put into tree */
-+int can_contain_key_cde(const coord_t * coord /* coord of item */ ,
-+ const reiser4_key * key /* key to check */ ,
-+ const reiser4_item_data * data /* parameters of new
-+ * item/unit being
-+ * created */ )
-+{
-+ reiser4_key item_key;
-+
-+ /* FIXME-VS: do not rely on anything but iplug field of @data. Only
-+ data->iplug is initialized */
-+ assert("vs-457", data && data->iplug);
-+/* assert( "vs-553", data -> user == 0 );*/
-+ item_key_by_coord(coord, &item_key);
-+
-+ return (item_plugin_by_coord(coord) == data->iplug) &&
-+ (extract_dir_id_from_key(&item_key) ==
-+ extract_dir_id_from_key(key));
-+}
-+
-+#if REISER4_DEBUG
-+/* cde_check ->check() method for compressed directory items
-+
-+ used for debugging, every item should have here the most complete
-+ possible check of the consistency of the item that the inventor can
-+ construct
-+*/
-+int reiser4_check_cde(const coord_t * coord /* coord of item to check */,
-+ const char **error /* where to store error message */)
-+{
-+ int i;
-+ int result;
-+ char *item_start;
-+ char *item_end;
-+ reiser4_key key;
-+
-+ coord_t c;
-+
-+ assert("nikita-1357", coord != NULL);
-+ assert("nikita-1358", error != NULL);
-+
-+ if (!ergo(coord->item_pos != 0,
-+ is_dot_key(item_key_by_coord(coord, &key)))) {
-+ *error = "CDE doesn't start with dot";
-+ return -1;
-+ }
-+ item_start = item_body_by_coord(coord);
-+ item_end = item_start + item_length_by_coord(coord);
-+
-+ coord_dup(&c, coord);
-+ result = 0;
-+ for (i = 0; i < units(coord); ++i) {
-+ directory_entry_format *entry;
-+
-+ if ((char *)(header_at(coord, i) + 1) >
-+ item_end - units(coord) * sizeof *entry) {
-+ *error = "CDE header is out of bounds";
-+ result = -1;
-+ break;
-+ }
-+ entry = entry_at(coord, i);
-+ if ((char *)entry < item_start + sizeof(cde_item_format)) {
-+ *error = "CDE header is too low";
-+ result = -1;
-+ break;
-+ }
-+ if ((char *)(entry + 1) > item_end) {
-+ *error = "CDE header is too high";
-+ result = -1;
-+ break;
-+ }
-+ }
-+
-+ return result;
-+}
-+#endif
-+
-+/* ->init() method for this item plugin. */
-+int init_cde(coord_t * coord /* coord of item */ ,
-+ coord_t * from UNUSED_ARG, reiser4_item_data * data /* structure used for insertion */
-+ UNUSED_ARG)
-+{
-+ put_unaligned(cpu_to_le16(0), &formatted_at(coord)->num_of_entries);
-+ return 0;
-+}
-+
-+/* ->lookup() method for this item plugin. */
-+lookup_result lookup_cde(const reiser4_key * key /* key to search for */ ,
-+ lookup_bias bias /* search bias */ ,
-+ coord_t * coord /* coord of item to lookup in */ )
-+{
-+ cmp_t last_comp;
-+ int pos;
-+
-+ reiser4_key utmost_key;
-+
-+ assert("nikita-1293", coord != NULL);
-+ assert("nikita-1294", key != NULL);
-+
-+ CHECKME(coord);
-+
-+ if (keygt(item_key_by_coord(coord, &utmost_key), key)) {
-+ coord->unit_pos = 0;
-+ coord->between = BEFORE_UNIT;
-+ return CBK_COORD_NOTFOUND;
-+ }
-+ pos = find(coord, key, &last_comp);
-+ if (pos >= 0) {
-+ coord->unit_pos = (int)pos;
-+ switch (last_comp) {
-+ case EQUAL_TO:
-+ coord->between = AT_UNIT;
-+ return CBK_COORD_FOUND;
-+ case GREATER_THAN:
-+ coord->between = BEFORE_UNIT;
-+ return RETERR(-ENOENT);
-+ case LESS_THAN:
-+ default:
-+ impossible("nikita-1298", "Broken find");
-+ return RETERR(-EIO);
-+ }
-+ } else {
-+ coord->unit_pos = units(coord) - 1;
-+ coord->between = AFTER_UNIT;
-+ return (bias ==
-+ FIND_MAX_NOT_MORE_THAN) ? CBK_COORD_FOUND :
-+ CBK_COORD_NOTFOUND;
-+ }
-+}
-+
-+/* ->paste() method for this item plugin. */
-+int paste_cde(coord_t * coord /* coord of item */ ,
-+ reiser4_item_data * data /* parameters of new unit being
-+ * inserted */ ,
-+ carry_plugin_info * info UNUSED_ARG /* todo carry queue */ )
-+{
-+ struct cde_entry_data *e;
-+ int result;
-+ int i;
-+
-+ CHECKME(coord);
-+ e = (struct cde_entry_data *) data->data;
-+
-+ result = 0;
-+ for (i = 0; i < e->num_of_entries; ++i) {
-+ int pos;
-+ int phantom_size;
-+
-+ phantom_size = data->length;
-+ if (units(coord) == 0)
-+ phantom_size -= sizeof(cde_item_format);
-+
-+ result =
-+ expand(coord, e->entry + i, phantom_size, &pos, data->arg);
-+ if (result != 0)
-+ break;
-+ result = paste_entry(coord, e->entry + i, pos, data->arg);
-+ if (result != 0)
-+ break;
-+ }
-+ CHECKME(coord);
-+ return result;
-+}
-+
-+/* amount of space occupied by all entries starting from @idx both headers and
-+ bodies. */
-+static unsigned int part_size(const coord_t * coord /* coord of item */ ,
-+ int idx /* index of unit */ )
-+{
-+ assert("nikita-1299", coord != NULL);
-+ assert("nikita-1300", idx < (int)units(coord));
-+
-+ return sizeof(cde_item_format) +
-+ (idx + 1) * sizeof(cde_unit_header) + offset_of(coord,
-+ idx + 1) -
-+ offset_of(coord, 0);
-+}
-+
-+/* how many but not more than @want units of @source can be merged with
-+ item in @target node. If pend == append - we try to append last item
-+ of @target by first units of @source. If pend == prepend - we try to
-+ "prepend" first item in @target by last units of @source. @target
-+ node has @free_space bytes of free space. Total size of those units
-+ are returned via @size */
-+int can_shift_cde(unsigned free_space /* free space in item */ ,
-+ coord_t * coord /* coord of source item */ ,
-+ znode * target /* target node */ ,
-+ shift_direction pend /* shift direction */ ,
-+ unsigned *size /* resulting number of shifted bytes */ ,
-+ unsigned want /* maximal number of bytes to shift */ )
-+{
-+ int shift;
-+
-+ CHECKME(coord);
-+ if (want == 0) {
-+ *size = 0;
-+ return 0;
-+ }
-+
-+ /* pend == SHIFT_LEFT <==> shifting to the left */
-+ if (pend == SHIFT_LEFT) {
-+ for (shift = min((int)want - 1, units(coord)); shift >= 0;
-+ --shift) {
-+ *size = part_size(coord, shift);
-+ if (target != NULL)
-+ *size -= sizeof(cde_item_format);
-+ if (*size <= free_space)
-+ break;
-+ }
-+ shift = shift + 1;
-+ } else {
-+ int total_size;
-+
-+ assert("nikita-1301", pend == SHIFT_RIGHT);
-+
-+ total_size = item_length_by_coord(coord);
-+ for (shift = units(coord) - want - 1; shift < units(coord) - 1;
-+ ++shift) {
-+ *size = total_size - part_size(coord, shift);
-+ if (target == NULL)
-+ *size += sizeof(cde_item_format);
-+ if (*size <= free_space)
-+ break;
-+ }
-+ shift = units(coord) - shift - 1;
-+ }
-+ if (shift == 0)
-+ *size = 0;
-+ CHECKME(coord);
-+ return shift;
-+}
-+
-+/* ->copy_units() method for this item plugin. */
-+void copy_units_cde(coord_t * target /* coord of target item */ ,
-+ coord_t * source /* coord of source item */ ,
-+ unsigned from /* starting unit */ ,
-+ unsigned count /* how many units to copy */ ,
-+ shift_direction where_is_free_space /* shift direction */ ,
-+ unsigned free_space /* free space in item */ )
-+{
-+ char *header_from;
-+ char *header_to;
-+
-+ char *entry_from;
-+ char *entry_to;
-+
-+ int pos_in_target;
-+ int data_size;
-+ int data_delta;
-+ int i;
-+
-+ assert("nikita-1303", target != NULL);
-+ assert("nikita-1304", source != NULL);
-+ assert("nikita-1305", (int)from < units(source));
-+ assert("nikita-1307", (int)(from + count) <= units(source));
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ assert("nikita-1453", from == 0);
-+ pos_in_target = units(target);
-+ } else {
-+ assert("nikita-1309", (int)(from + count) == units(source));
-+ pos_in_target = 0;
-+ memmove(item_body_by_coord(target),
-+ (char *)item_body_by_coord(target) + free_space,
-+ item_length_by_coord(target) - free_space);
-+ }
-+
-+ CHECKME(target);
-+ CHECKME(source);
-+
-+ /* expand @target */
-+ data_size =
-+ offset_of(source, (int)(from + count)) - offset_of(source,
-+ (int)from);
-+
-+ if (units(target) == 0)
-+ free_space -= sizeof(cde_item_format);
-+
-+ expand_item(target, pos_in_target, (int)count,
-+ (int)(item_length_by_coord(target) - free_space),
-+ (unsigned)data_size);
-+
-+ /* copy first @count units of @source into @target */
-+ data_delta =
-+ offset_of(target, pos_in_target) - offset_of(source, (int)from);
-+
-+ /* copy entries */
-+ entry_from = (char *)entry_at(source, (int)from);
-+ entry_to = (char *)entry_at(source, (int)(from + count));
-+ memmove(entry_at(target, pos_in_target), entry_from,
-+ (unsigned)(entry_to - entry_from));
-+
-+ /* copy headers */
-+ header_from = (char *)header_at(source, (int)from);
-+ header_to = (char *)header_at(source, (int)(from + count));
-+ memmove(header_at(target, pos_in_target), header_from,
-+ (unsigned)(header_to - header_from));
-+
-+ /* update offsets */
-+ for (i = pos_in_target; i < (int)(pos_in_target + count); ++i)
-+ adj_offset(target, i, data_delta);
-+ CHECKME(target);
-+ CHECKME(source);
-+}
-+
-+/* ->cut_units() method for this item plugin. */
-+int cut_units_cde(coord_t * coord /* coord of item */ ,
-+ pos_in_node_t from /* start unit pos */ ,
-+ pos_in_node_t to /* stop unit pos */ ,
-+ struct carry_cut_data *cdata UNUSED_ARG,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ char *header_from;
-+ char *header_to;
-+
-+ char *entry_from;
-+ char *entry_to;
-+
-+ int size;
-+ int entry_delta;
-+ int header_delta;
-+ int i;
-+
-+ unsigned count;
-+
-+ CHECKME(coord);
-+
-+ count = to - from + 1;
-+
-+ assert("nikita-1454", coord != NULL);
-+ assert("nikita-1455", (int)(from + count) <= units(coord));
-+
-+ if (smallest_removed)
-+ unit_key_by_coord(coord, smallest_removed);
-+
-+ if (new_first) {
-+ coord_t next;
-+
-+ /* not everything is cut from item head */
-+ assert("vs-1527", from == 0);
-+ assert("vs-1528", to < units(coord) - 1);
-+
-+ coord_dup(&next, coord);
-+ next.unit_pos++;
-+ unit_key_by_coord(&next, new_first);
-+ }
-+
-+ size = item_length_by_coord(coord);
-+ if (count == (unsigned)units(coord)) {
-+ return size;
-+ }
-+
-+ header_from = (char *)header_at(coord, (int)from);
-+ header_to = (char *)header_at(coord, (int)(from + count));
-+
-+ entry_from = (char *)entry_at(coord, (int)from);
-+ entry_to = (char *)entry_at(coord, (int)(from + count));
-+
-+ /* move headers */
-+ memmove(header_from, header_to,
-+ (unsigned)(address(coord, size) - header_to));
-+
-+ header_delta = header_to - header_from;
-+
-+ entry_from -= header_delta;
-+ entry_to -= header_delta;
-+ size -= header_delta;
-+
-+ /* copy entries */
-+ memmove(entry_from, entry_to,
-+ (unsigned)(address(coord, size) - entry_to));
-+
-+ entry_delta = entry_to - entry_from;
-+ size -= entry_delta;
-+
-+ /* update offsets */
-+
-+ for (i = 0; i < (int)from; ++i)
-+ adj_offset(coord, i, -header_delta);
-+
-+ for (i = from; i < units(coord) - (int)count; ++i)
-+ adj_offset(coord, i, -header_delta - entry_delta);
-+
-+ put_unaligned(cpu_to_le16((__u16) units(coord) - count),
-+ &formatted_at(coord)->num_of_entries);
-+
-+ if (from == 0) {
-+ /* entries from head was removed - move remaining to right */
-+ memmove((char *)item_body_by_coord(coord) +
-+ header_delta + entry_delta, item_body_by_coord(coord),
-+ (unsigned)size);
-+ if (REISER4_DEBUG)
-+ memset(item_body_by_coord(coord), 0,
-+ (unsigned)header_delta + entry_delta);
-+ } else {
-+ /* freed space is already at the end of item */
-+ if (REISER4_DEBUG)
-+ memset((char *)item_body_by_coord(coord) + size, 0,
-+ (unsigned)header_delta + entry_delta);
-+ }
-+
-+ return header_delta + entry_delta;
-+}
-+
-+int kill_units_cde(coord_t * coord /* coord of item */ ,
-+ pos_in_node_t from /* start unit pos */ ,
-+ pos_in_node_t to /* stop unit pos */ ,
-+ struct carry_kill_data *kdata UNUSED_ARG,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ return cut_units_cde(coord, from, to, NULL, smallest_removed, new_first);
-+}
-+
-+/* ->s.dir.extract_key() method for this item plugin. */
-+int extract_key_cde(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * key /* resulting key */ )
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1155", coord != NULL);
-+ assert("nikita-1156", key != NULL);
-+
-+ dent = entry_at(coord, idx_of(coord));
-+ return extract_key_from_id(&dent->id, key);
-+}
-+
-+int
-+update_key_cde(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh UNUSED_ARG)
-+{
-+ directory_entry_format *dent;
-+ obj_key_id obj_id;
-+ int result;
-+
-+ assert("nikita-2344", coord != NULL);
-+ assert("nikita-2345", key != NULL);
-+
-+ dent = entry_at(coord, idx_of(coord));
-+ result = build_obj_key_id(key, &obj_id);
-+ if (result == 0) {
-+ dent->id = obj_id;
-+ znode_make_dirty(coord->node);
-+ }
-+ return 0;
-+}
-+
-+/* ->s.dir.extract_name() method for this item plugin. */
-+char *extract_name_cde(const coord_t * coord /* coord of item */ , char *buf)
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1157", coord != NULL);
-+
-+ dent = entry_at(coord, idx_of(coord));
-+ return extract_dent_name(coord, dent, buf);
-+}
-+
-+static int cde_bytes(int pasting, const reiser4_item_data * data)
-+{
-+ int result;
-+
-+ result = data->length;
-+ if (!pasting)
-+ result -= sizeof(cde_item_format);
-+ return result;
-+}
-+
-+/* ->s.dir.add_entry() method for this item plugin */
-+int add_entry_cde(struct inode *dir /* directory object */ ,
-+ coord_t * coord /* coord of item */ ,
-+ lock_handle * lh /* lock handle for insertion */ ,
-+ const struct dentry *name /* name to insert */ ,
-+ reiser4_dir_entry_desc * dir_entry /* parameters of new
-+ * directory entry */ )
-+{
-+ reiser4_item_data data;
-+ struct cde_entry entry;
-+ struct cde_entry_data edata;
-+ int result;
-+
-+ assert("nikita-1656", coord->node == lh->node);
-+ assert("nikita-1657", znode_is_write_locked(coord->node));
-+
-+ edata.num_of_entries = 1;
-+ edata.entry = &entry;
-+
-+ entry.dir = dir;
-+ entry.obj = dir_entry->obj;
-+ entry.name = &name->d_name;
-+
-+ data.data = (char *)&edata;
-+ data.user = 0; /* &edata is not user space */
-+ data.iplug = item_plugin_by_id(COMPOUND_DIR_ID);
-+ data.arg = dir_entry;
-+ assert("nikita-1302", data.iplug != NULL);
-+
-+ result = is_dot_key(&dir_entry->key);
-+ data.length = estimate_cde(result ? coord : NULL, &data);
-+
-+ /* NOTE-NIKITA quota plugin? */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(dir, cde_bytes(result, &data)))
-+ return RETERR(-EDQUOT);
-+
-+ if (result)
-+ result = insert_by_coord(coord, &data, &dir_entry->key, lh, 0);
-+ else
-+ result = reiser4_resize_item(coord, &data, &dir_entry->key,
-+ lh, 0);
-+ return result;
-+}
-+
-+/* ->s.dir.rem_entry() */
-+int rem_entry_cde(struct inode *dir /* directory of item */ ,
-+ const struct qstr *name, coord_t * coord /* coord of item */ ,
-+ lock_handle * lh UNUSED_ARG /* lock handle for
-+ * removal */ ,
-+ reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
-+ * directory entry
-+ * being removed */ )
-+{
-+ coord_t shadow;
-+ int result;
-+ int length;
-+ ON_DEBUG(char buf[DE_NAME_BUF_LEN]);
-+
-+ assert("nikita-2870", strlen(name->name) == name->len);
-+ assert("nikita-2869",
-+ !strcmp(name->name, extract_name_cde(coord, buf)));
-+
-+ length = sizeof(directory_entry_format) + sizeof(cde_unit_header);
-+ if (is_longname(name->name, name->len))
-+ length += name->len + 1;
-+
-+ if (inode_get_bytes(dir) < length) {
-+ warning("nikita-2628", "Dir is broke: %llu: %llu",
-+ (unsigned long long)get_inode_oid(dir),
-+ inode_get_bytes(dir));
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ /* cut_node() is supposed to take pointers to _different_
-+ coords, because it will modify them without respect to
-+ possible aliasing. To work around this, create temporary copy
-+ of @coord.
-+ */
-+ coord_dup(&shadow, coord);
-+ result =
-+ kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
-+ if (result == 0) {
-+ /* NOTE-NIKITA quota plugin? */
-+ DQUOT_FREE_SPACE_NODIRTY(dir, length);
-+ }
-+ return result;
-+}
-+
-+/* ->s.dir.max_name_len() method for this item plugin */
-+int max_name_len_cde(const struct inode *dir /* directory */ )
-+{
-+ return
-+ reiser4_tree_by_inode(dir)->nplug->max_item_size() -
-+ sizeof(directory_entry_format) - sizeof(cde_item_format) -
-+ sizeof(cde_unit_header) - 2;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/cde.h linux-2.6.24/fs/reiser4/plugin/item/cde.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/cde.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/cde.h 2008-01-25 11:39:07.004225206 +0300
-@@ -0,0 +1,87 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Compound directory item. See cde.c for description. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ )
-+#define __FS_REISER4_PLUGIN_COMPRESSED_DE_H__
-+
-+#include "../../forward.h"
-+#include "../../kassign.h"
-+#include "../../dformat.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+#include <linux/dcache.h> /* for struct dentry, etc */
-+
-+typedef struct cde_unit_header {
-+ de_id hash;
-+ d16 offset;
-+} cde_unit_header;
-+
-+typedef struct cde_item_format {
-+ d16 num_of_entries;
-+ cde_unit_header entry[0];
-+} cde_item_format;
-+
-+struct cde_entry {
-+ const struct inode *dir;
-+ const struct inode *obj;
-+ const struct qstr *name;
-+};
-+
-+struct cde_entry_data {
-+ int num_of_entries;
-+ struct cde_entry *entry;
-+};
-+
-+/* plugin->item.b.* */
-+reiser4_key *max_key_inside_cde(const coord_t * coord, reiser4_key * result);
-+int can_contain_key_cde(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data *);
-+int mergeable_cde(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_cde(const coord_t * coord);
-+reiser4_key *unit_key_cde(const coord_t * coord, reiser4_key * key);
-+int estimate_cde(const coord_t * coord, const reiser4_item_data * data);
-+void print_cde(const char *prefix, coord_t * coord);
-+int init_cde(coord_t * coord, coord_t * from, reiser4_item_data * data);
-+lookup_result lookup_cde(const reiser4_key * key, lookup_bias bias,
-+ coord_t * coord);
-+int paste_cde(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG);
-+int can_shift_cde(unsigned free_space, coord_t * coord, znode * target,
-+ shift_direction pend, unsigned *size, unsigned want);
-+void copy_units_cde(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction where_is_free_space,
-+ unsigned free_space);
-+int cut_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+void print_cde(const char *prefix, coord_t * coord);
-+int reiser4_check_cde(const coord_t * coord, const char **error);
-+
-+/* plugin->u.item.s.dir.* */
-+int extract_key_cde(const coord_t * coord, reiser4_key * key);
-+int update_key_cde(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh);
-+char *extract_name_cde(const coord_t * coord, char *buf);
-+int add_entry_cde(struct inode *dir, coord_t * coord,
-+ lock_handle * lh, const struct dentry *name,
-+ reiser4_dir_entry_desc * entry);
-+int rem_entry_cde(struct inode *dir, const struct qstr *name, coord_t * coord,
-+ lock_handle * lh, reiser4_dir_entry_desc * entry);
-+int max_name_len_cde(const struct inode *dir);
-+
-+/* __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/ctail.c linux-2.6.24/fs/reiser4/plugin/item/ctail.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/ctail.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/ctail.c 2008-01-25 11:39:07.008226236 +0300
-@@ -0,0 +1,1613 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* ctails (aka "clustered tails") are items for cryptcompress objects */
-+
-+/* DESCRIPTION:
-+
-+Each cryptcompress object is stored on disk as a set of clusters sliced
-+into ctails.
-+
-+Internal on-disk structure:
-+
-+ HEADER (1) Here stored disk cluster shift
-+ BODY
-+*/
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "item.h"
-+#include "../node/node.h"
-+#include "../plugin.h"
-+#include "../object.h"
-+#include "../../znode.h"
-+#include "../../carry.h"
-+#include "../../tree.h"
-+#include "../../inode.h"
-+#include "../../super.h"
-+#include "../../context.h"
-+#include "../../page_cache.h"
-+#include "../cluster.h"
-+#include "../../flush.h"
-+#include "../../tree_walk.h"
-+
-+#include <linux/pagevec.h>
-+#include <linux/swap.h>
-+#include <linux/fs.h>
-+
-+/* return body of ctail item at @coord */
-+static ctail_item_format *ctail_formatted_at(const coord_t * coord)
-+{
-+ assert("edward-60", coord != NULL);
-+ return item_body_by_coord(coord);
-+}
-+
-+static int cluster_shift_by_coord(const coord_t * coord)
-+{
-+ return get_unaligned(&ctail_formatted_at(coord)->cluster_shift);
-+}
-+
-+static inline void dclust_set_extension_shift(hint_t * hint)
-+{
-+ assert("edward-1270",
-+ item_id_by_coord(&hint->ext_coord.coord) == CTAIL_ID);
-+ hint->ext_coord.extension.ctail.shift =
-+ cluster_shift_by_coord(&hint->ext_coord.coord);
-+}
-+
-+static loff_t off_by_coord(const coord_t * coord)
-+{
-+ reiser4_key key;
-+ return get_key_offset(item_key_by_coord(coord, &key));
-+}
-+
-+int coord_is_unprepped_ctail(const coord_t * coord)
-+{
-+ assert("edward-1233", coord != NULL);
-+ assert("edward-1234", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-1235",
-+ ergo((int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT,
-+ nr_units_ctail(coord) == (pos_in_node_t) UCTAIL_NR_UNITS));
-+
-+ return (int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT;
-+}
-+
-+static cloff_t clust_by_coord(const coord_t * coord, struct inode *inode)
-+{
-+ int shift;
-+
-+ if (inode != NULL) {
-+ shift = inode_cluster_shift(inode);
-+ assert("edward-1236",
-+ ergo(!coord_is_unprepped_ctail(coord),
-+ shift == cluster_shift_by_coord(coord)));
-+ } else {
-+ assert("edward-1237", !coord_is_unprepped_ctail(coord));
-+ shift = cluster_shift_by_coord(coord);
-+ }
-+ return off_by_coord(coord) >> shift;
-+}
-+
-+static int disk_cluster_size(const coord_t * coord)
-+{
-+ assert("edward-1156",
-+ item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
-+ /* calculation of disk cluster size
-+ is meaninless if ctail is unprepped */
-+ assert("edward-1238", !coord_is_unprepped_ctail(coord));
-+
-+ return 1 << cluster_shift_by_coord(coord);
-+}
-+
-+/* true if the key is of first disk cluster item */
-+static int is_disk_cluster_key(const reiser4_key * key, const coord_t * coord)
-+{
-+ assert("edward-1239", item_id_by_coord(coord) == CTAIL_ID);
-+
-+ return coord_is_unprepped_ctail(coord) ||
-+ ((get_key_offset(key) &
-+ ((loff_t) disk_cluster_size(coord) - 1)) == 0);
-+}
-+
-+static char *first_unit(coord_t * coord)
-+{
-+ /* FIXME: warning: pointer of type `void *' used in arithmetic */
-+ return (char *)item_body_by_coord(coord) + sizeof(ctail_item_format);
-+}
-+
-+/* plugin->u.item.b.max_key_inside :
-+ tail_max_key_inside */
-+
-+/* plugin->u.item.b.can_contain_key */
-+int
-+can_contain_key_ctail(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data * data)
-+{
-+ reiser4_key item_key;
-+
-+ if (item_plugin_by_coord(coord) != data->iplug)
-+ return 0;
-+
-+ item_key_by_coord(coord, &item_key);
-+ if (get_key_locality(key) != get_key_locality(&item_key) ||
-+ get_key_objectid(key) != get_key_objectid(&item_key))
-+ return 0;
-+ if (get_key_offset(&item_key) + nr_units_ctail(coord) !=
-+ get_key_offset(key))
-+ return 0;
-+ if (is_disk_cluster_key(key, coord))
-+ return 0;
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.mergeable */
-+int mergeable_ctail(const coord_t * p1, const coord_t * p2)
-+{
-+ reiser4_key key1, key2;
-+
-+ assert("edward-62", item_id_by_coord(p1) == CTAIL_ID);
-+ assert("edward-61", plugin_of_group(item_plugin_by_coord(p1),
-+ UNIX_FILE_METADATA_ITEM_TYPE));
-+
-+ if (item_id_by_coord(p2) != CTAIL_ID) {
-+ /* second item is of another type */
-+ return 0;
-+ }
-+
-+ item_key_by_coord(p1, &key1);
-+ item_key_by_coord(p2, &key2);
-+ if (get_key_locality(&key1) != get_key_locality(&key2) ||
-+ get_key_objectid(&key1) != get_key_objectid(&key2) ||
-+ get_key_type(&key1) != get_key_type(&key2)) {
-+ /* items of different objects */
-+ return 0;
-+ }
-+ if (get_key_offset(&key1) + nr_units_ctail(p1) != get_key_offset(&key2))
-+ /* not adjacent items */
-+ return 0;
-+ if (is_disk_cluster_key(&key2, p2))
-+ return 0;
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.nr_units */
-+pos_in_node_t nr_units_ctail(const coord_t * coord)
-+{
-+ return (item_length_by_coord(coord) -
-+ sizeof(ctail_formatted_at(coord)->cluster_shift));
-+}
-+
-+/* plugin->u.item.b.estimate:
-+ estimate how much space is needed to insert/paste @data->length bytes
-+ into ctail at @coord */
-+int estimate_ctail(const coord_t * coord /* coord of item */ ,
-+ const reiser4_item_data *
-+ data /* parameters for new item */ )
-+{
-+ if (coord == NULL)
-+ /* insert */
-+ return (sizeof(ctail_item_format) + data->length);
-+ else
-+ /* paste */
-+ return data->length;
-+}
-+
-+/* ->init() method for this item plugin. */
-+int init_ctail(coord_t * to /* coord of item */ ,
-+ coord_t * from /* old_item */ ,
-+ reiser4_item_data * data /* structure used for insertion */ )
-+{
-+ int cluster_shift; /* cpu value to convert */
-+
-+ if (data) {
-+ assert("edward-463", data->length > sizeof(ctail_item_format));
-+ cluster_shift = *((int *)(data->arg));
-+ data->length -= sizeof(ctail_item_format);
-+ } else {
-+ assert("edward-464", from != NULL);
-+ assert("edward-855", ctail_ok(from));
-+ cluster_shift = (int)(cluster_shift_by_coord(from));
-+ }
-+ put_unaligned((d8)cluster_shift, &ctail_formatted_at(to)->cluster_shift);
-+ assert("edward-856", ctail_ok(to));
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.lookup:
-+ NULL: We are looking for item keys only */
-+
-+#if REISER4_DEBUG
-+int ctail_ok(const coord_t * coord)
-+{
-+ return coord_is_unprepped_ctail(coord) ||
-+ cluster_shift_ok(cluster_shift_by_coord(coord));
-+}
-+
-+/* plugin->u.item.b.check */
-+int check_ctail(const coord_t * coord, const char **error)
-+{
-+ if (!ctail_ok(coord)) {
-+ if (error)
-+ *error = "bad cluster shift in ctail";
-+ return 1;
-+ }
-+ return 0;
-+}
-+#endif
-+
-+/* plugin->u.item.b.paste */
-+int
-+paste_ctail(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG)
-+{
-+ unsigned old_nr_units;
-+
-+ assert("edward-268", data->data != NULL);
-+ /* copy only from kernel space */
-+ assert("edward-66", data->user == 0);
-+
-+ old_nr_units =
-+ item_length_by_coord(coord) - sizeof(ctail_item_format) -
-+ data->length;
-+
-+ /* ctail items never get pasted in the middle */
-+
-+ if (coord->unit_pos == 0 && coord->between == AT_UNIT) {
-+
-+ /* paste at the beginning when create new item */
-+ assert("edward-450",
-+ item_length_by_coord(coord) ==
-+ data->length + sizeof(ctail_item_format));
-+ assert("edward-451", old_nr_units == 0);
-+ } else if (coord->unit_pos == old_nr_units - 1
-+ && coord->between == AFTER_UNIT) {
-+
-+ /* paste at the end */
-+ coord->unit_pos++;
-+ } else
-+ impossible("edward-453", "bad paste position");
-+
-+ memcpy(first_unit(coord) + coord->unit_pos, data->data, data->length);
-+
-+ assert("edward-857", ctail_ok(coord));
-+
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.fast_paste */
-+
-+/* plugin->u.item.b.can_shift
-+ number of units is returned via return value, number of bytes via @size. For
-+ ctail items they coincide */
-+int
-+can_shift_ctail(unsigned free_space, coord_t * source,
-+ znode * target, shift_direction direction UNUSED_ARG,
-+ unsigned *size /* number of bytes */ , unsigned want)
-+{
-+ /* make sure that that we do not want to shift more than we have */
-+ assert("edward-68", want > 0 && want <= nr_units_ctail(source));
-+
-+ *size = min(want, free_space);
-+
-+ if (!target) {
-+ /* new item will be created */
-+ if (*size <= sizeof(ctail_item_format)) {
-+ *size = 0;
-+ return 0;
-+ }
-+ return *size - sizeof(ctail_item_format);
-+ }
-+ return *size;
-+}
-+
-+/* plugin->u.item.b.copy_units
-+ cooperates with ->can_shift() */
-+void
-+copy_units_ctail(coord_t * target, coord_t * source,
-+ unsigned from, unsigned count /* units */ ,
-+ shift_direction where_is_free_space,
-+ unsigned free_space /* bytes */ )
-+{
-+ /* make sure that item @target is expanded already */
-+ assert("edward-69", (unsigned)item_length_by_coord(target) >= count);
-+ assert("edward-70", free_space == count || free_space == count + 1);
-+
-+ assert("edward-858", ctail_ok(source));
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ /* append item @target with @count first bytes of @source:
-+ this restriction came from ordinary tails */
-+ assert("edward-71", from == 0);
-+ assert("edward-860", ctail_ok(target));
-+
-+ memcpy(first_unit(target) + nr_units_ctail(target) - count,
-+ first_unit(source), count);
-+ } else {
-+ /* target item is moved to right already */
-+ reiser4_key key;
-+
-+ assert("edward-72", nr_units_ctail(source) == from + count);
-+
-+ if (free_space == count) {
-+ init_ctail(target, source, NULL);
-+ } else {
-+ /* new item has been created */
-+ assert("edward-862", ctail_ok(target));
-+ }
-+ memcpy(first_unit(target), first_unit(source) + from, count);
-+
-+ assert("edward-863", ctail_ok(target));
-+
-+ /* new units are inserted before first unit in an item,
-+ therefore, we have to update item key */
-+ item_key_by_coord(source, &key);
-+ set_key_offset(&key, get_key_offset(&key) + from);
-+
-+ node_plugin_by_node(target->node)->update_item_key(target, &key,
-+ NULL /*info */);
-+ }
-+}
-+
-+/* plugin->u.item.b.create_hook */
-+int create_hook_ctail(const coord_t * coord, void *arg)
-+{
-+ assert("edward-864", znode_is_loaded(coord->node));
-+
-+ znode_set_convertible(coord->node);
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.kill_hook */
-+int kill_hook_ctail(const coord_t * coord, pos_in_node_t from,
-+ pos_in_node_t count, carry_kill_data * kdata)
-+{
-+ struct inode *inode;
-+
-+ assert("edward-1157", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-291", znode_is_write_locked(coord->node));
-+
-+ inode = kdata->inode;
-+ if (inode) {
-+ reiser4_key key;
-+ struct cryptcompress_info * info;
-+ cloff_t index;
-+
-+ item_key_by_coord(coord, &key);
-+ info = cryptcompress_inode_data(inode);
-+ index = off_to_clust(get_key_offset(&key), inode);
-+
-+ if (from == 0) {
-+ info->trunc_index = index;
-+ if (is_disk_cluster_key(&key, coord)) {
-+ /*
-+ * first item of disk cluster is to be killed
-+ */
-+ truncate_complete_page_cluster(
-+ inode, index, kdata->params.truncate);
-+ inode_sub_bytes(inode,
-+ inode_cluster_size(inode));
-+ }
-+ }
-+ }
-+ return 0;
-+}
-+
-+/* for shift_hook_ctail(),
-+ return true if the first disk cluster item has dirty child
-+*/
-+static int ctail_convertible(const coord_t * coord)
-+{
-+ int result;
-+ reiser4_key key;
-+ jnode *child = NULL;
-+
-+ assert("edward-477", coord != NULL);
-+ assert("edward-478", item_id_by_coord(coord) == CTAIL_ID);
-+
-+ if (coord_is_unprepped_ctail(coord))
-+ /* unprepped ctail should be converted */
-+ return 1;
-+
-+ item_key_by_coord(coord, &key);
-+ child = jlookup(current_tree,
-+ get_key_objectid(&key),
-+ off_to_pg(off_by_coord(coord)));
-+ if (!child)
-+ return 0;
-+ result = JF_ISSET(child, JNODE_DIRTY);
-+ jput(child);
-+ return result;
-+}
-+
-+/* FIXME-EDWARD */
-+/* plugin->u.item.b.shift_hook */
-+int shift_hook_ctail(const coord_t * item /* coord of item */ ,
-+ unsigned from UNUSED_ARG /* start unit */ ,
-+ unsigned count UNUSED_ARG /* stop unit */ ,
-+ znode * old_node /* old parent */ )
-+{
-+ assert("edward-479", item != NULL);
-+ assert("edward-480", item->node != old_node);
-+
-+ if (!znode_convertible(old_node) || znode_convertible(item->node))
-+ return 0;
-+ if (ctail_convertible(item))
-+ znode_set_convertible(item->node);
-+ return 0;
-+}
-+
-+static int
-+cut_or_kill_ctail_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ int cut, void *p, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ pos_in_node_t count; /* number of units to cut */
-+ char *item;
-+
-+ count = to - from + 1;
-+ item = item_body_by_coord(coord);
-+
-+ assert("edward-74", ergo(from != 0, to == coord_last_unit_pos(coord)));
-+
-+ if (smallest_removed) {
-+ /* store smallest key removed */
-+ item_key_by_coord(coord, smallest_removed);
-+ set_key_offset(smallest_removed,
-+ get_key_offset(smallest_removed) + from);
-+ }
-+
-+ if (new_first) {
-+ assert("vs-1531", from == 0);
-+
-+ item_key_by_coord(coord, new_first);
-+ set_key_offset(new_first,
-+ get_key_offset(new_first) + from + count);
-+ }
-+
-+ if (!cut)
-+ kill_hook_ctail(coord, from, 0, (struct carry_kill_data *)p);
-+
-+ if (from == 0) {
-+ if (count != nr_units_ctail(coord)) {
-+ /* part of item is removed, so move free space at the beginning
-+ of the item and update item key */
-+ reiser4_key key;
-+ memcpy(item + to + 1, item, sizeof(ctail_item_format));
-+ item_key_by_coord(coord, &key);
-+ set_key_offset(&key, get_key_offset(&key) + count);
-+ node_plugin_by_node(coord->node)->update_item_key(coord,
-+ &key,
-+ NULL);
-+ } else {
-+ /* cut_units should not be called to cut evrything */
-+ assert("vs-1532", ergo(cut, 0));
-+ /* whole item is cut, so more then amount of space occupied
-+ by units got freed */
-+ count += sizeof(ctail_item_format);
-+ }
-+ if (REISER4_DEBUG)
-+ memset(item, 0, count);
-+ } else if (REISER4_DEBUG)
-+ memset(item + sizeof(ctail_item_format) + from, 0, count);
-+ return count;
-+}
-+
-+/* plugin->u.item.b.cut_units */
-+int
-+cut_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
-+ carry_cut_data * cdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ return cut_or_kill_ctail_units(item, from, to, 1, NULL,
-+ smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.b.kill_units */
-+int
-+kill_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *kdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ return cut_or_kill_ctail_units(item, from, to, 0, kdata,
-+ smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.s.file.read */
-+int read_ctail(struct file *file UNUSED_ARG, flow_t * f, hint_t * hint)
-+{
-+ uf_coord_t *uf_coord;
-+ coord_t *coord;
-+
-+ uf_coord = &hint->ext_coord;
-+ coord = &uf_coord->coord;
-+ assert("edward-127", f->user == 0);
-+ assert("edward-129", coord && coord->node);
-+ assert("edward-130", coord_is_existing_unit(coord));
-+ assert("edward-132", znode_is_loaded(coord->node));
-+
-+ /* start read only from the beginning of ctail */
-+ assert("edward-133", coord->unit_pos == 0);
-+ /* read only whole ctails */
-+ assert("edward-135", nr_units_ctail(coord) <= f->length);
-+
-+ assert("edward-136", reiser4_schedulable());
-+ assert("edward-886", ctail_ok(coord));
-+
-+ if (f->data)
-+ memcpy(f->data, (char *)first_unit(coord),
-+ (size_t) nr_units_ctail(coord));
-+
-+ dclust_set_extension_shift(hint);
-+ mark_page_accessed(znode_page(coord->node));
-+ move_flow_forward(f, nr_units_ctail(coord));
-+
-+ return 0;
-+}
-+
-+/**
-+ * Prepare transform stream with plain text for page
-+ * @page taking into account synchronization issues.
-+ */
-+static int ctail_read_disk_cluster(struct cluster_handle * clust,
-+ struct inode * inode, struct page * page,
-+ znode_lock_mode mode)
-+{
-+ int result;
-+
-+ assert("edward-1450", mode == ZNODE_READ_LOCK || ZNODE_WRITE_LOCK);
-+ assert("edward-671", clust->hint != NULL);
-+ assert("edward-140", clust->dstat == INVAL_DISK_CLUSTER);
-+ assert("edward-672", cryptcompress_inode_ok(inode));
-+ assert("edward-1527", PageLocked(page));
-+
-+ unlock_page(page);
-+
-+ /* set input stream */
-+ result = grab_tfm_stream(inode, &clust->tc, INPUT_STREAM);
-+ if (result) {
-+ lock_page(page);
-+ return result;
-+ }
-+ result = find_disk_cluster(clust, inode, 1 /* read items */, mode);
-+ lock_page(page);
-+ if (result)
-+ return result;
-+ /*
-+ * at this point we have locked position in the tree
-+ */
-+ assert("edward-1528", znode_is_any_locked(clust->hint->lh.node));
-+
-+ if (page->mapping != inode->i_mapping) {
-+ /* page was truncated */
-+ reiser4_unset_hint(clust->hint);
-+ reset_cluster_params(clust);
-+ return AOP_TRUNCATED_PAGE;
-+ }
-+ if (PageUptodate(page)) {
-+ /* disk cluster can be obsolete, don't use it! */
-+ reiser4_unset_hint(clust->hint);
-+ reset_cluster_params(clust);
-+ return 0;
-+ }
-+ if (clust->dstat == FAKE_DISK_CLUSTER ||
-+ clust->dstat == UNPR_DISK_CLUSTER ||
-+ clust->dstat == TRNC_DISK_CLUSTER) {
-+ /*
-+ * this information about disk cluster will be valid
-+ * as long as we keep the position in the tree locked
-+ */
-+ tfm_cluster_set_uptodate(&clust->tc);
-+ return 0;
-+ }
-+ /* now prepare output stream.. */
-+ result = grab_coa(&clust->tc, inode_compression_plugin(inode));
-+ if (result)
-+ return result;
-+ /* ..and fill this with plain text */
-+ result = reiser4_inflate_cluster(clust, inode);
-+ if (result)
-+ return result;
-+ /*
-+ * The stream is ready! It won't be obsolete as
-+ * long as we keep last disk cluster item locked.
-+ */
-+ tfm_cluster_set_uptodate(&clust->tc);
-+ return 0;
-+}
-+
-+/*
-+ * fill one page with plain text.
-+ */
-+int do_readpage_ctail(struct inode * inode, struct cluster_handle * clust,
-+ struct page *page, znode_lock_mode mode)
-+{
-+ int ret;
-+ unsigned cloff;
-+ char *data;
-+ size_t to_page;
-+ struct tfm_cluster * tc = &clust->tc;
-+
-+ assert("edward-212", PageLocked(page));
-+
-+ if (unlikely(page->mapping != inode->i_mapping))
-+ return AOP_TRUNCATED_PAGE;
-+ if (PageUptodate(page))
-+ goto exit;
-+ to_page = pbytes(page_index(page), inode);
-+ if (to_page == 0) {
-+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-+ SetPageUptodate(page);
-+ goto exit;
-+ }
-+ if (!tfm_cluster_is_uptodate(&clust->tc)) {
-+ clust->index = pg_to_clust(page->index, inode);
-+
-+ /* this will unlock/lock the page */
-+ ret = ctail_read_disk_cluster(clust, inode, page, mode);
-+
-+ assert("edward-212", PageLocked(page));
-+ if (ret)
-+ return ret;
-+
-+ /* refresh bytes */
-+ to_page = pbytes(page_index(page), inode);
-+ if (to_page == 0) {
-+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-+ SetPageUptodate(page);
-+ goto exit;
-+ }
-+ }
-+ if (PageUptodate(page))
-+ /* somebody else fill it already */
-+ goto exit;
-+
-+ assert("edward-119", tfm_cluster_is_uptodate(tc));
-+ assert("edward-1529", znode_is_any_locked(clust->hint->lh.node));
-+
-+ switch (clust->dstat) {
-+ case UNPR_DISK_CLUSTER:
-+ BUG_ON(1);
-+ case TRNC_DISK_CLUSTER:
-+ /*
-+ * Race with truncate!
-+ * We resolve it in favour of the last one (the only way,
-+ * as in this case plain text is unrecoverable)
-+ */
-+ case FAKE_DISK_CLUSTER:
-+ /* fill the page by zeroes */
-+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-+ SetPageUptodate(page);
-+ break;
-+ case PREP_DISK_CLUSTER:
-+ /* fill page by transformed stream with plain text */
-+ assert("edward-1058", !PageUptodate(page));
-+ assert("edward-120", tc->len <= inode_cluster_size(inode));
-+
-+ /* page index in this logical cluster */
-+ cloff = pg_to_off_to_cloff(page->index, inode);
-+
-+ data = kmap(page);
-+ memcpy(data, tfm_stream_data(tc, OUTPUT_STREAM) + cloff, to_page);
-+ memset(data + to_page, 0, (size_t) PAGE_CACHE_SIZE - to_page);
-+ flush_dcache_page(page);
-+ kunmap(page);
-+ SetPageUptodate(page);
-+ break;
-+ default:
-+ impossible("edward-1169", "bad disk cluster state");
-+ }
-+ exit:
-+ return 0;
-+}
-+
-+/* plugin->u.item.s.file.readpage */
-+int readpage_ctail(void *vp, struct page *page)
-+{
-+ int result;
-+ hint_t * hint;
-+ struct cluster_handle * clust = vp;
-+
-+ assert("edward-114", clust != NULL);
-+ assert("edward-115", PageLocked(page));
-+ assert("edward-116", !PageUptodate(page));
-+ assert("edward-118", page->mapping && page->mapping->host);
-+ assert("edward-867", !tfm_cluster_is_uptodate(&clust->tc));
-+
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL) {
-+ unlock_page(page);
-+ return RETERR(-ENOMEM);
-+ }
-+ clust->hint = hint;
-+ result = load_file_hint(clust->file, hint);
-+ if (result) {
-+ kfree(hint);
-+ unlock_page(page);
-+ return result;
-+ }
-+ assert("vs-25", hint->ext_coord.lh == &hint->lh);
-+
-+ result = do_readpage_ctail(page->mapping->host, clust, page,
-+ ZNODE_READ_LOCK);
-+ assert("edward-213", PageLocked(page));
-+ assert("edward-1163", ergo(!result, PageUptodate(page)));
-+
-+ unlock_page(page);
-+ done_lh(&hint->lh);
-+ hint->ext_coord.valid = 0;
-+ save_file_hint(clust->file, hint);
-+ kfree(hint);
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+
-+ return result;
-+}
-+
-+/* Helper function for ->readpages() */
-+static int ctail_read_page_cluster(struct cluster_handle * clust,
-+ struct inode *inode)
-+{
-+ int i;
-+ int result;
-+ assert("edward-779", clust != NULL);
-+ assert("edward-1059", clust->win == NULL);
-+ assert("edward-780", inode != NULL);
-+
-+ result = prepare_page_cluster(inode, clust, READ_OP);
-+ if (result)
-+ return result;
-+
-+ assert("edward-781", !tfm_cluster_is_uptodate(&clust->tc));
-+
-+ for (i = 0; i < clust->nr_pages; i++) {
-+ struct page *page = clust->pages[i];
-+ lock_page(page);
-+ result = do_readpage_ctail(inode, clust, page, ZNODE_READ_LOCK);
-+ unlock_page(page);
-+ if (result)
-+ break;
-+ }
-+ tfm_cluster_clr_uptodate(&clust->tc);
-+ put_page_cluster(clust, inode, READ_OP);
-+ return result;
-+}
-+
-+/* filler for read_cache_pages() */
-+static int ctail_readpages_filler(void * data, struct page * page)
-+{
-+ int ret = 0;
-+ struct cluster_handle * clust = data;
-+ struct inode * inode = clust->file->f_dentry->d_inode;
-+
-+ assert("edward-1525", page->mapping == inode->i_mapping);
-+
-+ if (PageUptodate(page)) {
-+ unlock_page(page);
-+ return 0;
-+ }
-+ if (pbytes(page_index(page), inode) == 0) {
-+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ return 0;
-+ }
-+ move_cluster_forward(clust, inode, page->index);
-+ unlock_page(page);
-+ /*
-+ * read the whole page cluster
-+ */
-+ ret = ctail_read_page_cluster(clust, inode);
-+
-+ assert("edward-869", !tfm_cluster_is_uptodate(&clust->tc));
-+ return ret;
-+}
-+
-+/*
-+ * We populate a bit more then upper readahead suggests:
-+ * with each nominated page we read the whole page cluster
-+ * this page belongs to.
-+ */
-+int readpages_ctail(struct file *file, struct address_space *mapping,
-+ struct list_head *pages)
-+{
-+ int ret = 0;
-+ hint_t *hint;
-+ struct cluster_handle clust;
-+ struct inode *inode = mapping->host;
-+
-+ assert("edward-1521", inode == file->f_dentry->d_inode);
-+
-+ cluster_init_read(&clust, NULL);
-+ clust.file = file;
-+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
-+ if (hint == NULL) {
-+ warning("vs-28", "failed to allocate hint");
-+ ret = RETERR(-ENOMEM);
-+ goto exit1;
-+ }
-+ clust.hint = hint;
-+ ret = load_file_hint(clust.file, hint);
-+ if (ret) {
-+ warning("edward-1522", "failed to load hint");
-+ goto exit2;
-+ }
-+ assert("vs-26", hint->ext_coord.lh == &hint->lh);
-+ ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
-+ if (ret) {
-+ warning("edward-1523", "failed to alloc pgset");
-+ goto exit3;
-+ }
-+ ret = read_cache_pages(mapping, pages, ctail_readpages_filler, &clust);
-+
-+ assert("edward-870", !tfm_cluster_is_uptodate(&clust.tc));
-+ exit3:
-+ done_lh(&hint->lh);
-+ save_file_hint(file, hint);
-+ hint->ext_coord.valid = 0;
-+ exit2:
-+ kfree(hint);
-+ exit1:
-+ put_cluster_handle(&clust);
-+ return ret;
-+}
-+
-+/*
-+ plugin->u.item.s.file.append_key
-+ key of the first item of the next disk cluster
-+*/
-+reiser4_key *append_key_ctail(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("edward-1241", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-1242", cluster_shift_ok(cluster_shift_by_coord(coord)));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, ((__u64) (clust_by_coord(coord, NULL)) + 1)
-+ << cluster_shift_by_coord(coord));
-+ return key;
-+}
-+
-+static int insert_unprepped_ctail(struct cluster_handle * clust,
-+ struct inode *inode)
-+{
-+ int result;
-+ char buf[UCTAIL_NR_UNITS];
-+ reiser4_item_data data;
-+ reiser4_key key;
-+ int shift = (int)UCTAIL_SHIFT;
-+
-+ memset(buf, 0, (size_t) UCTAIL_NR_UNITS);
-+ result = key_by_inode_cryptcompress(inode,
-+ clust_to_off(clust->index, inode),
-+ &key);
-+ if (result)
-+ return result;
-+ data.user = 0;
-+ data.iplug = item_plugin_by_id(CTAIL_ID);
-+ data.arg = &shift;
-+ data.length = sizeof(ctail_item_format) + (size_t) UCTAIL_NR_UNITS;
-+ data.data = buf;
-+
-+ result = insert_by_coord(&clust->hint->ext_coord.coord,
-+ &data, &key, clust->hint->ext_coord.lh, 0);
-+ return result;
-+}
-+
-+static int
-+insert_cryptcompress_flow(coord_t * coord, lock_handle * lh, flow_t * f,
-+ int cluster_shift)
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ reiser4_item_data *data;
-+ carry_op *op;
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*data));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+ data = (reiser4_item_data *) (lowest_level + 3);
-+
-+ assert("edward-466", coord->between == AFTER_ITEM
-+ || coord->between == AFTER_UNIT || coord->between == BEFORE_ITEM
-+ || coord->between == EMPTY_NODE
-+ || coord->between == BEFORE_UNIT);
-+
-+ if (coord->between == AFTER_UNIT) {
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+ }
-+ op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
-+ 0 /* operate directly on coord -> node */);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+ data->user = 0;
-+ data->iplug = item_plugin_by_id(CTAIL_ID);
-+ data->arg = &cluster_shift;
-+
-+ data->length = 0;
-+ data->data = NULL;
-+
-+ op->u.insert_flow.flags = COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT;
-+ op->u.insert_flow.insert_point = coord;
-+ op->u.insert_flow.flow = f;
-+ op->u.insert_flow.data = data;
-+ op->u.insert_flow.new_nodes = 0;
-+
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* Implementation of CRC_APPEND_ITEM mode of ctail conversion */
-+static int insert_cryptcompress_flow_in_place(coord_t * coord,
-+ lock_handle * lh, flow_t * f,
-+ int cluster_shift)
-+{
-+ int ret;
-+ coord_t pos;
-+ lock_handle lock;
-+
-+ assert("edward-484",
-+ coord->between == AT_UNIT || coord->between == AFTER_ITEM);
-+ assert("edward-485", item_id_by_coord(coord) == CTAIL_ID);
-+
-+ coord_dup(&pos, coord);
-+ pos.unit_pos = 0;
-+ pos.between = AFTER_ITEM;
-+
-+ init_lh(&lock);
-+ copy_lh(&lock, lh);
-+
-+ ret = insert_cryptcompress_flow(&pos, &lock, f, cluster_shift);
-+ done_lh(&lock);
-+ assert("edward-1347", znode_is_write_locked(lh->node));
-+ assert("edward-1228", !ret);
-+ return ret;
-+}
-+
-+/* Implementation of CRC_OVERWRITE_ITEM mode of ctail conversion */
-+static int overwrite_ctail(coord_t * coord, flow_t * f)
-+{
-+ unsigned count;
-+
-+ assert("edward-269", f->user == 0);
-+ assert("edward-270", f->data != NULL);
-+ assert("edward-271", f->length > 0);
-+ assert("edward-272", coord_is_existing_unit(coord));
-+ assert("edward-273", coord->unit_pos == 0);
-+ assert("edward-274", znode_is_write_locked(coord->node));
-+ assert("edward-275", reiser4_schedulable());
-+ assert("edward-467", item_id_by_coord(coord) == CTAIL_ID);
-+ assert("edward-1243", ctail_ok(coord));
-+
-+ count = nr_units_ctail(coord);
-+
-+ if (count > f->length)
-+ count = f->length;
-+ memcpy(first_unit(coord), f->data, count);
-+ move_flow_forward(f, count);
-+ coord->unit_pos += count;
-+ return 0;
-+}
-+
-+/* Implementation of CRC_CUT_ITEM mode of ctail conversion:
-+ cut ctail (part or whole) starting from next unit position */
-+static int cut_ctail(coord_t * coord)
-+{
-+ coord_t stop;
-+
-+ assert("edward-435", coord->between == AT_UNIT &&
-+ coord->item_pos < coord_num_items(coord) &&
-+ coord->unit_pos <= coord_num_units(coord));
-+
-+ if (coord->unit_pos == coord_num_units(coord))
-+ /* nothing to cut */
-+ return 0;
-+ coord_dup(&stop, coord);
-+ stop.unit_pos = coord_last_unit_pos(coord);
-+
-+ return cut_node_content(coord, &stop, NULL, NULL, NULL);
-+}
-+
-+int ctail_insert_unprepped_cluster(struct cluster_handle * clust,
-+ struct inode * inode)
-+{
-+ int result;
-+ assert("edward-1244", inode != NULL);
-+ assert("edward-1245", clust->hint != NULL);
-+ assert("edward-1246", clust->dstat == FAKE_DISK_CLUSTER);
-+ assert("edward-1247", clust->reserved == 1);
-+
-+ result = get_disk_cluster_locked(clust, inode, ZNODE_WRITE_LOCK);
-+ if (cbk_errored(result))
-+ return result;
-+ assert("edward-1249", result == CBK_COORD_NOTFOUND);
-+ assert("edward-1250", znode_is_write_locked(clust->hint->lh.node));
-+
-+ assert("edward-1295",
-+ clust->hint->ext_coord.lh->node ==
-+ clust->hint->ext_coord.coord.node);
-+
-+ coord_set_between_clusters(&clust->hint->ext_coord.coord);
-+
-+ result = insert_unprepped_ctail(clust, inode);
-+ all_grabbed2free();
-+
-+ assert("edward-1251", !result);
-+ assert("edward-1252", cryptcompress_inode_ok(inode));
-+ assert("edward-1253", znode_is_write_locked(clust->hint->lh.node));
-+ assert("edward-1254",
-+ reiser4_clustered_blocks(reiser4_get_current_sb()));
-+ assert("edward-1255",
-+ znode_convertible(clust->hint->ext_coord.coord.node));
-+
-+ return result;
-+}
-+
-+static int do_convert_ctail(flush_pos_t * pos, cryptcompress_write_mode_t mode)
-+{
-+ int result = 0;
-+ struct convert_item_info * info;
-+
-+ assert("edward-468", pos != NULL);
-+ assert("edward-469", pos->sq != NULL);
-+ assert("edward-845", item_convert_data(pos) != NULL);
-+
-+ info = item_convert_data(pos);
-+ assert("edward-679", info->flow.data != NULL);
-+
-+ switch (mode) {
-+ case CRC_APPEND_ITEM:
-+ assert("edward-1229", info->flow.length != 0);
-+ assert("edward-1256",
-+ cluster_shift_ok(cluster_shift_by_coord(&pos->coord)));
-+ result =
-+ insert_cryptcompress_flow_in_place(&pos->coord,
-+ &pos->lock,
-+ &info->flow,
-+ info->cluster_shift);
-+ break;
-+ case CRC_OVERWRITE_ITEM:
-+ assert("edward-1230", info->flow.length != 0);
-+ overwrite_ctail(&pos->coord, &info->flow);
-+ if (info->flow.length != 0)
-+ break;
-+ case CRC_CUT_ITEM:
-+ assert("edward-1231", info->flow.length == 0);
-+ result = cut_ctail(&pos->coord);
-+ break;
-+ default:
-+ result = RETERR(-EIO);
-+ impossible("edward-244", "bad convert mode");
-+ }
-+ return result;
-+}
-+
-+/* plugin->u.item.f.scan */
-+int scan_ctail(flush_scan * scan)
-+{
-+ int result = 0;
-+ struct page *page;
-+ struct inode *inode;
-+ jnode *node = scan->node;
-+
-+ assert("edward-227", scan->node != NULL);
-+ assert("edward-228", jnode_is_cluster_page(scan->node));
-+ assert("edward-639", znode_is_write_locked(scan->parent_lock.node));
-+
-+ page = jnode_page(node);
-+ inode = page->mapping->host;
-+
-+ if (!reiser4_scanning_left(scan))
-+ return result;
-+ if (!ZF_ISSET(scan->parent_lock.node, JNODE_DIRTY))
-+ znode_make_dirty(scan->parent_lock.node);
-+
-+ if (!znode_convertible(scan->parent_lock.node)) {
-+ if (JF_ISSET(scan->node, JNODE_DIRTY))
-+ znode_set_convertible(scan->parent_lock.node);
-+ else {
-+ warning("edward-681",
-+ "cluster page is already processed");
-+ return -EAGAIN;
-+ }
-+ }
-+ return result;
-+}
-+
-+/* If true, this function attaches children */
-+static int should_attach_convert_idata(flush_pos_t * pos)
-+{
-+ int result;
-+ assert("edward-431", pos != NULL);
-+ assert("edward-432", pos->child == NULL);
-+ assert("edward-619", znode_is_write_locked(pos->coord.node));
-+ assert("edward-470",
-+ item_plugin_by_coord(&pos->coord) ==
-+ item_plugin_by_id(CTAIL_ID));
-+
-+ /* check for leftmost child */
-+ utmost_child_ctail(&pos->coord, LEFT_SIDE, &pos->child);
-+
-+ if (!pos->child)
-+ return 0;
-+ spin_lock_jnode(pos->child);
-+ result = (JF_ISSET(pos->child, JNODE_DIRTY) &&
-+ pos->child->atom == ZJNODE(pos->coord.node)->atom);
-+ spin_unlock_jnode(pos->child);
-+ if (!result && pos->child) {
-+ /* existing child isn't to attach, clear up this one */
-+ jput(pos->child);
-+ pos->child = NULL;
-+ }
-+ return result;
-+}
-+
-+/**
-+ * Collect all needed information about the object here,
-+ * as in-memory inode can be evicted from memory before
-+ * disk update completion.
-+ */
-+static int init_convert_data_ctail(struct convert_item_info * idata,
-+ struct inode *inode)
-+{
-+ assert("edward-813", idata != NULL);
-+ assert("edward-814", inode != NULL);
-+
-+ idata->cluster_shift = inode_cluster_shift(inode);
-+ idata->d_cur = DC_FIRST_ITEM;
-+ idata->d_next = DC_INVALID_STATE;
-+
-+ return 0;
-+}
-+
-+static int alloc_item_convert_data(struct convert_info * sq)
-+{
-+ assert("edward-816", sq != NULL);
-+ assert("edward-817", sq->itm == NULL);
-+
-+ sq->itm = kmalloc(sizeof(*sq->itm), reiser4_ctx_gfp_mask_get());
-+ if (sq->itm == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+static void free_item_convert_data(struct convert_info * sq)
-+{
-+ assert("edward-818", sq != NULL);
-+ assert("edward-819", sq->itm != NULL);
-+ assert("edward-820", sq->iplug != NULL);
-+
-+ kfree(sq->itm);
-+ sq->itm = NULL;
-+ return;
-+}
-+
-+static int alloc_convert_data(flush_pos_t * pos)
-+{
-+ assert("edward-821", pos != NULL);
-+ assert("edward-822", pos->sq == NULL);
-+
-+ pos->sq = kmalloc(sizeof(*pos->sq), reiser4_ctx_gfp_mask_get());
-+ if (!pos->sq)
-+ return RETERR(-ENOMEM);
-+ memset(pos->sq, 0, sizeof(*pos->sq));
-+ cluster_init_write(&pos->sq->clust, NULL);
-+ return 0;
-+}
-+
-+void free_convert_data(flush_pos_t * pos)
-+{
-+ struct convert_info *sq;
-+
-+ assert("edward-823", pos != NULL);
-+ assert("edward-824", pos->sq != NULL);
-+
-+ sq = pos->sq;
-+ if (sq->itm)
-+ free_item_convert_data(sq);
-+ put_cluster_handle(&sq->clust);
-+ kfree(pos->sq);
-+ pos->sq = NULL;
-+ return;
-+}
-+
-+static int init_item_convert_data(flush_pos_t * pos, struct inode *inode)
-+{
-+ struct convert_info *sq;
-+
-+ assert("edward-825", pos != NULL);
-+ assert("edward-826", pos->sq != NULL);
-+ assert("edward-827", item_convert_data(pos) != NULL);
-+ assert("edward-828", inode != NULL);
-+
-+ sq = pos->sq;
-+
-+ memset(sq->itm, 0, sizeof(*sq->itm));
-+
-+ /* iplug->init_convert_data() */
-+ return init_convert_data_ctail(sq->itm, inode);
-+}
-+
-+/* create and attach disk cluster info used by 'convert' phase of the flush
-+ squalloc() */
-+static int attach_convert_idata(flush_pos_t * pos, struct inode *inode)
-+{
-+ int ret = 0;
-+ struct convert_item_info *info;
-+ struct cluster_handle *clust;
-+ file_plugin *fplug = inode_file_plugin(inode);
-+ compression_plugin *cplug = inode_compression_plugin(inode);
-+
-+ assert("edward-248", pos != NULL);
-+ assert("edward-249", pos->child != NULL);
-+ assert("edward-251", inode != NULL);
-+ assert("edward-682", cryptcompress_inode_ok(inode));
-+ assert("edward-252",
-+ fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
-+ assert("edward-473",
-+ item_plugin_by_coord(&pos->coord) ==
-+ item_plugin_by_id(CTAIL_ID));
-+
-+ if (!pos->sq) {
-+ ret = alloc_convert_data(pos);
-+ if (ret)
-+ return ret;
-+ }
-+ clust = &pos->sq->clust;
-+ ret = grab_coa(&clust->tc, cplug);
-+ if (ret)
-+ goto err;
-+ ret = set_cluster_by_page(clust,
-+ jnode_page(pos->child),
-+ MAX_CLUSTER_NRPAGES);
-+ if (ret)
-+ goto err;
-+
-+ assert("edward-829", pos->sq != NULL);
-+ assert("edward-250", item_convert_data(pos) == NULL);
-+
-+ pos->sq->iplug = item_plugin_by_id(CTAIL_ID);
-+
-+ ret = alloc_item_convert_data(pos->sq);
-+ if (ret)
-+ goto err;
-+ ret = init_item_convert_data(pos, inode);
-+ if (ret)
-+ goto err;
-+ info = item_convert_data(pos);
-+
-+ ret = checkout_logical_cluster(clust, pos->child, inode);
-+ if (ret)
-+ goto err;
-+
-+ reiser4_deflate_cluster(clust, inode);
-+ inc_item_convert_count(pos);
-+
-+ /* prepare flow for insertion */
-+ fplug->flow_by_inode(inode,
-+ (const char __user *)tfm_stream_data(&clust->tc, OUTPUT_STREAM),
-+ 0 /* kernel space */ ,
-+ clust->tc.len,
-+ clust_to_off(clust->index, inode),
-+ WRITE_OP, &info->flow);
-+ jput(pos->child);
-+ return 0;
-+ err:
-+ jput(pos->child);
-+ free_convert_data(pos);
-+ return ret;
-+}
-+
-+/* clear up disk cluster info */
-+static void detach_convert_idata(struct convert_info * sq)
-+{
-+ struct convert_item_info *info;
-+
-+ assert("edward-253", sq != NULL);
-+ assert("edward-840", sq->itm != NULL);
-+
-+ info = sq->itm;
-+ assert("edward-1212", info->flow.length == 0);
-+
-+ free_item_convert_data(sq);
-+ return;
-+}
-+
-+/* plugin->u.item.f.utmost_child */
-+
-+/* This function sets leftmost child for a first cluster item,
-+ if the child exists, and NULL in other cases.
-+ NOTE-EDWARD: Do not call this for RIGHT_SIDE */
-+
-+int utmost_child_ctail(const coord_t * coord, sideof side, jnode ** child)
-+{
-+ reiser4_key key;
-+
-+ item_key_by_coord(coord, &key);
-+
-+ assert("edward-257", coord != NULL);
-+ assert("edward-258", child != NULL);
-+ assert("edward-259", side == LEFT_SIDE);
-+ assert("edward-260",
-+ item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
-+
-+ if (!is_disk_cluster_key(&key, coord))
-+ *child = NULL;
-+ else
-+ *child = jlookup(current_tree,
-+ get_key_objectid(item_key_by_coord
-+ (coord, &key)),
-+ off_to_pg(get_key_offset(&key)));
-+ return 0;
-+}
-+
-+/* Returns true if @p2 is the next item to @p1
-+ in the _same_ disk cluster.
-+ Disk cluster is a set of items. If ->clustered() != NULL,
-+ with each item the whole disk cluster should be read/modified
-+*/
-+
-+/* Go rightward and check for next disk cluster item, set
-+ * d_next to DC_CHAINED_ITEM, if the last one exists.
-+ * If the current position is last item, go to right neighbor.
-+ * Skip empty nodes. Note, that right neighbors may be not in
-+ * the slum because of races. If so, make it dirty and
-+ * convertible.
-+ */
-+static int next_item_dc_stat(flush_pos_t * pos)
-+{
-+ int ret = 0;
-+ int stop = 0;
-+ znode *cur;
-+ coord_t coord;
-+ lock_handle lh;
-+ lock_handle right_lock;
-+
-+ assert("edward-1232", !node_is_empty(pos->coord.node));
-+ assert("edward-1014",
-+ pos->coord.item_pos < coord_num_items(&pos->coord));
-+ assert("edward-1015", chaining_data_present(pos));
-+ assert("edward-1017",
-+ item_convert_data(pos)->d_next == DC_INVALID_STATE);
-+
-+ item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
-+
-+ if (item_convert_data(pos)->d_cur == DC_AFTER_CLUSTER)
-+ return ret;
-+ if (pos->coord.item_pos < coord_num_items(&pos->coord) - 1)
-+ return ret;
-+
-+ /* Check next slum item.
-+ * Note, that it can not be killed by concurrent truncate,
-+ * as the last one will want the lock held by us.
-+ */
-+ init_lh(&right_lock);
-+ cur = pos->coord.node;
-+
-+ while (!stop) {
-+ init_lh(&lh);
-+ ret = reiser4_get_right_neighbor(&lh,
-+ cur,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (ret)
-+ break;
-+ ret = zload(lh.node);
-+ if (ret) {
-+ done_lh(&lh);
-+ break;
-+ }
-+ coord_init_before_first_item(&coord, lh.node);
-+
-+ if (node_is_empty(lh.node)) {
-+ znode_make_dirty(lh.node);
-+ znode_set_convertible(lh.node);
-+ stop = 0;
-+ } else if (same_disk_cluster(&pos->coord, &coord)) {
-+
-+ item_convert_data(pos)->d_next = DC_CHAINED_ITEM;
-+
-+ if (!ZF_ISSET(lh.node, JNODE_DIRTY)) {
-+ /*
-+ warning("edward-1024",
-+ "next slum item mergeable, "
-+ "but znode %p isn't dirty\n",
-+ lh.node);
-+ */
-+ znode_make_dirty(lh.node);
-+ }
-+ if (!znode_convertible(lh.node)) {
-+ /*
-+ warning("edward-1272",
-+ "next slum item mergeable, "
-+ "but znode %p isn't convertible\n",
-+ lh.node);
-+ */
-+ znode_set_convertible(lh.node);
-+ }
-+ stop = 1;
-+ } else
-+ stop = 1;
-+ zrelse(lh.node);
-+ done_lh(&right_lock);
-+ copy_lh(&right_lock, &lh);
-+ done_lh(&lh);
-+ cur = right_lock.node;
-+ }
-+ done_lh(&right_lock);
-+
-+ if (ret == -E_NO_NEIGHBOR)
-+ ret = 0;
-+ return ret;
-+}
-+
-+static int
-+assign_convert_mode(struct convert_item_info * idata,
-+ cryptcompress_write_mode_t * mode)
-+{
-+ int result = 0;
-+
-+ assert("edward-1025", idata != NULL);
-+
-+ if (idata->flow.length) {
-+ /* append or overwrite */
-+ switch (idata->d_cur) {
-+ case DC_FIRST_ITEM:
-+ case DC_CHAINED_ITEM:
-+ *mode = CRC_OVERWRITE_ITEM;
-+ break;
-+ case DC_AFTER_CLUSTER:
-+ *mode = CRC_APPEND_ITEM;
-+ break;
-+ default:
-+ impossible("edward-1018", "wrong current item state");
-+ }
-+ } else {
-+ /* cut or invalidate */
-+ switch (idata->d_cur) {
-+ case DC_FIRST_ITEM:
-+ case DC_CHAINED_ITEM:
-+ *mode = CRC_CUT_ITEM;
-+ break;
-+ case DC_AFTER_CLUSTER:
-+ result = 1;
-+ break;
-+ default:
-+ impossible("edward-1019", "wrong current item state");
-+ }
-+ }
-+ return result;
-+}
-+
-+/* plugin->u.item.f.convert */
-+/* write ctail in guessed mode */
-+int convert_ctail(flush_pos_t * pos)
-+{
-+ int result;
-+ int nr_items;
-+ cryptcompress_write_mode_t mode = CRC_OVERWRITE_ITEM;
-+
-+ assert("edward-1020", pos != NULL);
-+ assert("edward-1213", coord_num_items(&pos->coord) != 0);
-+ assert("edward-1257", item_id_by_coord(&pos->coord) == CTAIL_ID);
-+ assert("edward-1258", ctail_ok(&pos->coord));
-+ assert("edward-261", pos->coord.node != NULL);
-+
-+ nr_items = coord_num_items(&pos->coord);
-+ if (!chaining_data_present(pos)) {
-+ if (should_attach_convert_idata(pos)) {
-+ /* attach convert item info */
-+ struct inode *inode;
-+
-+ assert("edward-264", pos->child != NULL);
-+ assert("edward-265", jnode_page(pos->child) != NULL);
-+ assert("edward-266",
-+ jnode_page(pos->child)->mapping != NULL);
-+
-+ inode = jnode_page(pos->child)->mapping->host;
-+
-+ assert("edward-267", inode != NULL);
-+
-+ /* attach item convert info by child and put the last one */
-+ result = attach_convert_idata(pos, inode);
-+ pos->child = NULL;
-+ if (result == -E_REPEAT) {
-+ /* jnode became clean, or there is no dirty
-+ pages (nothing to update in disk cluster) */
-+ warning("edward-1021",
-+ "convert_ctail: nothing to attach");
-+ return 0;
-+ }
-+ if (result != 0)
-+ return result;
-+ } else
-+ /* unconvertible */
-+ return 0;
-+ } else {
-+ /* use old convert info */
-+
-+ struct convert_item_info *idata;
-+
-+ idata = item_convert_data(pos);
-+
-+ result = assign_convert_mode(idata, &mode);
-+ if (result) {
-+ /* disk cluster is over,
-+ nothing to update anymore */
-+ detach_convert_idata(pos->sq);
-+ return 0;
-+ }
-+ }
-+
-+ assert("edward-433", chaining_data_present(pos));
-+ assert("edward-1022",
-+ pos->coord.item_pos < coord_num_items(&pos->coord));
-+
-+ /* check if next item is of current disk cluster */
-+ result = next_item_dc_stat(pos);
-+ if (result) {
-+ detach_convert_idata(pos->sq);
-+ return result;
-+ }
-+ result = do_convert_ctail(pos, mode);
-+ if (result) {
-+ detach_convert_idata(pos->sq);
-+ return result;
-+ }
-+ switch (mode) {
-+ case CRC_CUT_ITEM:
-+ assert("edward-1214", item_convert_data(pos)->flow.length == 0);
-+ assert("edward-1215",
-+ coord_num_items(&pos->coord) == nr_items ||
-+ coord_num_items(&pos->coord) == nr_items - 1);
-+ if (item_convert_data(pos)->d_next == DC_CHAINED_ITEM)
-+ break;
-+ if (coord_num_items(&pos->coord) != nr_items) {
-+ /* the item was killed, no more chained items */
-+ detach_convert_idata(pos->sq);
-+ if (!node_is_empty(pos->coord.node))
-+ /* make sure the next item will be scanned */
-+ coord_init_before_item(&pos->coord);
-+ break;
-+ }
-+ case CRC_APPEND_ITEM:
-+ assert("edward-434", item_convert_data(pos)->flow.length == 0);
-+ detach_convert_idata(pos->sq);
-+ break;
-+ case CRC_OVERWRITE_ITEM:
-+ if (coord_is_unprepped_ctail(&pos->coord)) {
-+ /* convert unpprepped ctail to prepped one */
-+ assert("edward-1259",
-+ cluster_shift_ok(item_convert_data(pos)->
-+ cluster_shift));
-+ put_unaligned((d8)item_convert_data(pos)->cluster_shift,
-+ &ctail_formatted_at(&pos->coord)->
-+ cluster_shift);
-+ }
-+ break;
-+ }
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/ctail.h linux-2.6.24/fs/reiser4/plugin/item/ctail.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/ctail.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/ctail.h 2008-01-25 11:39:07.008226236 +0300
-@@ -0,0 +1,102 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Ctail items are fragments (or bodies) of special tipe to provide
-+ optimal storage of encrypted and(or) compressed files. */
-+
-+
-+#if !defined( __FS_REISER4_CTAIL_H__ )
-+#define __FS_REISER4_CTAIL_H__
-+
-+/* Disk format of ctail item */
-+typedef struct ctail_item_format {
-+ /* packed shift;
-+ if its value is different from UCTAIL_SHIFT (see below), then
-+ size of disk cluster is calculated as (1 << cluster_shift) */
-+ d8 cluster_shift;
-+ /* ctail body */
-+ d8 body[0];
-+} __attribute__ ((packed)) ctail_item_format;
-+
-+/* "Unprepped" disk cluster is represented by a single ctail item
-+ with the following "magic" attributes: */
-+/* "magic" cluster_shift */
-+#define UCTAIL_SHIFT 0xff
-+/* How many units unprepped ctail item has */
-+#define UCTAIL_NR_UNITS 1
-+
-+/* The following is a set of various item states in a disk cluster.
-+ Disk cluster is a set of items whose keys belong to the interval
-+ [dc_key , dc_key + disk_cluster_size - 1] */
-+typedef enum {
-+ DC_INVALID_STATE = 0,
-+ DC_FIRST_ITEM = 1,
-+ DC_CHAINED_ITEM = 2,
-+ DC_AFTER_CLUSTER = 3
-+} dc_item_stat;
-+
-+/* ctail-specific extension.
-+ In particular this describes parameters of disk cluster an item belongs to */
-+struct ctail_coord_extension {
-+ int shift; /* this contains cluster_shift extracted from
-+ ctail_item_format (above), or UCTAIL_SHIFT
-+ (the last one is the "magic" of unprepped disk clusters)*/
-+ int dsize; /* size of a prepped disk cluster */
-+ int ncount; /* count of nodes occupied by a disk cluster */
-+};
-+
-+struct cut_list;
-+
-+/* plugin->item.b.* */
-+int can_contain_key_ctail(const coord_t *, const reiser4_key *,
-+ const reiser4_item_data *);
-+int mergeable_ctail(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_ctail(const coord_t * coord);
-+int estimate_ctail(const coord_t * coord, const reiser4_item_data * data);
-+void print_ctail(const char *prefix, coord_t * coord);
-+lookup_result lookup_ctail(const reiser4_key *, lookup_bias, coord_t *);
-+
-+int paste_ctail(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG);
-+int init_ctail(coord_t *, coord_t *, reiser4_item_data *);
-+int can_shift_ctail(unsigned free_space, coord_t * coord,
-+ znode * target, shift_direction pend, unsigned *size,
-+ unsigned want);
-+void copy_units_ctail(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction where_is_free_space,
-+ unsigned free_space);
-+int cut_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int ctail_ok(const coord_t * coord);
-+int check_ctail(const coord_t * coord, const char **error);
-+
-+/* plugin->u.item.s.* */
-+int read_ctail(struct file *, flow_t *, hint_t *);
-+int readpage_ctail(void *, struct page *);
-+int readpages_ctail(struct file *, struct address_space *, struct list_head *);
-+reiser4_key *append_key_ctail(const coord_t *, reiser4_key *);
-+int create_hook_ctail(const coord_t * coord, void *arg);
-+int kill_hook_ctail(const coord_t *, pos_in_node_t, pos_in_node_t,
-+ carry_kill_data *);
-+int shift_hook_ctail(const coord_t *, unsigned, unsigned, znode *);
-+
-+/* plugin->u.item.f */
-+int utmost_child_ctail(const coord_t *, sideof, jnode **);
-+int scan_ctail(flush_scan *);
-+int convert_ctail(flush_pos_t *);
-+size_t inode_scaled_cluster_size(struct inode *);
-+
-+#endif /* __FS_REISER4_CTAIL_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/extent.c linux-2.6.24/fs/reiser4/plugin/item/extent.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/extent.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/extent.c 2008-01-25 11:39:07.008226236 +0300
-@@ -0,0 +1,197 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../key.h"
-+#include "../../super.h"
-+#include "../../carry.h"
-+#include "../../inode.h"
-+#include "../../page_cache.h"
-+#include "../../flush.h"
-+#include "../object.h"
-+
-+/* prepare structure reiser4_item_data. It is used to put one extent unit into tree */
-+/* Audited by: green(2002.06.13) */
-+reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
-+ int nr_extents)
-+{
-+ data->data = ext_unit;
-+ /* data->data is kernel space */
-+ data->user = 0;
-+ data->length = sizeof(reiser4_extent) * nr_extents;
-+ data->arg = NULL;
-+ data->iplug = item_plugin_by_id(EXTENT_POINTER_ID);
-+ return data;
-+}
-+
-+/* how many bytes are addressed by @nr first extents of the extent item */
-+reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr)
-+{
-+ pos_in_node_t i;
-+ reiser4_block_nr blocks;
-+ reiser4_extent *ext;
-+
-+ ext = item_body_by_coord(coord);
-+ assert("vs-263", nr <= nr_units_extent(coord));
-+
-+ blocks = 0;
-+ for (i = 0; i < nr; i++, ext++) {
-+ blocks += extent_get_width(ext);
-+ }
-+
-+ return blocks * current_blocksize;
-+}
-+
-+extent_state state_of_extent(reiser4_extent * ext)
-+{
-+ switch ((int)extent_get_start(ext)) {
-+ case 0:
-+ return HOLE_EXTENT;
-+ case 1:
-+ return UNALLOCATED_EXTENT;
-+ default:
-+ break;
-+ }
-+ return ALLOCATED_EXTENT;
-+}
-+
-+int extent_is_unallocated(const coord_t * item)
-+{
-+ assert("jmacd-5133", item_is_extent(item));
-+
-+ return state_of_extent(extent_by_coord(item)) == UNALLOCATED_EXTENT;
-+}
-+
-+/* set extent's start and width */
-+void reiser4_set_extent(reiser4_extent * ext, reiser4_block_nr start,
-+ reiser4_block_nr width)
-+{
-+ extent_set_start(ext, start);
-+ extent_set_width(ext, width);
-+}
-+
-+/**
-+ * reiser4_replace_extent - replace extent and paste 1 or 2 after it
-+ * @un_extent: coordinate of extent to be overwritten
-+ * @lh: need better comment
-+ * @key: need better comment
-+ * @exts_to_add: data prepared for insertion into tree
-+ * @replace: need better comment
-+ * @flags: need better comment
-+ * @return_insert_position: need better comment
-+ *
-+ * Overwrites one extent, pastes 1 or 2 more ones after overwritten one. If
-+ * @return_inserted_position is 1 - @un_extent and @lh are returned set to
-+ * first of newly inserted units, if it is 0 - @un_extent and @lh are returned
-+ * set to extent which was overwritten.
-+ */
-+int reiser4_replace_extent(struct replace_handle *h,
-+ int return_inserted_position)
-+{
-+ int result;
-+ znode *orig_znode;
-+ /*ON_DEBUG(reiser4_extent orig_ext);*/ /* this is for debugging */
-+
-+ assert("vs-990", coord_is_existing_unit(h->coord));
-+ assert("vs-1375", znode_is_write_locked(h->coord->node));
-+ assert("vs-1426", extent_get_width(&h->overwrite) != 0);
-+ assert("vs-1427", extent_get_width(&h->new_extents[0]) != 0);
-+ assert("vs-1427", ergo(h->nr_new_extents == 2,
-+ extent_get_width(&h->new_extents[1]) != 0));
-+
-+ /* compose structure for paste */
-+ init_new_extent(&h->item, &h->new_extents[0], h->nr_new_extents);
-+
-+ coord_dup(&h->coord_after, h->coord);
-+ init_lh(&h->lh_after);
-+ copy_lh(&h->lh_after, h->lh);
-+ reiser4_tap_init(&h->watch, &h->coord_after, &h->lh_after, ZNODE_WRITE_LOCK);
-+ reiser4_tap_monitor(&h->watch);
-+
-+ ON_DEBUG(h->orig_ext = *extent_by_coord(h->coord));
-+ orig_znode = h->coord->node;
-+
-+#if REISER4_DEBUG
-+ /* make sure that key is set properly */
-+ unit_key_by_coord(h->coord, &h->tmp);
-+ set_key_offset(&h->tmp,
-+ get_key_offset(&h->tmp) +
-+ extent_get_width(&h->overwrite) * current_blocksize);
-+ assert("vs-1080", keyeq(&h->tmp, &h->paste_key));
-+#endif
-+
-+ /* set insert point after unit to be replaced */
-+ h->coord->between = AFTER_UNIT;
-+
-+ result = insert_into_item(h->coord, return_inserted_position ? h->lh : NULL,
-+ &h->paste_key, &h->item, h->flags);
-+ if (!result) {
-+ /* now we have to replace the unit after which new units were
-+ inserted. Its position is tracked by @watch */
-+ reiser4_extent *ext;
-+ znode *node;
-+
-+ node = h->coord_after.node;
-+ if (node != orig_znode) {
-+ coord_clear_iplug(&h->coord_after);
-+ result = zload(node);
-+ }
-+
-+ if (likely(!result)) {
-+ ext = extent_by_coord(&h->coord_after);
-+
-+ assert("vs-987", znode_is_loaded(node));
-+ assert("vs-988", !memcmp(ext, &h->orig_ext, sizeof(*ext)));
-+
-+ /* overwrite extent unit */
-+ memcpy(ext, &h->overwrite, sizeof(reiser4_extent));
-+ znode_make_dirty(node);
-+
-+ if (node != orig_znode)
-+ zrelse(node);
-+
-+ if (return_inserted_position == 0) {
-+ /* coord and lh are to be set to overwritten
-+ extent */
-+ assert("vs-1662",
-+ WITH_DATA(node, !memcmp(&h->overwrite,
-+ extent_by_coord(
-+ &h->coord_after),
-+ sizeof(reiser4_extent))));
-+
-+ *h->coord = h->coord_after;
-+ done_lh(h->lh);
-+ copy_lh(h->lh, &h->lh_after);
-+ } else {
-+ /* h->coord and h->lh are to be set to first of
-+ inserted units */
-+ assert("vs-1663",
-+ WITH_DATA(h->coord->node,
-+ !memcmp(&h->new_extents[0],
-+ extent_by_coord(h->coord),
-+ sizeof(reiser4_extent))));
-+ assert("vs-1664", h->lh->node == h->coord->node);
-+ }
-+ }
-+ }
-+ reiser4_tap_done(&h->watch);
-+
-+ return result;
-+}
-+
-+lock_handle *znode_lh(znode *node)
-+{
-+ assert("vs-1371", znode_is_write_locked(node));
-+ assert("vs-1372", znode_is_wlocked_once(node));
-+ return list_entry(node->lock.owners.next, lock_handle, owners_link);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/extent_file_ops.c linux-2.6.24/fs/reiser4/plugin/item/extent_file_ops.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/extent_file_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/extent_file_ops.c 2008-01-25 11:40:16.698169785 +0300
-@@ -0,0 +1,1450 @@
-+/* COPYRIGHT 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../inode.h"
-+#include "../../page_cache.h"
-+#include "../object.h"
-+
-+#include <linux/quotaops.h>
-+#include <linux/swap.h>
-+
-+static inline reiser4_extent *ext_by_offset(const znode *node, int offset)
-+{
-+ reiser4_extent *ext;
-+
-+ ext = (reiser4_extent *) (zdata(node) + offset);
-+ return ext;
-+}
-+
-+/**
-+ * check_uf_coord - verify coord extension
-+ * @uf_coord:
-+ * @key:
-+ *
-+ * Makes sure that all fields of @uf_coord are set properly. If @key is
-+ * specified - check whether @uf_coord is set correspondingly.
-+ */
-+static void check_uf_coord(const uf_coord_t *uf_coord, const reiser4_key *key)
-+{
-+#if REISER4_DEBUG
-+ const coord_t *coord;
-+ const struct extent_coord_extension *ext_coord;
-+ reiser4_extent *ext;
-+
-+ coord = &uf_coord->coord;
-+ ext_coord = &uf_coord->extension.extent;
-+ ext = ext_by_offset(coord->node, uf_coord->extension.extent.ext_offset);
-+
-+ assert("",
-+ WITH_DATA(coord->node,
-+ (uf_coord->valid == 1 &&
-+ coord_is_iplug_set(coord) &&
-+ item_is_extent(coord) &&
-+ ext_coord->nr_units == nr_units_extent(coord) &&
-+ ext == extent_by_coord(coord) &&
-+ ext_coord->width == extent_get_width(ext) &&
-+ coord->unit_pos < ext_coord->nr_units &&
-+ ext_coord->pos_in_unit < ext_coord->width &&
-+ memcmp(ext, &ext_coord->extent,
-+ sizeof(reiser4_extent)) == 0)));
-+ if (key) {
-+ reiser4_key coord_key;
-+
-+ unit_key_by_coord(&uf_coord->coord, &coord_key);
-+ set_key_offset(&coord_key,
-+ get_key_offset(&coord_key) +
-+ (uf_coord->extension.extent.
-+ pos_in_unit << PAGE_CACHE_SHIFT));
-+ assert("", keyeq(key, &coord_key));
-+ }
-+#endif
-+}
-+
-+static inline reiser4_extent *ext_by_ext_coord(const uf_coord_t *uf_coord)
-+{
-+ check_uf_coord(uf_coord, NULL);
-+
-+ return ext_by_offset(uf_coord->coord.node,
-+ uf_coord->extension.extent.ext_offset);
-+}
-+
-+#if REISER4_DEBUG
-+
-+/**
-+ * offset_is_in_unit
-+ *
-+ *
-+ *
-+ */
-+/* return 1 if offset @off is inside of extent unit pointed to by @coord. Set
-+ pos_in_unit inside of unit correspondingly */
-+static int offset_is_in_unit(const coord_t *coord, loff_t off)
-+{
-+ reiser4_key unit_key;
-+ __u64 unit_off;
-+ reiser4_extent *ext;
-+
-+ ext = extent_by_coord(coord);
-+
-+ unit_key_extent(coord, &unit_key);
-+ unit_off = get_key_offset(&unit_key);
-+ if (off < unit_off)
-+ return 0;
-+ if (off >= (unit_off + (current_blocksize * extent_get_width(ext))))
-+ return 0;
-+ return 1;
-+}
-+
-+static int
-+coord_matches_key_extent(const coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key item_key;
-+
-+ assert("vs-771", coord_is_existing_unit(coord));
-+ assert("vs-1258", keylt(key, append_key_extent(coord, &item_key)));
-+ assert("vs-1259", keyge(key, item_key_by_coord(coord, &item_key)));
-+
-+ return offset_is_in_unit(coord, get_key_offset(key));
-+}
-+
-+#endif
-+
-+/**
-+ * can_append -
-+ * @key:
-+ * @coord:
-+ *
-+ * Returns 1 if @key is equal to an append key of item @coord is set to
-+ */
-+static int can_append(const reiser4_key *key, const coord_t *coord)
-+{
-+ reiser4_key append_key;
-+
-+ return keyeq(key, append_key_extent(coord, &append_key));
-+}
-+
-+/**
-+ * append_hole
-+ * @coord:
-+ * @lh:
-+ * @key:
-+ *
-+ */
-+static int append_hole(coord_t *coord, lock_handle *lh,
-+ const reiser4_key *key)
-+{
-+ reiser4_key append_key;
-+ reiser4_block_nr hole_width;
-+ reiser4_extent *ext, new_ext;
-+ reiser4_item_data idata;
-+
-+ /* last item of file may have to be appended with hole */
-+ assert("vs-708", znode_get_level(coord->node) == TWIG_LEVEL);
-+ assert("vs-714", item_id_by_coord(coord) == EXTENT_POINTER_ID);
-+
-+ /* key of first byte which is not addressed by this extent */
-+ append_key_extent(coord, &append_key);
-+
-+ assert("", keyle(&append_key, key));
-+
-+ /*
-+ * extent item has to be appended with hole. Calculate length of that
-+ * hole
-+ */
-+ hole_width = ((get_key_offset(key) - get_key_offset(&append_key) +
-+ current_blocksize - 1) >> current_blocksize_bits);
-+ assert("vs-954", hole_width > 0);
-+
-+ /* set coord after last unit */
-+ coord_init_after_item_end(coord);
-+
-+ /* get last extent in the item */
-+ ext = extent_by_coord(coord);
-+ if (state_of_extent(ext) == HOLE_EXTENT) {
-+ /*
-+ * last extent of a file is hole extent. Widen that extent by
-+ * @hole_width blocks. Note that we do not worry about
-+ * overflowing - extent width is 64 bits
-+ */
-+ reiser4_set_extent(ext, HOLE_EXTENT_START,
-+ extent_get_width(ext) + hole_width);
-+ znode_make_dirty(coord->node);
-+ return 0;
-+ }
-+
-+ /* append last item of the file with hole extent unit */
-+ assert("vs-713", (state_of_extent(ext) == ALLOCATED_EXTENT ||
-+ state_of_extent(ext) == UNALLOCATED_EXTENT));
-+
-+ reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
-+ init_new_extent(&idata, &new_ext, 1);
-+ return insert_into_item(coord, lh, &append_key, &idata, 0);
-+}
-+
-+/**
-+ * check_jnodes
-+ * @twig: longterm locked twig node
-+ * @key:
-+ *
-+ */
-+static void check_jnodes(znode *twig, const reiser4_key *key, int count)
-+{
-+#if REISER4_DEBUG
-+ coord_t c;
-+ reiser4_key node_key, jnode_key;
-+
-+ jnode_key = *key;
-+
-+ assert("", twig != NULL);
-+ assert("", znode_get_level(twig) == TWIG_LEVEL);
-+ assert("", znode_is_write_locked(twig));
-+
-+ zload(twig);
-+ /* get the smallest key in twig node */
-+ coord_init_first_unit(&c, twig);
-+ unit_key_by_coord(&c, &node_key);
-+ assert("", keyle(&node_key, &jnode_key));
-+
-+ coord_init_last_unit(&c, twig);
-+ unit_key_by_coord(&c, &node_key);
-+ if (item_plugin_by_coord(&c)->s.file.append_key)
-+ item_plugin_by_coord(&c)->s.file.append_key(&c, &node_key);
-+ set_key_offset(&jnode_key,
-+ get_key_offset(&jnode_key) + (loff_t)count * PAGE_CACHE_SIZE - 1);
-+ assert("", keylt(&jnode_key, &node_key));
-+ zrelse(twig);
-+#endif
-+}
-+
-+/**
-+ * append_last_extent - append last file item
-+ * @uf_coord: coord to start insertion from
-+ * @jnodes: array of jnodes
-+ * @count: number of jnodes in the array
-+ *
-+ * There is already at least one extent item of file @inode in the tree. Append
-+ * the last of them with unallocated extent unit of width @count. Assign
-+ * fake block numbers to jnodes corresponding to the inserted extent.
-+ */
-+static int append_last_extent(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode **jnodes, int count)
-+{
-+ int result;
-+ reiser4_extent new_ext;
-+ reiser4_item_data idata;
-+ coord_t *coord;
-+ struct extent_coord_extension *ext_coord;
-+ reiser4_extent *ext;
-+ reiser4_block_nr block;
-+ jnode *node;
-+ int i;
-+
-+ coord = &uf_coord->coord;
-+ ext_coord = &uf_coord->extension.extent;
-+ ext = ext_by_ext_coord(uf_coord);
-+
-+ /* check correctness of position in the item */
-+ assert("vs-228", coord->unit_pos == coord_last_unit_pos(coord));
-+ assert("vs-1311", coord->between == AFTER_UNIT);
-+ assert("vs-1302", ext_coord->pos_in_unit == ext_coord->width - 1);
-+
-+ if (!can_append(key, coord)) {
-+ /* hole extent has to be inserted */
-+ result = append_hole(coord, uf_coord->lh, key);
-+ uf_coord->valid = 0;
-+ return result;
-+ }
-+
-+ if (count == 0)
-+ return 0;
-+
-+ assert("", get_key_offset(key) == (loff_t)index_jnode(jnodes[0]) * PAGE_CACHE_SIZE);
-+
-+ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(jnodes[0])->host,
-+ count);
-+ BUG_ON(result != 0);
-+
-+ switch (state_of_extent(ext)) {
-+ case UNALLOCATED_EXTENT:
-+ /*
-+ * last extent unit of the file is unallocated one. Increase
-+ * its width by @count
-+ */
-+ reiser4_set_extent(ext, UNALLOCATED_EXTENT_START,
-+ extent_get_width(ext) + count);
-+ znode_make_dirty(coord->node);
-+
-+ /* update coord extension */
-+ ext_coord->width += count;
-+ ON_DEBUG(extent_set_width
-+ (&uf_coord->extension.extent.extent,
-+ ext_coord->width));
-+ break;
-+
-+ case HOLE_EXTENT:
-+ case ALLOCATED_EXTENT:
-+ /*
-+ * last extent unit of the file is either hole or allocated
-+ * one. Append one unallocated extent of width @count
-+ */
-+ reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
-+ init_new_extent(&idata, &new_ext, 1);
-+ result = insert_into_item(coord, uf_coord->lh, key, &idata, 0);
-+ uf_coord->valid = 0;
-+ if (result)
-+ return result;
-+ break;
-+
-+ default:
-+ return RETERR(-EIO);
-+ }
-+
-+ /*
-+ * make sure that we hold long term locked twig node containing all
-+ * jnodes we are about to capture
-+ */
-+ check_jnodes(uf_coord->lh->node, key, count);
-+
-+ /*
-+ * assign fake block numbers to all jnodes. FIXME: make sure whether
-+ * twig node containing inserted extent item is locked
-+ */
-+ block = fake_blocknr_unformatted(count);
-+ for (i = 0; i < count; i ++, block ++) {
-+ node = jnodes[i];
-+ spin_lock_jnode(node);
-+ JF_SET(node, JNODE_CREATED);
-+ jnode_set_block(node, &block);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+ }
-+ return count;
-+}
-+
-+/**
-+ * insert_first_hole - inser hole extent into tree
-+ * @coord:
-+ * @lh:
-+ * @key:
-+ *
-+ *
-+ */
-+static int insert_first_hole(coord_t *coord, lock_handle *lh,
-+ const reiser4_key *key)
-+{
-+ reiser4_extent new_ext;
-+ reiser4_item_data idata;
-+ reiser4_key item_key;
-+ reiser4_block_nr hole_width;
-+
-+ /* @coord must be set for inserting of new item */
-+ assert("vs-711", coord_is_between_items(coord));
-+
-+ item_key = *key;
-+ set_key_offset(&item_key, 0ull);
-+
-+ hole_width = ((get_key_offset(key) + current_blocksize - 1) >>
-+ current_blocksize_bits);
-+ assert("vs-710", hole_width > 0);
-+
-+ /* compose body of hole extent and insert item into tree */
-+ reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
-+ init_new_extent(&idata, &new_ext, 1);
-+ return insert_extent_by_coord(coord, &idata, &item_key, lh);
-+}
-+
-+
-+/**
-+ * insert_first_extent - insert first file item
-+ * @inode: inode of file
-+ * @uf_coord: coord to start insertion from
-+ * @jnodes: array of jnodes
-+ * @count: number of jnodes in the array
-+ * @inode:
-+ *
-+ * There are no items of file @inode in the tree yet. Insert unallocated extent
-+ * of width @count into tree or hole extent if writing not to the
-+ * beginning. Assign fake block numbers to jnodes corresponding to the inserted
-+ * unallocated extent. Returns number of jnodes or error code.
-+ */
-+static int insert_first_extent(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode **jnodes, int count,
-+ struct inode *inode)
-+{
-+ int result;
-+ int i;
-+ reiser4_extent new_ext;
-+ reiser4_item_data idata;
-+ reiser4_block_nr block;
-+ struct unix_file_info *uf_info;
-+ jnode *node;
-+
-+ /* first extent insertion starts at leaf level */
-+ assert("vs-719", znode_get_level(uf_coord->coord.node) == LEAF_LEVEL);
-+ assert("vs-711", coord_is_between_items(&uf_coord->coord));
-+
-+ if (get_key_offset(key) != 0) {
-+ result = insert_first_hole(&uf_coord->coord, uf_coord->lh, key);
-+ uf_coord->valid = 0;
-+ uf_info = unix_file_inode_data(inode);
-+
-+ /*
-+ * first item insertion is only possible when writing to empty
-+ * file or performing tail conversion
-+ */
-+ assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
-+ (reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED) &&
-+ reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV))));
-+ /* if file was empty - update its state */
-+ if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
-+ uf_info->container = UF_CONTAINER_EXTENTS;
-+ return result;
-+ }
-+
-+ if (count == 0)
-+ return 0;
-+
-+ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(jnodes[0])->host, count);
-+ BUG_ON(result != 0);
-+
-+ /*
-+ * prepare for tree modification: compose body of item and item data
-+ * structure needed for insertion
-+ */
-+ reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
-+ init_new_extent(&idata, &new_ext, 1);
-+
-+ /* insert extent item into the tree */
-+ result = insert_extent_by_coord(&uf_coord->coord, &idata, key,
-+ uf_coord->lh);
-+ if (result)
-+ return result;
-+
-+ /*
-+ * make sure that we hold long term locked twig node containing all
-+ * jnodes we are about to capture
-+ */
-+ check_jnodes(uf_coord->lh->node, key, count);
-+ /*
-+ * assign fake block numbers to all jnodes, capture and mark them dirty
-+ */
-+ block = fake_blocknr_unformatted(count);
-+ for (i = 0; i < count; i ++, block ++) {
-+ node = jnodes[i];
-+ spin_lock_jnode(node);
-+ JF_SET(node, JNODE_CREATED);
-+ jnode_set_block(node, &block);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+ }
-+
-+ /*
-+ * invalidate coordinate, research must be performed to continue
-+ * because write will continue on twig level
-+ */
-+ uf_coord->valid = 0;
-+ return count;
-+}
-+
-+/**
-+ * plug_hole - replace hole extent with unallocated and holes
-+ * @uf_coord:
-+ * @key:
-+ * @node:
-+ * @h: structure containing coordinate, lock handle, key, etc
-+ *
-+ * Creates an unallocated extent of width 1 within a hole. In worst case two
-+ * additional extents can be created.
-+ */
-+static int plug_hole(uf_coord_t *uf_coord, const reiser4_key *key, int *how)
-+{
-+ struct replace_handle rh;
-+ reiser4_extent *ext;
-+ reiser4_block_nr width, pos_in_unit;
-+ coord_t *coord;
-+ struct extent_coord_extension *ext_coord;
-+ int return_inserted_position;
-+
-+ check_uf_coord(uf_coord, key);
-+
-+ rh.coord = coord_by_uf_coord(uf_coord);
-+ rh.lh = uf_coord->lh;
-+ rh.flags = 0;
-+
-+ coord = coord_by_uf_coord(uf_coord);
-+ ext_coord = ext_coord_by_uf_coord(uf_coord);
-+ ext = ext_by_ext_coord(uf_coord);
-+
-+ width = ext_coord->width;
-+ pos_in_unit = ext_coord->pos_in_unit;
-+
-+ *how = 0;
-+ if (width == 1) {
-+ reiser4_set_extent(ext, UNALLOCATED_EXTENT_START, 1);
-+ znode_make_dirty(coord->node);
-+ /* update uf_coord */
-+ ON_DEBUG(ext_coord->extent = *ext);
-+ *how = 1;
-+ return 0;
-+ } else if (pos_in_unit == 0) {
-+ /* we deal with first element of extent */
-+ if (coord->unit_pos) {
-+ /* there is an extent to the left */
-+ if (state_of_extent(ext - 1) == UNALLOCATED_EXTENT) {
-+ /*
-+ * left neighboring unit is an unallocated
-+ * extent. Increase its width and decrease
-+ * width of hole
-+ */
-+ extent_set_width(ext - 1,
-+ extent_get_width(ext - 1) + 1);
-+ extent_set_width(ext, width - 1);
-+ znode_make_dirty(coord->node);
-+
-+ /* update coord extension */
-+ coord->unit_pos--;
-+ ext_coord->width = extent_get_width(ext - 1);
-+ ext_coord->pos_in_unit = ext_coord->width - 1;
-+ ext_coord->ext_offset -= sizeof(reiser4_extent);
-+ ON_DEBUG(ext_coord->extent =
-+ *extent_by_coord(coord));
-+ *how = 2;
-+ return 0;
-+ }
-+ }
-+ /* extent for replace */
-+ reiser4_set_extent(&rh.overwrite, UNALLOCATED_EXTENT_START, 1);
-+ /* extent to be inserted */
-+ reiser4_set_extent(&rh.new_extents[0], HOLE_EXTENT_START,
-+ width - 1);
-+ rh.nr_new_extents = 1;
-+
-+ /* have reiser4_replace_extent to return with @coord and
-+ @uf_coord->lh set to unit which was replaced */
-+ return_inserted_position = 0;
-+ *how = 3;
-+ } else if (pos_in_unit == width - 1) {
-+ /* we deal with last element of extent */
-+ if (coord->unit_pos < nr_units_extent(coord) - 1) {
-+ /* there is an extent unit to the right */
-+ if (state_of_extent(ext + 1) == UNALLOCATED_EXTENT) {
-+ /*
-+ * right neighboring unit is an unallocated
-+ * extent. Increase its width and decrease
-+ * width of hole
-+ */
-+ extent_set_width(ext + 1,
-+ extent_get_width(ext + 1) + 1);
-+ extent_set_width(ext, width - 1);
-+ znode_make_dirty(coord->node);
-+
-+ /* update coord extension */
-+ coord->unit_pos++;
-+ ext_coord->width = extent_get_width(ext + 1);
-+ ext_coord->pos_in_unit = 0;
-+ ext_coord->ext_offset += sizeof(reiser4_extent);
-+ ON_DEBUG(ext_coord->extent =
-+ *extent_by_coord(coord));
-+ *how = 4;
-+ return 0;
-+ }
-+ }
-+ /* extent for replace */
-+ reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START, width - 1);
-+ /* extent to be inserted */
-+ reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
-+ 1);
-+ rh.nr_new_extents = 1;
-+
-+ /* have reiser4_replace_extent to return with @coord and
-+ @uf_coord->lh set to unit which was inserted */
-+ return_inserted_position = 1;
-+ *how = 5;
-+ } else {
-+ /* extent for replace */
-+ reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START,
-+ pos_in_unit);
-+ /* extents to be inserted */
-+ reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
-+ 1);
-+ reiser4_set_extent(&rh.new_extents[1], HOLE_EXTENT_START,
-+ width - pos_in_unit - 1);
-+ rh.nr_new_extents = 2;
-+
-+ /* have reiser4_replace_extent to return with @coord and
-+ @uf_coord->lh set to first of units which were inserted */
-+ return_inserted_position = 1;
-+ *how = 6;
-+ }
-+ unit_key_by_coord(coord, &rh.paste_key);
-+ set_key_offset(&rh.paste_key, get_key_offset(&rh.paste_key) +
-+ extent_get_width(&rh.overwrite) * current_blocksize);
-+
-+ uf_coord->valid = 0;
-+ return reiser4_replace_extent(&rh, return_inserted_position);
-+}
-+
-+/**
-+ * overwrite_one_block -
-+ * @uf_coord:
-+ * @key:
-+ * @node:
-+ *
-+ * If @node corresponds to hole extent - create unallocated extent for it and
-+ * assign fake block number. If @node corresponds to allocated extent - assign
-+ * block number of jnode
-+ */
-+static int overwrite_one_block(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode *node, int *hole_plugged)
-+{
-+ int result;
-+ struct extent_coord_extension *ext_coord;
-+ reiser4_extent *ext;
-+ reiser4_block_nr block;
-+ int how;
-+
-+ assert("vs-1312", uf_coord->coord.between == AT_UNIT);
-+
-+ result = 0;
-+ ext_coord = ext_coord_by_uf_coord(uf_coord);
-+ ext = ext_by_ext_coord(uf_coord);
-+ assert("", state_of_extent(ext) != UNALLOCATED_EXTENT);
-+
-+ switch (state_of_extent(ext)) {
-+ case ALLOCATED_EXTENT:
-+ block = extent_get_start(ext) + ext_coord->pos_in_unit;
-+ break;
-+
-+ case HOLE_EXTENT:
-+ result = DQUOT_ALLOC_BLOCK_NODIRTY(mapping_jnode(node)->host, 1);
-+ BUG_ON(result != 0);
-+ result = plug_hole(uf_coord, key, &how);
-+ if (result)
-+ return result;
-+ block = fake_blocknr_unformatted(1);
-+ if (hole_plugged)
-+ *hole_plugged = 1;
-+ JF_SET(node, JNODE_CREATED);
-+ break;
-+
-+ default:
-+ return RETERR(-EIO);
-+ }
-+
-+ jnode_set_block(node, &block);
-+ return 0;
-+}
-+
-+/**
-+ * move_coord - move coordinate forward
-+ * @uf_coord:
-+ *
-+ * Move coordinate one data block pointer forward. Return 1 if coord is set to
-+ * the last one already or is invalid.
-+ */
-+static int move_coord(uf_coord_t *uf_coord)
-+{
-+ struct extent_coord_extension *ext_coord;
-+
-+ if (uf_coord->valid == 0)
-+ return 1;
-+ ext_coord = &uf_coord->extension.extent;
-+ ext_coord->pos_in_unit ++;
-+ if (ext_coord->pos_in_unit < ext_coord->width)
-+ /* coordinate moved within the unit */
-+ return 0;
-+
-+ /* end of unit is reached. Try to move to next unit */
-+ ext_coord->pos_in_unit = 0;
-+ uf_coord->coord.unit_pos ++;
-+ if (uf_coord->coord.unit_pos < ext_coord->nr_units) {
-+ /* coordinate moved to next unit */
-+ ext_coord->ext_offset += sizeof(reiser4_extent);
-+ ext_coord->width =
-+ extent_get_width(ext_by_offset
-+ (uf_coord->coord.node,
-+ ext_coord->ext_offset));
-+ ON_DEBUG(ext_coord->extent =
-+ *ext_by_offset(uf_coord->coord.node,
-+ ext_coord->ext_offset));
-+ return 0;
-+ }
-+ /* end of item is reached */
-+ uf_coord->valid = 0;
-+ return 1;
-+}
-+
-+/**
-+ * overwrite_extent -
-+ * @inode:
-+ *
-+ * Returns number of handled jnodes.
-+ */
-+static int overwrite_extent(uf_coord_t *uf_coord, const reiser4_key *key,
-+ jnode **jnodes, int count, int *plugged_hole)
-+{
-+ int result;
-+ reiser4_key k;
-+ int i;
-+ jnode *node;
-+
-+ k = *key;
-+ for (i = 0; i < count; i ++) {
-+ node = jnodes[i];
-+ if (*jnode_get_block(node) == 0) {
-+ result = overwrite_one_block(uf_coord, &k, node, plugged_hole);
-+ if (result)
-+ return result;
-+ }
-+ /*
-+ * make sure that we hold long term locked twig node containing
-+ * all jnodes we are about to capture
-+ */
-+ check_jnodes(uf_coord->lh->node, &k, 1);
-+ /*
-+ * assign fake block numbers to all jnodes, capture and mark
-+ * them dirty
-+ */
-+ spin_lock_jnode(node);
-+ result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(result != 0);
-+ jnode_make_dirty_locked(node);
-+ spin_unlock_jnode(node);
-+
-+ if (uf_coord->valid == 0)
-+ return i + 1;
-+
-+ check_uf_coord(uf_coord, &k);
-+
-+ if (move_coord(uf_coord)) {
-+ /*
-+ * failed to move to the next node pointer. Either end
-+ * of file or end of twig node is reached. In the later
-+ * case we might go to the right neighbor.
-+ */
-+ uf_coord->valid = 0;
-+ return i + 1;
-+ }
-+ set_key_offset(&k, get_key_offset(&k) + PAGE_CACHE_SIZE);
-+ }
-+
-+ return count;
-+}
-+
-+/**
-+ * reiser4_update_extent
-+ * @file:
-+ * @jnodes:
-+ * @count:
-+ * @off:
-+ *
-+ */
-+int reiser4_update_extent(struct inode *inode, jnode *node, loff_t pos,
-+ int *plugged_hole)
-+{
-+ int result;
-+ znode *loaded;
-+ uf_coord_t uf_coord;
-+ coord_t *coord;
-+ lock_handle lh;
-+ reiser4_key key;
-+
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+
-+ key_by_inode_and_offset_common(inode, pos, &key);
-+
-+ init_uf_coord(&uf_coord, &lh);
-+ coord = &uf_coord.coord;
-+ result = find_file_item_nohint(coord, &lh, &key,
-+ ZNODE_WRITE_LOCK, inode);
-+ if (IS_CBKERR(result)) {
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return result;
-+ }
-+
-+ result = zload(coord->node);
-+ BUG_ON(result != 0);
-+ loaded = coord->node;
-+
-+ if (coord->between == AFTER_UNIT) {
-+ /*
-+ * append existing extent item with unallocated extent of width
-+ * nr_jnodes
-+ */
-+ init_coord_extension_extent(&uf_coord,
-+ get_key_offset(&key));
-+ result = append_last_extent(&uf_coord, &key,
-+ &node, 1);
-+ } else if (coord->between == AT_UNIT) {
-+ /*
-+ * overwrite
-+ * not optimal yet. Will be optimized if new write will show
-+ * performance win.
-+ */
-+ init_coord_extension_extent(&uf_coord,
-+ get_key_offset(&key));
-+ result = overwrite_extent(&uf_coord, &key,
-+ &node, 1, plugged_hole);
-+ } else {
-+ /*
-+ * there are no items of this file in the tree yet. Create
-+ * first item of the file inserting one unallocated extent of
-+ * width nr_jnodes
-+ */
-+ result = insert_first_extent(&uf_coord, &key, &node, 1, inode);
-+ }
-+ assert("", result == 1 || result < 0);
-+ zrelse(loaded);
-+ done_lh(&lh);
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return (result == 1) ? 0 : result;
-+}
-+
-+/**
-+ * update_extents
-+ * @file:
-+ * @jnodes:
-+ * @count:
-+ * @off:
-+ *
-+ */
-+static int update_extents(struct file *file, struct inode *inode,
-+ jnode **jnodes, int count, loff_t pos)
-+{
-+ struct hint hint;
-+ reiser4_key key;
-+ int result;
-+ znode *loaded;
-+
-+ result = load_file_hint(file, &hint);
-+ BUG_ON(result != 0);
-+
-+ if (count != 0)
-+ /*
-+ * count == 0 is special case: expanding truncate
-+ */
-+ pos = (loff_t)index_jnode(jnodes[0]) << PAGE_CACHE_SHIFT;
-+ key_by_inode_and_offset_common(inode, pos, &key);
-+
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+
-+ do {
-+ result = find_file_item(&hint, &key, ZNODE_WRITE_LOCK, inode);
-+ if (IS_CBKERR(result)) {
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return result;
-+ }
-+
-+ result = zload(hint.ext_coord.coord.node);
-+ BUG_ON(result != 0);
-+ loaded = hint.ext_coord.coord.node;
-+
-+ if (hint.ext_coord.coord.between == AFTER_UNIT) {
-+ /*
-+ * append existing extent item with unallocated extent
-+ * of width nr_jnodes
-+ */
-+ if (hint.ext_coord.valid == 0)
-+ /* NOTE: get statistics on this */
-+ init_coord_extension_extent(&hint.ext_coord,
-+ get_key_offset(&key));
-+ result = append_last_extent(&hint.ext_coord, &key,
-+ jnodes, count);
-+ } else if (hint.ext_coord.coord.between == AT_UNIT) {
-+ /*
-+ * overwrite
-+ * not optimal yet. Will be optimized if new write will
-+ * show performance win.
-+ */
-+ if (hint.ext_coord.valid == 0)
-+ /* NOTE: get statistics on this */
-+ init_coord_extension_extent(&hint.ext_coord,
-+ get_key_offset(&key));
-+ result = overwrite_extent(&hint.ext_coord, &key,
-+ jnodes, count, NULL);
-+ } else {
-+ /*
-+ * there are no items of this file in the tree
-+ * yet. Create first item of the file inserting one
-+ * unallocated extent of * width nr_jnodes
-+ */
-+ result = insert_first_extent(&hint.ext_coord, &key,
-+ jnodes, count, inode);
-+ }
-+ zrelse(loaded);
-+ if (result < 0) {
-+ done_lh(hint.ext_coord.lh);
-+ break;
-+ }
-+
-+ jnodes += result;
-+ count -= result;
-+ set_key_offset(&key, get_key_offset(&key) + result * PAGE_CACHE_SIZE);
-+
-+ /* seal and unlock znode */
-+ if (hint.ext_coord.valid)
-+ reiser4_set_hint(&hint, &key, ZNODE_WRITE_LOCK);
-+ else
-+ reiser4_unset_hint(&hint);
-+
-+ } while (count > 0);
-+
-+ save_file_hint(file, &hint);
-+ assert("", reiser4_lock_counters()->d_refs == 0);
-+ return result;
-+}
-+
-+/**
-+ * write_extent_reserve_space - reserve space for extent write operation
-+ * @inode:
-+ *
-+ * Estimates and reserves space which may be required for writing
-+ * WRITE_GRANULARITY pages of file.
-+ */
-+static int write_extent_reserve_space(struct inode *inode)
-+{
-+ __u64 count;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * to write WRITE_GRANULARITY pages to a file by extents we have to
-+ * reserve disk space for:
-+
-+ * 1. find_file_item may have to insert empty node to the tree (empty
-+ * leaf node between two extent items). This requires 1 block and
-+ * number of blocks which are necessary to perform insertion of an
-+ * internal item into twig level.
-+
-+ * 2. for each of written pages there might be needed 1 block and
-+ * number of blocks which might be necessary to perform insertion of or
-+ * paste to an extent item.
-+
-+ * 3. stat data update
-+ */
-+ tree = reiser4_tree_by_inode(inode);
-+ count = estimate_one_insert_item(tree) +
-+ WRITE_GRANULARITY * (1 + estimate_one_insert_into_item(tree)) +
-+ estimate_one_insert_item(tree);
-+ grab_space_enable();
-+ return reiser4_grab_space(count, 0 /* flags */);
-+}
-+
-+/*
-+ * filemap_copy_from_user no longer exists in generic code, because it
-+ * is deadlocky (copying from user while holding the page lock is bad).
-+ * As a temporary fix for reiser4, just define it here.
-+ */
-+static inline size_t
-+filemap_copy_from_user(struct page *page, unsigned long offset,
-+ const char __user *buf, unsigned bytes)
-+{
-+ char *kaddr;
-+ int left;
-+
-+ kaddr = kmap_atomic(page, KM_USER0);
-+ left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes);
-+ kunmap_atomic(kaddr, KM_USER0);
-+
-+ if (left != 0) {
-+ /* Do it the slow way */
-+ kaddr = kmap(page);
-+ left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
-+ kunmap(page);
-+ }
-+ return bytes - left;
-+}
-+
-+/**
-+ * reiser4_write_extent - write method of extent item plugin
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @count: number of bytes to write
-+ * @pos: position in file to write to
-+ *
-+ */
-+ssize_t reiser4_write_extent(struct file *file, struct inode * inode,
-+ const char __user *buf, size_t count, loff_t *pos)
-+{
-+ int have_to_update_extent;
-+ int nr_pages, nr_dirty;
-+ struct page *page;
-+ jnode *jnodes[WRITE_GRANULARITY + 1];
-+ unsigned long index;
-+ unsigned long end;
-+ int i;
-+ int to_page, page_off;
-+ size_t left, written;
-+ int result = 0;
-+
-+ if (write_extent_reserve_space(inode))
-+ return RETERR(-ENOSPC);
-+
-+ if (count == 0) {
-+ /* truncate case */
-+ update_extents(file, inode, jnodes, 0, *pos);
-+ return 0;
-+ }
-+
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+
-+ left = count;
-+ index = *pos >> PAGE_CACHE_SHIFT;
-+ /* calculate number of pages which are to be written */
-+ end = ((*pos + count - 1) >> PAGE_CACHE_SHIFT);
-+ nr_pages = end - index + 1;
-+ nr_dirty = 0;
-+ assert("", nr_pages <= WRITE_GRANULARITY + 1);
-+
-+ /* get pages and jnodes */
-+ for (i = 0; i < nr_pages; i ++) {
-+ page = find_or_create_page(inode->i_mapping, index + i,
-+ reiser4_ctx_gfp_mask_get());
-+ if (page == NULL) {
-+ nr_pages = i;
-+ result = RETERR(-ENOMEM);
-+ goto out;
-+ }
-+
-+ jnodes[i] = jnode_of_page(page);
-+ if (IS_ERR(jnodes[i])) {
-+ unlock_page(page);
-+ page_cache_release(page);
-+ nr_pages = i;
-+ result = RETERR(-ENOMEM);
-+ goto out;
-+ }
-+ /* prevent jnode and page from disconnecting */
-+ JF_SET(jnodes[i], JNODE_WRITE_PREPARED);
-+ unlock_page(page);
-+ }
-+
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+
-+ have_to_update_extent = 0;
-+
-+ page_off = (*pos & (PAGE_CACHE_SIZE - 1));
-+ for (i = 0; i < nr_pages; i ++) {
-+ to_page = PAGE_CACHE_SIZE - page_off;
-+ if (to_page > left)
-+ to_page = left;
-+ page = jnode_page(jnodes[i]);
-+ if (page_offset(page) < inode->i_size &&
-+ !PageUptodate(page) && to_page != PAGE_CACHE_SIZE) {
-+ /*
-+ * the above is not optimal for partial write to last
-+ * page of file when file size is not at boundary of
-+ * page
-+ */
-+ lock_page(page);
-+ if (!PageUptodate(page)) {
-+ result = readpage_unix_file(NULL, page);
-+ BUG_ON(result != 0);
-+ /* wait for read completion */
-+ lock_page(page);
-+ BUG_ON(!PageUptodate(page));
-+ } else
-+ result = 0;
-+ unlock_page(page);
-+ }
-+
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+ fault_in_pages_readable(buf, to_page);
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+
-+ lock_page(page);
-+ if (!PageUptodate(page) && to_page != PAGE_CACHE_SIZE)
-+ simple_prepare_write(file, page, page_off,
-+ page_off + to_page);
-+
-+ written = filemap_copy_from_user(page, page_off, buf, to_page);
-+ if (unlikely(written != to_page)) {
-+ unlock_page(page);
-+ result = RETERR(-EFAULT);
-+ break;
-+ }
-+
-+ flush_dcache_page(page);
-+ reiser4_set_page_dirty_internal(page);
-+ unlock_page(page);
-+ nr_dirty++;
-+
-+ mark_page_accessed(page);
-+ SetPageUptodate(page);
-+
-+ if (jnodes[i]->blocknr == 0)
-+ have_to_update_extent ++;
-+
-+ page_off = 0;
-+ buf += to_page;
-+ left -= to_page;
-+ BUG_ON(get_current_context()->trans->atom != NULL);
-+ }
-+
-+ if (have_to_update_extent) {
-+ update_extents(file, inode, jnodes, nr_dirty, *pos);
-+ } else {
-+ for (i = 0; i < nr_dirty; i ++) {
-+ int ret;
-+ spin_lock_jnode(jnodes[i]);
-+ ret = reiser4_try_capture(jnodes[i],
-+ ZNODE_WRITE_LOCK, 0);
-+ BUG_ON(ret != 0);
-+ jnode_make_dirty_locked(jnodes[i]);
-+ spin_unlock_jnode(jnodes[i]);
-+ }
-+ }
-+out:
-+ for (i = 0; i < nr_pages; i ++) {
-+ page_cache_release(jnode_page(jnodes[i]));
-+ JF_CLR(jnodes[i], JNODE_WRITE_PREPARED);
-+ jput(jnodes[i]);
-+ }
-+
-+ /* the only errors handled so far is ENOMEM and
-+ EFAULT on copy_from_user */
-+
-+ return (count - left) ? (count - left) : result;
-+}
-+
-+int reiser4_do_readpage_extent(reiser4_extent * ext, reiser4_block_nr pos,
-+ struct page *page)
-+{
-+ jnode *j;
-+ struct address_space *mapping;
-+ unsigned long index;
-+ oid_t oid;
-+ reiser4_block_nr block;
-+
-+ mapping = page->mapping;
-+ oid = get_inode_oid(mapping->host);
-+ index = page->index;
-+
-+ switch (state_of_extent(ext)) {
-+ case HOLE_EXTENT:
-+ /*
-+ * it is possible to have hole page with jnode, if page was
-+ * eflushed previously.
-+ */
-+ j = jfind(mapping, index);
-+ if (j == NULL) {
-+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ return 0;
-+ }
-+ spin_lock_jnode(j);
-+ if (!jnode_page(j)) {
-+ jnode_attach_page(j, page);
-+ } else {
-+ BUG_ON(jnode_page(j) != page);
-+ assert("vs-1504", jnode_page(j) == page);
-+ }
-+ block = *jnode_get_io_block(j);
-+ spin_unlock_jnode(j);
-+ if (block == 0) {
-+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ jput(j);
-+ return 0;
-+ }
-+ break;
-+
-+ case ALLOCATED_EXTENT:
-+ j = jnode_of_page(page);
-+ if (IS_ERR(j))
-+ return PTR_ERR(j);
-+ if (*jnode_get_block(j) == 0) {
-+ reiser4_block_nr blocknr;
-+
-+ blocknr = extent_get_start(ext) + pos;
-+ jnode_set_block(j, &blocknr);
-+ } else
-+ assert("vs-1403",
-+ j->blocknr == extent_get_start(ext) + pos);
-+ break;
-+
-+ case UNALLOCATED_EXTENT:
-+ j = jfind(mapping, index);
-+ assert("nikita-2688", j);
-+ assert("vs-1426", jnode_page(j) == NULL);
-+
-+ spin_lock_jnode(j);
-+ jnode_attach_page(j, page);
-+ spin_unlock_jnode(j);
-+ break;
-+
-+ default:
-+ warning("vs-957", "wrong extent\n");
-+ return RETERR(-EIO);
-+ }
-+
-+ BUG_ON(j == 0);
-+ reiser4_page_io(page, j, READ, reiser4_ctx_gfp_mask_get());
-+ jput(j);
-+ return 0;
-+}
-+
-+/* Implements plugin->u.item.s.file.read operation for extent items. */
-+int reiser4_read_extent(struct file *file, flow_t *flow, hint_t *hint)
-+{
-+ int result;
-+ struct page *page;
-+ unsigned long cur_page, next_page;
-+ unsigned long page_off, count;
-+ struct address_space *mapping;
-+ loff_t file_off;
-+ uf_coord_t *uf_coord;
-+ coord_t *coord;
-+ struct extent_coord_extension *ext_coord;
-+ unsigned long nr_pages;
-+ char *kaddr;
-+
-+ assert("vs-1353", current_blocksize == PAGE_CACHE_SIZE);
-+ assert("vs-572", flow->user == 1);
-+ assert("vs-1351", flow->length > 0);
-+
-+ uf_coord = &hint->ext_coord;
-+
-+ check_uf_coord(uf_coord, NULL);
-+ assert("vs-33", uf_coord->lh == &hint->lh);
-+
-+ coord = &uf_coord->coord;
-+ assert("vs-1119", znode_is_rlocked(coord->node));
-+ assert("vs-1120", znode_is_loaded(coord->node));
-+ assert("vs-1256", coord_matches_key_extent(coord, &flow->key));
-+
-+ mapping = file->f_dentry->d_inode->i_mapping;
-+ ext_coord = &uf_coord->extension.extent;
-+
-+ /* offset in a file to start read from */
-+ file_off = get_key_offset(&flow->key);
-+ /* offset within the page to start read from */
-+ page_off = (unsigned long)(file_off & (PAGE_CACHE_SIZE - 1));
-+ /* bytes which can be read from the page which contains file_off */
-+ count = PAGE_CACHE_SIZE - page_off;
-+
-+ /* index of page containing offset read is to start from */
-+ cur_page = (unsigned long)(file_off >> PAGE_CACHE_SHIFT);
-+ next_page = cur_page;
-+ /* number of pages flow spans over */
-+ nr_pages =
-+ ((file_off + flow->length + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT) - cur_page;
-+
-+ /* we start having twig node read locked. However, we do not want to
-+ keep that lock all the time readahead works. So, set a sel and
-+ release twig node. */
-+ reiser4_set_hint(hint, &flow->key, ZNODE_READ_LOCK);
-+ /* &hint->lh is done-ed */
-+
-+ do {
-+ reiser4_txn_restart_current();
-+ page = read_mapping_page(mapping, cur_page, file);
-+ if (IS_ERR(page))
-+ return PTR_ERR(page);
-+ lock_page(page);
-+ if (!PageUptodate(page)) {
-+ unlock_page(page);
-+ page_cache_release(page);
-+ warning("jmacd-97178", "extent_read: page is not up to date");
-+ return RETERR(-EIO);
-+ }
-+ mark_page_accessed(page);
-+ unlock_page(page);
-+
-+ /* If users can be writing to this page using arbitrary virtual
-+ addresses, take care about potential aliasing before reading
-+ the page on the kernel side.
-+ */
-+ if (mapping_writably_mapped(mapping))
-+ flush_dcache_page(page);
-+
-+ assert("nikita-3034", reiser4_schedulable());
-+
-+ /* number of bytes which are to be read from the page */
-+ if (count > flow->length)
-+ count = flow->length;
-+
-+ result = fault_in_pages_writeable(flow->data, count);
-+ if (result) {
-+ page_cache_release(page);
-+ return RETERR(-EFAULT);
-+ }
-+
-+ kaddr = kmap_atomic(page, KM_USER0);
-+ result = __copy_to_user_inatomic(flow->data,
-+ kaddr + page_off, count);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ if (result != 0) {
-+ kaddr = kmap(page);
-+ result = __copy_to_user(flow->data, kaddr + page_off, count);
-+ kunmap(page);
-+ if (unlikely(result))
-+ return RETERR(-EFAULT);
-+ }
-+
-+ page_cache_release(page);
-+
-+ /* increase key (flow->key), update user area pointer (flow->data) */
-+ move_flow_forward(flow, count);
-+
-+ page_off = 0;
-+ cur_page ++;
-+ count = PAGE_CACHE_SIZE;
-+ nr_pages--;
-+ } while (flow->length);
-+
-+ return 0;
-+}
-+
-+/*
-+ plugin->s.file.readpage
-+ reiser4_read->unix_file_read->page_cache_readahead->reiser4_readpage->unix_file_readpage->extent_readpage
-+ or
-+ filemap_nopage->reiser4_readpage->readpage_unix_file->->readpage_extent
-+
-+ At the beginning: coord->node is read locked, zloaded, page is
-+ locked, coord is set to existing unit inside of extent item (it is not necessary that coord matches to page->index)
-+*/
-+int reiser4_readpage_extent(void *vp, struct page *page)
-+{
-+ uf_coord_t *uf_coord = vp;
-+ ON_DEBUG(coord_t * coord = &uf_coord->coord);
-+ ON_DEBUG(reiser4_key key);
-+
-+ assert("vs-1040", PageLocked(page));
-+ assert("vs-1050", !PageUptodate(page));
-+ assert("vs-1039", page->mapping && page->mapping->host);
-+
-+ assert("vs-1044", znode_is_loaded(coord->node));
-+ assert("vs-758", item_is_extent(coord));
-+ assert("vs-1046", coord_is_existing_unit(coord));
-+ assert("vs-1045", znode_is_rlocked(coord->node));
-+ assert("vs-1047",
-+ page->mapping->host->i_ino ==
-+ get_key_objectid(item_key_by_coord(coord, &key)));
-+ check_uf_coord(uf_coord, NULL);
-+
-+ return reiser4_do_readpage_extent(
-+ ext_by_ext_coord(uf_coord),
-+ uf_coord->extension.extent.pos_in_unit, page);
-+}
-+
-+/**
-+ * get_block_address_extent
-+ * @coord:
-+ * @block:
-+ * @result:
-+ *
-+ *
-+ */
-+int get_block_address_extent(const coord_t *coord, sector_t block,
-+ sector_t *result)
-+{
-+ reiser4_extent *ext;
-+
-+ if (!coord_is_existing_unit(coord))
-+ return RETERR(-EINVAL);
-+
-+ ext = extent_by_coord(coord);
-+
-+ if (state_of_extent(ext) != ALLOCATED_EXTENT)
-+ /* FIXME: bad things may happen if it is unallocated extent */
-+ *result = 0;
-+ else {
-+ reiser4_key key;
-+
-+ unit_key_by_coord(coord, &key);
-+ assert("vs-1645",
-+ block >= get_key_offset(&key) >> current_blocksize_bits);
-+ assert("vs-1646",
-+ block <
-+ (get_key_offset(&key) >> current_blocksize_bits) +
-+ extent_get_width(ext));
-+ *result =
-+ extent_get_start(ext) + (block -
-+ (get_key_offset(&key) >>
-+ current_blocksize_bits));
-+ }
-+ return 0;
-+}
-+
-+/*
-+ plugin->u.item.s.file.append_key
-+ key of first byte which is the next to last byte by addressed by this extent
-+*/
-+reiser4_key *append_key_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key,
-+ get_key_offset(key) + reiser4_extent_size(coord,
-+ nr_units_extent
-+ (coord)));
-+
-+ assert("vs-610", get_key_offset(key)
-+ && (get_key_offset(key) & (current_blocksize - 1)) == 0);
-+ return key;
-+}
-+
-+/* plugin->u.item.s.file.init_coord_extension */
-+void init_coord_extension_extent(uf_coord_t * uf_coord, loff_t lookuped)
-+{
-+ coord_t *coord;
-+ struct extent_coord_extension *ext_coord;
-+ reiser4_key key;
-+ loff_t offset;
-+
-+ assert("vs-1295", uf_coord->valid == 0);
-+
-+ coord = &uf_coord->coord;
-+ assert("vs-1288", coord_is_iplug_set(coord));
-+ assert("vs-1327", znode_is_loaded(coord->node));
-+
-+ if (coord->between != AFTER_UNIT && coord->between != AT_UNIT)
-+ return;
-+
-+ ext_coord = &uf_coord->extension.extent;
-+ ext_coord->nr_units = nr_units_extent(coord);
-+ ext_coord->ext_offset =
-+ (char *)extent_by_coord(coord) - zdata(coord->node);
-+ ext_coord->width = extent_get_width(extent_by_coord(coord));
-+ ON_DEBUG(ext_coord->extent = *extent_by_coord(coord));
-+ uf_coord->valid = 1;
-+
-+ /* pos_in_unit is the only uninitialized field in extended coord */
-+ if (coord->between == AFTER_UNIT) {
-+ assert("vs-1330",
-+ coord->unit_pos == nr_units_extent(coord) - 1);
-+
-+ ext_coord->pos_in_unit = ext_coord->width - 1;
-+ } else {
-+ /* AT_UNIT */
-+ unit_key_by_coord(coord, &key);
-+ offset = get_key_offset(&key);
-+
-+ assert("vs-1328", offset <= lookuped);
-+ assert("vs-1329",
-+ lookuped <
-+ offset + ext_coord->width * current_blocksize);
-+ ext_coord->pos_in_unit =
-+ ((lookuped - offset) >> current_blocksize_bits);
-+ }
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/extent_flush_ops.c linux-2.6.24/fs/reiser4/plugin/item/extent_flush_ops.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/extent_flush_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/extent_flush_ops.c 2008-01-25 11:39:07.016228297 +0300
-@@ -0,0 +1,1028 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../tree.h"
-+#include "../../jnode.h"
-+#include "../../super.h"
-+#include "../../flush.h"
-+#include "../../carry.h"
-+#include "../object.h"
-+
-+#include <linux/pagemap.h>
-+
-+static reiser4_block_nr extent_unit_start(const coord_t * item);
-+
-+/* Return either first or last extent (depending on @side) of the item
-+ @coord is set to. Set @pos_in_unit either to first or to last block
-+ of extent. */
-+static reiser4_extent *extent_utmost_ext(const coord_t * coord, sideof side,
-+ reiser4_block_nr * pos_in_unit)
-+{
-+ reiser4_extent *ext;
-+
-+ if (side == LEFT_SIDE) {
-+ /* get first extent of item */
-+ ext = extent_item(coord);
-+ *pos_in_unit = 0;
-+ } else {
-+ /* get last extent of item and last position within it */
-+ assert("vs-363", side == RIGHT_SIDE);
-+ ext = extent_item(coord) + coord_last_unit_pos(coord);
-+ *pos_in_unit = extent_get_width(ext) - 1;
-+ }
-+
-+ return ext;
-+}
-+
-+/* item_plugin->f.utmost_child */
-+/* Return the child. Coord is set to extent item. Find jnode corresponding
-+ either to first or to last unformatted node pointed by the item */
-+int utmost_child_extent(const coord_t * coord, sideof side, jnode ** childp)
-+{
-+ reiser4_extent *ext;
-+ reiser4_block_nr pos_in_unit;
-+
-+ ext = extent_utmost_ext(coord, side, &pos_in_unit);
-+
-+ switch (state_of_extent(ext)) {
-+ case HOLE_EXTENT:
-+ *childp = NULL;
-+ return 0;
-+ case ALLOCATED_EXTENT:
-+ case UNALLOCATED_EXTENT:
-+ break;
-+ default:
-+ /* this should never happen */
-+ assert("vs-1417", 0);
-+ }
-+
-+ {
-+ reiser4_key key;
-+ reiser4_tree *tree;
-+ unsigned long index;
-+
-+ if (side == LEFT_SIDE) {
-+ /* get key of first byte addressed by the extent */
-+ item_key_by_coord(coord, &key);
-+ } else {
-+ /* get key of byte which next after last byte addressed by the extent */
-+ append_key_extent(coord, &key);
-+ }
-+
-+ assert("vs-544",
-+ (get_key_offset(&key) >> PAGE_CACHE_SHIFT) < ~0ul);
-+ /* index of first or last (depending on @side) page addressed
-+ by the extent */
-+ index =
-+ (unsigned long)(get_key_offset(&key) >> PAGE_CACHE_SHIFT);
-+ if (side == RIGHT_SIDE)
-+ index--;
-+
-+ tree = coord->node->zjnode.tree;
-+ *childp = jlookup(tree, get_key_objectid(&key), index);
-+ }
-+
-+ return 0;
-+}
-+
-+/* item_plugin->f.utmost_child_real_block */
-+/* Return the child's block, if allocated. */
-+int
-+utmost_child_real_block_extent(const coord_t * coord, sideof side,
-+ reiser4_block_nr * block)
-+{
-+ reiser4_extent *ext;
-+
-+ ext = extent_by_coord(coord);
-+
-+ switch (state_of_extent(ext)) {
-+ case ALLOCATED_EXTENT:
-+ *block = extent_get_start(ext);
-+ if (side == RIGHT_SIDE)
-+ *block += extent_get_width(ext) - 1;
-+ break;
-+ case HOLE_EXTENT:
-+ case UNALLOCATED_EXTENT:
-+ *block = 0;
-+ break;
-+ default:
-+ /* this should never happen */
-+ assert("vs-1418", 0);
-+ }
-+
-+ return 0;
-+}
-+
-+/* item_plugin->f.scan */
-+/* Performs leftward scanning starting from an unformatted node and its parent coordinate.
-+ This scan continues, advancing the parent coordinate, until either it encounters a
-+ formatted child or it finishes scanning this node.
-+
-+ If unallocated, the entire extent must be dirty and in the same atom. (Actually, I'm
-+ not sure this is last property (same atom) is enforced, but it should be the case since
-+ one atom must write the parent and the others must read the parent, thus fusing?). In
-+ any case, the code below asserts this case for unallocated extents. Unallocated
-+ extents are thus optimized because we can skip to the endpoint when scanning.
-+
-+ It returns control to reiser4_scan_extent, handles these terminating conditions,
-+ e.g., by loading the next twig.
-+*/
-+int reiser4_scan_extent(flush_scan * scan)
-+{
-+ coord_t coord;
-+ jnode *neighbor;
-+ unsigned long scan_index, unit_index, unit_width, scan_max, scan_dist;
-+ reiser4_block_nr unit_start;
-+ __u64 oid;
-+ reiser4_key key;
-+ int ret = 0, allocated, incr;
-+ reiser4_tree *tree;
-+
-+ if (!JF_ISSET(scan->node, JNODE_DIRTY)) {
-+ scan->stop = 1;
-+ return 0; /* Race with truncate, this node is already
-+ * truncated. */
-+ }
-+
-+ coord_dup(&coord, &scan->parent_coord);
-+
-+ assert("jmacd-1404", !reiser4_scan_finished(scan));
-+ assert("jmacd-1405", jnode_get_level(scan->node) == LEAF_LEVEL);
-+ assert("jmacd-1406", jnode_is_unformatted(scan->node));
-+
-+ /* The scan_index variable corresponds to the current page index of the
-+ unformatted block scan position. */
-+ scan_index = index_jnode(scan->node);
-+
-+ assert("jmacd-7889", item_is_extent(&coord));
-+
-+ repeat:
-+ /* objectid of file */
-+ oid = get_key_objectid(item_key_by_coord(&coord, &key));
-+
-+ allocated = !extent_is_unallocated(&coord);
-+ /* Get the values of this extent unit: */
-+ unit_index = extent_unit_index(&coord);
-+ unit_width = extent_unit_width(&coord);
-+ unit_start = extent_unit_start(&coord);
-+
-+ assert("jmacd-7187", unit_width > 0);
-+ assert("jmacd-7188", scan_index >= unit_index);
-+ assert("jmacd-7189", scan_index <= unit_index + unit_width - 1);
-+
-+ /* Depending on the scan direction, we set different maximum values for scan_index
-+ (scan_max) and the number of nodes that would be passed if the scan goes the
-+ entire way (scan_dist). Incr is an integer reflecting the incremental
-+ direction of scan_index. */
-+ if (reiser4_scanning_left(scan)) {
-+ scan_max = unit_index;
-+ scan_dist = scan_index - unit_index;
-+ incr = -1;
-+ } else {
-+ scan_max = unit_index + unit_width - 1;
-+ scan_dist = scan_max - unit_index;
-+ incr = +1;
-+ }
-+
-+ tree = coord.node->zjnode.tree;
-+
-+ /* If the extent is allocated we have to check each of its blocks. If the extent
-+ is unallocated we can skip to the scan_max. */
-+ if (allocated) {
-+ do {
-+ neighbor = jlookup(tree, oid, scan_index);
-+ if (neighbor == NULL)
-+ goto stop_same_parent;
-+
-+ if (scan->node != neighbor
-+ && !reiser4_scan_goto(scan, neighbor)) {
-+ /* @neighbor was jput() by reiser4_scan_goto */
-+ goto stop_same_parent;
-+ }
-+
-+ ret = scan_set_current(scan, neighbor, 1, &coord);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+
-+ /* reference to @neighbor is stored in @scan, no need
-+ to jput(). */
-+ scan_index += incr;
-+
-+ } while (incr + scan_max != scan_index);
-+
-+ } else {
-+ /* Optimized case for unallocated extents, skip to the end. */
-+ neighbor = jlookup(tree, oid, scan_max /*index */ );
-+ if (neighbor == NULL) {
-+ /* Race with truncate */
-+ scan->stop = 1;
-+ ret = 0;
-+ goto exit;
-+ }
-+
-+ assert("zam-1043",
-+ reiser4_blocknr_is_fake(jnode_get_block(neighbor)));
-+
-+ ret = scan_set_current(scan, neighbor, scan_dist, &coord);
-+ if (ret != 0) {
-+ goto exit;
-+ }
-+ }
-+
-+ if (coord_sideof_unit(&coord, scan->direction) == 0
-+ && item_is_extent(&coord)) {
-+ /* Continue as long as there are more extent units. */
-+
-+ scan_index =
-+ extent_unit_index(&coord) +
-+ (reiser4_scanning_left(scan) ?
-+ extent_unit_width(&coord) - 1 : 0);
-+ goto repeat;
-+ }
-+
-+ if (0) {
-+ stop_same_parent:
-+
-+ /* If we are scanning left and we stop in the middle of an allocated
-+ extent, we know the preceder immediately.. */
-+ /* middle of extent is (scan_index - unit_index) != 0. */
-+ if (reiser4_scanning_left(scan) &&
-+ (scan_index - unit_index) != 0) {
-+ /* FIXME(B): Someone should step-through and verify that this preceder
-+ calculation is indeed correct. */
-+ /* @unit_start is starting block (number) of extent
-+ unit. Flush stopped at the @scan_index block from
-+ the beginning of the file, which is (scan_index -
-+ unit_index) block within extent.
-+ */
-+ if (unit_start) {
-+ /* skip preceder update when we are at hole */
-+ scan->preceder_blk =
-+ unit_start + scan_index - unit_index;
-+ check_preceder(scan->preceder_blk);
-+ }
-+ }
-+
-+ /* In this case, we leave coord set to the parent of scan->node. */
-+ scan->stop = 1;
-+
-+ } else {
-+ /* In this case, we are still scanning, coord is set to the next item which is
-+ either off-the-end of the node or not an extent. */
-+ assert("jmacd-8912", scan->stop == 0);
-+ assert("jmacd-7812",
-+ (coord_is_after_sideof_unit(&coord, scan->direction)
-+ || !item_is_extent(&coord)));
-+ }
-+
-+ ret = 0;
-+ exit:
-+ return ret;
-+}
-+
-+/* ask block allocator for some blocks */
-+static void extent_allocate_blocks(reiser4_blocknr_hint *preceder,
-+ reiser4_block_nr wanted_count,
-+ reiser4_block_nr *first_allocated,
-+ reiser4_block_nr *allocated,
-+ block_stage_t block_stage)
-+{
-+ *allocated = wanted_count;
-+ preceder->max_dist = 0; /* scan whole disk, if needed */
-+
-+ /* that number of blocks (wanted_count) is either in UNALLOCATED or in GRABBED */
-+ preceder->block_stage = block_stage;
-+
-+ /* FIXME: we do not handle errors here now */
-+ check_me("vs-420",
-+ reiser4_alloc_blocks(preceder, first_allocated, allocated,
-+ BA_PERMANENT) == 0);
-+ /* update flush_pos's preceder to last allocated block number */
-+ preceder->blk = *first_allocated + *allocated - 1;
-+}
-+
-+/* when on flush time unallocated extent is to be replaced with allocated one it may happen that one unallocated extent
-+ will have to be replaced with set of allocated extents. In this case insert_into_item will be called which may have
-+ to add new nodes into tree. Space for that is taken from inviolable reserve (5%). */
-+static reiser4_block_nr reserve_replace(void)
-+{
-+ reiser4_block_nr grabbed, needed;
-+
-+ grabbed = get_current_context()->grabbed_blocks;
-+ needed = estimate_one_insert_into_item(current_tree);
-+ check_me("vpf-340", !reiser4_grab_space_force(needed, BA_RESERVED));
-+ return grabbed;
-+}
-+
-+static void free_replace_reserved(reiser4_block_nr grabbed)
-+{
-+ reiser4_context *ctx;
-+
-+ ctx = get_current_context();
-+ grabbed2free(ctx, get_super_private(ctx->super),
-+ ctx->grabbed_blocks - grabbed);
-+}
-+
-+/* Block offset of first block addressed by unit */
-+__u64 extent_unit_index(const coord_t * item)
-+{
-+ reiser4_key key;
-+
-+ assert("vs-648", coord_is_existing_unit(item));
-+ unit_key_by_coord(item, &key);
-+ return get_key_offset(&key) >> current_blocksize_bits;
-+}
-+
-+/* AUDIT shouldn't return value be of reiser4_block_nr type?
-+ Josh's answer: who knows? Is a "number of blocks" the same type as "block offset"? */
-+__u64 extent_unit_width(const coord_t * item)
-+{
-+ assert("vs-649", coord_is_existing_unit(item));
-+ return width_by_coord(item);
-+}
-+
-+/* Starting block location of this unit */
-+static reiser4_block_nr extent_unit_start(const coord_t * item)
-+{
-+ return extent_get_start(extent_by_coord(item));
-+}
-+
-+/**
-+ * split_allocated_extent -
-+ * @coord:
-+ * @pos_in_unit:
-+ *
-+ * replace allocated extent with two allocated extents
-+ */
-+static int split_allocated_extent(coord_t *coord, reiser4_block_nr pos_in_unit)
-+{
-+ int result;
-+ struct replace_handle *h;
-+ reiser4_extent *ext;
-+ reiser4_block_nr grabbed;
-+
-+ ext = extent_by_coord(coord);
-+ assert("vs-1410", state_of_extent(ext) == ALLOCATED_EXTENT);
-+ assert("vs-1411", extent_get_width(ext) > pos_in_unit);
-+
-+ h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
-+ if (h == NULL)
-+ return RETERR(-ENOMEM);
-+ h->coord = coord;
-+ h->lh = znode_lh(coord->node);
-+ h->pkey = &h->key;
-+ unit_key_by_coord(coord, h->pkey);
-+ set_key_offset(h->pkey,
-+ (get_key_offset(h->pkey) +
-+ pos_in_unit * current_blocksize));
-+ reiser4_set_extent(&h->overwrite, extent_get_start(ext),
-+ pos_in_unit);
-+ reiser4_set_extent(&h->new_extents[0],
-+ extent_get_start(ext) + pos_in_unit,
-+ extent_get_width(ext) - pos_in_unit);
-+ h->nr_new_extents = 1;
-+ h->flags = COPI_DONT_SHIFT_LEFT;
-+ h->paste_key = h->key;
-+
-+ /* reserve space for extent unit paste, @grabbed is reserved before */
-+ grabbed = reserve_replace();
-+ result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
-+ extent */);
-+ /* restore reserved */
-+ free_replace_reserved(grabbed);
-+ kfree(h);
-+ return result;
-+}
-+
-+/* replace extent @ext by extent @replace. Try to merge @replace with previous extent of the item (if there is
-+ one). Return 1 if it succeeded, 0 - otherwise */
-+static int try_to_merge_with_left(coord_t *coord, reiser4_extent *ext,
-+ reiser4_extent *replace)
-+{
-+ assert("vs-1415", extent_by_coord(coord) == ext);
-+
-+ if (coord->unit_pos == 0
-+ || state_of_extent(ext - 1) != ALLOCATED_EXTENT)
-+ /* @ext either does not exist or is not allocated extent */
-+ return 0;
-+ if (extent_get_start(ext - 1) + extent_get_width(ext - 1) !=
-+ extent_get_start(replace))
-+ return 0;
-+
-+ /* we can glue, widen previous unit */
-+ extent_set_width(ext - 1,
-+ extent_get_width(ext - 1) + extent_get_width(replace));
-+
-+ if (extent_get_width(ext) != extent_get_width(replace)) {
-+ /* make current extent narrower */
-+ if (state_of_extent(ext) == ALLOCATED_EXTENT)
-+ extent_set_start(ext,
-+ extent_get_start(ext) +
-+ extent_get_width(replace));
-+ extent_set_width(ext,
-+ extent_get_width(ext) -
-+ extent_get_width(replace));
-+ } else {
-+ /* current extent completely glued with its left neighbor, remove it */
-+ coord_t from, to;
-+
-+ coord_dup(&from, coord);
-+ from.unit_pos = nr_units_extent(coord) - 1;
-+ coord_dup(&to, &from);
-+
-+ /* currently cut from extent can cut either from the beginning or from the end. Move place which got
-+ freed after unit removal to end of item */
-+ memmove(ext, ext + 1,
-+ (from.unit_pos -
-+ coord->unit_pos) * sizeof(reiser4_extent));
-+ /* wipe part of item which is going to be cut, so that node_check will not be confused */
-+ cut_node_content(&from, &to, NULL, NULL, NULL);
-+ }
-+ znode_make_dirty(coord->node);
-+ /* move coord back */
-+ coord->unit_pos--;
-+ return 1;
-+}
-+
-+/**
-+ * conv_extent - replace extent with 2 ones
-+ * @coord: coordinate of extent to be replaced
-+ * @replace: extent to overwrite the one @coord is set to
-+ *
-+ * Overwrites extent @coord is set to and paste one extent unit after
-+ * overwritten one if @replace is shorter than initial extent
-+ */
-+static int conv_extent(coord_t *coord, reiser4_extent *replace)
-+{
-+ int result;
-+ struct replace_handle *h;
-+ reiser4_extent *ext;
-+ reiser4_block_nr start, width, new_width;
-+ reiser4_block_nr grabbed;
-+ extent_state state;
-+
-+ ext = extent_by_coord(coord);
-+ state = state_of_extent(ext);
-+ start = extent_get_start(ext);
-+ width = extent_get_width(ext);
-+ new_width = extent_get_width(replace);
-+
-+ assert("vs-1458", (state == UNALLOCATED_EXTENT ||
-+ state == ALLOCATED_EXTENT));
-+ assert("vs-1459", width >= new_width);
-+
-+ if (try_to_merge_with_left(coord, ext, replace)) {
-+ /* merged @replace with left neighbor. Current unit is either
-+ removed or narrowed */
-+ return 0;
-+ }
-+
-+ if (width == new_width) {
-+ /* replace current extent with @replace */
-+ *ext = *replace;
-+ znode_make_dirty(coord->node);
-+ return 0;
-+ }
-+
-+ h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
-+ if (h == NULL)
-+ return RETERR(-ENOMEM);
-+ h->coord = coord;
-+ h->lh = znode_lh(coord->node);
-+ h->pkey = &h->key;
-+ unit_key_by_coord(coord, h->pkey);
-+ set_key_offset(h->pkey,
-+ (get_key_offset(h->pkey) + new_width * current_blocksize));
-+ h->overwrite = *replace;
-+
-+ /* replace @ext with @replace and padding extent */
-+ reiser4_set_extent(&h->new_extents[0],
-+ (state == ALLOCATED_EXTENT) ?
-+ (start + new_width) :
-+ UNALLOCATED_EXTENT_START,
-+ width - new_width);
-+ h->nr_new_extents = 1;
-+ h->flags = COPI_DONT_SHIFT_LEFT;
-+ h->paste_key = h->key;
-+
-+ /* reserve space for extent unit paste, @grabbed is reserved before */
-+ grabbed = reserve_replace();
-+ result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
-+ extent */);
-+
-+ /* restore reserved */
-+ free_replace_reserved(grabbed);
-+ kfree(h);
-+ return result;
-+}
-+
-+/**
-+ * assign_real_blocknrs
-+ * @flush_pos:
-+ * @oid: objectid of file jnodes to assign block number to belongs to
-+ * @index: first jnode on the range
-+ * @count: number of jnodes to assign block numbers to
-+ * @first: start of allocated block range
-+ *
-+ * Assigns block numbers to each of @count jnodes. Index of first jnode is
-+ * @index. Jnodes get lookuped with jlookup.
-+ */
-+static void assign_real_blocknrs(flush_pos_t *flush_pos, oid_t oid,
-+ unsigned long index, reiser4_block_nr count,
-+ reiser4_block_nr first)
-+{
-+ unsigned long i;
-+ reiser4_tree *tree;
-+ txn_atom *atom;
-+ int nr;
-+
-+ atom = atom_locked_by_fq(flush_pos->fq);
-+ assert("vs-1468", atom);
-+ BUG_ON(atom == NULL);
-+
-+ nr = 0;
-+ tree = current_tree;
-+ for (i = 0; i < count; ++i, ++index) {
-+ jnode *node;
-+
-+ node = jlookup(tree, oid, index);
-+ assert("", node != NULL);
-+ BUG_ON(node == NULL);
-+
-+ spin_lock_jnode(node);
-+ assert("", !jnode_is_flushprepped(node));
-+ assert("vs-1475", node->atom == atom);
-+ assert("vs-1476", atomic_read(&node->x_count) > 0);
-+
-+ JF_CLR(node, JNODE_FLUSH_RESERVED);
-+ jnode_set_block(node, &first);
-+ unformatted_make_reloc(node, flush_pos->fq);
-+ ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
-+ FQ_LIST, 0));
-+ spin_unlock_jnode(node);
-+ first++;
-+
-+ atomic_dec(&node->x_count);
-+ nr ++;
-+ }
-+
-+ spin_unlock_atom(atom);
-+ return;
-+}
-+
-+/**
-+ * make_node_ovrwr - assign node to overwrite set
-+ * @jnodes: overwrite set list head
-+ * @node: jnode to belong to overwrite set
-+ *
-+ * Sets OVRWR jnode state bit and puts @node to the end of list head @jnodes
-+ * which is an accumulator for nodes before they get to overwrite set list of
-+ * atom.
-+ */
-+static void make_node_ovrwr(struct list_head *jnodes, jnode *node)
-+{
-+ spin_lock_jnode(node);
-+
-+ assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
-+ assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
-+
-+ JF_SET(node, JNODE_OVRWR);
-+ list_move_tail(&node->capture_link, jnodes);
-+ ON_DEBUG(count_jnode(node->atom, node, DIRTY_LIST, OVRWR_LIST, 0));
-+
-+ spin_unlock_jnode(node);
-+}
-+
-+/**
-+ * mark_jnodes_overwrite - put bunch of jnodes to overwrite set
-+ * @flush_pos: flush position
-+ * @oid: objectid of file jnodes belong to
-+ * @index: starting index
-+ * @width: extent width
-+ *
-+ * Puts nodes of one extent (file objectid @oid, extent width @width) to atom's
-+ * overwrite set. Starting from the one with index @index. If end of slum is
-+ * detected (node is not found or flushprepped) - stop iterating and set flush
-+ * position's state to POS_INVALID.
-+ */
-+static void mark_jnodes_overwrite(flush_pos_t *flush_pos, oid_t oid,
-+ unsigned long index, reiser4_block_nr width)
-+{
-+ unsigned long i;
-+ reiser4_tree *tree;
-+ jnode *node;
-+ txn_atom *atom;
-+ LIST_HEAD(jnodes);
-+
-+ tree = current_tree;
-+
-+ atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
-+ assert("vs-1478", atom);
-+
-+ for (i = flush_pos->pos_in_unit; i < width; i++, index++) {
-+ node = jlookup(tree, oid, index);
-+ if (!node) {
-+ flush_pos->state = POS_INVALID;
-+ break;
-+ }
-+ if (jnode_check_flushprepped(node)) {
-+ flush_pos->state = POS_INVALID;
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+ if (node->atom != atom) {
-+ flush_pos->state = POS_INVALID;
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+ make_node_ovrwr(&jnodes, node);
-+ atomic_dec(&node->x_count);
-+ }
-+
-+ list_splice_init(&jnodes, ATOM_OVRWR_LIST(atom)->prev);
-+ spin_unlock_atom(atom);
-+}
-+
-+/**
-+ * allocated_extent_slum_size
-+ * @flush_pos:
-+ * @oid:
-+ * @index:
-+ * @count:
-+ *
-+ *
-+ */
-+static int allocated_extent_slum_size(flush_pos_t *flush_pos, oid_t oid,
-+ unsigned long index, unsigned long count)
-+{
-+ unsigned long i;
-+ reiser4_tree *tree;
-+ txn_atom *atom;
-+ int nr;
-+
-+ atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
-+ assert("vs-1468", atom);
-+
-+ nr = 0;
-+ tree = current_tree;
-+ for (i = 0; i < count; ++i, ++index) {
-+ jnode *node;
-+
-+ node = jlookup(tree, oid, index);
-+ if (!node)
-+ break;
-+
-+ if (jnode_check_flushprepped(node)) {
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+
-+ if (node->atom != atom) {
-+ /*
-+ * this is possible on overwrite: extent_write may
-+ * capture several unformatted nodes without capturing
-+ * any formatted nodes.
-+ */
-+ atomic_dec(&node->x_count);
-+ break;
-+ }
-+
-+ assert("vs-1476", atomic_read(&node->x_count) > 1);
-+ atomic_dec(&node->x_count);
-+ nr ++;
-+ }
-+
-+ spin_unlock_atom(atom);
-+ return nr;
-+}
-+
-+/**
-+ * alloc_extent
-+ * @flush_pos:
-+ *
-+ *
-+ * this is called by handle_pos_on_twig to proceed extent unit flush_pos->coord
-+ * is set to. It is to prepare for flushing sequence of not flushprepped nodes
-+ * (slum). It supposes that slum starts at flush_pos->pos_in_unit position
-+ * within the extent. Slum gets to relocate set if flush_pos->leaf_relocate is
-+ * set to 1 and to overwrite set otherwise
-+ */
-+int reiser4_alloc_extent(flush_pos_t *flush_pos)
-+{
-+ coord_t *coord;
-+ reiser4_extent *ext;
-+ reiser4_extent replace_ext;
-+ oid_t oid;
-+ reiser4_block_nr protected;
-+ reiser4_block_nr start;
-+ __u64 index;
-+ __u64 width;
-+ extent_state state;
-+ int result;
-+ reiser4_block_nr first_allocated;
-+ __u64 allocated;
-+ reiser4_key key;
-+ block_stage_t block_stage;
-+
-+ assert("vs-1468", flush_pos->state == POS_ON_EPOINT);
-+ assert("vs-1469", coord_is_existing_unit(&flush_pos->coord)
-+ && item_is_extent(&flush_pos->coord));
-+
-+ coord = &flush_pos->coord;
-+
-+ ext = extent_by_coord(coord);
-+ state = state_of_extent(ext);
-+ if (state == HOLE_EXTENT) {
-+ flush_pos->state = POS_INVALID;
-+ return 0;
-+ }
-+
-+ item_key_by_coord(coord, &key);
-+ oid = get_key_objectid(&key);
-+ index = extent_unit_index(coord) + flush_pos->pos_in_unit;
-+ start = extent_get_start(ext);
-+ width = extent_get_width(ext);
-+
-+ assert("vs-1457", width > flush_pos->pos_in_unit);
-+
-+ if (flush_pos->leaf_relocate || state == UNALLOCATED_EXTENT) {
-+ /* relocate */
-+ if (flush_pos->pos_in_unit) {
-+ /* split extent unit into two */
-+ result =
-+ split_allocated_extent(coord,
-+ flush_pos->pos_in_unit);
-+ flush_pos->pos_in_unit = 0;
-+ return result;
-+ }
-+
-+ /* limit number of nodes to allocate */
-+ if (flush_pos->nr_to_write < width)
-+ width = flush_pos->nr_to_write;
-+
-+ if (state == ALLOCATED_EXTENT) {
-+ /*
-+ * all protected nodes are not flushprepped, therefore
-+ * they are counted as flush_reserved
-+ */
-+ block_stage = BLOCK_FLUSH_RESERVED;
-+ protected = allocated_extent_slum_size(flush_pos, oid,
-+ index, width);
-+ if (protected == 0) {
-+ flush_pos->state = POS_INVALID;
-+ flush_pos->pos_in_unit = 0;
-+ return 0;
-+ }
-+ } else {
-+ block_stage = BLOCK_UNALLOCATED;
-+ protected = width;
-+ }
-+
-+ /*
-+ * look at previous unit if possible. If it is allocated, make
-+ * preceder more precise
-+ */
-+ if (coord->unit_pos &&
-+ (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
-+ reiser4_pos_hint(flush_pos)->blk =
-+ extent_get_start(ext - 1) +
-+ extent_get_width(ext - 1);
-+
-+ /* allocate new block numbers for protected nodes */
-+ extent_allocate_blocks(reiser4_pos_hint(flush_pos),
-+ protected,
-+ &first_allocated, &allocated,
-+ block_stage);
-+
-+ if (state == ALLOCATED_EXTENT)
-+ /*
-+ * on relocating - free nodes which are going to be
-+ * relocated
-+ */
-+ reiser4_dealloc_blocks(&start, &allocated,
-+ BLOCK_ALLOCATED, BA_DEFER);
-+
-+ /* assign new block numbers to protected nodes */
-+ assign_real_blocknrs(flush_pos, oid, index, allocated, first_allocated);
-+
-+ /* prepare extent which will replace current one */
-+ reiser4_set_extent(&replace_ext, first_allocated, allocated);
-+
-+ /* adjust extent item */
-+ result = conv_extent(coord, &replace_ext);
-+ if (result != 0 && result != -ENOMEM) {
-+ warning("vs-1461",
-+ "Failed to allocate extent. Should not happen\n");
-+ return result;
-+ }
-+
-+ /*
-+ * break flush: we prepared for flushing as many blocks as we
-+ * were asked for
-+ */
-+ if (flush_pos->nr_to_write == allocated)
-+ flush_pos->state = POS_INVALID;
-+ } else {
-+ /* overwrite */
-+ mark_jnodes_overwrite(flush_pos, oid, index, width);
-+ }
-+ flush_pos->pos_in_unit = 0;
-+ return 0;
-+}
-+
-+/* if @key is glueable to the item @coord is set to */
-+static int must_insert(const coord_t *coord, const reiser4_key *key)
-+{
-+ reiser4_key last;
-+
-+ if (item_id_by_coord(coord) == EXTENT_POINTER_ID
-+ && keyeq(append_key_extent(coord, &last), key))
-+ return 0;
-+ return 1;
-+}
-+
-+/* copy extent @copy to the end of @node. It may have to either insert new item after the last one, or append last item,
-+ or modify last unit of last item to have greater width */
-+static int put_unit_to_end(znode *node, const reiser4_key *key,
-+ reiser4_extent *copy_ext)
-+{
-+ int result;
-+ coord_t coord;
-+ cop_insert_flag flags;
-+ reiser4_extent *last_ext;
-+ reiser4_item_data data;
-+
-+ /* set coord after last unit in an item */
-+ coord_init_last_unit(&coord, node);
-+ coord.between = AFTER_UNIT;
-+
-+ flags =
-+ COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT | COPI_DONT_ALLOCATE;
-+ if (must_insert(&coord, key)) {
-+ result =
-+ insert_by_coord(&coord, init_new_extent(&data, copy_ext, 1),
-+ key, NULL /*lh */ , flags);
-+
-+ } else {
-+ /* try to glue with last unit */
-+ last_ext = extent_by_coord(&coord);
-+ if (state_of_extent(last_ext) &&
-+ extent_get_start(last_ext) + extent_get_width(last_ext) ==
-+ extent_get_start(copy_ext)) {
-+ /* widen last unit of node */
-+ extent_set_width(last_ext,
-+ extent_get_width(last_ext) +
-+ extent_get_width(copy_ext));
-+ znode_make_dirty(node);
-+ return 0;
-+ }
-+
-+ /* FIXME: put an assertion here that we can not merge last unit in @node and new unit */
-+ result =
-+ insert_into_item(&coord, NULL /*lh */ , key,
-+ init_new_extent(&data, copy_ext, 1),
-+ flags);
-+ }
-+
-+ assert("vs-438", result == 0 || result == -E_NODE_FULL);
-+ return result;
-+}
-+
-+/* @coord is set to extent unit */
-+squeeze_result squalloc_extent(znode *left, const coord_t *coord,
-+ flush_pos_t *flush_pos,
-+ reiser4_key *stop_key)
-+{
-+ reiser4_extent *ext;
-+ __u64 index;
-+ __u64 width;
-+ reiser4_block_nr start;
-+ extent_state state;
-+ oid_t oid;
-+ reiser4_block_nr first_allocated;
-+ __u64 allocated;
-+ __u64 protected;
-+ reiser4_extent copy_extent;
-+ reiser4_key key;
-+ int result;
-+ block_stage_t block_stage;
-+
-+ assert("vs-1457", flush_pos->pos_in_unit == 0);
-+ assert("vs-1467", coord_is_leftmost_unit(coord));
-+ assert("vs-1467", item_is_extent(coord));
-+
-+ ext = extent_by_coord(coord);
-+ index = extent_unit_index(coord);
-+ start = extent_get_start(ext);
-+ width = extent_get_width(ext);
-+ state = state_of_extent(ext);
-+ unit_key_by_coord(coord, &key);
-+ oid = get_key_objectid(&key);
-+
-+ if ((flush_pos->leaf_relocate && state == ALLOCATED_EXTENT) ||
-+ (state == UNALLOCATED_EXTENT)) {
-+ /* relocate */
-+ if (state == ALLOCATED_EXTENT) {
-+ /* all protected nodes are not flushprepped, therefore
-+ * they are counted as flush_reserved */
-+ block_stage = BLOCK_FLUSH_RESERVED;
-+ protected = allocated_extent_slum_size(flush_pos, oid,
-+ index, width);
-+ if (protected == 0) {
-+ flush_pos->state = POS_INVALID;
-+ flush_pos->pos_in_unit = 0;
-+ return 0;
-+ }
-+ } else {
-+ block_stage = BLOCK_UNALLOCATED;
-+ protected = width;
-+ }
-+
-+ /*
-+ * look at previous unit if possible. If it is allocated, make
-+ * preceder more precise
-+ */
-+ if (coord->unit_pos &&
-+ (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
-+ reiser4_pos_hint(flush_pos)->blk =
-+ extent_get_start(ext - 1) +
-+ extent_get_width(ext - 1);
-+
-+ /* allocate new block numbers for protected nodes */
-+ extent_allocate_blocks(reiser4_pos_hint(flush_pos),
-+ protected,
-+ &first_allocated, &allocated,
-+ block_stage);
-+
-+ /* prepare extent which will be copied to left */
-+ reiser4_set_extent(©_extent, first_allocated, allocated);
-+
-+ result = put_unit_to_end(left, &key, ©_extent);
-+ if (result == -E_NODE_FULL) {
-+ int target_block_stage;
-+
-+ /* free blocks which were just allocated */
-+ target_block_stage =
-+ (state ==
-+ ALLOCATED_EXTENT) ? BLOCK_FLUSH_RESERVED :
-+ BLOCK_UNALLOCATED;
-+ reiser4_dealloc_blocks(&first_allocated, &allocated,
-+ target_block_stage,
-+ BA_PERMANENT);
-+
-+ /* rewind the preceder. */
-+ flush_pos->preceder.blk = first_allocated;
-+ check_preceder(flush_pos->preceder.blk);
-+
-+ return SQUEEZE_TARGET_FULL;
-+ }
-+
-+ if (state == ALLOCATED_EXTENT) {
-+ /* free nodes which were relocated */
-+ reiser4_dealloc_blocks(&start, &allocated,
-+ BLOCK_ALLOCATED, BA_DEFER);
-+ }
-+
-+ /* assign new block numbers to protected nodes */
-+ assign_real_blocknrs(flush_pos, oid, index, allocated,
-+ first_allocated);
-+
-+ set_key_offset(&key,
-+ get_key_offset(&key) +
-+ (allocated << current_blocksize_bits));
-+ } else {
-+ /*
-+ * overwrite: try to copy unit as it is to left neighbor and
-+ * make all first not flushprepped nodes overwrite nodes
-+ */
-+ reiser4_set_extent(©_extent, start, width);
-+ result = put_unit_to_end(left, &key, ©_extent);
-+ if (result == -E_NODE_FULL)
-+ return SQUEEZE_TARGET_FULL;
-+
-+ if (state != HOLE_EXTENT)
-+ mark_jnodes_overwrite(flush_pos, oid, index, width);
-+ set_key_offset(&key,
-+ get_key_offset(&key) +
-+ (width << current_blocksize_bits));
-+ }
-+ *stop_key = key;
-+ return SQUEEZE_CONTINUE;
-+}
-+
-+int key_by_offset_extent(struct inode *inode, loff_t off, reiser4_key * key)
-+{
-+ return key_by_inode_and_offset_common(inode, off, key);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/extent.h linux-2.6.24/fs/reiser4/plugin/item/extent.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/extent.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/extent.h 2008-01-25 11:40:16.698169785 +0300
-@@ -0,0 +1,231 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#ifndef __REISER4_EXTENT_H__
-+#define __REISER4_EXTENT_H__
-+
-+/* on disk extent */
-+typedef struct {
-+ reiser4_dblock_nr start;
-+ reiser4_dblock_nr width;
-+} reiser4_extent;
-+
-+struct extent_stat {
-+ int unallocated_units;
-+ int unallocated_blocks;
-+ int allocated_units;
-+ int allocated_blocks;
-+ int hole_units;
-+ int hole_blocks;
-+};
-+
-+/* extents in an extent item can be either holes, or unallocated or allocated
-+ extents */
-+typedef enum {
-+ HOLE_EXTENT,
-+ UNALLOCATED_EXTENT,
-+ ALLOCATED_EXTENT
-+} extent_state;
-+
-+#define HOLE_EXTENT_START 0
-+#define UNALLOCATED_EXTENT_START 1
-+#define UNALLOCATED_EXTENT_START2 2
-+
-+struct extent_coord_extension {
-+ reiser4_block_nr pos_in_unit;
-+ reiser4_block_nr width; /* width of current unit */
-+ pos_in_node_t nr_units; /* number of units */
-+ int ext_offset; /* offset from the beginning of zdata() */
-+ unsigned long expected_page;
-+#if REISER4_DEBUG
-+ reiser4_extent extent;
-+#endif
-+};
-+
-+/* macros to set/get fields of on-disk extent */
-+static inline reiser4_block_nr extent_get_start(const reiser4_extent * ext)
-+{
-+ return le64_to_cpu(ext->start);
-+}
-+
-+static inline reiser4_block_nr extent_get_width(const reiser4_extent * ext)
-+{
-+ return le64_to_cpu(ext->width);
-+}
-+
-+extern __u64 reiser4_current_block_count(void);
-+
-+static inline void
-+extent_set_start(reiser4_extent * ext, reiser4_block_nr start)
-+{
-+ cassert(sizeof(ext->start) == 8);
-+ assert("nikita-2510",
-+ ergo(start > 1, start < reiser4_current_block_count()));
-+ put_unaligned(cpu_to_le64(start), &ext->start);
-+}
-+
-+static inline void
-+extent_set_width(reiser4_extent * ext, reiser4_block_nr width)
-+{
-+ cassert(sizeof(ext->width) == 8);
-+ assert("", width > 0);
-+ put_unaligned(cpu_to_le64(width), &ext->width);
-+ assert("nikita-2511",
-+ ergo(extent_get_start(ext) > 1,
-+ extent_get_start(ext) + width <=
-+ reiser4_current_block_count()));
-+}
-+
-+#define extent_item(coord) \
-+({ \
-+ assert("nikita-3143", item_is_extent(coord)); \
-+ ((reiser4_extent *)item_body_by_coord (coord)); \
-+})
-+
-+#define extent_by_coord(coord) \
-+({ \
-+ assert("nikita-3144", item_is_extent(coord)); \
-+ (extent_item (coord) + (coord)->unit_pos); \
-+})
-+
-+#define width_by_coord(coord) \
-+({ \
-+ assert("nikita-3145", item_is_extent(coord)); \
-+ extent_get_width (extent_by_coord(coord)); \
-+})
-+
-+struct carry_cut_data;
-+struct carry_kill_data;
-+
-+/* plugin->u.item.b.* */
-+reiser4_key *max_key_inside_extent(const coord_t *, reiser4_key *);
-+int can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data *);
-+int mergeable_extent(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_extent(const coord_t *);
-+lookup_result lookup_extent(const reiser4_key *, lookup_bias, coord_t *);
-+void init_coord_extent(coord_t *);
-+int init_extent(coord_t *, reiser4_item_data *);
-+int paste_extent(coord_t *, reiser4_item_data *, carry_plugin_info *);
-+int can_shift_extent(unsigned free_space,
-+ coord_t * source, znode * target, shift_direction,
-+ unsigned *size, unsigned want);
-+void copy_units_extent(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction where_is_free_space,
-+ unsigned free_space);
-+int kill_hook_extent(const coord_t *, pos_in_node_t from, pos_in_node_t count,
-+ struct carry_kill_data *);
-+int create_hook_extent(const coord_t * coord, void *arg);
-+int cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+reiser4_key *unit_key_extent(const coord_t *, reiser4_key *);
-+reiser4_key *max_unit_key_extent(const coord_t *, reiser4_key *);
-+void print_extent(const char *, coord_t *);
-+int utmost_child_extent(const coord_t * coord, sideof side, jnode ** child);
-+int utmost_child_real_block_extent(const coord_t * coord, sideof side,
-+ reiser4_block_nr * block);
-+void item_stat_extent(const coord_t * coord, void *vp);
-+int reiser4_check_extent(const coord_t * coord, const char **error);
-+
-+/* plugin->u.item.s.file.* */
-+ssize_t reiser4_write_extent(struct file *, struct inode * inode,
-+ const char __user *, size_t, loff_t *);
-+int reiser4_read_extent(struct file *, flow_t *, hint_t *);
-+int reiser4_readpage_extent(void *, struct page *);
-+int reiser4_do_readpage_extent(reiser4_extent*, reiser4_block_nr, struct page*);
-+reiser4_key *append_key_extent(const coord_t *, reiser4_key *);
-+void init_coord_extension_extent(uf_coord_t *, loff_t offset);
-+int get_block_address_extent(const coord_t *, sector_t block,
-+ sector_t * result);
-+
-+/* these are used in flush.c
-+ FIXME-VS: should they be somewhere in item_plugin? */
-+int allocate_extent_item_in_place(coord_t *, lock_handle *, flush_pos_t * pos);
-+int allocate_and_copy_extent(znode * left, coord_t * right, flush_pos_t * pos,
-+ reiser4_key * stop_key);
-+
-+int extent_is_unallocated(const coord_t * item); /* True if this extent is unallocated (i.e., not a hole, not allocated). */
-+__u64 extent_unit_index(const coord_t * item); /* Block offset of this unit. */
-+__u64 extent_unit_width(const coord_t * item); /* Number of blocks in this unit. */
-+
-+/* plugin->u.item.f. */
-+int reiser4_scan_extent(flush_scan * scan);
-+extern int key_by_offset_extent(struct inode *, loff_t, reiser4_key *);
-+
-+reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
-+ int nr_extents);
-+reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr);
-+extent_state state_of_extent(reiser4_extent * ext);
-+void reiser4_set_extent(reiser4_extent *, reiser4_block_nr start,
-+ reiser4_block_nr width);
-+int reiser4_update_extent(struct inode *, jnode *, loff_t pos,
-+ int *plugged_hole);
-+
-+#include "../../coord.h"
-+#include "../../lock.h"
-+#include "../../tap.h"
-+
-+struct replace_handle {
-+ /* these are to be set before calling reiser4_replace_extent */
-+ coord_t *coord;
-+ lock_handle *lh;
-+ reiser4_key key;
-+ reiser4_key *pkey;
-+ reiser4_extent overwrite;
-+ reiser4_extent new_extents[2];
-+ int nr_new_extents;
-+ unsigned flags;
-+
-+ /* these are used by reiser4_replace_extent */
-+ reiser4_item_data item;
-+ coord_t coord_after;
-+ lock_handle lh_after;
-+ tap_t watch;
-+ reiser4_key paste_key;
-+#if REISER4_DEBUG
-+ reiser4_extent orig_ext;
-+ reiser4_key tmp;
-+#endif
-+};
-+
-+/* this structure is kmalloced before calling make_extent to avoid excessive
-+ stack consumption on plug_hole->reiser4_replace_extent */
-+struct make_extent_handle {
-+ uf_coord_t *uf_coord;
-+ reiser4_block_nr blocknr;
-+ int created;
-+ struct inode *inode;
-+ union {
-+ struct {
-+ } append;
-+ struct replace_handle replace;
-+ } u;
-+};
-+
-+int reiser4_replace_extent(struct replace_handle *,
-+ int return_inserted_position);
-+lock_handle *znode_lh(znode *);
-+
-+/* the reiser4 repacker support */
-+struct repacker_cursor;
-+extern int process_extent_backward_for_repacking(tap_t *,
-+ struct repacker_cursor *);
-+extern int mark_extent_for_repacking(tap_t *, int);
-+
-+#define coord_by_uf_coord(uf_coord) (&((uf_coord)->coord))
-+#define ext_coord_by_uf_coord(uf_coord) (&((uf_coord)->extension.extent))
-+
-+/* __REISER4_EXTENT_H__ */
-+#endif
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/extent_item_ops.c linux-2.6.24/fs/reiser4/plugin/item/extent_item_ops.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/extent_item_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/extent_item_ops.c 2008-01-25 11:39:07.016228297 +0300
-@@ -0,0 +1,889 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../inode.h"
-+#include "../../tree_walk.h" /* check_sibling_list() */
-+#include "../../page_cache.h"
-+#include "../../carry.h"
-+
-+#include <linux/quotaops.h>
-+
-+/* item_plugin->b.max_key_inside */
-+reiser4_key *max_key_inside_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, get_key_offset(reiser4_max_key()));
-+ return key;
-+}
-+
-+/* item_plugin->b.can_contain_key
-+ this checks whether @key of @data is matching to position set by @coord */
-+int
-+can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data * data)
-+{
-+ reiser4_key item_key;
-+
-+ if (item_plugin_by_coord(coord) != data->iplug)
-+ return 0;
-+
-+ item_key_by_coord(coord, &item_key);
-+ if (get_key_locality(key) != get_key_locality(&item_key) ||
-+ get_key_objectid(key) != get_key_objectid(&item_key) ||
-+ get_key_ordering(key) != get_key_ordering(&item_key))
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/* item_plugin->b.mergeable
-+ first item is of extent type */
-+/* Audited by: green(2002.06.13) */
-+int mergeable_extent(const coord_t * p1, const coord_t * p2)
-+{
-+ reiser4_key key1, key2;
-+
-+ assert("vs-299", item_id_by_coord(p1) == EXTENT_POINTER_ID);
-+ /* FIXME-VS: Which is it? Assert or return 0 */
-+ if (item_id_by_coord(p2) != EXTENT_POINTER_ID) {
-+ return 0;
-+ }
-+
-+ item_key_by_coord(p1, &key1);
-+ item_key_by_coord(p2, &key2);
-+ if (get_key_locality(&key1) != get_key_locality(&key2) ||
-+ get_key_objectid(&key1) != get_key_objectid(&key2) ||
-+ get_key_ordering(&key1) != get_key_ordering(&key2) ||
-+ get_key_type(&key1) != get_key_type(&key2))
-+ return 0;
-+ if (get_key_offset(&key1) +
-+ reiser4_extent_size(p1, nr_units_extent(p1)) !=
-+ get_key_offset(&key2))
-+ return 0;
-+ return 1;
-+}
-+
-+/* item_plugin->b.nr_units */
-+pos_in_node_t nr_units_extent(const coord_t * coord)
-+{
-+ /* length of extent item has to be multiple of extent size */
-+ assert("vs-1424",
-+ (item_length_by_coord(coord) % sizeof(reiser4_extent)) == 0);
-+ return item_length_by_coord(coord) / sizeof(reiser4_extent);
-+}
-+
-+/* item_plugin->b.lookup */
-+lookup_result
-+lookup_extent(const reiser4_key * key, lookup_bias bias UNUSED_ARG,
-+ coord_t * coord)
-+{ /* znode and item_pos are
-+ set to an extent item to
-+ look through */
-+ reiser4_key item_key;
-+ reiser4_block_nr lookuped, offset;
-+ unsigned i, nr_units;
-+ reiser4_extent *ext;
-+ unsigned blocksize;
-+ unsigned char blocksize_bits;
-+
-+ item_key_by_coord(coord, &item_key);
-+ offset = get_key_offset(&item_key);
-+
-+ /* key we are looking for must be greater than key of item @coord */
-+ assert("vs-414", keygt(key, &item_key));
-+
-+ assert("umka-99945",
-+ !keygt(key, max_key_inside_extent(coord, &item_key)));
-+
-+ ext = extent_item(coord);
-+ assert("vs-1350", (char *)ext == (zdata(coord->node) + coord->offset));
-+
-+ blocksize = current_blocksize;
-+ blocksize_bits = current_blocksize_bits;
-+
-+ /* offset we are looking for */
-+ lookuped = get_key_offset(key);
-+
-+ nr_units = nr_units_extent(coord);
-+ /* go through all extents until the one which address given offset */
-+ for (i = 0; i < nr_units; i++, ext++) {
-+ offset += (extent_get_width(ext) << blocksize_bits);
-+ if (offset > lookuped) {
-+ /* desired byte is somewhere in this extent */
-+ coord->unit_pos = i;
-+ coord->between = AT_UNIT;
-+ return CBK_COORD_FOUND;
-+ }
-+ }
-+
-+ /* set coord after last unit */
-+ coord->unit_pos = nr_units - 1;
-+ coord->between = AFTER_UNIT;
-+ return CBK_COORD_FOUND;
-+}
-+
-+/* item_plugin->b.paste
-+ item @coord is set to has been appended with @data->length of free
-+ space. data->data contains data to be pasted into the item in position
-+ @coord->in_item.unit_pos. It must fit into that free space.
-+ @coord must be set between units.
-+*/
-+int
-+paste_extent(coord_t * coord, reiser4_item_data * data,
-+ carry_plugin_info * info UNUSED_ARG)
-+{
-+ unsigned old_nr_units;
-+ reiser4_extent *ext;
-+ int item_length;
-+
-+ ext = extent_item(coord);
-+ item_length = item_length_by_coord(coord);
-+ old_nr_units = (item_length - data->length) / sizeof(reiser4_extent);
-+
-+ /* this is also used to copy extent into newly created item, so
-+ old_nr_units could be 0 */
-+ assert("vs-260", item_length >= data->length);
-+
-+ /* make sure that coord is set properly */
-+ assert("vs-35",
-+ ((!coord_is_existing_unit(coord))
-+ || (!old_nr_units && !coord->unit_pos)));
-+
-+ /* first unit to be moved */
-+ switch (coord->between) {
-+ case AFTER_UNIT:
-+ coord->unit_pos++;
-+ case BEFORE_UNIT:
-+ coord->between = AT_UNIT;
-+ break;
-+ case AT_UNIT:
-+ assert("vs-331", !old_nr_units && !coord->unit_pos);
-+ break;
-+ default:
-+ impossible("vs-330", "coord is set improperly");
-+ }
-+
-+ /* prepare space for new units */
-+ memmove(ext + coord->unit_pos + data->length / sizeof(reiser4_extent),
-+ ext + coord->unit_pos,
-+ (old_nr_units - coord->unit_pos) * sizeof(reiser4_extent));
-+
-+ /* copy new data from kernel space */
-+ assert("vs-556", data->user == 0);
-+ memcpy(ext + coord->unit_pos, data->data, (unsigned)data->length);
-+
-+ /* after paste @coord is set to first of pasted units */
-+ assert("vs-332", coord_is_existing_unit(coord));
-+ assert("vs-333",
-+ !memcmp(data->data, extent_by_coord(coord),
-+ (unsigned)data->length));
-+ return 0;
-+}
-+
-+/* item_plugin->b.can_shift */
-+int
-+can_shift_extent(unsigned free_space, coord_t * source,
-+ znode * target UNUSED_ARG, shift_direction pend UNUSED_ARG,
-+ unsigned *size, unsigned want)
-+{
-+ *size = item_length_by_coord(source);
-+ if (*size > free_space)
-+ /* never split a unit of extent item */
-+ *size = free_space - free_space % sizeof(reiser4_extent);
-+
-+ /* we can shift *size bytes, calculate how many do we want to shift */
-+ if (*size > want * sizeof(reiser4_extent))
-+ *size = want * sizeof(reiser4_extent);
-+
-+ if (*size % sizeof(reiser4_extent) != 0)
-+ impossible("vs-119", "Wrong extent size: %i %zd", *size,
-+ sizeof(reiser4_extent));
-+ return *size / sizeof(reiser4_extent);
-+
-+}
-+
-+/* item_plugin->b.copy_units */
-+void
-+copy_units_extent(coord_t * target, coord_t * source,
-+ unsigned from, unsigned count,
-+ shift_direction where_is_free_space, unsigned free_space)
-+{
-+ char *from_ext, *to_ext;
-+
-+ assert("vs-217", free_space == count * sizeof(reiser4_extent));
-+
-+ from_ext = item_body_by_coord(source);
-+ to_ext = item_body_by_coord(target);
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ assert("vs-215", from == 0);
-+
-+ /* At this moment, item length was already updated in the item
-+ header by shifting code, hence nr_units_extent() will
-+ return "new" number of units---one we obtain after copying
-+ units.
-+ */
-+ to_ext +=
-+ (nr_units_extent(target) - count) * sizeof(reiser4_extent);
-+ } else {
-+ reiser4_key key;
-+ coord_t coord;
-+
-+ assert("vs-216",
-+ from + count == coord_last_unit_pos(source) + 1);
-+
-+ from_ext += item_length_by_coord(source) - free_space;
-+
-+ /* new units are inserted before first unit in an item,
-+ therefore, we have to update item key */
-+ coord = *source;
-+ coord.unit_pos = from;
-+ unit_key_extent(&coord, &key);
-+
-+ node_plugin_by_node(target->node)->update_item_key(target, &key,
-+ NULL /*info */);
-+ }
-+
-+ memcpy(to_ext, from_ext, free_space);
-+}
-+
-+/* item_plugin->b.create_hook
-+ @arg is znode of leaf node for which we need to update right delimiting key */
-+int create_hook_extent(const coord_t * coord, void *arg)
-+{
-+ coord_t *child_coord;
-+ znode *node;
-+ reiser4_key key;
-+ reiser4_tree *tree;
-+
-+ if (!arg)
-+ return 0;
-+
-+ child_coord = arg;
-+ tree = znode_get_tree(coord->node);
-+
-+ assert("nikita-3246", znode_get_level(child_coord->node) == LEAF_LEVEL);
-+
-+ write_lock_tree(tree);
-+ write_lock_dk(tree);
-+ /* find a node on the left level for which right delimiting key has to
-+ be updated */
-+ if (coord_wrt(child_coord) == COORD_ON_THE_LEFT) {
-+ assert("vs-411", znode_is_left_connected(child_coord->node));
-+ node = child_coord->node->left;
-+ } else {
-+ assert("vs-412", coord_wrt(child_coord) == COORD_ON_THE_RIGHT);
-+ node = child_coord->node;
-+ assert("nikita-3314", node != NULL);
-+ }
-+
-+ if (node != NULL) {
-+ znode_set_rd_key(node, item_key_by_coord(coord, &key));
-+
-+ assert("nikita-3282", check_sibling_list(node));
-+ /* break sibling links */
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && node->right) {
-+ ON_DEBUG(node->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ node->right_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ node->right->left = NULL;
-+ node->right = NULL;
-+ }
-+ }
-+ write_unlock_dk(tree);
-+ write_unlock_tree(tree);
-+ return 0;
-+}
-+
-+#define ITEM_TAIL_KILLED 0
-+#define ITEM_HEAD_KILLED 1
-+#define ITEM_KILLED 2
-+
-+/* item_plugin->b.kill_hook
-+ this is called when @count units starting from @from-th one are going to be removed
-+ */
-+int
-+kill_hook_extent(const coord_t * coord, pos_in_node_t from, pos_in_node_t count,
-+ struct carry_kill_data *kdata)
-+{
-+ reiser4_extent *ext;
-+ reiser4_block_nr start, length;
-+ const reiser4_key *pfrom_key, *pto_key;
-+ struct inode *inode;
-+ reiser4_tree *tree;
-+ pgoff_t from_off, to_off, offset, skip;
-+ int retval;
-+
-+ /* these are located in memory kmalloc-ed by kill_node_content */
-+ reiser4_key *min_item_key, *max_item_key, *from_key, *to_key, *key;
-+ coord_t *dup, *next;
-+
-+ assert("zam-811", znode_is_write_locked(coord->node));
-+ assert("nikita-3315", kdata != NULL);
-+ assert("vs-34", kdata->buf != NULL);
-+
-+ /* map structures to kdata->buf */
-+ min_item_key = (reiser4_key *) (kdata->buf);
-+ max_item_key = min_item_key + 1;
-+ from_key = max_item_key + 1;
-+ to_key = from_key + 1;
-+ key = to_key + 1;
-+ dup = (coord_t *) (key + 1);
-+ next = dup + 1;
-+
-+ item_key_by_coord(coord, min_item_key);
-+ max_item_key_by_coord(coord, max_item_key);
-+
-+ if (kdata->params.from_key) {
-+ pfrom_key = kdata->params.from_key;
-+ pto_key = kdata->params.to_key;
-+ } else {
-+ assert("vs-1549", from == coord->unit_pos);
-+ unit_key_by_coord(coord, from_key);
-+ pfrom_key = from_key;
-+
-+ coord_dup(dup, coord);
-+ dup->unit_pos = from + count - 1;
-+ max_unit_key_by_coord(dup, to_key);
-+ pto_key = to_key;
-+ }
-+
-+ if (!keylt(pto_key, max_item_key)) {
-+ if (!keygt(pfrom_key, min_item_key)) {
-+ znode *left, *right;
-+
-+ /* item is to be removed completely */
-+ assert("nikita-3316", kdata->left != NULL
-+ && kdata->right != NULL);
-+
-+ left = kdata->left->node;
-+ right = kdata->right->node;
-+
-+ tree = current_tree;
-+ /* we have to do two things:
-+ *
-+ * 1. link left and right formatted neighbors of
-+ * extent being removed, and
-+ *
-+ * 2. update their delimiting keys.
-+ *
-+ * atomicity of these operations is protected by
-+ * taking dk-lock and tree-lock.
-+ */
-+ /* if neighbors of item being removed are znodes -
-+ * link them */
-+ write_lock_tree(tree);
-+ write_lock_dk(tree);
-+ link_left_and_right(left, right);
-+ if (left) {
-+ /* update right delimiting key of left
-+ * neighbor of extent item */
-+ /*coord_t next;
-+ reiser4_key key; */
-+
-+ coord_dup(next, coord);
-+
-+ if (coord_next_item(next))
-+ *key = *znode_get_rd_key(coord->node);
-+ else
-+ item_key_by_coord(next, key);
-+ znode_set_rd_key(left, key);
-+ }
-+ write_unlock_dk(tree);
-+ write_unlock_tree(tree);
-+
-+ from_off =
-+ get_key_offset(min_item_key) >> PAGE_CACHE_SHIFT;
-+ to_off =
-+ (get_key_offset(max_item_key) +
-+ 1) >> PAGE_CACHE_SHIFT;
-+ retval = ITEM_KILLED;
-+ } else {
-+ /* tail of item is to be removed */
-+ from_off =
-+ (get_key_offset(pfrom_key) + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT;
-+ to_off =
-+ (get_key_offset(max_item_key) +
-+ 1) >> PAGE_CACHE_SHIFT;
-+ retval = ITEM_TAIL_KILLED;
-+ }
-+ } else {
-+ /* head of item is to be removed */
-+ assert("vs-1571", keyeq(pfrom_key, min_item_key));
-+ assert("vs-1572",
-+ (get_key_offset(pfrom_key) & (PAGE_CACHE_SIZE - 1)) ==
-+ 0);
-+ assert("vs-1573",
-+ ((get_key_offset(pto_key) + 1) & (PAGE_CACHE_SIZE -
-+ 1)) == 0);
-+
-+ if (kdata->left->node) {
-+ /* update right delimiting key of left neighbor of extent item */
-+ /*reiser4_key key; */
-+
-+ *key = *pto_key;
-+ set_key_offset(key, get_key_offset(pto_key) + 1);
-+
-+ write_lock_dk(current_tree);
-+ znode_set_rd_key(kdata->left->node, key);
-+ write_unlock_dk(current_tree);
-+ }
-+
-+ from_off = get_key_offset(pfrom_key) >> PAGE_CACHE_SHIFT;
-+ to_off = (get_key_offset(pto_key) + 1) >> PAGE_CACHE_SHIFT;
-+ retval = ITEM_HEAD_KILLED;
-+ }
-+
-+ inode = kdata->inode;
-+ assert("vs-1545", inode != NULL);
-+ if (inode != NULL)
-+ /* take care of pages and jnodes corresponding to part of item being killed */
-+ reiser4_invalidate_pages(inode->i_mapping, from_off,
-+ to_off - from_off,
-+ kdata->params.truncate);
-+
-+ ext = extent_item(coord) + from;
-+ offset =
-+ (get_key_offset(min_item_key) +
-+ reiser4_extent_size(coord, from)) >> PAGE_CACHE_SHIFT;
-+
-+ assert("vs-1551", from_off >= offset);
-+ assert("vs-1552", from_off - offset <= extent_get_width(ext));
-+ skip = from_off - offset;
-+ offset = from_off;
-+
-+ while (offset < to_off) {
-+ length = extent_get_width(ext) - skip;
-+ if (state_of_extent(ext) == HOLE_EXTENT) {
-+ skip = 0;
-+ offset += length;
-+ ext++;
-+ continue;
-+ }
-+
-+ if (offset + length > to_off) {
-+ length = to_off - offset;
-+ }
-+
-+ DQUOT_FREE_BLOCK_NODIRTY(inode, length);
-+
-+ if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
-+ /* some jnodes corresponding to this unallocated extent */
-+ fake_allocated2free(length, 0 /* unformatted */ );
-+
-+ skip = 0;
-+ offset += length;
-+ ext++;
-+ continue;
-+ }
-+
-+ assert("vs-1218", state_of_extent(ext) == ALLOCATED_EXTENT);
-+
-+ if (length != 0) {
-+ start = extent_get_start(ext) + skip;
-+
-+ /* BA_DEFER bit parameter is turned on because blocks which get freed are not safe to be freed
-+ immediately */
-+ reiser4_dealloc_blocks(&start, &length,
-+ 0 /* not used */ ,
-+ BA_DEFER
-+ /* unformatted with defer */ );
-+ }
-+ skip = 0;
-+ offset += length;
-+ ext++;
-+ }
-+ return retval;
-+}
-+
-+/* item_plugin->b.kill_units */
-+int
-+kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *kdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ reiser4_extent *ext;
-+ reiser4_key item_key;
-+ pos_in_node_t count;
-+ reiser4_key from_key, to_key;
-+ const reiser4_key *pfrom_key, *pto_key;
-+ loff_t off;
-+ int result;
-+
-+ assert("vs-1541",
-+ ((kdata->params.from_key == NULL && kdata->params.to_key == NULL)
-+ || (kdata->params.from_key != NULL
-+ && kdata->params.to_key != NULL)));
-+
-+ if (kdata->params.from_key) {
-+ pfrom_key = kdata->params.from_key;
-+ pto_key = kdata->params.to_key;
-+ } else {
-+ coord_t dup;
-+
-+ /* calculate key range of kill */
-+ assert("vs-1549", from == coord->unit_pos);
-+ unit_key_by_coord(coord, &from_key);
-+ pfrom_key = &from_key;
-+
-+ coord_dup(&dup, coord);
-+ dup.unit_pos = to;
-+ max_unit_key_by_coord(&dup, &to_key);
-+ pto_key = &to_key;
-+ }
-+
-+ item_key_by_coord(coord, &item_key);
-+
-+#if REISER4_DEBUG
-+ {
-+ reiser4_key max_item_key;
-+
-+ max_item_key_by_coord(coord, &max_item_key);
-+
-+ if (new_first) {
-+ /* head of item is to be cut */
-+ assert("vs-1542", keyeq(pfrom_key, &item_key));
-+ assert("vs-1538", keylt(pto_key, &max_item_key));
-+ } else {
-+ /* tail of item is to be cut */
-+ assert("vs-1540", keygt(pfrom_key, &item_key));
-+ assert("vs-1543", !keylt(pto_key, &max_item_key));
-+ }
-+ }
-+#endif
-+
-+ if (smallest_removed)
-+ *smallest_removed = *pfrom_key;
-+
-+ if (new_first) {
-+ /* item head is cut. Item key will change. This new key is calculated here */
-+ assert("vs-1556",
-+ (get_key_offset(pto_key) & (PAGE_CACHE_SIZE - 1)) ==
-+ (PAGE_CACHE_SIZE - 1));
-+ *new_first = *pto_key;
-+ set_key_offset(new_first, get_key_offset(new_first) + 1);
-+ }
-+
-+ count = to - from + 1;
-+ result = kill_hook_extent(coord, from, count, kdata);
-+ if (result == ITEM_TAIL_KILLED) {
-+ assert("vs-1553",
-+ get_key_offset(pfrom_key) >=
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, from));
-+ off =
-+ get_key_offset(pfrom_key) -
-+ (get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, from));
-+ if (off) {
-+ /* unit @from is to be cut partially. Its width decreases */
-+ ext = extent_item(coord) + from;
-+ extent_set_width(ext,
-+ (off + PAGE_CACHE_SIZE -
-+ 1) >> PAGE_CACHE_SHIFT);
-+ count--;
-+ }
-+ } else {
-+ __u64 max_to_offset;
-+ __u64 rest;
-+
-+ assert("vs-1575", result == ITEM_HEAD_KILLED);
-+ assert("", from == 0);
-+ assert("",
-+ ((get_key_offset(pto_key) + 1) & (PAGE_CACHE_SIZE -
-+ 1)) == 0);
-+ assert("",
-+ get_key_offset(pto_key) + 1 >
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to));
-+ max_to_offset =
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to + 1) - 1;
-+ assert("", get_key_offset(pto_key) <= max_to_offset);
-+
-+ rest =
-+ (max_to_offset -
-+ get_key_offset(pto_key)) >> PAGE_CACHE_SHIFT;
-+ if (rest) {
-+ /* unit @to is to be cut partially */
-+ ext = extent_item(coord) + to;
-+
-+ assert("", extent_get_width(ext) > rest);
-+
-+ if (state_of_extent(ext) == ALLOCATED_EXTENT)
-+ extent_set_start(ext,
-+ extent_get_start(ext) +
-+ (extent_get_width(ext) -
-+ rest));
-+
-+ extent_set_width(ext, rest);
-+ count--;
-+ }
-+ }
-+ return count * sizeof(reiser4_extent);
-+}
-+
-+/* item_plugin->b.cut_units
-+ this is too similar to kill_units_extent */
-+int
-+cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *cdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ reiser4_extent *ext;
-+ reiser4_key item_key;
-+ pos_in_node_t count;
-+ reiser4_key from_key, to_key;
-+ const reiser4_key *pfrom_key, *pto_key;
-+ loff_t off;
-+
-+ assert("vs-1541",
-+ ((cdata->params.from_key == NULL && cdata->params.to_key == NULL)
-+ || (cdata->params.from_key != NULL
-+ && cdata->params.to_key != NULL)));
-+
-+ if (cdata->params.from_key) {
-+ pfrom_key = cdata->params.from_key;
-+ pto_key = cdata->params.to_key;
-+ } else {
-+ coord_t dup;
-+
-+ /* calculate key range of kill */
-+ coord_dup(&dup, coord);
-+ dup.unit_pos = from;
-+ unit_key_by_coord(&dup, &from_key);
-+
-+ dup.unit_pos = to;
-+ max_unit_key_by_coord(&dup, &to_key);
-+
-+ pfrom_key = &from_key;
-+ pto_key = &to_key;
-+ }
-+
-+ assert("vs-1555",
-+ (get_key_offset(pfrom_key) & (PAGE_CACHE_SIZE - 1)) == 0);
-+ assert("vs-1556",
-+ (get_key_offset(pto_key) & (PAGE_CACHE_SIZE - 1)) ==
-+ (PAGE_CACHE_SIZE - 1));
-+
-+ item_key_by_coord(coord, &item_key);
-+
-+#if REISER4_DEBUG
-+ {
-+ reiser4_key max_item_key;
-+
-+ assert("vs-1584",
-+ get_key_locality(pfrom_key) ==
-+ get_key_locality(&item_key));
-+ assert("vs-1585",
-+ get_key_type(pfrom_key) == get_key_type(&item_key));
-+ assert("vs-1586",
-+ get_key_objectid(pfrom_key) ==
-+ get_key_objectid(&item_key));
-+ assert("vs-1587",
-+ get_key_ordering(pfrom_key) ==
-+ get_key_ordering(&item_key));
-+
-+ max_item_key_by_coord(coord, &max_item_key);
-+
-+ if (new_first != NULL) {
-+ /* head of item is to be cut */
-+ assert("vs-1542", keyeq(pfrom_key, &item_key));
-+ assert("vs-1538", keylt(pto_key, &max_item_key));
-+ } else {
-+ /* tail of item is to be cut */
-+ assert("vs-1540", keygt(pfrom_key, &item_key));
-+ assert("vs-1543", keyeq(pto_key, &max_item_key));
-+ }
-+ }
-+#endif
-+
-+ if (smallest_removed)
-+ *smallest_removed = *pfrom_key;
-+
-+ if (new_first) {
-+ /* item head is cut. Item key will change. This new key is calculated here */
-+ *new_first = *pto_key;
-+ set_key_offset(new_first, get_key_offset(new_first) + 1);
-+ }
-+
-+ count = to - from + 1;
-+
-+ assert("vs-1553",
-+ get_key_offset(pfrom_key) >=
-+ get_key_offset(&item_key) + reiser4_extent_size(coord, from));
-+ off =
-+ get_key_offset(pfrom_key) - (get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, from));
-+ if (off) {
-+ /* tail of unit @from is to be cut partially. Its width decreases */
-+ assert("vs-1582", new_first == NULL);
-+ ext = extent_item(coord) + from;
-+ extent_set_width(ext, off >> PAGE_CACHE_SHIFT);
-+ count--;
-+ }
-+
-+ assert("vs-1554",
-+ get_key_offset(pto_key) <=
-+ get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to + 1) - 1);
-+ off =
-+ (get_key_offset(&item_key) +
-+ reiser4_extent_size(coord, to + 1) - 1) -
-+ get_key_offset(pto_key);
-+ if (off) {
-+ /* @to_key is smaller than max key of unit @to. Unit @to will not be removed. It gets start increased
-+ and width decreased. */
-+ assert("vs-1583", (off & (PAGE_CACHE_SIZE - 1)) == 0);
-+ ext = extent_item(coord) + to;
-+ if (state_of_extent(ext) == ALLOCATED_EXTENT)
-+ extent_set_start(ext,
-+ extent_get_start(ext) +
-+ (extent_get_width(ext) -
-+ (off >> PAGE_CACHE_SHIFT)));
-+
-+ extent_set_width(ext, (off >> PAGE_CACHE_SHIFT));
-+ count--;
-+ }
-+ return count * sizeof(reiser4_extent);
-+}
-+
-+/* item_plugin->b.unit_key */
-+reiser4_key *unit_key_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("vs-300", coord_is_existing_unit(coord));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key,
-+ (get_key_offset(key) +
-+ reiser4_extent_size(coord, coord->unit_pos)));
-+
-+ return key;
-+}
-+
-+/* item_plugin->b.max_unit_key */
-+reiser4_key *max_unit_key_extent(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("vs-300", coord_is_existing_unit(coord));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key,
-+ (get_key_offset(key) +
-+ reiser4_extent_size(coord, coord->unit_pos + 1) - 1));
-+ return key;
-+}
-+
-+/* item_plugin->b.estimate
-+ item_plugin->b.item_data_by_flow */
-+
-+#if REISER4_DEBUG
-+
-+/* item_plugin->b.check
-+ used for debugging, every item should have here the most complete
-+ possible check of the consistency of the item that the inventor can
-+ construct
-+*/
-+int reiser4_check_extent(const coord_t * coord /* coord of item to check */,
-+ const char **error /* where to store error message */)
-+{
-+ reiser4_extent *ext, *first;
-+ unsigned i, j;
-+ reiser4_block_nr start, width, blk_cnt;
-+ unsigned num_units;
-+ reiser4_tree *tree;
-+ oid_t oid;
-+ reiser4_key key;
-+ coord_t scan;
-+
-+ assert("vs-933", REISER4_DEBUG);
-+
-+ if (znode_get_level(coord->node) != TWIG_LEVEL) {
-+ *error = "Extent on the wrong level";
-+ return -1;
-+ }
-+ if (item_length_by_coord(coord) % sizeof(reiser4_extent) != 0) {
-+ *error = "Wrong item size";
-+ return -1;
-+ }
-+ ext = first = extent_item(coord);
-+ blk_cnt = reiser4_block_count(reiser4_get_current_sb());
-+ num_units = coord_num_units(coord);
-+ tree = znode_get_tree(coord->node);
-+ item_key_by_coord(coord, &key);
-+ oid = get_key_objectid(&key);
-+ coord_dup(&scan, coord);
-+
-+ for (i = 0; i < num_units; ++i, ++ext) {
-+ __u64 index;
-+
-+ scan.unit_pos = i;
-+ index = extent_unit_index(&scan);
-+
-+#if 0
-+ /* check that all jnodes are present for the unallocated
-+ * extent */
-+ if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
-+ for (j = 0; j < extent_get_width(ext); j++) {
-+ jnode *node;
-+
-+ node = jlookup(tree, oid, index + j);
-+ if (node == NULL) {
-+ print_coord("scan", &scan, 0);
-+ *error = "Jnode missing";
-+ return -1;
-+ }
-+ jput(node);
-+ }
-+ }
-+#endif
-+
-+ start = extent_get_start(ext);
-+ if (start < 2)
-+ continue;
-+ /* extent is allocated one */
-+ width = extent_get_width(ext);
-+ if (start >= blk_cnt) {
-+ *error = "Start too large";
-+ return -1;
-+ }
-+ if (start + width > blk_cnt) {
-+ *error = "End too large";
-+ return -1;
-+ }
-+ /* make sure that this extent does not overlap with other
-+ allocated extents extents */
-+ for (j = 0; j < i; j++) {
-+ if (state_of_extent(first + j) != ALLOCATED_EXTENT)
-+ continue;
-+ if (!
-+ ((extent_get_start(ext) >=
-+ extent_get_start(first + j) +
-+ extent_get_width(first + j))
-+ || (extent_get_start(ext) +
-+ extent_get_width(ext) <=
-+ extent_get_start(first + j)))) {
-+ *error = "Extent overlaps with others";
-+ return -1;
-+ }
-+ }
-+
-+ }
-+
-+ return 0;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/internal.c linux-2.6.24/fs/reiser4/plugin/item/internal.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/internal.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/internal.c 2008-01-25 11:39:07.020229327 +0300
-@@ -0,0 +1,396 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Implementation of internal-item plugin methods. */
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "internal.h"
-+#include "item.h"
-+#include "../node/node.h"
-+#include "../plugin.h"
-+#include "../../jnode.h"
-+#include "../../znode.h"
-+#include "../../tree_walk.h"
-+#include "../../tree_mod.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../block_alloc.h"
-+
-+/* see internal.h for explanation */
-+
-+/* plugin->u.item.b.mergeable */
-+int mergeable_internal(const coord_t * p1 UNUSED_ARG /* first item */ ,
-+ const coord_t * p2 UNUSED_ARG /* second item */ )
-+{
-+ /* internal items are not mergeable */
-+ return 0;
-+}
-+
-+/* ->lookup() method for internal items */
-+lookup_result lookup_internal(const reiser4_key * key /* key to look up */ ,
-+ lookup_bias bias UNUSED_ARG /* lookup bias */ ,
-+ coord_t * coord /* coord of item */ )
-+{
-+ reiser4_key ukey;
-+
-+ switch (keycmp(unit_key_by_coord(coord, &ukey), key)) {
-+ default:
-+ impossible("", "keycmp()?!");
-+ case LESS_THAN:
-+ /* FIXME-VS: AFTER_ITEM used to be here. But with new coord
-+ item plugin can not be taken using coord set this way */
-+ assert("vs-681", coord->unit_pos == 0);
-+ coord->between = AFTER_UNIT;
-+ case EQUAL_TO:
-+ return CBK_COORD_FOUND;
-+ case GREATER_THAN:
-+ return CBK_COORD_NOTFOUND;
-+ }
-+}
-+
-+/* return body of internal item at @coord */
-+static internal_item_layout *internal_at(const coord_t * coord /* coord of
-+ * item */ )
-+{
-+ assert("nikita-607", coord != NULL);
-+ assert("nikita-1650",
-+ item_plugin_by_coord(coord) ==
-+ item_plugin_by_id(NODE_POINTER_ID));
-+ return (internal_item_layout *) item_body_by_coord(coord);
-+}
-+
-+void reiser4_update_internal(const coord_t * coord,
-+ const reiser4_block_nr * blocknr)
-+{
-+ internal_item_layout *item = internal_at(coord);
-+ assert("nikita-2959", reiser4_blocknr_is_sane(blocknr));
-+
-+ put_unaligned(cpu_to_le64(*blocknr), &item->pointer);
-+}
-+
-+/* return child block number stored in the internal item at @coord */
-+static reiser4_block_nr pointer_at(const coord_t * coord /* coord of item */ )
-+{
-+ assert("nikita-608", coord != NULL);
-+ return le64_to_cpu(get_unaligned(&internal_at(coord)->pointer));
-+}
-+
-+/* get znode pointed to by internal @item */
-+static znode *znode_at(const coord_t * item /* coord of item */ ,
-+ znode * parent /* parent node */ )
-+{
-+ return child_znode(item, parent, 1, 0);
-+}
-+
-+/* store pointer from internal item into "block". Implementation of
-+ ->down_link() method */
-+void down_link_internal(const coord_t * coord /* coord of item */ ,
-+ const reiser4_key * key UNUSED_ARG /* key to get
-+ * pointer for */ ,
-+ reiser4_block_nr * block /* resulting block number */ )
-+{
-+ ON_DEBUG(reiser4_key item_key);
-+
-+ assert("nikita-609", coord != NULL);
-+ assert("nikita-611", block != NULL);
-+ assert("nikita-612", (key == NULL) ||
-+ /* twig horrors */
-+ (znode_get_level(coord->node) == TWIG_LEVEL)
-+ || keyle(item_key_by_coord(coord, &item_key), key));
-+
-+ *block = pointer_at(coord);
-+ assert("nikita-2960", reiser4_blocknr_is_sane(block));
-+}
-+
-+/* Get the child's block number, or 0 if the block is unallocated. */
-+int
-+utmost_child_real_block_internal(const coord_t * coord, sideof side UNUSED_ARG,
-+ reiser4_block_nr * block)
-+{
-+ assert("jmacd-2059", coord != NULL);
-+
-+ *block = pointer_at(coord);
-+ assert("nikita-2961", reiser4_blocknr_is_sane(block));
-+
-+ if (reiser4_blocknr_is_fake(block)) {
-+ *block = 0;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Return the child. */
-+int
-+utmost_child_internal(const coord_t * coord, sideof side UNUSED_ARG,
-+ jnode ** childp)
-+{
-+ reiser4_block_nr block = pointer_at(coord);
-+ znode *child;
-+
-+ assert("jmacd-2059", childp != NULL);
-+ assert("nikita-2962", reiser4_blocknr_is_sane(&block));
-+
-+ child = zlook(znode_get_tree(coord->node), &block);
-+
-+ if (IS_ERR(child)) {
-+ return PTR_ERR(child);
-+ }
-+
-+ *childp = ZJNODE(child);
-+
-+ return 0;
-+}
-+
-+#if REISER4_DEBUG
-+
-+static void check_link(znode * left, znode * right)
-+{
-+ znode *scan;
-+
-+ for (scan = left; scan != right; scan = scan->right) {
-+ if (ZF_ISSET(scan, JNODE_RIP))
-+ break;
-+ if (znode_is_right_connected(scan) && scan->right != NULL) {
-+ if (ZF_ISSET(scan->right, JNODE_RIP))
-+ break;
-+ assert("nikita-3285",
-+ znode_is_left_connected(scan->right));
-+ assert("nikita-3265",
-+ ergo(scan != left,
-+ ZF_ISSET(scan, JNODE_HEARD_BANSHEE)));
-+ assert("nikita-3284", scan->right->left == scan);
-+ } else
-+ break;
-+ }
-+}
-+
-+int check__internal(const coord_t * coord, const char **error)
-+{
-+ reiser4_block_nr blk;
-+ znode *child;
-+ coord_t cpy;
-+
-+ blk = pointer_at(coord);
-+ if (!reiser4_blocknr_is_sane(&blk)) {
-+ *error = "Invalid pointer";
-+ return -1;
-+ }
-+ coord_dup(&cpy, coord);
-+ child = znode_at(&cpy, cpy.node);
-+ if (child != NULL) {
-+ znode *left_child;
-+ znode *right_child;
-+
-+ left_child = right_child = NULL;
-+
-+ assert("nikita-3256", znode_invariant(child));
-+ if (coord_prev_item(&cpy) == 0 && item_is_internal(&cpy)) {
-+ left_child = znode_at(&cpy, cpy.node);
-+ if (left_child != NULL) {
-+ read_lock_tree(znode_get_tree(child));
-+ check_link(left_child, child);
-+ read_unlock_tree(znode_get_tree(child));
-+ zput(left_child);
-+ }
-+ }
-+ coord_dup(&cpy, coord);
-+ if (coord_next_item(&cpy) == 0 && item_is_internal(&cpy)) {
-+ right_child = znode_at(&cpy, cpy.node);
-+ if (right_child != NULL) {
-+ read_lock_tree(znode_get_tree(child));
-+ check_link(child, right_child);
-+ read_unlock_tree(znode_get_tree(child));
-+ zput(right_child);
-+ }
-+ }
-+ zput(child);
-+ }
-+ return 0;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/* return true only if this item really points to "block" */
-+/* Audited by: green(2002.06.14) */
-+int has_pointer_to_internal(const coord_t * coord /* coord of item */ ,
-+ const reiser4_block_nr * block /* block number to
-+ * check */ )
-+{
-+ assert("nikita-613", coord != NULL);
-+ assert("nikita-614", block != NULL);
-+
-+ return pointer_at(coord) == *block;
-+}
-+
-+/* hook called by ->create_item() method of node plugin after new internal
-+ item was just created.
-+
-+ This is point where pointer to new node is inserted into tree. Initialize
-+ parent pointer in child znode, insert child into sibling list and slum.
-+
-+*/
-+int create_hook_internal(const coord_t * item /* coord of item */ ,
-+ void *arg /* child's left neighbor, if any */ )
-+{
-+ znode *child;
-+ __u64 child_ptr;
-+
-+ assert("nikita-1252", item != NULL);
-+ assert("nikita-1253", item->node != NULL);
-+ assert("nikita-1181", znode_get_level(item->node) > LEAF_LEVEL);
-+ assert("nikita-1450", item->unit_pos == 0);
-+
-+ /*
-+ * preparing to item insertion build_child_ptr_data sets pointer to
-+ * data to be inserted to jnode's blocknr which is in cpu byte
-+ * order. Node's create_item simply copied those data. As result we
-+ * have child pointer in cpu's byte order. Convert content of internal
-+ * item to little endian byte order.
-+ */
-+ child_ptr = get_unaligned((__u64 *)item_body_by_coord(item));
-+ reiser4_update_internal(item, &child_ptr);
-+
-+ child = znode_at(item, item->node);
-+ if (child != NULL && !IS_ERR(child)) {
-+ znode *left;
-+ int result = 0;
-+ reiser4_tree *tree;
-+
-+ left = arg;
-+ tree = znode_get_tree(item->node);
-+ write_lock_tree(tree);
-+ write_lock_dk(tree);
-+ assert("nikita-1400", (child->in_parent.node == NULL)
-+ || (znode_above_root(child->in_parent.node)));
-+ ++item->node->c_count;
-+ coord_to_parent_coord(item, &child->in_parent);
-+ sibling_list_insert_nolock(child, left);
-+
-+ assert("nikita-3297", ZF_ISSET(child, JNODE_ORPHAN));
-+ ZF_CLR(child, JNODE_ORPHAN);
-+
-+ if ((left != NULL) && !keyeq(znode_get_rd_key(left),
-+ znode_get_rd_key(child))) {
-+ znode_set_rd_key(child, znode_get_rd_key(left));
-+ }
-+ write_unlock_dk(tree);
-+ write_unlock_tree(tree);
-+ zput(child);
-+ return result;
-+ } else {
-+ if (child == NULL)
-+ child = ERR_PTR(-EIO);
-+ return PTR_ERR(child);
-+ }
-+}
-+
-+/* hook called by ->cut_and_kill() method of node plugin just before internal
-+ item is removed.
-+
-+ This is point where empty node is removed from the tree. Clear parent
-+ pointer in child, and mark node for pending deletion.
-+
-+ Node will be actually deleted later and in several installations:
-+
-+ . when last lock on this node will be released, node will be removed from
-+ the sibling list and its lock will be invalidated
-+
-+ . when last reference to this node will be dropped, bitmap will be updated
-+ and node will be actually removed from the memory.
-+
-+*/
-+int kill_hook_internal(const coord_t * item /* coord of item */ ,
-+ pos_in_node_t from UNUSED_ARG /* start unit */ ,
-+ pos_in_node_t count UNUSED_ARG /* stop unit */ ,
-+ struct carry_kill_data *p UNUSED_ARG)
-+{
-+ znode *child;
-+
-+ assert("nikita-1222", item != NULL);
-+ assert("nikita-1224", from == 0);
-+ assert("nikita-1225", count == 1);
-+
-+ child = znode_at(item, item->node);
-+ if (IS_ERR(child))
-+ return PTR_ERR(child);
-+ else if (node_is_empty(child)) {
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1397", znode_is_write_locked(child));
-+ assert("nikita-1398", child->c_count == 0);
-+ assert("nikita-2546", ZF_ISSET(child, JNODE_HEARD_BANSHEE));
-+
-+ tree = znode_get_tree(item->node);
-+ write_lock_tree(tree);
-+ init_parent_coord(&child->in_parent, NULL);
-+ --item->node->c_count;
-+ write_unlock_tree(tree);
-+ zput(child);
-+ return 0;
-+ } else {
-+ warning("nikita-1223",
-+ "Cowardly refuse to remove link to non-empty node");
-+ zput(child);
-+ return RETERR(-EIO);
-+ }
-+}
-+
-+/* hook called by ->shift() node plugin method when iternal item was just
-+ moved from one node to another.
-+
-+ Update parent pointer in child and c_counts in old and new parent
-+
-+*/
-+int shift_hook_internal(const coord_t * item /* coord of item */ ,
-+ unsigned from UNUSED_ARG /* start unit */ ,
-+ unsigned count UNUSED_ARG /* stop unit */ ,
-+ znode * old_node /* old parent */ )
-+{
-+ znode *child;
-+ znode *new_node;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1276", item != NULL);
-+ assert("nikita-1277", from == 0);
-+ assert("nikita-1278", count == 1);
-+ assert("nikita-1451", item->unit_pos == 0);
-+
-+ new_node = item->node;
-+ assert("nikita-2132", new_node != old_node);
-+ tree = znode_get_tree(item->node);
-+ child = child_znode(item, old_node, 1, 0);
-+ if (child == NULL)
-+ return 0;
-+ if (!IS_ERR(child)) {
-+ write_lock_tree(tree);
-+ ++new_node->c_count;
-+ assert("nikita-1395", znode_parent(child) == old_node);
-+ assert("nikita-1396", old_node->c_count > 0);
-+ coord_to_parent_coord(item, &child->in_parent);
-+ assert("nikita-1781", znode_parent(child) == new_node);
-+ assert("nikita-1782",
-+ check_tree_pointer(item, child) == NS_FOUND);
-+ --old_node->c_count;
-+ write_unlock_tree(tree);
-+ zput(child);
-+ return 0;
-+ } else
-+ return PTR_ERR(child);
-+}
-+
-+/* plugin->u.item.b.max_key_inside - not defined */
-+
-+/* plugin->u.item.b.nr_units - item.c:single_unit */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/internal.h linux-2.6.24/fs/reiser4/plugin/item/internal.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/internal.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/internal.h 2008-01-25 11:39:07.020229327 +0300
-@@ -0,0 +1,57 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Internal item contains down-link to the child of the internal/twig
-+ node in a tree. It is internal items that are actually used during
-+ tree traversal. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ )
-+#define __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+
-+/* on-disk layout of internal item */
-+typedef struct internal_item_layout {
-+ /* 0 */ reiser4_dblock_nr pointer;
-+ /* 4 */
-+} internal_item_layout;
-+
-+struct cut_list;
-+
-+int mergeable_internal(const coord_t * p1, const coord_t * p2);
-+lookup_result lookup_internal(const reiser4_key * key, lookup_bias bias,
-+ coord_t * coord);
-+/* store pointer from internal item into "block". Implementation of
-+ ->down_link() method */
-+extern void down_link_internal(const coord_t * coord, const reiser4_key * key,
-+ reiser4_block_nr * block);
-+extern int has_pointer_to_internal(const coord_t * coord,
-+ const reiser4_block_nr * block);
-+extern int create_hook_internal(const coord_t * item, void *arg);
-+extern int kill_hook_internal(const coord_t * item, pos_in_node_t from,
-+ pos_in_node_t count, struct carry_kill_data *);
-+extern int shift_hook_internal(const coord_t * item, unsigned from,
-+ unsigned count, znode * old_node);
-+extern void reiser4_print_internal(const char *prefix, coord_t * coord);
-+
-+extern int utmost_child_internal(const coord_t * coord, sideof side,
-+ jnode ** child);
-+int utmost_child_real_block_internal(const coord_t * coord, sideof side,
-+ reiser4_block_nr * block);
-+
-+extern void reiser4_update_internal(const coord_t * coord,
-+ const reiser4_block_nr * blocknr);
-+/* FIXME: reiserfs has check_internal */
-+extern int check__internal(const coord_t * coord, const char **error);
-+
-+/* __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/item.c linux-2.6.24/fs/reiser4/plugin/item/item.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/item.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/item.c 2008-01-25 11:39:07.020229327 +0300
-@@ -0,0 +1,719 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* definition of item plugins. */
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "../plugin_header.h"
-+#include "sde.h"
-+#include "internal.h"
-+#include "item.h"
-+#include "static_stat.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../tree.h"
-+#include "../../context.h"
-+#include "ctail.h"
-+
-+/* return pointer to item body */
-+void item_body_by_coord_hard(coord_t * coord /* coord to query */ )
-+{
-+ assert("nikita-324", coord != NULL);
-+ assert("nikita-325", coord->node != NULL);
-+ assert("nikita-326", znode_is_loaded(coord->node));
-+ assert("nikita-3200", coord->offset == INVALID_OFFSET);
-+
-+ coord->offset =
-+ node_plugin_by_node(coord->node)->item_by_coord(coord) -
-+ zdata(coord->node);
-+ ON_DEBUG(coord->body_v = coord->node->times_locked);
-+}
-+
-+void *item_body_by_coord_easy(const coord_t * coord /* coord to query */ )
-+{
-+ return zdata(coord->node) + coord->offset;
-+}
-+
-+#if REISER4_DEBUG
-+
-+int item_body_is_valid(const coord_t * coord)
-+{
-+ return
-+ coord->offset ==
-+ node_plugin_by_node(coord->node)->item_by_coord(coord) -
-+ zdata(coord->node);
-+}
-+
-+#endif
-+
-+/* return length of item at @coord */
-+pos_in_node_t item_length_by_coord(const coord_t * coord /* coord to query */ )
-+{
-+ int len;
-+
-+ assert("nikita-327", coord != NULL);
-+ assert("nikita-328", coord->node != NULL);
-+ assert("nikita-329", znode_is_loaded(coord->node));
-+
-+ len = node_plugin_by_node(coord->node)->length_by_coord(coord);
-+ return len;
-+}
-+
-+void obtain_item_plugin(const coord_t * coord)
-+{
-+ assert("nikita-330", coord != NULL);
-+ assert("nikita-331", coord->node != NULL);
-+ assert("nikita-332", znode_is_loaded(coord->node));
-+
-+ coord_set_iplug((coord_t *) coord,
-+ node_plugin_by_node(coord->node)->
-+ plugin_by_coord(coord));
-+ assert("nikita-2479",
-+ coord_iplug(coord) ==
-+ node_plugin_by_node(coord->node)->plugin_by_coord(coord));
-+}
-+
-+/* return id of item */
-+/* Audited by: green(2002.06.15) */
-+item_id item_id_by_coord(const coord_t * coord /* coord to query */ )
-+{
-+ assert("vs-539", coord != NULL);
-+ assert("vs-538", coord->node != NULL);
-+ assert("vs-537", znode_is_loaded(coord->node));
-+ assert("vs-536", item_plugin_by_coord(coord) != NULL);
-+ assert("vs-540",
-+ item_id_by_plugin(item_plugin_by_coord(coord)) < LAST_ITEM_ID);
-+
-+ return item_id_by_plugin(item_plugin_by_coord(coord));
-+}
-+
-+/* return key of item at @coord */
-+/* Audited by: green(2002.06.15) */
-+reiser4_key *item_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-338", coord != NULL);
-+ assert("nikita-339", coord->node != NULL);
-+ assert("nikita-340", znode_is_loaded(coord->node));
-+
-+ return node_plugin_by_node(coord->node)->key_at(coord, key);
-+}
-+
-+/* this returns max key in the item */
-+reiser4_key *max_item_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ coord_t last;
-+
-+ assert("nikita-338", coord != NULL);
-+ assert("nikita-339", coord->node != NULL);
-+ assert("nikita-340", znode_is_loaded(coord->node));
-+
-+ /* make coord pointing to last item's unit */
-+ coord_dup(&last, coord);
-+ last.unit_pos = coord_num_units(&last) - 1;
-+ assert("vs-1560", coord_is_existing_unit(&last));
-+
-+ max_unit_key_by_coord(&last, key);
-+ return key;
-+}
-+
-+/* return key of unit at @coord */
-+reiser4_key *unit_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-772", coord != NULL);
-+ assert("nikita-774", coord->node != NULL);
-+ assert("nikita-775", znode_is_loaded(coord->node));
-+
-+ if (item_plugin_by_coord(coord)->b.unit_key != NULL)
-+ return item_plugin_by_coord(coord)->b.unit_key(coord, key);
-+ else
-+ return item_key_by_coord(coord, key);
-+}
-+
-+/* return the biggest key contained the unit @coord */
-+reiser4_key *max_unit_key_by_coord(const coord_t * coord /* coord to query */ ,
-+ reiser4_key * key /* result */ )
-+{
-+ assert("nikita-772", coord != NULL);
-+ assert("nikita-774", coord->node != NULL);
-+ assert("nikita-775", znode_is_loaded(coord->node));
-+
-+ if (item_plugin_by_coord(coord)->b.max_unit_key != NULL)
-+ return item_plugin_by_coord(coord)->b.max_unit_key(coord, key);
-+ else
-+ return unit_key_by_coord(coord, key);
-+}
-+
-+/* ->max_key_inside() method for items consisting of exactly one key (like
-+ stat-data) */
-+static reiser4_key *max_key_inside_single_key(const coord_t *
-+ coord /* coord of item */ ,
-+ reiser4_key *
-+ result /* resulting key */ )
-+{
-+ assert("nikita-604", coord != NULL);
-+
-+ /* coord -> key is starting key of this item and it has to be already
-+ filled in */
-+ return unit_key_by_coord(coord, result);
-+}
-+
-+/* ->nr_units() method for items consisting of exactly one unit always */
-+pos_in_node_t
-+nr_units_single_unit(const coord_t * coord UNUSED_ARG /* coord of item */ )
-+{
-+ return 1;
-+}
-+
-+static int
-+paste_no_paste(coord_t * coord UNUSED_ARG,
-+ reiser4_item_data * data UNUSED_ARG,
-+ carry_plugin_info * info UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* default ->fast_paste() method */
-+static int
-+agree_to_fast_op(const coord_t * coord UNUSED_ARG /* coord of item */ )
-+{
-+ return 1;
-+}
-+
-+int item_can_contain_key(const coord_t * item /* coord of item */ ,
-+ const reiser4_key * key /* key to check */ ,
-+ const reiser4_item_data * data /* parameters of item
-+ * being created */ )
-+{
-+ item_plugin *iplug;
-+ reiser4_key min_key_in_item;
-+ reiser4_key max_key_in_item;
-+
-+ assert("nikita-1658", item != NULL);
-+ assert("nikita-1659", key != NULL);
-+
-+ iplug = item_plugin_by_coord(item);
-+ if (iplug->b.can_contain_key != NULL)
-+ return iplug->b.can_contain_key(item, key, data);
-+ else {
-+ assert("nikita-1681", iplug->b.max_key_inside != NULL);
-+ item_key_by_coord(item, &min_key_in_item);
-+ iplug->b.max_key_inside(item, &max_key_in_item);
-+
-+ /* can contain key if
-+ min_key_in_item <= key &&
-+ key <= max_key_in_item
-+ */
-+ return keyle(&min_key_in_item, key)
-+ && keyle(key, &max_key_in_item);
-+ }
-+}
-+
-+/* mergeable method for non mergeable items */
-+static int
-+not_mergeable(const coord_t * i1 UNUSED_ARG, const coord_t * i2 UNUSED_ARG)
-+{
-+ return 0;
-+}
-+
-+/* return 0 if @item1 and @item2 are not mergeable, !0 - otherwise */
-+int are_items_mergeable(const coord_t * i1 /* coord of first item */ ,
-+ const coord_t * i2 /* coord of second item */ )
-+{
-+ item_plugin *iplug;
-+ reiser4_key k1;
-+ reiser4_key k2;
-+
-+ assert("nikita-1336", i1 != NULL);
-+ assert("nikita-1337", i2 != NULL);
-+
-+ iplug = item_plugin_by_coord(i1);
-+ assert("nikita-1338", iplug != NULL);
-+
-+ /* NOTE-NIKITA are_items_mergeable() is also called by assertions in
-+ shifting code when nodes are in "suspended" state. */
-+ assert("nikita-1663",
-+ keyle(item_key_by_coord(i1, &k1), item_key_by_coord(i2, &k2)));
-+
-+ if (iplug->b.mergeable != NULL) {
-+ return iplug->b.mergeable(i1, i2);
-+ } else if (iplug->b.max_key_inside != NULL) {
-+ iplug->b.max_key_inside(i1, &k1);
-+ item_key_by_coord(i2, &k2);
-+
-+ /* mergeable if ->max_key_inside() >= key of i2; */
-+ return keyge(iplug->b.max_key_inside(i1, &k1),
-+ item_key_by_coord(i2, &k2));
-+ } else {
-+ item_key_by_coord(i1, &k1);
-+ item_key_by_coord(i2, &k2);
-+
-+ return
-+ (get_key_locality(&k1) == get_key_locality(&k2)) &&
-+ (get_key_objectid(&k1) == get_key_objectid(&k2))
-+ && (iplug == item_plugin_by_coord(i2));
-+ }
-+}
-+
-+int item_is_extent(const coord_t * item)
-+{
-+ assert("vs-482", coord_is_existing_item(item));
-+ return item_id_by_coord(item) == EXTENT_POINTER_ID;
-+}
-+
-+int item_is_tail(const coord_t * item)
-+{
-+ assert("vs-482", coord_is_existing_item(item));
-+ return item_id_by_coord(item) == FORMATTING_ID;
-+}
-+
-+#if REISER4_DEBUG
-+
-+int item_is_statdata(const coord_t * item)
-+{
-+ assert("vs-516", coord_is_existing_item(item));
-+ return plugin_of_group(item_plugin_by_coord(item), STAT_DATA_ITEM_TYPE);
-+}
-+
-+int item_is_ctail(const coord_t * item)
-+{
-+ assert("edward-xx", coord_is_existing_item(item));
-+ return item_id_by_coord(item) == CTAIL_ID;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+static int change_item(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ /* cannot change constituent item (sd, or dir_item) */
-+ return RETERR(-EINVAL);
-+}
-+
-+static reiser4_plugin_ops item_plugin_ops = {
-+ .init = NULL,
-+ .load = NULL,
-+ .save_len = NULL,
-+ .save = NULL,
-+ .change = change_item
-+};
-+
-+item_plugin item_plugins[LAST_ITEM_ID] = {
-+ [STATIC_STAT_DATA_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = STATIC_STAT_DATA_ID,
-+ .groups = (1 << STAT_DATA_ITEM_TYPE),
-+ .pops = &item_plugin_ops,
-+ .label = "sd",
-+ .desc = "stat-data",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_single_key,
-+ .can_contain_key = NULL,
-+ .mergeable = not_mergeable,
-+ .nr_units = nr_units_single_unit,
-+ .lookup = NULL,
-+ .init = NULL,
-+ .paste = paste_no_paste,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .sd = {
-+ .init_inode = init_inode_static_sd,
-+ .save_len = save_len_static_sd,
-+ .save = save_static_sd
-+ }
-+ }
-+ },
-+ [SIMPLE_DIR_ENTRY_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = SIMPLE_DIR_ENTRY_ID,
-+ .groups = (1 << DIR_ENTRY_ITEM_TYPE),
-+ .pops = &item_plugin_ops,
-+ .label = "de",
-+ .desc = "directory entry",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_single_key,
-+ .can_contain_key = NULL,
-+ .mergeable = NULL,
-+ .nr_units = nr_units_single_unit,
-+ .lookup = NULL,
-+ .init = NULL,
-+ .paste = NULL,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .dir = {
-+ .extract_key = extract_key_de,
-+ .update_key = update_key_de,
-+ .extract_name = extract_name_de,
-+ .extract_file_type = extract_file_type_de,
-+ .add_entry = add_entry_de,
-+ .rem_entry = rem_entry_de,
-+ .max_name_len = max_name_len_de
-+ }
-+ }
-+ },
-+ [COMPOUND_DIR_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = COMPOUND_DIR_ID,
-+ .groups = (1 << DIR_ENTRY_ITEM_TYPE),
-+ .pops = &item_plugin_ops,
-+ .label = "cde",
-+ .desc = "compressed directory entry",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_cde,
-+ .can_contain_key = can_contain_key_cde,
-+ .mergeable = mergeable_cde,
-+ .nr_units = nr_units_cde,
-+ .lookup = lookup_cde,
-+ .init = init_cde,
-+ .paste = paste_cde,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_cde,
-+ .copy_units = copy_units_cde,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = cut_units_cde,
-+ .kill_units = kill_units_cde,
-+ .unit_key = unit_key_cde,
-+ .max_unit_key = unit_key_cde,
-+ .estimate = estimate_cde,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = reiser4_check_cde
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .dir = {
-+ .extract_key = extract_key_cde,
-+ .update_key = update_key_cde,
-+ .extract_name = extract_name_cde,
-+ .extract_file_type = extract_file_type_de,
-+ .add_entry = add_entry_cde,
-+ .rem_entry = rem_entry_cde,
-+ .max_name_len = max_name_len_cde
-+ }
-+ }
-+ },
-+ [NODE_POINTER_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = NODE_POINTER_ID,
-+ .groups = (1 << INTERNAL_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "internal",
-+ .desc = "internal item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = NULL,
-+ .can_contain_key = NULL,
-+ .mergeable = mergeable_internal,
-+ .nr_units = nr_units_single_unit,
-+ .lookup = lookup_internal,
-+ .init = NULL,
-+ .paste = NULL,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = create_hook_internal,
-+ .kill_hook = kill_hook_internal,
-+ .shift_hook = shift_hook_internal,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = check__internal
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = utmost_child_internal,
-+ .utmost_child_real_block =
-+ utmost_child_real_block_internal,
-+ .update = reiser4_update_internal,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .internal = {
-+ .down_link = down_link_internal,
-+ .has_pointer_to = has_pointer_to_internal
-+ }
-+ }
-+ },
-+ [EXTENT_POINTER_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = EXTENT_POINTER_ID,
-+ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "extent",
-+ .desc = "extent item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_extent,
-+ .can_contain_key = can_contain_key_extent,
-+ .mergeable = mergeable_extent,
-+ .nr_units = nr_units_extent,
-+ .lookup = lookup_extent,
-+ .init = NULL,
-+ .paste = paste_extent,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_extent,
-+ .create_hook = create_hook_extent,
-+ .copy_units = copy_units_extent,
-+ .kill_hook = kill_hook_extent,
-+ .shift_hook = NULL,
-+ .cut_units = cut_units_extent,
-+ .kill_units = kill_units_extent,
-+ .unit_key = unit_key_extent,
-+ .max_unit_key = max_unit_key_extent,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = reiser4_check_extent
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = utmost_child_extent,
-+ .utmost_child_real_block =
-+ utmost_child_real_block_extent,
-+ .update = NULL,
-+ .scan = reiser4_scan_extent,
-+ .convert = NULL,
-+ .key_by_offset = key_by_offset_extent
-+ },
-+ .s = {
-+ .file = {
-+ .write = reiser4_write_extent,
-+ .read = reiser4_read_extent,
-+ .readpage = reiser4_readpage_extent,
-+ .get_block = get_block_address_extent,
-+ .append_key = append_key_extent,
-+ .init_coord_extension =
-+ init_coord_extension_extent
-+ }
-+ }
-+ },
-+ [FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = FORMATTING_ID,
-+ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "body",
-+ .desc = "body (or tail?) item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_tail,
-+ .can_contain_key = can_contain_key_tail,
-+ .mergeable = mergeable_tail,
-+ .nr_units = nr_units_tail,
-+ .lookup = lookup_tail,
-+ .init = NULL,
-+ .paste = paste_tail,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_tail,
-+ .create_hook = NULL,
-+ .copy_units = copy_units_tail,
-+ .kill_hook = kill_hook_tail,
-+ .shift_hook = NULL,
-+ .cut_units = cut_units_tail,
-+ .kill_units = kill_units_tail,
-+ .unit_key = unit_key_tail,
-+ .max_unit_key = unit_key_tail,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = NULL,
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = NULL,
-+ .convert = NULL
-+ },
-+ .s = {
-+ .file = {
-+ .write = reiser4_write_tail,
-+ .read = reiser4_read_tail,
-+ .readpage = readpage_tail,
-+ .get_block = get_block_address_tail,
-+ .append_key = append_key_tail,
-+ .init_coord_extension =
-+ init_coord_extension_tail
-+ }
-+ }
-+ },
-+ [CTAIL_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = CTAIL_ID,
-+ .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "ctail",
-+ .desc = "cryptcompress tail item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = max_key_inside_tail,
-+ .can_contain_key = can_contain_key_ctail,
-+ .mergeable = mergeable_ctail,
-+ .nr_units = nr_units_ctail,
-+ .lookup = NULL,
-+ .init = init_ctail,
-+ .paste = paste_ctail,
-+ .fast_paste = agree_to_fast_op,
-+ .can_shift = can_shift_ctail,
-+ .create_hook = create_hook_ctail,
-+ .copy_units = copy_units_ctail,
-+ .kill_hook = kill_hook_ctail,
-+ .shift_hook = shift_hook_ctail,
-+ .cut_units = cut_units_ctail,
-+ .kill_units = kill_units_ctail,
-+ .unit_key = unit_key_tail,
-+ .max_unit_key = unit_key_tail,
-+ .estimate = estimate_ctail,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = check_ctail
-+#endif
-+ },
-+ .f = {
-+ .utmost_child = utmost_child_ctail,
-+ /* FIXME-EDWARD: write this */
-+ .utmost_child_real_block = NULL,
-+ .update = NULL,
-+ .scan = scan_ctail,
-+ .convert = convert_ctail
-+ },
-+ .s = {
-+ .file = {
-+ .write = NULL,
-+ .read = read_ctail,
-+ .readpage = readpage_ctail,
-+ .get_block = get_block_address_tail,
-+ .append_key = append_key_ctail,
-+ .init_coord_extension =
-+ init_coord_extension_tail
-+ }
-+ }
-+ },
-+ [BLACK_BOX_ID] = {
-+ .h = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .id = BLACK_BOX_ID,
-+ .groups = (1 << OTHER_ITEM_TYPE),
-+ .pops = NULL,
-+ .label = "blackbox",
-+ .desc = "black box item",
-+ .linkage = {NULL, NULL}
-+ },
-+ .b = {
-+ .max_key_inside = NULL,
-+ .can_contain_key = NULL,
-+ .mergeable = not_mergeable,
-+ .nr_units = nr_units_single_unit,
-+ /* to need for ->lookup method */
-+ .lookup = NULL,
-+ .init = NULL,
-+ .paste = NULL,
-+ .fast_paste = NULL,
-+ .can_shift = NULL,
-+ .copy_units = NULL,
-+ .create_hook = NULL,
-+ .kill_hook = NULL,
-+ .shift_hook = NULL,
-+ .cut_units = NULL,
-+ .kill_units = NULL,
-+ .unit_key = NULL,
-+ .max_unit_key = NULL,
-+ .estimate = NULL,
-+ .item_data_by_flow = NULL,
-+#if REISER4_DEBUG
-+ .check = NULL
-+#endif
-+ }
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/item.h linux-2.6.24/fs/reiser4/plugin/item/item.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/item.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/item.h 2008-01-25 11:40:16.698169785 +0300
-@@ -0,0 +1,398 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* first read balance.c comments before reading this */
-+
-+/* An item_plugin implements all of the operations required for
-+ balancing that are item specific. */
-+
-+/* an item plugin also implements other operations that are specific to that
-+ item. These go into the item specific operations portion of the item
-+ handler, and all of the item specific portions of the item handler are put
-+ into a union. */
-+
-+#if !defined( __REISER4_ITEM_H__ )
-+#define __REISER4_ITEM_H__
-+
-+#include "../../forward.h"
-+#include "../plugin_header.h"
-+#include "../../dformat.h"
-+#include "../../seal.h"
-+#include "../../plugin/file/file.h"
-+
-+#include <linux/fs.h> /* for struct file, struct inode */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/dcache.h> /* for struct dentry */
-+
-+typedef enum {
-+ STAT_DATA_ITEM_TYPE,
-+ DIR_ENTRY_ITEM_TYPE,
-+ INTERNAL_ITEM_TYPE,
-+ UNIX_FILE_METADATA_ITEM_TYPE,
-+ OTHER_ITEM_TYPE
-+} item_type_id;
-+
-+/* this is the part of each item plugin that all items are expected to
-+ support or at least explicitly fail to support by setting the
-+ pointer to null. */
-+struct balance_ops {
-+ /* operations called by balancing
-+
-+ It is interesting to consider that some of these item
-+ operations could be given sources or targets that are not
-+ really items in nodes. This could be ok/useful.
-+
-+ */
-+ /* maximal key that can _possibly_ be occupied by this item
-+
-+ When inserting, and node ->lookup() method (called by
-+ coord_by_key()) reaches an item after binary search,
-+ the ->max_key_inside() item plugin method is used to determine
-+ whether new item should pasted into existing item
-+ (new_key<=max_key_inside()) or new item has to be created
-+ (new_key>max_key_inside()).
-+
-+ For items that occupy exactly one key (like stat-data)
-+ this method should return this key. For items that can
-+ grow indefinitely (extent, directory item) this should
-+ return reiser4_max_key().
-+
-+ For example extent with the key
-+
-+ (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
-+
-+ ->max_key_inside is (LOCALITY,4,OBJID,0xffffffffffffffff), and
-+ */
-+ reiser4_key *(*max_key_inside) (const coord_t *, reiser4_key *);
-+
-+ /* true if item @coord can merge data at @key. */
-+ int (*can_contain_key) (const coord_t *, const reiser4_key *,
-+ const reiser4_item_data *);
-+ /* mergeable() - check items for mergeability
-+
-+ Optional method. Returns true if two items can be merged.
-+
-+ */
-+ int (*mergeable) (const coord_t *, const coord_t *);
-+
-+ /* number of atomic things in an item.
-+ NOTE FOR CONTRIBUTORS: use a generic method
-+ nr_units_single_unit() for solid (atomic) items, as
-+ tree operations use it as a criterion of solidness
-+ (see is_solid_item macro) */
-+ pos_in_node_t(*nr_units) (const coord_t *);
-+
-+ /* search within item for a unit within the item, and return a
-+ pointer to it. This can be used to calculate how many
-+ bytes to shrink an item if you use pointer arithmetic and
-+ compare to the start of the item body if the item's data
-+ are continuous in the node, if the item's data are not
-+ continuous in the node, all sorts of other things are maybe
-+ going to break as well. */
-+ lookup_result(*lookup) (const reiser4_key *, lookup_bias, coord_t *);
-+ /* method called by ode_plugin->create_item() to initialise new
-+ item */
-+ int (*init) (coord_t * target, coord_t * from,
-+ reiser4_item_data * data);
-+ /* method called (e.g., by reiser4_resize_item()) to place new data
-+ into item when it grows */
-+ int (*paste) (coord_t *, reiser4_item_data *, carry_plugin_info *);
-+ /* return true if paste into @coord is allowed to skip
-+ carry. That is, if such paste would require any changes
-+ at the parent level
-+ */
-+ int (*fast_paste) (const coord_t *);
-+ /* how many but not more than @want units of @source can be
-+ shifted into @target node. If pend == append - we try to
-+ append last item of @target by first units of @source. If
-+ pend == prepend - we try to "prepend" first item in @target
-+ by last units of @source. @target node has @free_space
-+ bytes of free space. Total size of those units are returned
-+ via @size.
-+
-+ @target is not NULL if shifting to the mergeable item and
-+ NULL is new item will be created during shifting.
-+ */
-+ int (*can_shift) (unsigned free_space, coord_t *,
-+ znode *, shift_direction, unsigned *size,
-+ unsigned want);
-+
-+ /* starting off @from-th unit of item @source append or
-+ prepend @count units to @target. @target has been already
-+ expanded by @free_space bytes. That must be exactly what is
-+ needed for those items in @target. If @where_is_free_space
-+ == SHIFT_LEFT - free space is at the end of @target item,
-+ othersize - it is in the beginning of it. */
-+ void (*copy_units) (coord_t *, coord_t *,
-+ unsigned from, unsigned count,
-+ shift_direction where_is_free_space,
-+ unsigned free_space);
-+
-+ int (*create_hook) (const coord_t *, void *);
-+ /* do whatever is necessary to do when @count units starting
-+ from @from-th one are removed from the tree */
-+ /* FIXME-VS: this is used to be here for, in particular,
-+ extents and items of internal type to free blocks they point
-+ to at the same time with removing items from a
-+ tree. Problems start, however, when dealloc_block fails due
-+ to some reason. Item gets removed, but blocks it pointed to
-+ are not freed. It is not clear how to fix this for items of
-+ internal type because a need to remove internal item may
-+ appear in the middle of balancing, and there is no way to
-+ undo changes made. OTOH, if space allocator involves
-+ balancing to perform dealloc_block - this will probably
-+ break balancing due to deadlock issues
-+ */
-+ int (*kill_hook) (const coord_t *, pos_in_node_t from,
-+ pos_in_node_t count, struct carry_kill_data *);
-+ int (*shift_hook) (const coord_t *, unsigned from, unsigned count,
-+ znode * _node);
-+
-+ /* unit @*from contains @from_key. unit @*to contains @to_key. Cut all keys between @from_key and @to_key
-+ including boundaries. When units are cut from item beginning - move space which gets freed to head of
-+ item. When units are cut from item end - move freed space to item end. When units are cut from the middle of
-+ item - move freed space to item head. Return amount of space which got freed. Save smallest removed key in
-+ @smallest_removed if it is not 0. Save new first item key in @new_first_key if it is not 0
-+ */
-+ int (*cut_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *,
-+ reiser4_key * smallest_removed,
-+ reiser4_key * new_first_key);
-+
-+ /* like cut_units, except that these units are removed from the
-+ tree, not only from a node */
-+ int (*kill_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *,
-+ reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+
-+ /* if @key_of_coord == 1 - returned key of coord, otherwise -
-+ key of unit is returned. If @coord is not set to certain
-+ unit - ERR_PTR(-ENOENT) is returned */
-+ reiser4_key *(*unit_key) (const coord_t *, reiser4_key *);
-+ reiser4_key *(*max_unit_key) (const coord_t *, reiser4_key *);
-+ /* estimate how much space is needed for paste @data into item at
-+ @coord. if @coord==0 - estimate insertion, otherwise - estimate
-+ pasting
-+ */
-+ int (*estimate) (const coord_t *, const reiser4_item_data *);
-+
-+ /* converts flow @f to item data. @coord == 0 on insert */
-+ int (*item_data_by_flow) (const coord_t *, const flow_t *,
-+ reiser4_item_data *);
-+
-+ /*void (*show) (struct seq_file *, coord_t *); */
-+
-+#if REISER4_DEBUG
-+ /* used for debugging, every item should have here the most
-+ complete possible check of the consistency of the item that
-+ the inventor can construct */
-+ int (*check) (const coord_t *, const char **error);
-+#endif
-+
-+};
-+
-+struct flush_ops {
-+ /* return the right or left child of @coord, only if it is in memory */
-+ int (*utmost_child) (const coord_t *, sideof side, jnode ** child);
-+
-+ /* return whether the right or left child of @coord has a non-fake
-+ block number. */
-+ int (*utmost_child_real_block) (const coord_t *, sideof side,
-+ reiser4_block_nr *);
-+ /* relocate child at @coord to the @block */
-+ void (*update) (const coord_t *, const reiser4_block_nr *);
-+ /* count unformatted nodes per item for leave relocation policy, etc.. */
-+ int (*scan) (flush_scan * scan);
-+ /* convert item by flush */
-+ int (*convert) (flush_pos_t * pos);
-+ /* backward mapping from jnode offset to a key. */
-+ int (*key_by_offset) (struct inode *, loff_t, reiser4_key *);
-+};
-+
-+/* operations specific to the directory item */
-+struct dir_entry_iops {
-+ /* extract stat-data key from directory entry at @coord and place it
-+ into @key. */
-+ int (*extract_key) (const coord_t *, reiser4_key * key);
-+ /* update object key in item. */
-+ int (*update_key) (const coord_t *, const reiser4_key *, lock_handle *);
-+ /* extract name from directory entry at @coord and return it */
-+ char *(*extract_name) (const coord_t *, char *buf);
-+ /* extract file type (DT_* stuff) from directory entry at @coord and
-+ return it */
-+ unsigned (*extract_file_type) (const coord_t *);
-+ int (*add_entry) (struct inode * dir,
-+ coord_t *, lock_handle *,
-+ const struct dentry * name,
-+ reiser4_dir_entry_desc * entry);
-+ int (*rem_entry) (struct inode * dir, const struct qstr * name,
-+ coord_t *, lock_handle *,
-+ reiser4_dir_entry_desc * entry);
-+ int (*max_name_len) (const struct inode * dir);
-+};
-+
-+/* operations specific to items regular (unix) file metadata are built of */
-+struct file_iops{
-+ int (*write) (struct file *, struct inode *,
-+ const char __user *, size_t, loff_t *pos);
-+ int (*read) (struct file *, flow_t *, hint_t *);
-+ int (*readpage) (void *, struct page *);
-+ int (*get_block) (const coord_t *, sector_t, sector_t *);
-+ /*
-+ * key of first byte which is not addressed by the item @coord is set
-+ * to.
-+ * For example, for extent item with the key
-+ *
-+ * (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
-+ *
-+ * ->append_key is
-+ *
-+ * (LOCALITY,4,OBJID,STARTING-OFFSET + BLK * block_size)
-+ */
-+ reiser4_key *(*append_key) (const coord_t *, reiser4_key *);
-+
-+ void (*init_coord_extension) (uf_coord_t *, loff_t);
-+};
-+
-+/* operations specific to items of stat data type */
-+struct sd_iops {
-+ int (*init_inode) (struct inode * inode, char *sd, int len);
-+ int (*save_len) (struct inode * inode);
-+ int (*save) (struct inode * inode, char **area);
-+};
-+
-+/* operations specific to internal item */
-+struct internal_iops{
-+ /* all tree traversal want to know from internal item is where
-+ to go next. */
-+ void (*down_link) (const coord_t * coord,
-+ const reiser4_key * key, reiser4_block_nr * block);
-+ /* check that given internal item contains given pointer. */
-+ int (*has_pointer_to) (const coord_t * coord,
-+ const reiser4_block_nr * block);
-+};
-+
-+struct item_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* methods common for all item types */
-+ struct balance_ops b; /* balance operations */
-+ struct flush_ops f; /* flush operates with items via this methods */
-+
-+ /* methods specific to particular type of item */
-+ union {
-+ struct dir_entry_iops dir;
-+ struct file_iops file;
-+ struct sd_iops sd;
-+ struct internal_iops internal;
-+ } s;
-+};
-+
-+#define is_solid_item(iplug) ((iplug)->b.nr_units == nr_units_single_unit)
-+
-+static inline item_id item_id_by_plugin(item_plugin * plugin)
-+{
-+ return plugin->h.id;
-+}
-+
-+static inline char get_iplugid(item_plugin * iplug)
-+{
-+ assert("nikita-2838", iplug != NULL);
-+ assert("nikita-2839", iplug->h.id < 0xff);
-+ return (char)item_id_by_plugin(iplug);
-+}
-+
-+extern unsigned long znode_times_locked(const znode * z);
-+
-+static inline void coord_set_iplug(coord_t * coord, item_plugin * iplug)
-+{
-+ assert("nikita-2837", coord != NULL);
-+ assert("nikita-2838", iplug != NULL);
-+ coord->iplugid = get_iplugid(iplug);
-+ ON_DEBUG(coord->plug_v = znode_times_locked(coord->node));
-+}
-+
-+static inline item_plugin *coord_iplug(const coord_t * coord)
-+{
-+ assert("nikita-2833", coord != NULL);
-+ assert("nikita-2834", coord->iplugid != INVALID_PLUGID);
-+ assert("nikita-3549", coord->plug_v == znode_times_locked(coord->node));
-+ return (item_plugin *) plugin_by_id(REISER4_ITEM_PLUGIN_TYPE,
-+ coord->iplugid);
-+}
-+
-+extern int item_can_contain_key(const coord_t * item, const reiser4_key * key,
-+ const reiser4_item_data *);
-+extern int are_items_mergeable(const coord_t * i1, const coord_t * i2);
-+extern int item_is_extent(const coord_t *);
-+extern int item_is_tail(const coord_t *);
-+extern int item_is_statdata(const coord_t * item);
-+extern int item_is_ctail(const coord_t *);
-+
-+extern pos_in_node_t item_length_by_coord(const coord_t * coord);
-+extern pos_in_node_t nr_units_single_unit(const coord_t * coord);
-+extern item_id item_id_by_coord(const coord_t * coord /* coord to query */ );
-+extern reiser4_key *item_key_by_coord(const coord_t * coord, reiser4_key * key);
-+extern reiser4_key *max_item_key_by_coord(const coord_t *, reiser4_key *);
-+extern reiser4_key *unit_key_by_coord(const coord_t * coord, reiser4_key * key);
-+extern reiser4_key *max_unit_key_by_coord(const coord_t * coord,
-+ reiser4_key * key);
-+extern void obtain_item_plugin(const coord_t * coord);
-+
-+#if defined(REISER4_DEBUG)
-+extern int znode_is_loaded(const znode * node);
-+#endif
-+
-+/* return plugin of item at @coord */
-+static inline item_plugin *item_plugin_by_coord(const coord_t *
-+ coord /* coord to query */ )
-+{
-+ assert("nikita-330", coord != NULL);
-+ assert("nikita-331", coord->node != NULL);
-+ assert("nikita-332", znode_is_loaded(coord->node));
-+
-+ if (unlikely(!coord_is_iplug_set(coord)))
-+ obtain_item_plugin(coord);
-+ return coord_iplug(coord);
-+}
-+
-+/* this returns true if item is of internal type */
-+static inline int item_is_internal(const coord_t * item)
-+{
-+ assert("vs-483", coord_is_existing_item(item));
-+ return plugin_of_group(item_plugin_by_coord(item), INTERNAL_ITEM_TYPE);
-+}
-+
-+extern void item_body_by_coord_hard(coord_t * coord);
-+extern void *item_body_by_coord_easy(const coord_t * coord);
-+#if REISER4_DEBUG
-+extern int item_body_is_valid(const coord_t * coord);
-+#endif
-+
-+/* return pointer to item body */
-+static inline void *item_body_by_coord(const coord_t *
-+ coord /* coord to query */ )
-+{
-+ assert("nikita-324", coord != NULL);
-+ assert("nikita-325", coord->node != NULL);
-+ assert("nikita-326", znode_is_loaded(coord->node));
-+
-+ if (coord->offset == INVALID_OFFSET)
-+ item_body_by_coord_hard((coord_t *) coord);
-+ assert("nikita-3201", item_body_is_valid(coord));
-+ assert("nikita-3550", coord->body_v == znode_times_locked(coord->node));
-+ return item_body_by_coord_easy(coord);
-+}
-+
-+/* __REISER4_ITEM_H__ */
-+#endif
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/Makefile linux-2.6.24/fs/reiser4/plugin/item/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/item/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/Makefile 2008-01-25 11:39:07.024230357 +0300
-@@ -0,0 +1,18 @@
-+obj-$(CONFIG_REISER4_FS) += item_plugins.o
-+
-+item_plugins-objs := \
-+ item.o \
-+ static_stat.o \
-+ sde.o \
-+ cde.o \
-+ blackbox.o \
-+ internal.o \
-+ tail.o \
-+ ctail.o \
-+ extent.o \
-+ extent_item_ops.o \
-+ extent_file_ops.o \
-+ extent_flush_ops.o
-+
-+
-+
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/sde.c linux-2.6.24/fs/reiser4/plugin/item/sde.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/sde.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/sde.c 2008-01-25 11:39:07.024230357 +0300
-@@ -0,0 +1,190 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry implementation */
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../coord.h"
-+#include "sde.h"
-+#include "item.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../carry.h"
-+#include "../../tree.h"
-+#include "../../inode.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/quotaops.h>
-+
-+/* ->extract_key() method of simple directory item plugin. */
-+int extract_key_de(const coord_t * coord /* coord of item */ ,
-+ reiser4_key * key /* resulting key */ )
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1458", coord != NULL);
-+ assert("nikita-1459", key != NULL);
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ assert("nikita-1158", item_length_by_coord(coord) >= (int)sizeof *dent);
-+ return extract_key_from_id(&dent->id, key);
-+}
-+
-+int
-+update_key_de(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh UNUSED_ARG)
-+{
-+ directory_entry_format *dent;
-+ obj_key_id obj_id;
-+ int result;
-+
-+ assert("nikita-2342", coord != NULL);
-+ assert("nikita-2343", key != NULL);
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ result = build_obj_key_id(key, &obj_id);
-+ if (result == 0) {
-+ dent->id = obj_id;
-+ znode_make_dirty(coord->node);
-+ }
-+ return 0;
-+}
-+
-+char *extract_dent_name(const coord_t * coord, directory_entry_format * dent,
-+ char *buf)
-+{
-+ reiser4_key key;
-+
-+ unit_key_by_coord(coord, &key);
-+ if (get_key_type(&key) != KEY_FILE_NAME_MINOR)
-+ reiser4_print_address("oops", znode_get_block(coord->node));
-+ if (!is_longname_key(&key)) {
-+ if (is_dot_key(&key))
-+ return (char *)".";
-+ else
-+ return extract_name_from_key(&key, buf);
-+ } else
-+ return (char *)dent->name;
-+}
-+
-+/* ->extract_name() method of simple directory item plugin. */
-+char *extract_name_de(const coord_t * coord /* coord of item */ , char *buf)
-+{
-+ directory_entry_format *dent;
-+
-+ assert("nikita-1460", coord != NULL);
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ return extract_dent_name(coord, dent, buf);
-+}
-+
-+/* ->extract_file_type() method of simple directory item plugin. */
-+unsigned extract_file_type_de(const coord_t * coord UNUSED_ARG /* coord of
-+ * item */ )
-+{
-+ assert("nikita-1764", coord != NULL);
-+ /* we don't store file type in the directory entry yet.
-+
-+ But see comments at kassign.h:obj_key_id
-+ */
-+ return DT_UNKNOWN;
-+}
-+
-+int add_entry_de(struct inode *dir /* directory of item */ ,
-+ coord_t * coord /* coord of item */ ,
-+ lock_handle * lh /* insertion lock handle */ ,
-+ const struct dentry *de /* name to add */ ,
-+ reiser4_dir_entry_desc * entry /* parameters of new directory
-+ * entry */ )
-+{
-+ reiser4_item_data data;
-+ directory_entry_format *dent;
-+ int result;
-+ const char *name;
-+ int len;
-+ int longname;
-+
-+ name = de->d_name.name;
-+ len = de->d_name.len;
-+ assert("nikita-1163", strlen(name) == len);
-+
-+ longname = is_longname(name, len);
-+
-+ data.length = sizeof *dent;
-+ if (longname)
-+ data.length += len + 1;
-+ data.data = NULL;
-+ data.user = 0;
-+ data.iplug = item_plugin_by_id(SIMPLE_DIR_ENTRY_ID);
-+
-+ /* NOTE-NIKITA quota plugin */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(dir, data.length))
-+ return -EDQUOT;
-+
-+ result = insert_by_coord(coord, &data, &entry->key, lh, 0 /*flags */ );
-+ if (result != 0)
-+ return result;
-+
-+ dent = (directory_entry_format *) item_body_by_coord(coord);
-+ build_inode_key_id(entry->obj, &dent->id);
-+ if (longname) {
-+ memcpy(dent->name, name, len);
-+ put_unaligned(0, &dent->name[len]);
-+ }
-+ return 0;
-+}
-+
-+int rem_entry_de(struct inode *dir /* directory of item */ ,
-+ const struct qstr *name UNUSED_ARG,
-+ coord_t * coord /* coord of item */ ,
-+ lock_handle * lh UNUSED_ARG /* lock handle for
-+ * removal */ ,
-+ reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
-+ * directory entry
-+ * being removed */ )
-+{
-+ coord_t shadow;
-+ int result;
-+ int length;
-+
-+ length = item_length_by_coord(coord);
-+ if (inode_get_bytes(dir) < length) {
-+ warning("nikita-2627", "Dir is broke: %llu: %llu",
-+ (unsigned long long)get_inode_oid(dir),
-+ inode_get_bytes(dir));
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ /* cut_node() is supposed to take pointers to _different_
-+ coords, because it will modify them without respect to
-+ possible aliasing. To work around this, create temporary copy
-+ of @coord.
-+ */
-+ coord_dup(&shadow, coord);
-+ result =
-+ kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
-+ if (result == 0) {
-+ /* NOTE-NIKITA quota plugin */
-+ DQUOT_FREE_SPACE_NODIRTY(dir, length);
-+ }
-+ return result;
-+}
-+
-+int max_name_len_de(const struct inode *dir)
-+{
-+ return reiser4_tree_by_inode(dir)->nplug->max_item_size() -
-+ sizeof(directory_entry_format) - 2;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/sde.h linux-2.6.24/fs/reiser4/plugin/item/sde.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/sde.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/sde.h 2008-01-25 11:39:07.024230357 +0300
-@@ -0,0 +1,66 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Directory entry. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
-+#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "../../kassign.h"
-+#include "../../key.h"
-+
-+#include <linux/fs.h>
-+#include <linux/dcache.h> /* for struct dentry */
-+
-+typedef struct directory_entry_format {
-+ /* key of object stat-data. It's not necessary to store whole
-+ key here, because it's always key of stat-data, so minor
-+ packing locality and offset can be omitted here. But this
-+ relies on particular key allocation scheme for stat-data, so,
-+ for extensibility sake, whole key can be stored here.
-+
-+ We store key as array of bytes, because we don't want 8-byte
-+ alignment of dir entries.
-+ */
-+ obj_key_id id;
-+ /* file name. Null terminated string. */
-+ d8 name[0];
-+} directory_entry_format;
-+
-+void print_de(const char *prefix, coord_t * coord);
-+int extract_key_de(const coord_t * coord, reiser4_key * key);
-+int update_key_de(const coord_t * coord, const reiser4_key * key,
-+ lock_handle * lh);
-+char *extract_name_de(const coord_t * coord, char *buf);
-+unsigned extract_file_type_de(const coord_t * coord);
-+int add_entry_de(struct inode *dir, coord_t * coord,
-+ lock_handle * lh, const struct dentry *name,
-+ reiser4_dir_entry_desc * entry);
-+int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
-+ lock_handle * lh, reiser4_dir_entry_desc * entry);
-+int max_name_len_de(const struct inode *dir);
-+
-+int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
-+
-+char *extract_dent_name(const coord_t * coord,
-+ directory_entry_format * dent, char *buf);
-+
-+#if REISER4_LARGE_KEY
-+#define DE_NAME_BUF_LEN (24)
-+#else
-+#define DE_NAME_BUF_LEN (16)
-+#endif
-+
-+/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/static_stat.c linux-2.6.24/fs/reiser4/plugin/item/static_stat.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/static_stat.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/static_stat.c 2008-01-25 11:39:07.024230357 +0300
-@@ -0,0 +1,1107 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* stat data manipulation. */
-+
-+#include "../../forward.h"
-+#include "../../super.h"
-+#include "../../vfs_ops.h"
-+#include "../../inode.h"
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../object.h"
-+#include "../plugin.h"
-+#include "../plugin_header.h"
-+#include "static_stat.h"
-+#include "item.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+
-+/* see static_stat.h for explanation */
-+
-+/* helper function used while we are dumping/loading inode/plugin state
-+ to/from the stat-data. */
-+
-+static void move_on(int *length /* space remaining in stat-data */ ,
-+ char **area /* current coord in stat data */ ,
-+ int size_of /* how many bytes to move forward */ )
-+{
-+ assert("nikita-615", length != NULL);
-+ assert("nikita-616", area != NULL);
-+
-+ *length -= size_of;
-+ *area += size_of;
-+
-+ assert("nikita-617", *length >= 0);
-+}
-+
-+/* helper function used while loading inode/plugin state from stat-data.
-+ Complain if there is less space in stat-data than was expected.
-+ Can only happen on disk corruption. */
-+static int not_enough_space(struct inode *inode /* object being processed */ ,
-+ const char *where /* error message */ )
-+{
-+ assert("nikita-618", inode != NULL);
-+
-+ warning("nikita-619", "Not enough space in %llu while loading %s",
-+ (unsigned long long)get_inode_oid(inode), where);
-+
-+ return RETERR(-EINVAL);
-+}
-+
-+/* helper function used while loading inode/plugin state from
-+ stat-data. Call it if invalid plugin id was found. */
-+static int unknown_plugin(reiser4_plugin_id id /* invalid id */ ,
-+ struct inode *inode /* object being processed */ )
-+{
-+ warning("nikita-620", "Unknown plugin %i in %llu",
-+ id, (unsigned long long)get_inode_oid(inode));
-+
-+ return RETERR(-EINVAL);
-+}
-+
-+/* this is installed as ->init_inode() method of
-+ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c).
-+ Copies data from on-disk stat-data format into inode.
-+ Handles stat-data extensions. */
-+/* was sd_load */
-+int init_inode_static_sd(struct inode *inode /* object being processed */ ,
-+ char *sd /* stat-data body */ ,
-+ int len /* length of stat-data */ )
-+{
-+ int result;
-+ int bit;
-+ int chunk;
-+ __u16 mask;
-+ __u64 bigmask;
-+ reiser4_stat_data_base *sd_base;
-+ reiser4_inode *state;
-+
-+ assert("nikita-625", inode != NULL);
-+ assert("nikita-626", sd != NULL);
-+
-+ result = 0;
-+ sd_base = (reiser4_stat_data_base *) sd;
-+ state = reiser4_inode_data(inode);
-+ mask = le16_to_cpu(get_unaligned(&sd_base->extmask));
-+ bigmask = mask;
-+ reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
-+
-+ move_on(&len, &sd, sizeof *sd_base);
-+ for (bit = 0, chunk = 0;
-+ mask != 0 || bit <= LAST_IMPORTANT_SD_EXTENSION;
-+ ++bit, mask >>= 1) {
-+ if (((bit + 1) % 16) != 0) {
-+ /* handle extension */
-+ sd_ext_plugin *sdplug;
-+
-+ if (bit >= LAST_SD_EXTENSION) {
-+ warning("vpf-1904",
-+ "No such extension %i in inode %llu",
-+ bit,
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+
-+ sdplug = sd_ext_plugin_by_id(bit);
-+ if (sdplug == NULL) {
-+ warning("nikita-627",
-+ "No such extension %i in inode %llu",
-+ bit,
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ if (mask & 1) {
-+ assert("nikita-628", sdplug->present);
-+ /* alignment is not supported in node layout
-+ plugin yet.
-+ result = align( inode, &len, &sd,
-+ sdplug -> alignment );
-+ if( result != 0 )
-+ return result; */
-+ result = sdplug->present(inode, &sd, &len);
-+ } else if (sdplug->absent != NULL)
-+ result = sdplug->absent(inode);
-+ if (result)
-+ break;
-+ /* else, we are looking at the last bit in 16-bit
-+ portion of bitmask */
-+ } else if (mask & 1) {
-+ /* next portion of bitmask */
-+ if (len < (int)sizeof(d16)) {
-+ warning("nikita-629",
-+ "No space for bitmap in inode %llu",
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ mask = le16_to_cpu(get_unaligned((d16 *)sd));
-+ bigmask <<= 16;
-+ bigmask |= mask;
-+ move_on(&len, &sd, sizeof(d16));
-+ ++chunk;
-+ if (chunk == 3) {
-+ if (!(mask & 0x8000)) {
-+ /* clear last bit */
-+ mask &= ~0x8000;
-+ continue;
-+ }
-+ /* too much */
-+ warning("nikita-630",
-+ "Too many extensions in %llu",
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+
-+ result = RETERR(-EINVAL);
-+ break;
-+ }
-+ } else
-+ /* bitmask exhausted */
-+ break;
-+ }
-+ state->extmask = bigmask;
-+ /* common initialisations */
-+ if (len - (bit / 16 * sizeof(d16)) > 0) {
-+ /* alignment in save_len_static_sd() is taken into account
-+ -edward */
-+ warning("nikita-631", "unused space in inode %llu",
-+ (unsigned long long)get_inode_oid(inode));
-+ }
-+
-+ return result;
-+}
-+
-+/* estimates size of stat-data required to store inode.
-+ Installed as ->save_len() method of
-+ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
-+/* was sd_len */
-+int save_len_static_sd(struct inode *inode /* object being processed */ )
-+{
-+ unsigned int result;
-+ __u64 mask;
-+ int bit;
-+
-+ assert("nikita-632", inode != NULL);
-+
-+ result = sizeof(reiser4_stat_data_base);
-+ mask = reiser4_inode_data(inode)->extmask;
-+ for (bit = 0; mask != 0; ++bit, mask >>= 1) {
-+ if (mask & 1) {
-+ sd_ext_plugin *sdplug;
-+
-+ sdplug = sd_ext_plugin_by_id(bit);
-+ assert("nikita-633", sdplug != NULL);
-+ /* no aligment support
-+ result +=
-+ round_up( result, sdplug -> alignment ) - result; */
-+ result += sdplug->save_len(inode);
-+ }
-+ }
-+ result += bit / 16 * sizeof(d16);
-+ return result;
-+}
-+
-+/* saves inode into stat-data.
-+ Installed as ->save() method of
-+ item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
-+/* was sd_save */
-+int save_static_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* where to save stat-data */ )
-+{
-+ int result;
-+ __u64 emask;
-+ int bit;
-+ unsigned int len;
-+ reiser4_stat_data_base *sd_base;
-+
-+ assert("nikita-634", inode != NULL);
-+ assert("nikita-635", area != NULL);
-+
-+ result = 0;
-+ emask = reiser4_inode_data(inode)->extmask;
-+ sd_base = (reiser4_stat_data_base *) * area;
-+ put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)), &sd_base->extmask);
-+ /*cputod16((unsigned)(emask & 0xffff), &sd_base->extmask);*/
-+
-+ *area += sizeof *sd_base;
-+ len = 0xffffffffu;
-+ for (bit = 0; emask != 0; ++bit, emask >>= 1) {
-+ if (emask & 1) {
-+ if ((bit + 1) % 16 != 0) {
-+ sd_ext_plugin *sdplug;
-+ sdplug = sd_ext_plugin_by_id(bit);
-+ assert("nikita-636", sdplug != NULL);
-+ /* no alignment support yet
-+ align( inode, &len, area,
-+ sdplug -> alignment ); */
-+ result = sdplug->save(inode, area);
-+ if (result)
-+ break;
-+ } else {
-+ put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)),
-+ (d16 *)(*area));
-+ /*cputod16((unsigned)(emask & 0xffff),
-+ (d16 *) * area);*/
-+ *area += sizeof(d16);
-+ }
-+ }
-+ }
-+ return result;
-+}
-+
-+/* stat-data extension handling functions. */
-+
-+static int present_lw_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ if (*len >= (int)sizeof(reiser4_light_weight_stat)) {
-+ reiser4_light_weight_stat *sd_lw;
-+
-+ sd_lw = (reiser4_light_weight_stat *) * area;
-+
-+ inode->i_mode = le16_to_cpu(get_unaligned(&sd_lw->mode));
-+ inode->i_nlink = le32_to_cpu(get_unaligned(&sd_lw->nlink));
-+ inode->i_size = le64_to_cpu(get_unaligned(&sd_lw->size));
-+ if ((inode->i_mode & S_IFMT) == (S_IFREG | S_IFIFO)) {
-+ inode->i_mode &= ~S_IFIFO;
-+ warning("", "partially converted file is encountered");
-+ reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
-+ }
-+ move_on(len, area, sizeof *sd_lw);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "lw sd");
-+}
-+
-+static int save_len_lw_sd(struct inode *inode UNUSED_ARG /* object being
-+ * processed */ )
-+{
-+ return sizeof(reiser4_light_weight_stat);
-+}
-+
-+static int save_lw_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_light_weight_stat *sd;
-+ mode_t delta;
-+
-+ assert("nikita-2705", inode != NULL);
-+ assert("nikita-2706", area != NULL);
-+ assert("nikita-2707", *area != NULL);
-+
-+ sd = (reiser4_light_weight_stat *) * area;
-+
-+ delta = (reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED) ? S_IFIFO : 0);
-+ put_unaligned(cpu_to_le16(inode->i_mode | delta), &sd->mode);
-+ put_unaligned(cpu_to_le32(inode->i_nlink), &sd->nlink);
-+ put_unaligned(cpu_to_le64((__u64) inode->i_size), &sd->size);
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+static int present_unix_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ assert("nikita-637", inode != NULL);
-+ assert("nikita-638", area != NULL);
-+ assert("nikita-639", *area != NULL);
-+ assert("nikita-640", len != NULL);
-+ assert("nikita-641", *len > 0);
-+
-+ if (*len >= (int)sizeof(reiser4_unix_stat)) {
-+ reiser4_unix_stat *sd;
-+
-+ sd = (reiser4_unix_stat *) * area;
-+
-+ inode->i_uid = le32_to_cpu(get_unaligned(&sd->uid));
-+ inode->i_gid = le32_to_cpu(get_unaligned(&sd->gid));
-+ inode->i_atime.tv_sec = le32_to_cpu(get_unaligned(&sd->atime));
-+ inode->i_mtime.tv_sec = le32_to_cpu(get_unaligned(&sd->mtime));
-+ inode->i_ctime.tv_sec = le32_to_cpu(get_unaligned(&sd->ctime));
-+ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
-+ inode->i_rdev = le64_to_cpu(get_unaligned(&sd->u.rdev));
-+ else
-+ inode_set_bytes(inode, (loff_t) le64_to_cpu(get_unaligned(&sd->u.bytes)));
-+ move_on(len, area, sizeof *sd);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "unix sd");
-+}
-+
-+static int absent_unix_sd(struct inode *inode /* object being processed */ )
-+{
-+ inode->i_uid = get_super_private(inode->i_sb)->default_uid;
-+ inode->i_gid = get_super_private(inode->i_sb)->default_gid;
-+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-+ inode_set_bytes(inode, inode->i_size);
-+ /* mark inode as lightweight, so that caller (lookup_common) will
-+ complete initialisation by copying [ug]id from a parent. */
-+ reiser4_inode_set_flag(inode, REISER4_LIGHT_WEIGHT);
-+ return 0;
-+}
-+
-+/* Audited by: green(2002.06.14) */
-+static int save_len_unix_sd(struct inode *inode UNUSED_ARG /* object being
-+ * processed */ )
-+{
-+ return sizeof(reiser4_unix_stat);
-+}
-+
-+static int save_unix_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_unix_stat *sd;
-+
-+ assert("nikita-642", inode != NULL);
-+ assert("nikita-643", area != NULL);
-+ assert("nikita-644", *area != NULL);
-+
-+ sd = (reiser4_unix_stat *) * area;
-+ put_unaligned(cpu_to_le32(inode->i_uid), &sd->uid);
-+ put_unaligned(cpu_to_le32(inode->i_gid), &sd->gid);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_sec), &sd->atime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_sec), &sd->ctime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_sec), &sd->mtime);
-+ if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
-+ put_unaligned(cpu_to_le64(inode->i_rdev), &sd->u.rdev);
-+ else
-+ put_unaligned(cpu_to_le64((__u64) inode_get_bytes(inode)), &sd->u.bytes);
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+static int
-+present_large_times_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ if (*len >= (int)sizeof(reiser4_large_times_stat)) {
-+ reiser4_large_times_stat *sd_lt;
-+
-+ sd_lt = (reiser4_large_times_stat *) * area;
-+
-+ inode->i_atime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->atime));
-+ inode->i_mtime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->mtime));
-+ inode->i_ctime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->ctime));
-+
-+ move_on(len, area, sizeof *sd_lt);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "large times sd");
-+}
-+
-+static int
-+save_len_large_times_sd(struct inode *inode UNUSED_ARG
-+ /* object being processed */ )
-+{
-+ return sizeof(reiser4_large_times_stat);
-+}
-+
-+static int
-+save_large_times_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_large_times_stat *sd;
-+
-+ assert("nikita-2817", inode != NULL);
-+ assert("nikita-2818", area != NULL);
-+ assert("nikita-2819", *area != NULL);
-+
-+ sd = (reiser4_large_times_stat *) * area;
-+
-+ put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_nsec), &sd->atime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_nsec), &sd->ctime);
-+ put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_nsec), &sd->mtime);
-+
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+/* symlink stat data extension */
-+
-+/* allocate memory for symlink target and attach it to inode->i_private */
-+static int
-+symlink_target_to_inode(struct inode *inode, const char *target, int len)
-+{
-+ assert("vs-845", inode->i_private == NULL);
-+ assert("vs-846", !reiser4_inode_get_flag(inode,
-+ REISER4_GENERIC_PTR_USED));
-+ /* FIXME-VS: this is prone to deadlock. Not more than other similar
-+ places, though */
-+ inode->i_private = kmalloc((size_t) len + 1,
-+ reiser4_ctx_gfp_mask_get());
-+ if (!inode->i_private)
-+ return RETERR(-ENOMEM);
-+
-+ memcpy((char *)(inode->i_private), target, (size_t) len);
-+ ((char *)(inode->i_private))[len] = 0;
-+ reiser4_inode_set_flag(inode, REISER4_GENERIC_PTR_USED);
-+ return 0;
-+}
-+
-+/* this is called on read_inode. There is nothing to do actually, but some
-+ sanity checks */
-+static int present_symlink_sd(struct inode *inode, char **area, int *len)
-+{
-+ int result;
-+ int length;
-+ reiser4_symlink_stat *sd;
-+
-+ length = (int)inode->i_size;
-+ /*
-+ * *len is number of bytes in stat data item from *area to the end of
-+ * item. It must be not less than size of symlink + 1 for ending 0
-+ */
-+ if (length > *len)
-+ return not_enough_space(inode, "symlink");
-+
-+ if (*(*area + length) != 0) {
-+ warning("vs-840", "Symlink is not zero terminated");
-+ return RETERR(-EIO);
-+ }
-+
-+ sd = (reiser4_symlink_stat *) * area;
-+ result = symlink_target_to_inode(inode, sd->body, length);
-+
-+ move_on(len, area, length + 1);
-+ return result;
-+}
-+
-+static int save_len_symlink_sd(struct inode *inode)
-+{
-+ return inode->i_size + 1;
-+}
-+
-+/* this is called on create and update stat data. Do nothing on update but
-+ update @area */
-+static int save_symlink_sd(struct inode *inode, char **area)
-+{
-+ int result;
-+ int length;
-+ reiser4_symlink_stat *sd;
-+
-+ length = (int)inode->i_size;
-+ /* inode->i_size must be set already */
-+ assert("vs-841", length);
-+
-+ result = 0;
-+ sd = (reiser4_symlink_stat *) * area;
-+ if (!reiser4_inode_get_flag(inode, REISER4_GENERIC_PTR_USED)) {
-+ const char *target;
-+
-+ target = (const char *)(inode->i_private);
-+ inode->i_private = NULL;
-+
-+ result = symlink_target_to_inode(inode, target, length);
-+
-+ /* copy symlink to stat data */
-+ memcpy(sd->body, target, (size_t) length);
-+ (*area)[length] = 0;
-+ } else {
-+ /* there is nothing to do in update but move area */
-+ assert("vs-844",
-+ !memcmp(inode->i_private, sd->body,
-+ (size_t) length + 1));
-+ }
-+
-+ *area += (length + 1);
-+ return result;
-+}
-+
-+static int present_flags_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */ )
-+{
-+ assert("nikita-645", inode != NULL);
-+ assert("nikita-646", area != NULL);
-+ assert("nikita-647", *area != NULL);
-+ assert("nikita-648", len != NULL);
-+ assert("nikita-649", *len > 0);
-+
-+ if (*len >= (int)sizeof(reiser4_flags_stat)) {
-+ reiser4_flags_stat *sd;
-+
-+ sd = (reiser4_flags_stat *) * area;
-+ inode->i_flags = le32_to_cpu(get_unaligned(&sd->flags));
-+ move_on(len, area, sizeof *sd);
-+ return 0;
-+ } else
-+ return not_enough_space(inode, "generation and attrs");
-+}
-+
-+/* Audited by: green(2002.06.14) */
-+static int save_len_flags_sd(struct inode *inode UNUSED_ARG /* object being
-+ * processed */ )
-+{
-+ return sizeof(reiser4_flags_stat);
-+}
-+
-+static int save_flags_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ )
-+{
-+ reiser4_flags_stat *sd;
-+
-+ assert("nikita-650", inode != NULL);
-+ assert("nikita-651", area != NULL);
-+ assert("nikita-652", *area != NULL);
-+
-+ sd = (reiser4_flags_stat *) * area;
-+ put_unaligned(cpu_to_le32(inode->i_flags), &sd->flags);
-+ *area += sizeof *sd;
-+ return 0;
-+}
-+
-+static int absent_plugin_sd(struct inode *inode);
-+static int present_plugin_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */ ,
-+ int *len /* remaining length */,
-+ int is_pset /* 1 if plugin set, 0 if heir set. */)
-+{
-+ reiser4_plugin_stat *sd;
-+ reiser4_plugin *plugin;
-+ reiser4_inode *info;
-+ int i;
-+ __u16 mask;
-+ int result;
-+ int num_of_plugins;
-+
-+ assert("nikita-653", inode != NULL);
-+ assert("nikita-654", area != NULL);
-+ assert("nikita-655", *area != NULL);
-+ assert("nikita-656", len != NULL);
-+ assert("nikita-657", *len > 0);
-+
-+ if (*len < (int)sizeof(reiser4_plugin_stat))
-+ return not_enough_space(inode, "plugin");
-+
-+ sd = (reiser4_plugin_stat *) * area;
-+ info = reiser4_inode_data(inode);
-+
-+ mask = 0;
-+ num_of_plugins = le16_to_cpu(get_unaligned(&sd->plugins_no));
-+ move_on(len, area, sizeof *sd);
-+ result = 0;
-+ for (i = 0; i < num_of_plugins; ++i) {
-+ reiser4_plugin_slot *slot;
-+ reiser4_plugin_type type;
-+ pset_member memb;
-+
-+ slot = (reiser4_plugin_slot *) * area;
-+ if (*len < (int)sizeof *slot)
-+ return not_enough_space(inode, "additional plugin");
-+
-+ memb = le16_to_cpu(get_unaligned(&slot->pset_memb));
-+ type = aset_member_to_type_unsafe(memb);
-+
-+ if (type == REISER4_PLUGIN_TYPES) {
-+ warning("nikita-3502",
-+ "wrong %s member (%i) for %llu", is_pset ?
-+ "pset" : "hset", memb,
-+ (unsigned long long)get_inode_oid(inode));
-+ return RETERR(-EINVAL);
-+ }
-+ plugin = plugin_by_disk_id(reiser4_tree_by_inode(inode),
-+ type, &slot->id);
-+ if (plugin == NULL)
-+ return unknown_plugin(le16_to_cpu(get_unaligned(&slot->id)), inode);
-+
-+ /* plugin is loaded into inode, mark this into inode's
-+ bitmask of loaded non-standard plugins */
-+ if (!(mask & (1 << memb))) {
-+ mask |= (1 << memb);
-+ } else {
-+ warning("nikita-658", "duplicate plugin for %llu",
-+ (unsigned long long)get_inode_oid(inode));
-+ return RETERR(-EINVAL);
-+ }
-+ move_on(len, area, sizeof *slot);
-+ /* load plugin data, if any */
-+ if (plugin->h.pops != NULL && plugin->h.pops->load)
-+ result = plugin->h.pops->load(inode, plugin, area, len);
-+ else
-+ result = aset_set_unsafe(is_pset ? &info->pset :
-+ &info->hset, memb, plugin);
-+ if (result)
-+ return result;
-+ }
-+ if (is_pset) {
-+ /* if object plugin wasn't loaded from stat-data, guess it by
-+ mode bits */
-+ plugin = file_plugin_to_plugin(inode_file_plugin(inode));
-+ if (plugin == NULL)
-+ result = absent_plugin_sd(inode);
-+ info->plugin_mask = mask;
-+ } else
-+ info->heir_mask = mask;
-+
-+ return result;
-+}
-+
-+static int present_pset_sd(struct inode *inode, char **area, int *len) {
-+ return present_plugin_sd(inode, area, len, 1 /* pset */);
-+}
-+
-+/* Determine object plugin for @inode based on i_mode.
-+
-+ Many objects in reiser4 file system are controlled by standard object
-+ plugins that emulate traditional unix objects: unix file, directory, symlink, fifo, and so on.
-+
-+ For such files we don't explicitly store plugin id in object stat
-+ data. Rather required plugin is guessed from mode bits, where file "type"
-+ is encoded (see stat(2)).
-+*/
-+static int
-+guess_plugin_by_mode(struct inode *inode /* object to guess plugins for */ )
-+{
-+ int fplug_id;
-+ int dplug_id;
-+ reiser4_inode *info;
-+
-+ assert("nikita-736", inode != NULL);
-+
-+ dplug_id = fplug_id = -1;
-+
-+ switch (inode->i_mode & S_IFMT) {
-+ case S_IFSOCK:
-+ case S_IFBLK:
-+ case S_IFCHR:
-+ case S_IFIFO:
-+ fplug_id = SPECIAL_FILE_PLUGIN_ID;
-+ break;
-+ case S_IFLNK:
-+ fplug_id = SYMLINK_FILE_PLUGIN_ID;
-+ break;
-+ case S_IFDIR:
-+ fplug_id = DIRECTORY_FILE_PLUGIN_ID;
-+ dplug_id = HASHED_DIR_PLUGIN_ID;
-+ break;
-+ default:
-+ warning("nikita-737", "wrong file mode: %o", inode->i_mode);
-+ return RETERR(-EIO);
-+ case S_IFREG:
-+ fplug_id = UNIX_FILE_PLUGIN_ID;
-+ break;
-+ }
-+ info = reiser4_inode_data(inode);
-+ set_plugin(&info->pset, PSET_FILE, (fplug_id >= 0) ?
-+ plugin_by_id(REISER4_FILE_PLUGIN_TYPE, fplug_id) : NULL);
-+ set_plugin(&info->pset, PSET_DIR, (dplug_id >= 0) ?
-+ plugin_by_id(REISER4_DIR_PLUGIN_TYPE, dplug_id) : NULL);
-+ return 0;
-+}
-+
-+/* Audited by: green(2002.06.14) */
-+static int absent_plugin_sd(struct inode *inode /* object being processed */ )
-+{
-+ int result;
-+
-+ assert("nikita-659", inode != NULL);
-+
-+ result = guess_plugin_by_mode(inode);
-+ /* if mode was wrong, guess_plugin_by_mode() returns "regular file",
-+ but setup_inode_ops() will call make_bad_inode().
-+ Another, more logical but bit more complex solution is to add
-+ "bad-file plugin". */
-+ /* FIXME-VS: activate was called here */
-+ return result;
-+}
-+
-+/* helper function for plugin_sd_save_len(): calculate how much space
-+ required to save state of given plugin */
-+/* Audited by: green(2002.06.14) */
-+static int len_for(reiser4_plugin * plugin /* plugin to save */ ,
-+ struct inode *inode /* object being processed */ ,
-+ pset_member memb,
-+ int len, int is_pset)
-+{
-+ reiser4_inode *info;
-+ assert("nikita-661", inode != NULL);
-+
-+ if (plugin == NULL)
-+ return len;
-+
-+ info = reiser4_inode_data(inode);
-+ if (is_pset ?
-+ info->plugin_mask & (1 << memb) :
-+ info->heir_mask & (1 << memb)) {
-+ len += sizeof(reiser4_plugin_slot);
-+ if (plugin->h.pops && plugin->h.pops->save_len != NULL) {
-+ /* non-standard plugin, call method */
-+ /* commented as it is incompatible with alignment
-+ * policy in save_plug() -edward */
-+ /* len = round_up(len, plugin->h.pops->alignment); */
-+ len += plugin->h.pops->save_len(inode, plugin);
-+ }
-+ }
-+ return len;
-+}
-+
-+/* calculate how much space is required to save state of all plugins,
-+ associated with inode */
-+static int save_len_plugin_sd(struct inode *inode /* object being processed */,
-+ int is_pset)
-+{
-+ int len;
-+ int last;
-+ reiser4_inode *state;
-+ pset_member memb;
-+
-+ assert("nikita-663", inode != NULL);
-+
-+ state = reiser4_inode_data(inode);
-+
-+ /* common case: no non-standard plugins */
-+ if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
-+ return 0;
-+ len = sizeof(reiser4_plugin_stat);
-+ last = PSET_LAST;
-+
-+ for (memb = 0; memb < last; ++memb) {
-+ len = len_for(aset_get(is_pset ? state->pset : state->hset, memb),
-+ inode, memb, len, is_pset);
-+ }
-+ assert("nikita-664", len > (int)sizeof(reiser4_plugin_stat));
-+ return len;
-+}
-+
-+static int save_len_pset_sd(struct inode *inode) {
-+ return save_len_plugin_sd(inode, 1 /* pset */);
-+}
-+
-+/* helper function for plugin_sd_save(): save plugin, associated with
-+ inode. */
-+static int save_plug(reiser4_plugin * plugin /* plugin to save */ ,
-+ struct inode *inode /* object being processed */ ,
-+ int memb /* what element of pset is saved */ ,
-+ char **area /* position in stat-data */ ,
-+ int *count /* incremented if plugin were actually saved. */,
-+ int is_pset /* 1 for plugin set, 0 for heir set */)
-+{
-+ reiser4_plugin_slot *slot;
-+ int fake_len;
-+ int result;
-+
-+ assert("nikita-665", inode != NULL);
-+ assert("nikita-666", area != NULL);
-+ assert("nikita-667", *area != NULL);
-+
-+ if (plugin == NULL)
-+ return 0;
-+
-+ if (is_pset ?
-+ !(reiser4_inode_data(inode)->plugin_mask & (1 << memb)) :
-+ !(reiser4_inode_data(inode)->heir_mask & (1 << memb)))
-+ return 0;
-+ slot = (reiser4_plugin_slot *) * area;
-+ put_unaligned(cpu_to_le16(memb), &slot->pset_memb);
-+ put_unaligned(cpu_to_le16(plugin->h.id), &slot->id);
-+ fake_len = (int)0xffff;
-+ move_on(&fake_len, area, sizeof *slot);
-+ ++*count;
-+ result = 0;
-+ if (plugin->h.pops != NULL) {
-+ if (plugin->h.pops->save != NULL)
-+ result = plugin->h.pops->save(inode, plugin, area);
-+ }
-+ return result;
-+}
-+
-+/* save state of all non-standard plugins associated with inode */
-+static int save_plugin_sd(struct inode *inode /* object being processed */ ,
-+ char **area /* position in stat-data */,
-+ int is_pset /* 1 for pset, 0 for hset */)
-+{
-+ int fake_len;
-+ int result = 0;
-+ int num_of_plugins;
-+ reiser4_plugin_stat *sd;
-+ reiser4_inode *state;
-+ pset_member memb;
-+
-+ assert("nikita-669", inode != NULL);
-+ assert("nikita-670", area != NULL);
-+ assert("nikita-671", *area != NULL);
-+
-+ state = reiser4_inode_data(inode);
-+ if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
-+ return 0;
-+ sd = (reiser4_plugin_stat *) * area;
-+ fake_len = (int)0xffff;
-+ move_on(&fake_len, area, sizeof *sd);
-+
-+ num_of_plugins = 0;
-+ for (memb = 0; memb < PSET_LAST; ++memb) {
-+ result = save_plug(aset_get(is_pset ? state->pset : state->hset,
-+ memb),
-+ inode, memb, area, &num_of_plugins, is_pset);
-+ if (result != 0)
-+ break;
-+ }
-+
-+ put_unaligned(cpu_to_le16((__u16)num_of_plugins), &sd->plugins_no);
-+ return result;
-+}
-+
-+static int save_pset_sd(struct inode *inode, char **area) {
-+ return save_plugin_sd(inode, area, 1 /* pset */);
-+}
-+
-+static int present_hset_sd(struct inode *inode, char **area, int *len) {
-+ return present_plugin_sd(inode, area, len, 0 /* hset */);
-+}
-+
-+static int save_len_hset_sd(struct inode *inode) {
-+ return save_len_plugin_sd(inode, 0 /* pset */);
-+}
-+
-+static int save_hset_sd(struct inode *inode, char **area) {
-+ return save_plugin_sd(inode, area, 0 /* hset */);
-+}
-+
-+/* helper function for crypto_sd_present(), crypto_sd_save.
-+ Extract crypto info from stat-data and attach it to inode */
-+static int extract_crypto_info (struct inode * inode,
-+ reiser4_crypto_stat * sd)
-+{
-+ struct reiser4_crypto_info * info;
-+ assert("edward-11", !inode_crypto_info(inode));
-+ assert("edward-1413",
-+ !reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED));
-+ /* create and attach a crypto-stat without secret key loaded */
-+ info = reiser4_alloc_crypto_info(inode);
-+ if (IS_ERR(info))
-+ return PTR_ERR(info);
-+ info->keysize = le16_to_cpu(get_unaligned(&sd->keysize));
-+ memcpy(info->keyid, sd->keyid, inode_digest_plugin(inode)->fipsize);
-+ reiser4_attach_crypto_info(inode, info);
-+ reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
-+ return 0;
-+}
-+
-+/* crypto stat-data extension */
-+
-+static int present_crypto_sd(struct inode *inode, char **area, int *len)
-+{
-+ int result;
-+ reiser4_crypto_stat *sd;
-+ digest_plugin *dplug = inode_digest_plugin(inode);
-+
-+ assert("edward-06", dplug != NULL);
-+ assert("edward-684", dplug->fipsize);
-+ assert("edward-07", area != NULL);
-+ assert("edward-08", *area != NULL);
-+ assert("edward-09", len != NULL);
-+ assert("edward-10", *len > 0);
-+
-+ if (*len < (int)sizeof(reiser4_crypto_stat)) {
-+ return not_enough_space(inode, "crypto-sd");
-+ }
-+ /* *len is number of bytes in stat data item from *area to the end of
-+ item. It must be not less than size of this extension */
-+ assert("edward-75", sizeof(*sd) + dplug->fipsize <= *len);
-+
-+ sd = (reiser4_crypto_stat *) * area;
-+ result = extract_crypto_info(inode, sd);
-+ move_on(len, area, sizeof(*sd) + dplug->fipsize);
-+
-+ return result;
-+}
-+
-+static int save_len_crypto_sd(struct inode *inode)
-+{
-+ return sizeof(reiser4_crypto_stat) +
-+ inode_digest_plugin(inode)->fipsize;
-+}
-+
-+static int save_crypto_sd(struct inode *inode, char **area)
-+{
-+ int result = 0;
-+ reiser4_crypto_stat *sd;
-+ struct reiser4_crypto_info * info = inode_crypto_info(inode);
-+ digest_plugin *dplug = inode_digest_plugin(inode);
-+
-+ assert("edward-12", dplug != NULL);
-+ assert("edward-13", area != NULL);
-+ assert("edward-14", *area != NULL);
-+ assert("edward-15", info != NULL);
-+ assert("edward-1414", info->keyid != NULL);
-+ assert("edward-1415", info->keysize != 0);
-+ assert("edward-76", reiser4_inode_data(inode) != NULL);
-+
-+ if (!reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) {
-+ /* file is just created */
-+ sd = (reiser4_crypto_stat *) *area;
-+ /* copy everything but private key to the disk stat-data */
-+ put_unaligned(cpu_to_le16(info->keysize), &sd->keysize);
-+ memcpy(sd->keyid, info->keyid, (size_t) dplug->fipsize);
-+ reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
-+ }
-+ *area += (sizeof(*sd) + dplug->fipsize);
-+ return result;
-+}
-+
-+static int eio(struct inode *inode, char **area, int *len)
-+{
-+ return RETERR(-EIO);
-+}
-+
-+sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION] = {
-+ [LIGHT_WEIGHT_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = LIGHT_WEIGHT_STAT,
-+ .pops = NULL,
-+ .label = "light-weight sd",
-+ .desc = "sd for light-weight files",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_lw_sd,
-+ .absent = NULL,
-+ .save_len = save_len_lw_sd,
-+ .save = save_lw_sd,
-+ .alignment = 8
-+ },
-+ [UNIX_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = UNIX_STAT,
-+ .pops = NULL,
-+ .label = "unix-sd",
-+ .desc = "unix stat-data fields",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_unix_sd,
-+ .absent = absent_unix_sd,
-+ .save_len = save_len_unix_sd,
-+ .save = save_unix_sd,
-+ .alignment = 8
-+ },
-+ [LARGE_TIMES_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = LARGE_TIMES_STAT,
-+ .pops = NULL,
-+ .label = "64time-sd",
-+ .desc = "nanosecond resolution for times",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_large_times_sd,
-+ .absent = NULL,
-+ .save_len = save_len_large_times_sd,
-+ .save = save_large_times_sd,
-+ .alignment = 8
-+ },
-+ [SYMLINK_STAT] = {
-+ /* stat data of symlink has this extension */
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = SYMLINK_STAT,
-+ .pops = NULL,
-+ .label = "symlink-sd",
-+ .desc =
-+ "stat data is appended with symlink name",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_symlink_sd,
-+ .absent = NULL,
-+ .save_len = save_len_symlink_sd,
-+ .save = save_symlink_sd,
-+ .alignment = 8
-+ },
-+ [PLUGIN_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = PLUGIN_STAT,
-+ .pops = NULL,
-+ .label = "plugin-sd",
-+ .desc = "plugin stat-data fields",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_pset_sd,
-+ .absent = absent_plugin_sd,
-+ .save_len = save_len_pset_sd,
-+ .save = save_pset_sd,
-+ .alignment = 8
-+ },
-+ [HEIR_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = HEIR_STAT,
-+ .pops = NULL,
-+ .label = "heir-plugin-sd",
-+ .desc = "heir plugin stat-data fields",
-+ .linkage = {NULL,NULL}
-+ },
-+ .present = present_hset_sd,
-+ .absent = NULL,
-+ .save_len = save_len_hset_sd,
-+ .save = save_hset_sd,
-+ .alignment = 8
-+ },
-+ [FLAGS_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = FLAGS_STAT,
-+ .pops = NULL,
-+ .label = "flags-sd",
-+ .desc = "inode bit flags",
-+ .linkage = {NULL, NULL}
-+ },
-+ .present = present_flags_sd,
-+ .absent = NULL,
-+ .save_len = save_len_flags_sd,
-+ .save = save_flags_sd,
-+ .alignment = 8
-+ },
-+ [CAPABILITIES_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = CAPABILITIES_STAT,
-+ .pops = NULL,
-+ .label = "capabilities-sd",
-+ .desc = "capabilities",
-+ .linkage = {NULL, NULL}
-+ },
-+ .present = eio,
-+ .absent = NULL,
-+ .save_len = save_len_flags_sd,
-+ .save = save_flags_sd,
-+ .alignment = 8
-+ },
-+ [CRYPTO_STAT] = {
-+ .h = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .id = CRYPTO_STAT,
-+ .pops = NULL,
-+ .label = "crypto-sd",
-+ .desc = "secret key size and id",
-+ .linkage = {NULL, NULL}
-+ },
-+ .present = present_crypto_sd,
-+ .absent = NULL,
-+ .save_len = save_len_crypto_sd,
-+ .save = save_crypto_sd,
-+ .alignment = 8
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/static_stat.h linux-2.6.24/fs/reiser4/plugin/item/static_stat.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/static_stat.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/static_stat.h 2008-01-25 11:39:07.028231388 +0300
-@@ -0,0 +1,224 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* This describes the static_stat item, used to hold all information needed by the stat() syscall.
-+
-+In the case where each file has not less than the fields needed by the
-+stat() syscall, it is more compact to store those fields in this
-+struct.
-+
-+If this item does not exist, then all stats are dynamically resolved.
-+At the moment, we either resolve all stats dynamically or all of them
-+statically. If you think this is not fully optimal, and the rest of
-+reiser4 is working, then fix it...:-)
-+
-+*/
-+
-+#if !defined( __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ )
-+#define __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+
-+#include <linux/fs.h> /* for struct inode */
-+
-+/* Stat data layout: goals and implementation.
-+
-+ We want to be able to have lightweight files which have complete flexibility in what semantic metadata is attached to
-+ them, including not having semantic metadata attached to them.
-+
-+ There is one problem with doing that, which is that if in fact you have exactly the same metadata for most files you
-+ want to store, then it takes more space to store that metadata in a dynamically sized structure than in a statically
-+ sized structure because the statically sized structure knows without recording it what the names and lengths of the
-+ attributes are.
-+
-+ This leads to a natural compromise, which is to special case those files which have simply the standard unix file
-+ attributes, and only employ the full dynamic stat data mechanism for those files that differ from the standard unix
-+ file in their use of file attributes.
-+
-+ Yet this compromise deserves to be compromised a little.
-+
-+ We accommodate the case where you have no more than the standard unix file attributes by using an "extension
-+ bitmask": each bit in it indicates presence or absence of or particular stat data extension (see sd_ext_bits enum).
-+
-+ If the first bit of the extension bitmask bit is 0, we have light-weight file whose attributes are either inherited
-+ from parent directory (as uid, gid) or initialised to some sane values.
-+
-+ To capitalize on existing code infrastructure, extensions are
-+ implemented as plugins of type REISER4_SD_EXT_PLUGIN_TYPE.
-+ Each stat-data extension plugin implements four methods:
-+
-+ ->present() called by sd_load() when this extension is found in stat-data
-+ ->absent() called by sd_load() when this extension is not found in stat-data
-+ ->save_len() called by sd_len() to calculate total length of stat-data
-+ ->save() called by sd_save() to store extension data into stat-data
-+
-+ Implementation is in fs/reiser4/plugin/item/static_stat.c
-+*/
-+
-+/* stat-data extension. Please order this by presumed frequency of use */
-+typedef enum {
-+ /* support for light-weight files */
-+ LIGHT_WEIGHT_STAT,
-+ /* data required to implement unix stat(2) call. Layout is in
-+ reiser4_unix_stat. If this is not present, file is light-weight */
-+ UNIX_STAT,
-+ /* this contains additional set of 32bit [anc]time fields to implement
-+ nanosecond resolution. Layout is in reiser4_large_times_stat. Usage
-+ if this extension is governed by 32bittimes mount option. */
-+ LARGE_TIMES_STAT,
-+ /* stat data has link name included */
-+ SYMLINK_STAT,
-+ /* on-disk slots of non-standard plugins for main plugin table
-+ (@reiser4_inode->pset), that is, plugins that cannot be deduced
-+ from file mode bits), for example, aggregation, interpolation etc. */
-+ PLUGIN_STAT,
-+ /* this extension contains persistent inode flags. These flags are
-+ single bits: immutable, append, only, etc. Layout is in
-+ reiser4_flags_stat. */
-+ FLAGS_STAT,
-+ /* this extension contains capabilities sets, associated with this
-+ file. Layout is in reiser4_capabilities_stat */
-+ CAPABILITIES_STAT,
-+ /* this extension contains size and public id of the secret key.
-+ Layout is in reiser4_crypto_stat */
-+ CRYPTO_STAT,
-+ /* on-disk slots of non-default plugins for inheritance, which
-+ are extracted to special plugin table (@reiser4_inode->hset).
-+ By default, children of the object will inherit plugins from
-+ its main plugin table (pset). */
-+ HEIR_STAT,
-+ LAST_SD_EXTENSION,
-+ /*
-+ * init_inode_static_sd() iterates over extension mask until all
-+ * non-zero bits are processed. This means, that neither ->present(),
-+ * nor ->absent() methods will be called for stat-data extensions that
-+ * go after last present extension. But some basic extensions, we want
-+ * either ->absent() or ->present() method to be called, because these
-+ * extensions set up something in inode even when they are not
-+ * present. This is what LAST_IMPORTANT_SD_EXTENSION is for: for all
-+ * extensions before and including LAST_IMPORTANT_SD_EXTENSION either
-+ * ->present(), or ->absent() method will be called, independently of
-+ * what other extensions are present.
-+ */
-+ LAST_IMPORTANT_SD_EXTENSION = PLUGIN_STAT
-+} sd_ext_bits;
-+
-+/* minimal stat-data. This allows to support light-weight files. */
-+typedef struct reiser4_stat_data_base {
-+ /* 0 */ __le16 extmask;
-+ /* 2 */
-+} PACKED reiser4_stat_data_base;
-+
-+typedef struct reiser4_light_weight_stat {
-+ /* 0 */ __le16 mode;
-+ /* 2 */ __le32 nlink;
-+ /* 6 */ __le64 size;
-+ /* size in bytes */
-+ /* 14 */
-+} PACKED reiser4_light_weight_stat;
-+
-+typedef struct reiser4_unix_stat {
-+ /* owner id */
-+ /* 0 */ __le32 uid;
-+ /* group id */
-+ /* 4 */ __le32 gid;
-+ /* access time */
-+ /* 8 */ __le32 atime;
-+ /* modification time */
-+ /* 12 */ __le32 mtime;
-+ /* change time */
-+ /* 16 */ __le32 ctime;
-+ union {
-+ /* minor:major for device files */
-+ /* 20 */ __le64 rdev;
-+ /* bytes used by file */
-+ /* 20 */ __le64 bytes;
-+ } u;
-+ /* 28 */
-+} PACKED reiser4_unix_stat;
-+
-+/* symlink stored as part of inode */
-+typedef struct reiser4_symlink_stat {
-+ char body[0];
-+} PACKED reiser4_symlink_stat;
-+
-+typedef struct reiser4_plugin_slot {
-+ /* 0 */ __le16 pset_memb;
-+ /* 2 */ __le16 id;
-+ /* 4 *//* here plugin stores its persistent state */
-+} PACKED reiser4_plugin_slot;
-+
-+/* stat-data extension for files with non-standard plugin. */
-+typedef struct reiser4_plugin_stat {
-+ /* number of additional plugins, associated with this object */
-+ /* 0 */ __le16 plugins_no;
-+ /* 2 */ reiser4_plugin_slot slot[0];
-+ /* 2 */
-+} PACKED reiser4_plugin_stat;
-+
-+/* stat-data extension for inode flags. Currently it is just fixed-width 32
-+ * bit mask. If need arise, this can be replaced with variable width
-+ * bitmask. */
-+typedef struct reiser4_flags_stat {
-+ /* 0 */ __le32 flags;
-+ /* 4 */
-+} PACKED reiser4_flags_stat;
-+
-+typedef struct reiser4_capabilities_stat {
-+ /* 0 */ __le32 effective;
-+ /* 8 */ __le32 permitted;
-+ /* 16 */
-+} PACKED reiser4_capabilities_stat;
-+
-+typedef struct reiser4_cluster_stat {
-+/* this defines cluster size (an attribute of cryptcompress objects) as PAGE_SIZE << cluster shift */
-+ /* 0 */ d8 cluster_shift;
-+ /* 1 */
-+} PACKED reiser4_cluster_stat;
-+
-+typedef struct reiser4_crypto_stat {
-+ /* secret key size, bits */
-+ /* 0 */ d16 keysize;
-+ /* secret key id */
-+ /* 2 */ d8 keyid[0];
-+ /* 2 */
-+} PACKED reiser4_crypto_stat;
-+
-+typedef struct reiser4_large_times_stat {
-+ /* access time */
-+ /* 0 */ d32 atime;
-+ /* modification time */
-+ /* 4 */ d32 mtime;
-+ /* change time */
-+ /* 8 */ d32 ctime;
-+ /* 12 */
-+} PACKED reiser4_large_times_stat;
-+
-+/* this structure is filled by sd_item_stat */
-+typedef struct sd_stat {
-+ int dirs;
-+ int files;
-+ int others;
-+} sd_stat;
-+
-+/* plugin->item.common.* */
-+extern void print_sd(const char *prefix, coord_t * coord);
-+extern void item_stat_static_sd(const coord_t * coord, void *vp);
-+
-+/* plugin->item.s.sd.* */
-+extern int init_inode_static_sd(struct inode *inode, char *sd, int len);
-+extern int save_len_static_sd(struct inode *inode);
-+extern int save_static_sd(struct inode *inode, char **area);
-+
-+/* __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/tail.c linux-2.6.24/fs/reiser4/plugin/item/tail.c
---- linux-2.6.24.orig/fs/reiser4/plugin/item/tail.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/tail.c 2008-01-25 11:40:16.698169785 +0300
-@@ -0,0 +1,808 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "item.h"
-+#include "../../inode.h"
-+#include "../../page_cache.h"
-+#include "../../carry.h"
-+#include "../../vfs_ops.h"
-+
-+#include <linux/quotaops.h>
-+#include <asm/uaccess.h>
-+#include <linux/swap.h>
-+#include <linux/writeback.h>
-+
-+/* plugin->u.item.b.max_key_inside */
-+reiser4_key *max_key_inside_tail(const coord_t *coord, reiser4_key *key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, get_key_offset(reiser4_max_key()));
-+ return key;
-+}
-+
-+/* plugin->u.item.b.can_contain_key */
-+int can_contain_key_tail(const coord_t *coord, const reiser4_key *key,
-+ const reiser4_item_data *data)
-+{
-+ reiser4_key item_key;
-+
-+ if (item_plugin_by_coord(coord) != data->iplug)
-+ return 0;
-+
-+ item_key_by_coord(coord, &item_key);
-+ if (get_key_locality(key) != get_key_locality(&item_key) ||
-+ get_key_objectid(key) != get_key_objectid(&item_key))
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.mergeable
-+ first item is of tail type */
-+/* Audited by: green(2002.06.14) */
-+int mergeable_tail(const coord_t *p1, const coord_t *p2)
-+{
-+ reiser4_key key1, key2;
-+
-+ assert("vs-535", plugin_of_group(item_plugin_by_coord(p1),
-+ UNIX_FILE_METADATA_ITEM_TYPE));
-+ assert("vs-365", item_id_by_coord(p1) == FORMATTING_ID);
-+
-+ if (item_id_by_coord(p2) != FORMATTING_ID) {
-+ /* second item is of another type */
-+ return 0;
-+ }
-+
-+ item_key_by_coord(p1, &key1);
-+ item_key_by_coord(p2, &key2);
-+ if (get_key_locality(&key1) != get_key_locality(&key2) ||
-+ get_key_objectid(&key1) != get_key_objectid(&key2)
-+ || get_key_type(&key1) != get_key_type(&key2)) {
-+ /* items of different objects */
-+ return 0;
-+ }
-+ if (get_key_offset(&key1) + nr_units_tail(p1) != get_key_offset(&key2)) {
-+ /* not adjacent items */
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* plugin->u.item.b.print
-+ plugin->u.item.b.check */
-+
-+/* plugin->u.item.b.nr_units */
-+pos_in_node_t nr_units_tail(const coord_t * coord)
-+{
-+ return item_length_by_coord(coord);
-+}
-+
-+/* plugin->u.item.b.lookup */
-+lookup_result
-+lookup_tail(const reiser4_key * key, lookup_bias bias, coord_t * coord)
-+{
-+ reiser4_key item_key;
-+ __u64 lookuped, offset;
-+ unsigned nr_units;
-+
-+ item_key_by_coord(coord, &item_key);
-+ offset = get_key_offset(item_key_by_coord(coord, &item_key));
-+ nr_units = nr_units_tail(coord);
-+
-+ /* key we are looking for must be greater than key of item @coord */
-+ assert("vs-416", keygt(key, &item_key));
-+
-+ /* offset we are looking for */
-+ lookuped = get_key_offset(key);
-+
-+ if (lookuped >= offset && lookuped < offset + nr_units) {
-+ /* byte we are looking for is in this item */
-+ coord->unit_pos = lookuped - offset;
-+ coord->between = AT_UNIT;
-+ return CBK_COORD_FOUND;
-+ }
-+
-+ /* set coord after last unit */
-+ coord->unit_pos = nr_units - 1;
-+ coord->between = AFTER_UNIT;
-+ return bias ==
-+ FIND_MAX_NOT_MORE_THAN ? CBK_COORD_FOUND : CBK_COORD_NOTFOUND;
-+}
-+
-+/* plugin->u.item.b.paste */
-+int
-+paste_tail(coord_t *coord, reiser4_item_data *data,
-+ carry_plugin_info *info UNUSED_ARG)
-+{
-+ unsigned old_item_length;
-+ char *item;
-+
-+ /* length the item had before resizing has been performed */
-+ old_item_length = item_length_by_coord(coord) - data->length;
-+
-+ /* tail items never get pasted in the middle */
-+ assert("vs-363",
-+ (coord->unit_pos == 0 && coord->between == BEFORE_UNIT) ||
-+ (coord->unit_pos == old_item_length - 1 &&
-+ coord->between == AFTER_UNIT) ||
-+ (coord->unit_pos == 0 && old_item_length == 0
-+ && coord->between == AT_UNIT));
-+
-+ item = item_body_by_coord(coord);
-+ if (coord->unit_pos == 0)
-+ /* make space for pasted data when pasting at the beginning of
-+ the item */
-+ memmove(item + data->length, item, old_item_length);
-+
-+ if (coord->between == AFTER_UNIT)
-+ coord->unit_pos++;
-+
-+ if (data->data) {
-+ assert("vs-554", data->user == 0 || data->user == 1);
-+ if (data->user) {
-+ assert("nikita-3035", reiser4_schedulable());
-+ /* copy from user space */
-+ if (__copy_from_user(item + coord->unit_pos,
-+ (const char __user *)data->data,
-+ (unsigned)data->length))
-+ return RETERR(-EFAULT);
-+ } else
-+ /* copy from kernel space */
-+ memcpy(item + coord->unit_pos, data->data,
-+ (unsigned)data->length);
-+ } else {
-+ memset(item + coord->unit_pos, 0, (unsigned)data->length);
-+ }
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.fast_paste */
-+
-+/* plugin->u.item.b.can_shift
-+ number of units is returned via return value, number of bytes via @size. For
-+ tail items they coincide */
-+int
-+can_shift_tail(unsigned free_space, coord_t * source UNUSED_ARG,
-+ znode * target UNUSED_ARG, shift_direction direction UNUSED_ARG,
-+ unsigned *size, unsigned want)
-+{
-+ /* make sure that that we do not want to shift more than we have */
-+ assert("vs-364", want > 0
-+ && want <= (unsigned)item_length_by_coord(source));
-+
-+ *size = min(want, free_space);
-+ return *size;
-+}
-+
-+/* plugin->u.item.b.copy_units */
-+void
-+copy_units_tail(coord_t * target, coord_t * source,
-+ unsigned from, unsigned count,
-+ shift_direction where_is_free_space,
-+ unsigned free_space UNUSED_ARG)
-+{
-+ /* make sure that item @target is expanded already */
-+ assert("vs-366", (unsigned)item_length_by_coord(target) >= count);
-+ assert("vs-370", free_space >= count);
-+
-+ if (where_is_free_space == SHIFT_LEFT) {
-+ /* append item @target with @count first bytes of @source */
-+ assert("vs-365", from == 0);
-+
-+ memcpy((char *)item_body_by_coord(target) +
-+ item_length_by_coord(target) - count,
-+ (char *)item_body_by_coord(source), count);
-+ } else {
-+ /* target item is moved to right already */
-+ reiser4_key key;
-+
-+ assert("vs-367",
-+ (unsigned)item_length_by_coord(source) == from + count);
-+
-+ memcpy((char *)item_body_by_coord(target),
-+ (char *)item_body_by_coord(source) + from, count);
-+
-+ /* new units are inserted before first unit in an item,
-+ therefore, we have to update item key */
-+ item_key_by_coord(source, &key);
-+ set_key_offset(&key, get_key_offset(&key) + from);
-+
-+ node_plugin_by_node(target->node)->update_item_key(target, &key,
-+ NULL /*info */);
-+ }
-+}
-+
-+/* plugin->u.item.b.create_hook */
-+
-+/* item_plugin->b.kill_hook
-+ this is called when @count units starting from @from-th one are going to be removed
-+ */
-+int
-+kill_hook_tail(const coord_t * coord, pos_in_node_t from,
-+ pos_in_node_t count, struct carry_kill_data *kdata)
-+{
-+ reiser4_key key;
-+ loff_t start, end;
-+
-+ assert("vs-1577", kdata);
-+ assert("vs-1579", kdata->inode);
-+
-+ item_key_by_coord(coord, &key);
-+ start = get_key_offset(&key) + from;
-+ end = start + count;
-+ fake_kill_hook_tail(kdata->inode, start, end, kdata->params.truncate);
-+ return 0;
-+}
-+
-+/* plugin->u.item.b.shift_hook */
-+
-+/* helper for kill_units_tail and cut_units_tail */
-+static int
-+do_cut_or_kill(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ pos_in_node_t count;
-+
-+ /* this method is only called to remove part of item */
-+ assert("vs-374", (to - from + 1) < item_length_by_coord(coord));
-+ /* tails items are never cut from the middle of an item */
-+ assert("vs-396", ergo(from != 0, to == coord_last_unit_pos(coord)));
-+ assert("vs-1558", ergo(from == 0, to < coord_last_unit_pos(coord)));
-+
-+ count = to - from + 1;
-+
-+ if (smallest_removed) {
-+ /* store smallest key removed */
-+ item_key_by_coord(coord, smallest_removed);
-+ set_key_offset(smallest_removed,
-+ get_key_offset(smallest_removed) + from);
-+ }
-+ if (new_first) {
-+ /* head of item is cut */
-+ assert("vs-1529", from == 0);
-+
-+ item_key_by_coord(coord, new_first);
-+ set_key_offset(new_first,
-+ get_key_offset(new_first) + from + count);
-+ }
-+
-+ if (REISER4_DEBUG)
-+ memset((char *)item_body_by_coord(coord) + from, 0, count);
-+ return count;
-+}
-+
-+/* plugin->u.item.b.cut_units */
-+int
-+cut_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *cdata UNUSED_ARG,
-+ reiser4_key * smallest_removed, reiser4_key * new_first)
-+{
-+ return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.b.kill_units */
-+int
-+kill_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *kdata, reiser4_key * smallest_removed,
-+ reiser4_key * new_first)
-+{
-+ kill_hook_tail(coord, from, to - from + 1, kdata);
-+ return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
-+}
-+
-+/* plugin->u.item.b.unit_key */
-+reiser4_key *unit_key_tail(const coord_t * coord, reiser4_key * key)
-+{
-+ assert("vs-375", coord_is_existing_unit(coord));
-+
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, (get_key_offset(key) + coord->unit_pos));
-+
-+ return key;
-+}
-+
-+/* plugin->u.item.b.estimate
-+ plugin->u.item.b.item_data_by_flow */
-+
-+/* tail redpage function. It is called from readpage_tail(). */
-+static int do_readpage_tail(uf_coord_t *uf_coord, struct page *page)
-+{
-+ tap_t tap;
-+ int result;
-+ coord_t coord;
-+ lock_handle lh;
-+ int count, mapped;
-+ struct inode *inode;
-+ char *pagedata;
-+
-+ /* saving passed coord in order to do not move it by tap. */
-+ init_lh(&lh);
-+ copy_lh(&lh, uf_coord->lh);
-+ inode = page->mapping->host;
-+ coord_dup(&coord, &uf_coord->coord);
-+
-+ reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
-+
-+ if ((result = reiser4_tap_load(&tap)))
-+ goto out_tap_done;
-+
-+ /* lookup until page is filled up. */
-+ for (mapped = 0; mapped < PAGE_CACHE_SIZE; ) {
-+ /* number of bytes to be copied to page */
-+ count = item_length_by_coord(&coord) - coord.unit_pos;
-+ if (count > PAGE_CACHE_SIZE - mapped)
-+ count = PAGE_CACHE_SIZE - mapped;
-+
-+ /* attach @page to address space and get data address */
-+ pagedata = kmap_atomic(page, KM_USER0);
-+
-+ /* copy tail item to page */
-+ memcpy(pagedata + mapped,
-+ ((char *)item_body_by_coord(&coord) + coord.unit_pos),
-+ count);
-+ mapped += count;
-+
-+ flush_dcache_page(page);
-+
-+ /* dettach page from address space */
-+ kunmap_atomic(pagedata, KM_USER0);
-+
-+ /* Getting next tail item. */
-+ if (mapped < PAGE_CACHE_SIZE) {
-+ /*
-+ * unlock page in order to avoid keep it locked
-+ * during tree lookup, which takes long term locks
-+ */
-+ unlock_page(page);
-+
-+ /* getting right neighbour. */
-+ result = go_dir_el(&tap, RIGHT_SIDE, 0);
-+
-+ /* lock page back */
-+ lock_page(page);
-+ if (PageUptodate(page)) {
-+ /*
-+ * another thread read the page, we have
-+ * nothing to do
-+ */
-+ result = 0;
-+ goto out_unlock_page;
-+ }
-+
-+ if (result) {
-+ if (result == -E_NO_NEIGHBOR) {
-+ /*
-+ * rigth neighbor is not a formatted
-+ * node
-+ */
-+ result = 0;
-+ goto done;
-+ } else {
-+ goto out_tap_relse;
-+ }
-+ } else {
-+ if (!inode_file_plugin(inode)->
-+ owns_item(inode, &coord)) {
-+ /* item of another file is found */
-+ result = 0;
-+ goto done;
-+ }
-+ }
-+ }
-+ }
-+
-+ done:
-+ if (mapped != PAGE_CACHE_SIZE)
-+ zero_user_page(page, mapped, PAGE_CACHE_SIZE - mapped,
-+ KM_USER0);
-+ SetPageUptodate(page);
-+ out_unlock_page:
-+ unlock_page(page);
-+ out_tap_relse:
-+ reiser4_tap_relse(&tap);
-+ out_tap_done:
-+ reiser4_tap_done(&tap);
-+ return result;
-+}
-+
-+/*
-+ plugin->s.file.readpage
-+ reiser4_read->unix_file_read->page_cache_readahead->reiser4_readpage->unix_file_readpage->readpage_tail
-+ or
-+ filemap_nopage->reiser4_readpage->readpage_unix_file->->readpage_tail
-+
-+ At the beginning: coord->node is read locked, zloaded, page is locked, coord is set to existing unit inside of tail
-+ item. */
-+int readpage_tail(void *vp, struct page *page)
-+{
-+ uf_coord_t *uf_coord = vp;
-+ ON_DEBUG(coord_t * coord = &uf_coord->coord);
-+ ON_DEBUG(reiser4_key key);
-+
-+ assert("umka-2515", PageLocked(page));
-+ assert("umka-2516", !PageUptodate(page));
-+ assert("umka-2517", !jprivate(page) && !PagePrivate(page));
-+ assert("umka-2518", page->mapping && page->mapping->host);
-+
-+ assert("umka-2519", znode_is_loaded(coord->node));
-+ assert("umka-2520", item_is_tail(coord));
-+ assert("umka-2521", coord_is_existing_unit(coord));
-+ assert("umka-2522", znode_is_rlocked(coord->node));
-+ assert("umka-2523",
-+ page->mapping->host->i_ino ==
-+ get_key_objectid(item_key_by_coord(coord, &key)));
-+
-+ return do_readpage_tail(uf_coord, page);
-+}
-+
-+/**
-+ * overwrite_tail
-+ * @flow:
-+ * @coord:
-+ *
-+ * Overwrites tail item or its part by user data. Returns number of bytes
-+ * written or error code.
-+ */
-+static int overwrite_tail(flow_t *flow, coord_t *coord)
-+{
-+ unsigned count;
-+
-+ assert("vs-570", flow->user == 1);
-+ assert("vs-946", flow->data);
-+ assert("vs-947", coord_is_existing_unit(coord));
-+ assert("vs-948", znode_is_write_locked(coord->node));
-+ assert("nikita-3036", reiser4_schedulable());
-+
-+ count = item_length_by_coord(coord) - coord->unit_pos;
-+ if (count > flow->length)
-+ count = flow->length;
-+
-+ if (__copy_from_user((char *)item_body_by_coord(coord) + coord->unit_pos,
-+ (const char __user *)flow->data, count))
-+ return RETERR(-EFAULT);
-+
-+ znode_make_dirty(coord->node);
-+ return count;
-+}
-+
-+/**
-+ * insert_first_tail
-+ * @inode:
-+ * @flow:
-+ * @coord:
-+ * @lh:
-+ *
-+ * Returns number of bytes written or error code.
-+ */
-+static ssize_t insert_first_tail(struct inode *inode, flow_t *flow,
-+ coord_t *coord, lock_handle *lh)
-+{
-+ int result;
-+ loff_t to_write;
-+ struct unix_file_info *uf_info;
-+
-+ if (get_key_offset(&flow->key) != 0) {
-+ /*
-+ * file is empty and we have to write not to the beginning of
-+ * file. Create a hole at the beginning of file. On success
-+ * insert_flow returns 0 as number of written bytes which is
-+ * what we have to return on padding a file with holes
-+ */
-+ flow->data = NULL;
-+ flow->length = get_key_offset(&flow->key);
-+ set_key_offset(&flow->key, 0);
-+ /*
-+ * holes in files built of tails are stored just like if there
-+ * were real data which are all zeros. Therefore we have to
-+ * allocate quota here as well
-+ */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+
-+ uf_info = unix_file_inode_data(inode);
-+
-+ /*
-+ * first item insertion is only possible when writing to empty
-+ * file or performing tail conversion
-+ */
-+ assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
-+ (reiser4_inode_get_flag(inode,
-+ REISER4_PART_MIXED) &&
-+ reiser4_inode_get_flag(inode,
-+ REISER4_PART_IN_CONV))));
-+ /* if file was empty - update its state */
-+ if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
-+ uf_info->container = UF_CONTAINER_TAILS;
-+ return result;
-+ }
-+
-+ /* check quota before appending data */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+
-+ to_write = flow->length;
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+ return (to_write - flow->length) ? (to_write - flow->length) : result;
-+}
-+
-+/**
-+ * append_tail
-+ * @inode:
-+ * @flow:
-+ * @coord:
-+ * @lh:
-+ *
-+ * Returns number of bytes written or error code.
-+ */
-+static ssize_t append_tail(struct inode *inode,
-+ flow_t *flow, coord_t *coord, lock_handle *lh)
-+{
-+ int result;
-+ reiser4_key append_key;
-+ loff_t to_write;
-+
-+ if (!keyeq(&flow->key, append_key_tail(coord, &append_key))) {
-+ flow->data = NULL;
-+ flow->length = get_key_offset(&flow->key) - get_key_offset(&append_key);
-+ set_key_offset(&flow->key, get_key_offset(&append_key));
-+ /*
-+ * holes in files built of tails are stored just like if there
-+ * were real data which are all zeros. Therefore we have to
-+ * allocate quota here as well
-+ */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+ return result;
-+ }
-+
-+ /* check quota before appending data */
-+ if (DQUOT_ALLOC_SPACE_NODIRTY(inode, flow->length))
-+ return RETERR(-EDQUOT);
-+
-+ to_write = flow->length;
-+ result = reiser4_insert_flow(coord, lh, flow);
-+ if (flow->length)
-+ DQUOT_FREE_SPACE_NODIRTY(inode, flow->length);
-+ return (to_write - flow->length) ? (to_write - flow->length) : result;
-+}
-+
-+/**
-+ * write_tail_reserve_space - reserve space for tail write operation
-+ * @inode:
-+ *
-+ * Estimates and reserves space which may be required for writing one flow to a
-+ * file
-+ */
-+static int write_extent_reserve_space(struct inode *inode)
-+{
-+ __u64 count;
-+ reiser4_tree *tree;
-+
-+ /*
-+ * to write one flow to a file by tails we have to reserve disk space for:
-+
-+ * 1. find_file_item may have to insert empty node to the tree (empty
-+ * leaf node between two extent items). This requires 1 block and
-+ * number of blocks which are necessary to perform insertion of an
-+ * internal item into twig level.
-+ *
-+ * 2. flow insertion
-+ *
-+ * 3. stat data update
-+ */
-+ tree = reiser4_tree_by_inode(inode);
-+ count = estimate_one_insert_item(tree) +
-+ estimate_insert_flow(tree->height) +
-+ estimate_one_insert_item(tree);
-+ grab_space_enable();
-+ return reiser4_grab_space(count, 0 /* flags */);
-+}
-+
-+#define PAGE_PER_FLOW 4
-+
-+static loff_t faultin_user_pages(const char __user *buf, size_t count)
-+{
-+ loff_t faulted;
-+ int to_fault;
-+
-+ if (count > PAGE_PER_FLOW * PAGE_CACHE_SIZE)
-+ count = PAGE_PER_FLOW * PAGE_CACHE_SIZE;
-+ faulted = 0;
-+ while (count > 0) {
-+ to_fault = PAGE_CACHE_SIZE;
-+ if (count < to_fault)
-+ to_fault = count;
-+ fault_in_pages_readable(buf + faulted, to_fault);
-+ count -= to_fault;
-+ faulted += to_fault;
-+ }
-+ return faulted;
-+}
-+
-+/**
-+ * reiser4_write_tail - write method of tail item plugin
-+ * @file: file to write to
-+ * @buf: address of user-space buffer
-+ * @count: number of bytes to write
-+ * @pos: position in file to write to
-+ *
-+ * Returns number of written bytes or error code.
-+ */
-+ssize_t reiser4_write_tail(struct file *file, struct inode * inode,
-+ const char __user *buf, size_t count, loff_t *pos)
-+{
-+ struct hint hint;
-+ int result;
-+ flow_t flow;
-+ coord_t *coord;
-+ lock_handle *lh;
-+ znode *loaded;
-+
-+ assert("edward-1548", inode != NULL);
-+
-+ if (write_extent_reserve_space(inode))
-+ return RETERR(-ENOSPC);
-+
-+ result = load_file_hint(file, &hint);
-+ BUG_ON(result != 0);
-+
-+ flow.length = faultin_user_pages(buf, count);
-+ flow.user = 1;
-+ memcpy(&flow.data, &buf, sizeof(buf));
-+ flow.op = WRITE_OP;
-+ key_by_inode_and_offset_common(inode, *pos, &flow.key);
-+
-+ result = find_file_item(&hint, &flow.key, ZNODE_WRITE_LOCK, inode);
-+ if (IS_CBKERR(result))
-+ return result;
-+
-+ coord = &hint.ext_coord.coord;
-+ lh = hint.ext_coord.lh;
-+
-+ result = zload(coord->node);
-+ BUG_ON(result != 0);
-+ loaded = coord->node;
-+
-+ if (coord->between == AFTER_UNIT) {
-+ /* append with data or hole */
-+ result = append_tail(inode, &flow, coord, lh);
-+ } else if (coord->between == AT_UNIT) {
-+ /* overwrite */
-+ result = overwrite_tail(&flow, coord);
-+ } else {
-+ /* no items of this file yet. insert data or hole */
-+ result = insert_first_tail(inode, &flow, coord, lh);
-+ }
-+ zrelse(loaded);
-+ if (result < 0) {
-+ done_lh(lh);
-+ return result;
-+ }
-+
-+ /* seal and unlock znode */
-+ hint.ext_coord.valid = 0;
-+ if (hint.ext_coord.valid)
-+ reiser4_set_hint(&hint, &flow.key, ZNODE_WRITE_LOCK);
-+ else
-+ reiser4_unset_hint(&hint);
-+
-+ save_file_hint(file, &hint);
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+
-+static int
-+coord_matches_key_tail(const coord_t * coord, const reiser4_key * key)
-+{
-+ reiser4_key item_key;
-+
-+ assert("vs-1356", coord_is_existing_unit(coord));
-+ assert("vs-1354", keylt(key, append_key_tail(coord, &item_key)));
-+ assert("vs-1355", keyge(key, item_key_by_coord(coord, &item_key)));
-+ return get_key_offset(key) ==
-+ get_key_offset(&item_key) + coord->unit_pos;
-+
-+}
-+
-+#endif
-+
-+/* plugin->u.item.s.file.read */
-+int reiser4_read_tail(struct file *file UNUSED_ARG, flow_t *f, hint_t *hint)
-+{
-+ unsigned count;
-+ int item_length;
-+ coord_t *coord;
-+ uf_coord_t *uf_coord;
-+
-+ uf_coord = &hint->ext_coord;
-+ coord = &uf_coord->coord;
-+
-+ assert("vs-571", f->user == 1);
-+ assert("vs-571", f->data);
-+ assert("vs-967", coord && coord->node);
-+ assert("vs-1117", znode_is_rlocked(coord->node));
-+ assert("vs-1118", znode_is_loaded(coord->node));
-+
-+ assert("nikita-3037", reiser4_schedulable());
-+ assert("vs-1357", coord_matches_key_tail(coord, &f->key));
-+
-+ /* calculate number of bytes to read off the item */
-+ item_length = item_length_by_coord(coord);
-+ count = item_length_by_coord(coord) - coord->unit_pos;
-+ if (count > f->length)
-+ count = f->length;
-+
-+ /* user page has to be brought in so that major page fault does not
-+ * occur here when longtem lock is held */
-+ if (__copy_to_user((char __user *)f->data,
-+ ((char *)item_body_by_coord(coord) + coord->unit_pos),
-+ count))
-+ return RETERR(-EFAULT);
-+
-+ /* probably mark_page_accessed() should only be called if
-+ * coord->unit_pos is zero. */
-+ mark_page_accessed(znode_page(coord->node));
-+ move_flow_forward(f, count);
-+
-+ coord->unit_pos += count;
-+ if (item_length == coord->unit_pos) {
-+ coord->unit_pos--;
-+ coord->between = AFTER_UNIT;
-+ }
-+ reiser4_set_hint(hint, &f->key, ZNODE_READ_LOCK);
-+ return 0;
-+}
-+
-+/*
-+ plugin->u.item.s.file.append_key
-+ key of first byte which is the next to last byte by addressed by this item
-+*/
-+reiser4_key *append_key_tail(const coord_t * coord, reiser4_key * key)
-+{
-+ item_key_by_coord(coord, key);
-+ set_key_offset(key, get_key_offset(key) + item_length_by_coord(coord));
-+ return key;
-+}
-+
-+/* plugin->u.item.s.file.init_coord_extension */
-+void init_coord_extension_tail(uf_coord_t * uf_coord, loff_t lookuped)
-+{
-+ uf_coord->valid = 1;
-+}
-+
-+/*
-+ plugin->u.item.s.file.get_block
-+*/
-+int
-+get_block_address_tail(const coord_t * coord, sector_t lblock, sector_t * block)
-+{
-+ assert("nikita-3252", znode_get_level(coord->node) == LEAF_LEVEL);
-+
-+ if (reiser4_blocknr_is_fake(znode_get_block(coord->node)))
-+ /* if node has'nt obtainet its block number yet, return 0.
-+ * Lets avoid upsetting users with some cosmic numbers beyond
-+ * the device capacity.*/
-+ *block = 0;
-+ else
-+ *block = *znode_get_block(coord->node);
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/item/tail.h linux-2.6.24/fs/reiser4/plugin/item/tail.h
---- linux-2.6.24.orig/fs/reiser4/plugin/item/tail.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/item/tail.h 2008-01-25 11:40:16.702170815 +0300
-@@ -0,0 +1,58 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined( __REISER4_TAIL_H__ )
-+#define __REISER4_TAIL_H__
-+
-+struct tail_coord_extension {
-+ int not_used;
-+};
-+
-+struct cut_list;
-+
-+/* plugin->u.item.b.* */
-+reiser4_key *max_key_inside_tail(const coord_t *, reiser4_key *);
-+int can_contain_key_tail(const coord_t * coord, const reiser4_key * key,
-+ const reiser4_item_data *);
-+int mergeable_tail(const coord_t * p1, const coord_t * p2);
-+pos_in_node_t nr_units_tail(const coord_t *);
-+lookup_result lookup_tail(const reiser4_key *, lookup_bias, coord_t *);
-+int paste_tail(coord_t *, reiser4_item_data *, carry_plugin_info *);
-+int can_shift_tail(unsigned free_space, coord_t * source,
-+ znode * target, shift_direction, unsigned *size,
-+ unsigned want);
-+void copy_units_tail(coord_t * target, coord_t * source, unsigned from,
-+ unsigned count, shift_direction, unsigned free_space);
-+int kill_hook_tail(const coord_t *, pos_in_node_t from, pos_in_node_t count,
-+ struct carry_kill_data *);
-+int cut_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_cut_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+int kill_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
-+ struct carry_kill_data *, reiser4_key * smallest_removed,
-+ reiser4_key * new_first);
-+reiser4_key *unit_key_tail(const coord_t *, reiser4_key *);
-+
-+/* plugin->u.item.s.* */
-+ssize_t reiser4_write_tail(struct file *file, struct inode * inode,
-+ const char __user *buf, size_t count, loff_t *pos);
-+int reiser4_read_tail(struct file *, flow_t *, hint_t *);
-+int readpage_tail(void *vp, struct page *page);
-+reiser4_key *append_key_tail(const coord_t *, reiser4_key *);
-+void init_coord_extension_tail(uf_coord_t *, loff_t offset);
-+int get_block_address_tail(const coord_t *, sector_t, sector_t *);
-+int item_balance_dirty_pages(struct address_space *, const flow_t *,
-+ hint_t *, int back_to_dirty, int set_hint);
-+
-+/* __REISER4_TAIL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/Makefile linux-2.6.24/fs/reiser4/plugin/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/Makefile 2008-01-25 11:39:07.032232418 +0300
-@@ -0,0 +1,26 @@
-+obj-$(CONFIG_REISER4_FS) += plugins.o
-+
-+plugins-objs := \
-+ plugin.o \
-+ plugin_set.o \
-+ object.o \
-+ inode_ops.o \
-+ inode_ops_rename.o \
-+ file_ops.o \
-+ file_ops_readdir.o \
-+ file_plugin_common.o \
-+ dir_plugin_common.o \
-+ digest.o \
-+ hash.o \
-+ fibration.o \
-+ tail_policy.o \
-+ regular.o
-+
-+obj-$(CONFIG_REISER4_FS) += item/
-+obj-$(CONFIG_REISER4_FS) += file/
-+obj-$(CONFIG_REISER4_FS) += dir/
-+obj-$(CONFIG_REISER4_FS) += node/
-+obj-$(CONFIG_REISER4_FS) += compress/
-+obj-$(CONFIG_REISER4_FS) += space/
-+obj-$(CONFIG_REISER4_FS) += disk_format/
-+obj-$(CONFIG_REISER4_FS) += security/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/node/Makefile linux-2.6.24/fs/reiser4/plugin/node/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/node/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/node/Makefile 2008-01-25 11:39:07.032232418 +0300
-@@ -0,0 +1,5 @@
-+obj-$(CONFIG_REISER4_FS) += node_plugins.o
-+
-+node_plugins-objs := \
-+ node.o \
-+ node40.o
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/node/node40.c linux-2.6.24/fs/reiser4/plugin/node/node40.c
---- linux-2.6.24.orig/fs/reiser4/plugin/node/node40.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/node/node40.c 2008-01-25 11:39:07.036233449 +0300
-@@ -0,0 +1,2924 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "../plugin_header.h"
-+#include "../item/item.h"
-+#include "node.h"
-+#include "node40.h"
-+#include "../plugin.h"
-+#include "../../jnode.h"
-+#include "../../znode.h"
-+#include "../../pool.h"
-+#include "../../carry.h"
-+#include "../../tap.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../reiser4.h"
-+
-+#include <asm/uaccess.h>
-+#include <linux/types.h>
-+#include <linux/prefetch.h>
-+
-+/* leaf 40 format:
-+
-+ [node header | item 0, item 1, .., item N-1 | free space | item_head N-1, .. item_head 1, item head 0 ]
-+ plugin_id (16) key
-+ free_space (16) pluginid (16)
-+ free_space_start (16) offset (16)
-+ level (8)
-+ num_items (16)
-+ magic (32)
-+ flush_time (32)
-+*/
-+/* NIKITA-FIXME-HANS: I told you guys not less than 10 times to not call it r4fs. Change to "ReIs". */
-+/* magic number that is stored in ->magic field of node header */
-+static const __u32 REISER4_NODE_MAGIC = 0x52344653; /* (*(__u32 *)"R4FS"); */
-+
-+static int prepare_for_update(znode * left, znode * right,
-+ carry_plugin_info * info);
-+
-+/* header of node of reiser40 format is at the beginning of node */
-+static inline node40_header *node40_node_header(const znode * node /* node to
-+ * query */ )
-+{
-+ assert("nikita-567", node != NULL);
-+ assert("nikita-568", znode_page(node) != NULL);
-+ assert("nikita-569", zdata(node) != NULL);
-+ return (node40_header *) zdata(node);
-+}
-+
-+/* functions to get/set fields of node40_header */
-+#define nh40_get_magic(nh) le32_to_cpu(get_unaligned(&(nh)->magic))
-+#define nh40_get_free_space(nh) le16_to_cpu(get_unaligned(&(nh)->free_space))
-+#define nh40_get_free_space_start(nh) le16_to_cpu(get_unaligned(&(nh)->free_space_start))
-+#define nh40_get_level(nh) get_unaligned(&(nh)->level)
-+#define nh40_get_num_items(nh) le16_to_cpu(get_unaligned(&(nh)->nr_items))
-+#define nh40_get_flush_id(nh) le64_to_cpu(get_unaligned(&(nh)->flush_id))
-+
-+#define nh40_set_magic(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->magic)
-+#define nh40_set_free_space(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space)
-+#define nh40_set_free_space_start(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space_start)
-+#define nh40_set_level(nh, value) put_unaligned(value, &(nh)->level)
-+#define nh40_set_num_items(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->nr_items)
-+#define nh40_set_mkfs_id(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->mkfs_id)
-+
-+/* plugin field of node header should be read/set by
-+ plugin_by_disk_id/save_disk_plugin */
-+
-+/* array of item headers is at the end of node */
-+static inline item_header40 *node40_ih_at(const znode * node, unsigned pos)
-+{
-+ return (item_header40 *) (zdata(node) + znode_size(node)) - pos - 1;
-+}
-+
-+/* ( page_address( node -> pg ) + PAGE_CACHE_SIZE ) - pos - 1
-+ */
-+static inline item_header40 *node40_ih_at_coord(const coord_t * coord)
-+{
-+ return (item_header40 *) (zdata(coord->node) +
-+ znode_size(coord->node)) - (coord->item_pos) -
-+ 1;
-+}
-+
-+/* functions to get/set fields of item_header40 */
-+#define ih40_get_offset(ih) le16_to_cpu(get_unaligned(&(ih)->offset))
-+
-+#define ih40_set_offset(ih, value) put_unaligned(cpu_to_le16(value), &(ih)->offset)
-+
-+/* plugin field of item header should be read/set by
-+ plugin_by_disk_id/save_disk_plugin */
-+
-+/* plugin methods */
-+
-+/* plugin->u.node.item_overhead
-+ look for description of this method in plugin/node/node.h */
-+size_t
-+item_overhead_node40(const znode * node UNUSED_ARG, flow_t * f UNUSED_ARG)
-+{
-+ return sizeof(item_header40);
-+}
-+
-+/* plugin->u.node.free_space
-+ look for description of this method in plugin/node/node.h */
-+size_t free_space_node40(znode * node)
-+{
-+ assert("nikita-577", node != NULL);
-+ assert("nikita-578", znode_is_loaded(node));
-+ assert("nikita-579", zdata(node) != NULL);
-+
-+ return nh40_get_free_space(node40_node_header(node));
-+}
-+
-+/* private inline version of node40_num_of_items() for use in this file. This
-+ is necessary, because address of node40_num_of_items() is taken and it is
-+ never inlined as a result. */
-+static inline short node40_num_of_items_internal(const znode * node)
-+{
-+ return nh40_get_num_items(node40_node_header(node));
-+}
-+
-+#if REISER4_DEBUG
-+static inline void check_num_items(const znode * node)
-+{
-+ assert("nikita-2749",
-+ node40_num_of_items_internal(node) == node->nr_items);
-+ assert("nikita-2746", znode_is_write_locked(node));
-+}
-+#else
-+#define check_num_items(node) noop
-+#endif
-+
-+/* plugin->u.node.num_of_items
-+ look for description of this method in plugin/node/node.h */
-+int num_of_items_node40(const znode * node)
-+{
-+ return node40_num_of_items_internal(node);
-+}
-+
-+static void
-+node40_set_num_items(znode * node, node40_header * nh, unsigned value)
-+{
-+ assert("nikita-2751", node != NULL);
-+ assert("nikita-2750", nh == node40_node_header(node));
-+
-+ check_num_items(node);
-+ nh40_set_num_items(nh, value);
-+ node->nr_items = value;
-+ check_num_items(node);
-+}
-+
-+/* plugin->u.node.item_by_coord
-+ look for description of this method in plugin/node/node.h */
-+char *item_by_coord_node40(const coord_t * coord)
-+{
-+ item_header40 *ih;
-+ char *p;
-+
-+ /* @coord is set to existing item */
-+ assert("nikita-596", coord != NULL);
-+ assert("vs-255", coord_is_existing_item(coord));
-+
-+ ih = node40_ih_at_coord(coord);
-+ p = zdata(coord->node) + ih40_get_offset(ih);
-+ return p;
-+}
-+
-+/* plugin->u.node.length_by_coord
-+ look for description of this method in plugin/node/node.h */
-+int length_by_coord_node40(const coord_t * coord)
-+{
-+ item_header40 *ih;
-+ int result;
-+
-+ /* @coord is set to existing item */
-+ assert("vs-256", coord != NULL);
-+ assert("vs-257", coord_is_existing_item(coord));
-+
-+ ih = node40_ih_at_coord(coord);
-+ if ((int)coord->item_pos ==
-+ node40_num_of_items_internal(coord->node) - 1)
-+ result =
-+ nh40_get_free_space_start(node40_node_header(coord->node)) -
-+ ih40_get_offset(ih);
-+ else
-+ result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
-+
-+ return result;
-+}
-+
-+static pos_in_node_t
-+node40_item_length(const znode * node, pos_in_node_t item_pos)
-+{
-+ item_header40 *ih;
-+ pos_in_node_t result;
-+
-+ /* @coord is set to existing item */
-+ assert("vs-256", node != NULL);
-+ assert("vs-257", node40_num_of_items_internal(node) > item_pos);
-+
-+ ih = node40_ih_at(node, item_pos);
-+ if (item_pos == node40_num_of_items_internal(node) - 1)
-+ result =
-+ nh40_get_free_space_start(node40_node_header(node)) -
-+ ih40_get_offset(ih);
-+ else
-+ result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
-+
-+ return result;
-+}
-+
-+/* plugin->u.node.plugin_by_coord
-+ look for description of this method in plugin/node/node.h */
-+item_plugin *plugin_by_coord_node40(const coord_t * coord)
-+{
-+ item_header40 *ih;
-+ item_plugin *result;
-+
-+ /* @coord is set to existing item */
-+ assert("vs-258", coord != NULL);
-+ assert("vs-259", coord_is_existing_item(coord));
-+
-+ ih = node40_ih_at_coord(coord);
-+ /* pass NULL in stead of current tree. This is time critical call. */
-+ result = item_plugin_by_disk_id(NULL, &ih->plugin_id);
-+ return result;
-+}
-+
-+/* plugin->u.node.key_at
-+ look for description of this method in plugin/node/node.h */
-+reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key)
-+{
-+ item_header40 *ih;
-+
-+ assert("nikita-1765", coord_is_existing_item(coord));
-+
-+ /* @coord is set to existing item */
-+ ih = node40_ih_at_coord(coord);
-+ memcpy(key, &ih->key, sizeof(reiser4_key));
-+ return key;
-+}
-+
-+/* VS-FIXME-HANS: please review whether the below are properly disabled when debugging is disabled */
-+
-+#define NODE_INCSTAT(n, counter) \
-+ reiser4_stat_inc_at_level(znode_get_level(n), node.lookup.counter)
-+
-+#define NODE_ADDSTAT(n, counter, val) \
-+ reiser4_stat_add_at_level(znode_get_level(n), node.lookup.counter, val)
-+
-+/* plugin->u.node.lookup
-+ look for description of this method in plugin/node/node.h */
-+node_search_result lookup_node40(znode * node /* node to query */ ,
-+ const reiser4_key * key /* key to look for */ ,
-+ lookup_bias bias /* search bias */ ,
-+ coord_t * coord /* resulting coord */ )
-+{
-+ int left;
-+ int right;
-+ int found;
-+ int items;
-+
-+ item_header40 *lefth;
-+ item_header40 *righth;
-+
-+ item_plugin *iplug;
-+ item_header40 *bstop;
-+ item_header40 *ih;
-+ cmp_t order;
-+
-+ assert("nikita-583", node != NULL);
-+ assert("nikita-584", key != NULL);
-+ assert("nikita-585", coord != NULL);
-+ assert("nikita-2693", znode_is_any_locked(node));
-+ cassert(REISER4_SEQ_SEARCH_BREAK > 2);
-+
-+ items = node_num_items(node);
-+
-+ if (unlikely(items == 0)) {
-+ coord_init_first_unit(coord, node);
-+ return NS_NOT_FOUND;
-+ }
-+
-+ /* binary search for item that can contain given key */
-+ left = 0;
-+ right = items - 1;
-+ coord->node = node;
-+ coord_clear_iplug(coord);
-+ found = 0;
-+
-+ lefth = node40_ih_at(node, left);
-+ righth = node40_ih_at(node, right);
-+
-+ /* It is known that for small arrays sequential search is on average
-+ more efficient than binary. This is because sequential search is
-+ coded as tight loop that can be better optimized by compilers and
-+ for small array size gain from this optimization makes sequential
-+ search the winner. Another, maybe more important, reason for this,
-+ is that sequential array is more CPU cache friendly, whereas binary
-+ search effectively destroys CPU caching.
-+
-+ Critical here is the notion of "smallness". Reasonable value of
-+ REISER4_SEQ_SEARCH_BREAK can be found by playing with code in
-+ fs/reiser4/ulevel/ulevel.c:test_search().
-+
-+ Don't try to further optimize sequential search by scanning from
-+ right to left in attempt to use more efficient loop termination
-+ condition (comparison with 0). This doesn't work.
-+
-+ */
-+
-+ while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
-+ int median;
-+ item_header40 *medianh;
-+
-+ median = (left + right) / 2;
-+ medianh = node40_ih_at(node, median);
-+
-+ assert("nikita-1084", median >= 0);
-+ assert("nikita-1085", median < items);
-+ switch (keycmp(key, &medianh->key)) {
-+ case LESS_THAN:
-+ right = median;
-+ righth = medianh;
-+ break;
-+ default:
-+ wrong_return_value("nikita-586", "keycmp");
-+ case GREATER_THAN:
-+ left = median;
-+ lefth = medianh;
-+ break;
-+ case EQUAL_TO:
-+ do {
-+ --median;
-+ /* headers are ordered from right to left */
-+ ++medianh;
-+ } while (median >= 0 && keyeq(key, &medianh->key));
-+ right = left = median + 1;
-+ ih = lefth = righth = medianh - 1;
-+ found = 1;
-+ break;
-+ }
-+ }
-+ /* sequential scan. Item headers, and, therefore, keys are stored at
-+ the rightmost part of a node from right to left. We are trying to
-+ access memory from left to right, and hence, scan in _descending_
-+ order of item numbers.
-+ */
-+ if (!found) {
-+ for (left = right, ih = righth; left >= 0; ++ih, --left) {
-+ cmp_t comparison;
-+
-+ prefetchkey(&(ih + 1)->key);
-+ comparison = keycmp(&ih->key, key);
-+ if (comparison == GREATER_THAN)
-+ continue;
-+ if (comparison == EQUAL_TO) {
-+ found = 1;
-+ do {
-+ --left;
-+ ++ih;
-+ } while (left >= 0 && keyeq(&ih->key, key));
-+ ++left;
-+ --ih;
-+ } else {
-+ assert("nikita-1256", comparison == LESS_THAN);
-+ }
-+ break;
-+ }
-+ if (unlikely(left < 0))
-+ left = 0;
-+ }
-+
-+ assert("nikita-3212", right >= left);
-+ assert("nikita-3214",
-+ equi(found, keyeq(&node40_ih_at(node, left)->key, key)));
-+
-+ coord_set_item_pos(coord, left);
-+ coord->unit_pos = 0;
-+ coord->between = AT_UNIT;
-+
-+ /* key < leftmost key in a mode or node is corrupted and keys
-+ are not sorted */
-+ bstop = node40_ih_at(node, (unsigned)left);
-+ order = keycmp(&bstop->key, key);
-+ if (unlikely(order == GREATER_THAN)) {
-+ if (unlikely(left != 0)) {
-+ /* screw up */
-+ warning("nikita-587", "Key less than %i key in a node",
-+ left);
-+ reiser4_print_key("key", key);
-+ reiser4_print_key("min", &bstop->key);
-+ print_coord_content("coord", coord);
-+ return RETERR(-EIO);
-+ } else {
-+ coord->between = BEFORE_UNIT;
-+ return NS_NOT_FOUND;
-+ }
-+ }
-+ /* left <= key, ok */
-+ iplug = item_plugin_by_disk_id(znode_get_tree(node), &bstop->plugin_id);
-+
-+ if (unlikely(iplug == NULL)) {
-+ warning("nikita-588", "Unknown plugin %i",
-+ le16_to_cpu(get_unaligned(&bstop->plugin_id)));
-+ reiser4_print_key("key", key);
-+ print_coord_content("coord", coord);
-+ return RETERR(-EIO);
-+ }
-+
-+ coord_set_iplug(coord, iplug);
-+
-+ /* if exact key from item header was found by binary search, no
-+ further checks are necessary. */
-+ if (found) {
-+ assert("nikita-1259", order == EQUAL_TO);
-+ return NS_FOUND;
-+ }
-+ if (iplug->b.max_key_inside != NULL) {
-+ reiser4_key max_item_key;
-+
-+ /* key > max_item_key --- outside of an item */
-+ if (keygt(key, iplug->b.max_key_inside(coord, &max_item_key))) {
-+ coord->unit_pos = 0;
-+ coord->between = AFTER_ITEM;
-+ /* FIXME-VS: key we are looking for does not fit into
-+ found item. Return NS_NOT_FOUND then. Without that
-+ the following case does not work: there is extent of
-+ file 10000, 10001. File 10000, 10002 has been just
-+ created. When writing to position 0 in that file -
-+ traverse_tree will stop here on twig level. When we
-+ want it to go down to leaf level
-+ */
-+ return NS_NOT_FOUND;
-+ }
-+ }
-+
-+ if (iplug->b.lookup != NULL) {
-+ return iplug->b.lookup(key, bias, coord);
-+ } else {
-+ assert("nikita-1260", order == LESS_THAN);
-+ coord->between = AFTER_UNIT;
-+ return (bias == FIND_EXACT) ? NS_NOT_FOUND : NS_FOUND;
-+ }
-+}
-+
-+#undef NODE_ADDSTAT
-+#undef NODE_INCSTAT
-+
-+/* plugin->u.node.estimate
-+ look for description of this method in plugin/node/node.h */
-+size_t estimate_node40(znode * node)
-+{
-+ size_t result;
-+
-+ assert("nikita-597", node != NULL);
-+
-+ result = free_space_node40(node) - sizeof(item_header40);
-+
-+ return (result > 0) ? result : 0;
-+}
-+
-+/* plugin->u.node.check
-+ look for description of this method in plugin/node/node.h */
-+int check_node40(const znode * node /* node to check */ ,
-+ __u32 flags /* check flags */ ,
-+ const char **error /* where to store error message */ )
-+{
-+ int nr_items;
-+ int i;
-+ reiser4_key prev;
-+ unsigned old_offset;
-+ tree_level level;
-+ coord_t coord;
-+ int result;
-+
-+ assert("nikita-580", node != NULL);
-+ assert("nikita-581", error != NULL);
-+ assert("nikita-2948", znode_is_loaded(node));
-+
-+ if (ZF_ISSET(node, JNODE_HEARD_BANSHEE))
-+ return 0;
-+
-+ assert("nikita-582", zdata(node) != NULL);
-+
-+ nr_items = node40_num_of_items_internal(node);
-+ if (nr_items < 0) {
-+ *error = "Negative number of items";
-+ return -1;
-+ }
-+
-+ if (flags & REISER4_NODE_DKEYS)
-+ prev = *znode_get_ld_key((znode *) node);
-+ else
-+ prev = *reiser4_min_key();
-+
-+ old_offset = 0;
-+ coord_init_zero(&coord);
-+ coord.node = (znode *) node;
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ level = znode_get_level(node);
-+ for (i = 0; i < nr_items; i++) {
-+ item_header40 *ih;
-+ reiser4_key unit_key;
-+ unsigned j;
-+
-+ ih = node40_ih_at(node, (unsigned)i);
-+ coord_set_item_pos(&coord, i);
-+ if ((ih40_get_offset(ih) >=
-+ znode_size(node) - nr_items * sizeof(item_header40)) ||
-+ (ih40_get_offset(ih) < sizeof(node40_header))) {
-+ *error = "Offset is out of bounds";
-+ return -1;
-+ }
-+ if (ih40_get_offset(ih) <= old_offset) {
-+ *error = "Offsets are in wrong order";
-+ return -1;
-+ }
-+ if ((i == 0) && (ih40_get_offset(ih) != sizeof(node40_header))) {
-+ *error = "Wrong offset of first item";
-+ return -1;
-+ }
-+ old_offset = ih40_get_offset(ih);
-+
-+ if (keygt(&prev, &ih->key)) {
-+ *error = "Keys are in wrong order";
-+ return -1;
-+ }
-+ if (!keyeq(&ih->key, unit_key_by_coord(&coord, &unit_key))) {
-+ *error = "Wrong key of first unit";
-+ return -1;
-+ }
-+ prev = ih->key;
-+ for (j = 0; j < coord_num_units(&coord); ++j) {
-+ coord.unit_pos = j;
-+ unit_key_by_coord(&coord, &unit_key);
-+ if (keygt(&prev, &unit_key)) {
-+ *error = "Unit keys are in wrong order";
-+ return -1;
-+ }
-+ prev = unit_key;
-+ }
-+ coord.unit_pos = 0;
-+ if (level != TWIG_LEVEL && item_is_extent(&coord)) {
-+ *error = "extent on the wrong level";
-+ return -1;
-+ }
-+ if (level == LEAF_LEVEL && item_is_internal(&coord)) {
-+ *error = "internal item on the wrong level";
-+ return -1;
-+ }
-+ if (level != LEAF_LEVEL &&
-+ !item_is_internal(&coord) && !item_is_extent(&coord)) {
-+ *error = "wrong item on the internal level";
-+ return -1;
-+ }
-+ if (level > TWIG_LEVEL && !item_is_internal(&coord)) {
-+ *error = "non-internal item on the internal level";
-+ return -1;
-+ }
-+#if REISER4_DEBUG
-+ if (item_plugin_by_coord(&coord)->b.check
-+ && item_plugin_by_coord(&coord)->b.check(&coord, error))
-+ return -1;
-+#endif
-+ if (i) {
-+ coord_t prev_coord;
-+ /* two neighboring items can not be mergeable */
-+ coord_dup(&prev_coord, &coord);
-+ coord_prev_item(&prev_coord);
-+ if (are_items_mergeable(&prev_coord, &coord)) {
-+ *error = "mergeable items in one node";
-+ return -1;
-+ }
-+
-+ }
-+ }
-+
-+ if ((flags & REISER4_NODE_DKEYS) && !node_is_empty(node)) {
-+ coord_t coord;
-+ item_plugin *iplug;
-+
-+ coord_init_last_unit(&coord, node);
-+ iplug = item_plugin_by_coord(&coord);
-+ if ((item_is_extent(&coord) || item_is_tail(&coord)) &&
-+ iplug->s.file.append_key != NULL) {
-+ reiser4_key mkey;
-+
-+ iplug->s.file.append_key(&coord, &mkey);
-+ set_key_offset(&mkey, get_key_offset(&mkey) - 1);
-+ read_lock_dk(current_tree);
-+ result = keygt(&mkey, znode_get_rd_key((znode *) node));
-+ read_unlock_dk(current_tree);
-+ if (result) {
-+ *error = "key of rightmost item is too large";
-+ return -1;
-+ }
-+ }
-+ }
-+ if (flags & REISER4_NODE_DKEYS) {
-+ read_lock_tree(current_tree);
-+ read_lock_dk(current_tree);
-+
-+ flags |= REISER4_NODE_TREE_STABLE;
-+
-+ if (keygt(&prev, znode_get_rd_key((znode *) node))) {
-+ if (flags & REISER4_NODE_TREE_STABLE) {
-+ *error = "Last key is greater than rdkey";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+ }
-+ if (keygt
-+ (znode_get_ld_key((znode *) node),
-+ znode_get_rd_key((znode *) node))) {
-+ *error = "ldkey is greater than rdkey";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+ if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) &&
-+ (node->left != NULL) &&
-+ !ZF_ISSET(node->left, JNODE_HEARD_BANSHEE) &&
-+ ergo(flags & REISER4_NODE_TREE_STABLE,
-+ !keyeq(znode_get_rd_key(node->left),
-+ znode_get_ld_key((znode *) node)))
-+ && ergo(!(flags & REISER4_NODE_TREE_STABLE),
-+ keygt(znode_get_rd_key(node->left),
-+ znode_get_ld_key((znode *) node)))) {
-+ *error = "left rdkey or ldkey is wrong";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
-+ (node->right != NULL) &&
-+ !ZF_ISSET(node->right, JNODE_HEARD_BANSHEE) &&
-+ ergo(flags & REISER4_NODE_TREE_STABLE,
-+ !keyeq(znode_get_rd_key((znode *) node),
-+ znode_get_ld_key(node->right)))
-+ && ergo(!(flags & REISER4_NODE_TREE_STABLE),
-+ keygt(znode_get_rd_key((znode *) node),
-+ znode_get_ld_key(node->right)))) {
-+ *error = "rdkey or right ldkey is wrong";
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ return -1;
-+ }
-+
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.node.parse
-+ look for description of this method in plugin/node/node.h */
-+int parse_node40(znode * node /* node to parse */ )
-+{
-+ node40_header *header;
-+ int result;
-+ d8 level;
-+
-+ header = node40_node_header((znode *) node);
-+ result = -EIO;
-+ level = nh40_get_level(header);
-+ if (unlikely(((__u8) znode_get_level(node)) != level))
-+ warning("nikita-494", "Wrong level found in node: %i != %i",
-+ znode_get_level(node), level);
-+ else if (unlikely(nh40_get_magic(header) != REISER4_NODE_MAGIC))
-+ warning("nikita-495",
-+ "Wrong magic in tree node: want %x, got %x",
-+ REISER4_NODE_MAGIC, nh40_get_magic(header));
-+ else {
-+ node->nr_items = node40_num_of_items_internal(node);
-+ result = 0;
-+ }
-+ return RETERR(result);
-+}
-+
-+/* plugin->u.node.init
-+ look for description of this method in plugin/node/node.h */
-+int init_node40(znode * node /* node to initialise */ )
-+{
-+ node40_header *header;
-+
-+ assert("nikita-570", node != NULL);
-+ assert("nikita-572", zdata(node) != NULL);
-+
-+ header = node40_node_header(node);
-+ memset(header, 0, sizeof(node40_header));
-+ nh40_set_free_space(header, znode_size(node) - sizeof(node40_header));
-+ nh40_set_free_space_start(header, sizeof(node40_header));
-+ /* sane hypothesis: 0 in CPU format is 0 in disk format */
-+ /* items: 0 */
-+ save_plugin_id(node_plugin_to_plugin(node->nplug),
-+ &header->common_header.plugin_id);
-+ nh40_set_level(header, znode_get_level(node));
-+ nh40_set_magic(header, REISER4_NODE_MAGIC);
-+ node->nr_items = 0;
-+ nh40_set_mkfs_id(header, reiser4_mkfs_id(reiser4_get_current_sb()));
-+
-+ /* flags: 0 */
-+ return 0;
-+}
-+
-+#ifdef GUESS_EXISTS
-+int guess_node40(const znode * node /* node to guess plugin of */ )
-+{
-+ node40_header *nethack;
-+
-+ assert("nikita-1058", node != NULL);
-+ nethack = node40_node_header(node);
-+ return
-+ (nh40_get_magic(nethack) == REISER4_NODE_MAGIC) &&
-+ (plugin_by_disk_id(znode_get_tree(node),
-+ REISER4_NODE_PLUGIN_TYPE,
-+ &nethack->common_header.plugin_id)->h.id ==
-+ NODE40_ID);
-+}
-+#endif
-+
-+/* plugin->u.node.chage_item_size
-+ look for description of this method in plugin/node/node.h */
-+void change_item_size_node40(coord_t * coord, int by)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ char *item_data;
-+ int item_length;
-+ unsigned i;
-+
-+ /* make sure that @item is coord of existing item */
-+ assert("vs-210", coord_is_existing_item(coord));
-+
-+ nh = node40_node_header(coord->node);
-+
-+ item_data = item_by_coord_node40(coord);
-+ item_length = length_by_coord_node40(coord);
-+
-+ /* move item bodies */
-+ ih = node40_ih_at_coord(coord);
-+ memmove(item_data + item_length + by, item_data + item_length,
-+ nh40_get_free_space_start(node40_node_header(coord->node)) -
-+ (ih40_get_offset(ih) + item_length));
-+
-+ /* update offsets of moved items */
-+ for (i = coord->item_pos + 1; i < nh40_get_num_items(nh); i++) {
-+ ih = node40_ih_at(coord->node, i);
-+ ih40_set_offset(ih, ih40_get_offset(ih) + by);
-+ }
-+
-+ /* update node header */
-+ nh40_set_free_space(nh, nh40_get_free_space(nh) - by);
-+ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) + by);
-+}
-+
-+static int should_notify_parent(const znode * node)
-+{
-+ /* FIXME_JMACD This looks equivalent to znode_is_root(), right? -josh */
-+ return !disk_addr_eq(znode_get_block(node),
-+ &znode_get_tree(node)->root_block);
-+}
-+
-+/* plugin->u.node.create_item
-+ look for description of this method in plugin/node/node.h */
-+int
-+create_item_node40(coord_t *target, const reiser4_key *key,
-+ reiser4_item_data *data, carry_plugin_info *info)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ unsigned offset;
-+ unsigned i;
-+
-+ nh = node40_node_header(target->node);
-+
-+ assert("vs-212", coord_is_between_items(target));
-+ /* node must have enough free space */
-+ assert("vs-254",
-+ free_space_node40(target->node) >=
-+ data->length + sizeof(item_header40));
-+ assert("vs-1410", data->length >= 0);
-+
-+ if (coord_set_to_right(target))
-+ /* there are not items to the right of @target, so, new item
-+ will be inserted after last one */
-+ coord_set_item_pos(target, nh40_get_num_items(nh));
-+
-+ if (target->item_pos < nh40_get_num_items(nh)) {
-+ /* there are items to be moved to prepare space for new
-+ item */
-+ ih = node40_ih_at_coord(target);
-+ /* new item will start at this offset */
-+ offset = ih40_get_offset(ih);
-+
-+ memmove(zdata(target->node) + offset + data->length,
-+ zdata(target->node) + offset,
-+ nh40_get_free_space_start(nh) - offset);
-+ /* update headers of moved items */
-+ for (i = target->item_pos; i < nh40_get_num_items(nh); i++) {
-+ ih = node40_ih_at(target->node, i);
-+ ih40_set_offset(ih, ih40_get_offset(ih) + data->length);
-+ }
-+
-+ /* @ih is set to item header of the last item, move item headers */
-+ memmove(ih - 1, ih,
-+ sizeof(item_header40) * (nh40_get_num_items(nh) -
-+ target->item_pos));
-+ } else {
-+ /* new item will start at this offset */
-+ offset = nh40_get_free_space_start(nh);
-+ }
-+
-+ /* make item header for the new item */
-+ ih = node40_ih_at_coord(target);
-+ memcpy(&ih->key, key, sizeof(reiser4_key));
-+ ih40_set_offset(ih, offset);
-+ save_plugin_id(item_plugin_to_plugin(data->iplug), &ih->plugin_id);
-+
-+ /* update node header */
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) - data->length -
-+ sizeof(item_header40));
-+ nh40_set_free_space_start(nh,
-+ nh40_get_free_space_start(nh) + data->length);
-+ node40_set_num_items(target->node, nh, nh40_get_num_items(nh) + 1);
-+
-+ /* FIXME: check how does create_item work when between is set to BEFORE_UNIT */
-+ target->unit_pos = 0;
-+ target->between = AT_UNIT;
-+ coord_clear_iplug(target);
-+
-+ /* initialize item */
-+ if (data->iplug->b.init != NULL) {
-+ data->iplug->b.init(target, NULL, data);
-+ }
-+ /* copy item body */
-+ if (data->iplug->b.paste != NULL) {
-+ data->iplug->b.paste(target, data, info);
-+ } else if (data->data != NULL) {
-+ if (data->user) {
-+ /* AUDIT: Are we really should not check that pointer
-+ from userspace was valid and data bytes were
-+ available? How will we return -EFAULT of some kind
-+ without this check? */
-+ assert("nikita-3038", reiser4_schedulable());
-+ /* copy data from user space */
-+ __copy_from_user(zdata(target->node) + offset,
-+ (const char __user *)data->data,
-+ (unsigned)data->length);
-+ } else
-+ /* copy from kernel space */
-+ memcpy(zdata(target->node) + offset, data->data,
-+ (unsigned)data->length);
-+ }
-+
-+ if (target->item_pos == 0) {
-+ /* left delimiting key has to be updated */
-+ prepare_for_update(NULL, target->node, info);
-+ }
-+
-+ if (item_plugin_by_coord(target)->b.create_hook != NULL) {
-+ item_plugin_by_coord(target)->b.create_hook(target, data->arg);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.node.update_item_key
-+ look for description of this method in plugin/node/node.h */
-+void
-+update_item_key_node40(coord_t * target, const reiser4_key * key,
-+ carry_plugin_info * info)
-+{
-+ item_header40 *ih;
-+
-+ ih = node40_ih_at_coord(target);
-+ memcpy(&ih->key, key, sizeof(reiser4_key));
-+
-+ if (target->item_pos == 0) {
-+ prepare_for_update(NULL, target->node, info);
-+ }
-+}
-+
-+/* this bits encode cut mode */
-+#define CMODE_TAIL 1
-+#define CMODE_WHOLE 2
-+#define CMODE_HEAD 4
-+
-+struct cut40_info {
-+ int mode;
-+ pos_in_node_t tail_removed; /* position of item which gets tail removed */
-+ pos_in_node_t first_removed; /* position of first the leftmost item among items removed completely */
-+ pos_in_node_t removed_count; /* number of items removed completely */
-+ pos_in_node_t head_removed; /* position of item which gets head removed */
-+
-+ pos_in_node_t freed_space_start;
-+ pos_in_node_t freed_space_end;
-+ pos_in_node_t first_moved;
-+ pos_in_node_t head_removed_location;
-+};
-+
-+static void init_cinfo(struct cut40_info *cinfo)
-+{
-+ cinfo->mode = 0;
-+ cinfo->tail_removed = MAX_POS_IN_NODE;
-+ cinfo->first_removed = MAX_POS_IN_NODE;
-+ cinfo->removed_count = MAX_POS_IN_NODE;
-+ cinfo->head_removed = MAX_POS_IN_NODE;
-+ cinfo->freed_space_start = MAX_POS_IN_NODE;
-+ cinfo->freed_space_end = MAX_POS_IN_NODE;
-+ cinfo->first_moved = MAX_POS_IN_NODE;
-+ cinfo->head_removed_location = MAX_POS_IN_NODE;
-+}
-+
-+/* complete cut_node40/kill_node40 content by removing the gap created by */
-+static void compact(znode * node, struct cut40_info *cinfo)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ pos_in_node_t freed;
-+ pos_in_node_t pos, nr_items;
-+
-+ assert("vs-1526", (cinfo->freed_space_start != MAX_POS_IN_NODE &&
-+ cinfo->freed_space_end != MAX_POS_IN_NODE &&
-+ cinfo->first_moved != MAX_POS_IN_NODE));
-+ assert("vs-1523", cinfo->freed_space_end >= cinfo->freed_space_start);
-+
-+ nh = node40_node_header(node);
-+ nr_items = nh40_get_num_items(nh);
-+
-+ /* remove gap made up by removal */
-+ memmove(zdata(node) + cinfo->freed_space_start,
-+ zdata(node) + cinfo->freed_space_end,
-+ nh40_get_free_space_start(nh) - cinfo->freed_space_end);
-+
-+ /* update item headers of moved items - change their locations */
-+ pos = cinfo->first_moved;
-+ ih = node40_ih_at(node, pos);
-+ if (cinfo->head_removed_location != MAX_POS_IN_NODE) {
-+ assert("vs-1580", pos == cinfo->head_removed);
-+ ih40_set_offset(ih, cinfo->head_removed_location);
-+ pos++;
-+ ih--;
-+ }
-+
-+ freed = cinfo->freed_space_end - cinfo->freed_space_start;
-+ for (; pos < nr_items; pos++, ih--) {
-+ assert("vs-1581", ih == node40_ih_at(node, pos));
-+ ih40_set_offset(ih, ih40_get_offset(ih) - freed);
-+ }
-+
-+ /* free space start moved to right */
-+ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - freed);
-+
-+ if (cinfo->removed_count != MAX_POS_IN_NODE) {
-+ /* number of items changed. Remove item headers of those items */
-+ ih = node40_ih_at(node, nr_items - 1);
-+ memmove(ih + cinfo->removed_count, ih,
-+ sizeof(item_header40) * (nr_items -
-+ cinfo->removed_count -
-+ cinfo->first_removed));
-+ freed += sizeof(item_header40) * cinfo->removed_count;
-+ node40_set_num_items(node, nh, nr_items - cinfo->removed_count);
-+ }
-+
-+ /* total amount of free space increased */
-+ nh40_set_free_space(nh, nh40_get_free_space(nh) + freed);
-+}
-+
-+int shrink_item_node40(coord_t * coord, int delta)
-+{
-+ node40_header *nh;
-+ item_header40 *ih;
-+ pos_in_node_t pos;
-+ pos_in_node_t nr_items;
-+ char *end;
-+ znode *node;
-+ int off;
-+
-+ assert("nikita-3487", coord != NULL);
-+ assert("nikita-3488", delta >= 0);
-+
-+ node = coord->node;
-+ nh = node40_node_header(node);
-+ nr_items = nh40_get_num_items(nh);
-+
-+ ih = node40_ih_at_coord(coord);
-+ assert("nikita-3489", delta <= length_by_coord_node40(coord));
-+ off = ih40_get_offset(ih) + length_by_coord_node40(coord);
-+ end = zdata(node) + off;
-+
-+ /* remove gap made up by removal */
-+ memmove(end - delta, end, nh40_get_free_space_start(nh) - off);
-+
-+ /* update item headers of moved items - change their locations */
-+ pos = coord->item_pos + 1;
-+ ih = node40_ih_at(node, pos);
-+ for (; pos < nr_items; pos++, ih--) {
-+ assert("nikita-3490", ih == node40_ih_at(node, pos));
-+ ih40_set_offset(ih, ih40_get_offset(ih) - delta);
-+ }
-+
-+ /* free space start moved to left */
-+ nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - delta);
-+ /* total amount of free space increased */
-+ nh40_set_free_space(nh, nh40_get_free_space(nh) + delta);
-+ /*
-+ * This method does _not_ changes number of items. Hence, it cannot
-+ * make node empty. Also it doesn't remove items at all, which means
-+ * that no keys have to be updated either.
-+ */
-+ return 0;
-+}
-+
-+/* this is used by cut_node40 and kill_node40. It analyses input parameters and calculates cut mode. There are 2 types
-+ of cut. First is when a unit is removed from the middle of an item. In this case this function returns 1. All the
-+ rest fits into second case: 0 or 1 of items getting tail cut, 0 or more items removed completely and 0 or 1 item
-+ getting head cut. Function returns 0 in this case */
-+static int
-+parse_cut(struct cut40_info *cinfo, const struct cut_kill_params *params)
-+{
-+ reiser4_key left_key, right_key;
-+ reiser4_key min_from_key, max_to_key;
-+ const reiser4_key *from_key, *to_key;
-+
-+ init_cinfo(cinfo);
-+
-+ /* calculate minimal key stored in first item of items to be cut (params->from) */
-+ item_key_by_coord(params->from, &min_from_key);
-+ /* and max key stored in last item of items to be cut (params->to) */
-+ max_item_key_by_coord(params->to, &max_to_key);
-+
-+ /* if cut key range is not defined in input parameters - define it using cut coord range */
-+ if (params->from_key == NULL) {
-+ assert("vs-1513", params->to_key == NULL);
-+ unit_key_by_coord(params->from, &left_key);
-+ from_key = &left_key;
-+ max_unit_key_by_coord(params->to, &right_key);
-+ to_key = &right_key;
-+ } else {
-+ from_key = params->from_key;
-+ to_key = params->to_key;
-+ }
-+
-+ if (params->from->item_pos == params->to->item_pos) {
-+ if (keylt(&min_from_key, from_key)
-+ && keylt(to_key, &max_to_key))
-+ return 1;
-+
-+ if (keygt(from_key, &min_from_key)) {
-+ /* tail of item is to be cut cut */
-+ cinfo->tail_removed = params->from->item_pos;
-+ cinfo->mode |= CMODE_TAIL;
-+ } else if (keylt(to_key, &max_to_key)) {
-+ /* head of item is to be cut */
-+ cinfo->head_removed = params->from->item_pos;
-+ cinfo->mode |= CMODE_HEAD;
-+ } else {
-+ /* item is removed completely */
-+ cinfo->first_removed = params->from->item_pos;
-+ cinfo->removed_count = 1;
-+ cinfo->mode |= CMODE_WHOLE;
-+ }
-+ } else {
-+ cinfo->first_removed = params->from->item_pos + 1;
-+ cinfo->removed_count =
-+ params->to->item_pos - params->from->item_pos - 1;
-+
-+ if (keygt(from_key, &min_from_key)) {
-+ /* first item is not cut completely */
-+ cinfo->tail_removed = params->from->item_pos;
-+ cinfo->mode |= CMODE_TAIL;
-+ } else {
-+ cinfo->first_removed--;
-+ cinfo->removed_count++;
-+ }
-+ if (keylt(to_key, &max_to_key)) {
-+ /* last item is not cut completely */
-+ cinfo->head_removed = params->to->item_pos;
-+ cinfo->mode |= CMODE_HEAD;
-+ } else {
-+ cinfo->removed_count++;
-+ }
-+ if (cinfo->removed_count)
-+ cinfo->mode |= CMODE_WHOLE;
-+ }
-+
-+ return 0;
-+}
-+
-+static void
-+call_kill_hooks(znode * node, pos_in_node_t from, pos_in_node_t count,
-+ carry_kill_data * kdata)
-+{
-+ coord_t coord;
-+ item_plugin *iplug;
-+ pos_in_node_t pos;
-+
-+ coord.node = node;
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ for (pos = 0; pos < count; pos++) {
-+ coord_set_item_pos(&coord, from + pos);
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ iplug = item_plugin_by_coord(&coord);
-+ if (iplug->b.kill_hook) {
-+ iplug->b.kill_hook(&coord, 0, coord_num_units(&coord),
-+ kdata);
-+ }
-+ }
-+}
-+
-+/* this is used to kill item partially */
-+static pos_in_node_t
-+kill_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
-+ reiser4_key * smallest_removed, reiser4_key * new_first_key)
-+{
-+ struct carry_kill_data *kdata;
-+ item_plugin *iplug;
-+
-+ kdata = data;
-+ iplug = item_plugin_by_coord(coord);
-+
-+ assert("vs-1524", iplug->b.kill_units);
-+ return iplug->b.kill_units(coord, from, to, kdata, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* call item plugin to cut tail of file */
-+static pos_in_node_t
-+kill_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
-+{
-+ struct carry_kill_data *kdata;
-+ pos_in_node_t to;
-+
-+ kdata = data;
-+ to = coord_last_unit_pos(coord);
-+ return kill_units(coord, coord->unit_pos, to, kdata, smallest_removed,
-+ NULL);
-+}
-+
-+/* call item plugin to cut head of item */
-+static pos_in_node_t
-+kill_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
-+ reiser4_key * new_first_key)
-+{
-+ return kill_units(coord, 0, coord->unit_pos, data, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* this is used to cut item partially */
-+static pos_in_node_t
-+cut_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
-+ reiser4_key * smallest_removed, reiser4_key * new_first_key)
-+{
-+ carry_cut_data *cdata;
-+ item_plugin *iplug;
-+
-+ cdata = data;
-+ iplug = item_plugin_by_coord(coord);
-+ assert("vs-302", iplug->b.cut_units);
-+ return iplug->b.cut_units(coord, from, to, cdata, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* call item plugin to cut tail of file */
-+static pos_in_node_t
-+cut_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
-+{
-+ carry_cut_data *cdata;
-+ pos_in_node_t to;
-+
-+ cdata = data;
-+ to = coord_last_unit_pos(cdata->params.from);
-+ return cut_units(coord, coord->unit_pos, to, data, smallest_removed, NULL);
-+}
-+
-+/* call item plugin to cut head of item */
-+static pos_in_node_t
-+cut_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
-+ reiser4_key * new_first_key)
-+{
-+ return cut_units(coord, 0, coord->unit_pos, data, smallest_removed,
-+ new_first_key);
-+}
-+
-+/* this returns 1 of key of first item changed, 0 - if it did not */
-+static int
-+prepare_for_compact(struct cut40_info *cinfo,
-+ const struct cut_kill_params *params, int is_cut,
-+ void *data, carry_plugin_info * info)
-+{
-+ znode *node;
-+ item_header40 *ih;
-+ pos_in_node_t freed;
-+ pos_in_node_t item_pos;
-+ coord_t coord;
-+ reiser4_key new_first_key;
-+ pos_in_node_t(*kill_units_f) (coord_t *, pos_in_node_t, pos_in_node_t,
-+ void *, reiser4_key *, reiser4_key *);
-+ pos_in_node_t(*kill_tail_f) (coord_t *, void *, reiser4_key *);
-+ pos_in_node_t(*kill_head_f) (coord_t *, void *, reiser4_key *,
-+ reiser4_key *);
-+ int retval;
-+
-+ retval = 0;
-+
-+ node = params->from->node;
-+
-+ assert("vs-184", node == params->to->node);
-+ assert("vs-312", !node_is_empty(node));
-+ assert("vs-297",
-+ coord_compare(params->from, params->to) != COORD_CMP_ON_RIGHT);
-+
-+ if (is_cut) {
-+ kill_units_f = cut_units;
-+ kill_tail_f = cut_tail;
-+ kill_head_f = cut_head;
-+ } else {
-+ kill_units_f = kill_units;
-+ kill_tail_f = kill_tail;
-+ kill_head_f = kill_head;
-+ }
-+
-+ if (parse_cut(cinfo, params) == 1) {
-+ /* cut from the middle of item */
-+ freed =
-+ kill_units_f(params->from, params->from->unit_pos,
-+ params->to->unit_pos, data,
-+ params->smallest_removed, NULL);
-+
-+ item_pos = params->from->item_pos;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos) - freed;
-+ cinfo->freed_space_end = cinfo->freed_space_start + freed;
-+ cinfo->first_moved = item_pos + 1;
-+ } else {
-+ assert("vs-1521", (cinfo->tail_removed != MAX_POS_IN_NODE ||
-+ cinfo->first_removed != MAX_POS_IN_NODE ||
-+ cinfo->head_removed != MAX_POS_IN_NODE));
-+
-+ switch (cinfo->mode) {
-+ case CMODE_TAIL:
-+ /* one item gets cut partially from its end */
-+ assert("vs-1562",
-+ cinfo->tail_removed == params->from->item_pos);
-+
-+ freed =
-+ kill_tail_f(params->from, data,
-+ params->smallest_removed);
-+
-+ item_pos = cinfo->tail_removed;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos) -
-+ freed;
-+ cinfo->freed_space_end =
-+ cinfo->freed_space_start + freed;
-+ cinfo->first_moved = cinfo->tail_removed + 1;
-+ break;
-+
-+ case CMODE_WHOLE:
-+ /* one or more items get removed completely */
-+ assert("vs-1563",
-+ cinfo->first_removed == params->from->item_pos);
-+ assert("vs-1564", cinfo->removed_count > 0
-+ && cinfo->removed_count != MAX_POS_IN_NODE);
-+
-+ /* call kill hook for all items removed completely */
-+ if (is_cut == 0)
-+ call_kill_hooks(node, cinfo->first_removed,
-+ cinfo->removed_count, data);
-+
-+ item_pos = cinfo->first_removed;
-+ ih = node40_ih_at(node, item_pos);
-+
-+ if (params->smallest_removed)
-+ memcpy(params->smallest_removed, &ih->key,
-+ sizeof(reiser4_key));
-+
-+ cinfo->freed_space_start = ih40_get_offset(ih);
-+
-+ item_pos += (cinfo->removed_count - 1);
-+ ih -= (cinfo->removed_count - 1);
-+ cinfo->freed_space_end =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos);
-+ cinfo->first_moved = item_pos + 1;
-+ if (cinfo->first_removed == 0)
-+ /* key of first item of the node changes */
-+ retval = 1;
-+ break;
-+
-+ case CMODE_HEAD:
-+ /* one item gets cut partially from its head */
-+ assert("vs-1565",
-+ cinfo->head_removed == params->from->item_pos);
-+
-+ freed =
-+ kill_head_f(params->to, data,
-+ params->smallest_removed,
-+ &new_first_key);
-+
-+ item_pos = cinfo->head_removed;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start = ih40_get_offset(ih);
-+ cinfo->freed_space_end = ih40_get_offset(ih) + freed;
-+ cinfo->first_moved = cinfo->head_removed + 1;
-+
-+ /* item head is removed, therefore, item key changed */
-+ coord.node = node;
-+ coord_set_item_pos(&coord, item_pos);
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ update_item_key_node40(&coord, &new_first_key, NULL);
-+ if (item_pos == 0)
-+ /* key of first item of the node changes */
-+ retval = 1;
-+ break;
-+
-+ case CMODE_TAIL | CMODE_WHOLE:
-+ /* one item gets cut from its end and one or more items get removed completely */
-+ assert("vs-1566",
-+ cinfo->tail_removed == params->from->item_pos);
-+ assert("vs-1567",
-+ cinfo->first_removed == cinfo->tail_removed + 1);
-+ assert("vs-1564", cinfo->removed_count > 0
-+ && cinfo->removed_count != MAX_POS_IN_NODE);
-+
-+ freed =
-+ kill_tail_f(params->from, data,
-+ params->smallest_removed);
-+
-+ item_pos = cinfo->tail_removed;
-+ ih = node40_ih_at(node, item_pos);
-+ cinfo->freed_space_start =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos) -
-+ freed;
-+
-+ /* call kill hook for all items removed completely */
-+ if (is_cut == 0)
-+ call_kill_hooks(node, cinfo->first_removed,
-+ cinfo->removed_count, data);
-+
-+ item_pos += cinfo->removed_count;
-+ ih -= cinfo->removed_count;
-+ cinfo->freed_space_end =
-+ ih40_get_offset(ih) + node40_item_length(node,
-+ item_pos);
-+ cinfo->first_moved = item_pos + 1;
-+ break;
-+
-+ case CMODE_WHOLE | CMODE_HEAD:
-+ /* one or more items get removed completely and one item gets cut partially from its head */
-+ assert("vs-1568",
-+ cinfo->first_removed == params->from->item_pos);
-+ assert("vs-1564", cinfo->removed_count > 0
-+ && cinfo->removed_count != MAX_POS_IN_NODE);
-+ assert("vs-1569",
-+ cinfo->head_removed ==
-+ cinfo->first_removed + cinfo->removed_count);
-+
-+ /* call kill hook for all items removed completely */
-+ if (is_cut == 0)
-+ call_kill_hooks(node, cinfo->first_removed,
-+ cinfo->removed_count, data);
-+
-+ item_pos = cinfo->first_removed;
-+ ih = node40_ih_at(node, item_pos);
-+
-+ if (params->smallest_removed)
-+ memcpy(params->smallest_removed, &ih->key,
-+ sizeof(reiser4_key));
-+
-+ freed =
-+ kill_head_f(params->to, data, NULL, &new_first_key);
-+
-+ cinfo->freed_space_start = ih40_get_offset(ih);
-+
-+ ih = node40_ih_at(node, cinfo->head_removed);
-+ /* this is the most complex case. Item which got head removed and items which are to be moved
-+ intact change their location differently. */
-+ cinfo->freed_space_end = ih40_get_offset(ih) + freed;
-+ cinfo->first_moved = cinfo->head_removed;
-+ cinfo->head_removed_location = cinfo->freed_space_start;
-+
-+ /* item head is removed, therefore, item key changed */
-+ coord.node = node;
-+ coord_set_item_pos(&coord, cinfo->head_removed);
-+ coord.unit_pos = 0;
-+ coord.between = AT_UNIT;
-+ update_item_key_node40(&coord, &new_first_key, NULL);
-+
-+ assert("vs-1579", cinfo->first_removed == 0);
-+ /* key of first item of the node changes */
-+ retval = 1;
-+ break;
-+
-+ case CMODE_TAIL | CMODE_HEAD:
-+ /* one item get cut from its end and its neighbor gets cut from its tail */
-+ impossible("vs-1576", "this can not happen currently");
-+ break;
-+
-+ case CMODE_TAIL | CMODE_WHOLE | CMODE_HEAD:
-+ impossible("vs-1577", "this can not happen currently");
-+ break;
-+ default:
-+ impossible("vs-1578", "unexpected cut mode");
-+ break;
-+ }
-+ }
-+ return retval;
-+}
-+
-+/* plugin->u.node.kill
-+ return value is number of items removed completely */
-+int kill_node40(struct carry_kill_data *kdata, carry_plugin_info * info)
-+{
-+ znode *node;
-+ struct cut40_info cinfo;
-+ int first_key_changed;
-+
-+ node = kdata->params.from->node;
-+
-+ first_key_changed =
-+ prepare_for_compact(&cinfo, &kdata->params, 0 /* not cut */ , kdata,
-+ info);
-+ compact(node, &cinfo);
-+
-+ if (info) {
-+ /* it is not called by node40_shift, so we have to take care
-+ of changes on upper levels */
-+ if (node_is_empty(node)
-+ && !(kdata->flags & DELETE_RETAIN_EMPTY))
-+ /* all contents of node is deleted */
-+ prepare_removal_node40(node, info);
-+ else if (first_key_changed) {
-+ prepare_for_update(NULL, node, info);
-+ }
-+ }
-+
-+ coord_clear_iplug(kdata->params.from);
-+ coord_clear_iplug(kdata->params.to);
-+
-+ znode_make_dirty(node);
-+ return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
-+}
-+
-+/* plugin->u.node.cut
-+ return value is number of items removed completely */
-+int cut_node40(struct carry_cut_data *cdata, carry_plugin_info * info)
-+{
-+ znode *node;
-+ struct cut40_info cinfo;
-+ int first_key_changed;
-+
-+ node = cdata->params.from->node;
-+
-+ first_key_changed =
-+ prepare_for_compact(&cinfo, &cdata->params, 1 /* not cut */ , cdata,
-+ info);
-+ compact(node, &cinfo);
-+
-+ if (info) {
-+ /* it is not called by node40_shift, so we have to take care
-+ of changes on upper levels */
-+ if (node_is_empty(node))
-+ /* all contents of node is deleted */
-+ prepare_removal_node40(node, info);
-+ else if (first_key_changed) {
-+ prepare_for_update(NULL, node, info);
-+ }
-+ }
-+
-+ coord_clear_iplug(cdata->params.from);
-+ coord_clear_iplug(cdata->params.to);
-+
-+ znode_make_dirty(node);
-+ return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
-+}
-+
-+/* this structure is used by shift method of node40 plugin */
-+struct shift_params {
-+ shift_direction pend; /* when @pend == append - we are shifting to
-+ left, when @pend == prepend - to right */
-+ coord_t wish_stop; /* when shifting to left this is last unit we
-+ want shifted, when shifting to right - this
-+ is set to unit we want to start shifting
-+ from */
-+ znode *target;
-+ int everything; /* it is set to 1 if everything we have to shift is
-+ shifted, 0 - otherwise */
-+
-+ /* FIXME-VS: get rid of read_stop */
-+
-+ /* these are set by estimate_shift */
-+ coord_t real_stop; /* this will be set to last unit which will be
-+ really shifted */
-+
-+ /* coordinate in source node before operation of unit which becomes
-+ first after shift to left of last after shift to right */
-+ union {
-+ coord_t future_first;
-+ coord_t future_last;
-+ } u;
-+
-+ unsigned merging_units; /* number of units of first item which have to
-+ be merged with last item of target node */
-+ unsigned merging_bytes; /* number of bytes in those units */
-+
-+ unsigned entire; /* items shifted in their entirety */
-+ unsigned entire_bytes; /* number of bytes in those items */
-+
-+ unsigned part_units; /* number of units of partially copied item */
-+ unsigned part_bytes; /* number of bytes in those units */
-+
-+ unsigned shift_bytes; /* total number of bytes in items shifted (item
-+ headers not included) */
-+
-+};
-+
-+static int item_creation_overhead(coord_t *item)
-+{
-+ return node_plugin_by_coord(item)->item_overhead(item->node, NULL);
-+}
-+
-+/* how many units are there in @source starting from source->unit_pos
-+ but not further than @stop_coord */
-+static int
-+wanted_units(coord_t *source, coord_t *stop_coord, shift_direction pend)
-+{
-+ if (pend == SHIFT_LEFT) {
-+ assert("vs-181", source->unit_pos == 0);
-+ } else {
-+ assert("vs-182",
-+ source->unit_pos == coord_last_unit_pos(source));
-+ }
-+
-+ if (source->item_pos != stop_coord->item_pos) {
-+ /* @source and @stop_coord are different items */
-+ return coord_last_unit_pos(source) + 1;
-+ }
-+
-+ if (pend == SHIFT_LEFT) {
-+ return stop_coord->unit_pos + 1;
-+ } else {
-+ return source->unit_pos - stop_coord->unit_pos + 1;
-+ }
-+}
-+
-+/* this calculates what can be copied from @shift->wish_stop.node to
-+ @shift->target */
-+static void
-+estimate_shift(struct shift_params *shift, const reiser4_context * ctx)
-+{
-+ unsigned target_free_space, size;
-+ pos_in_node_t stop_item; /* item which estimating should not consider */
-+ unsigned want; /* number of units of item we want shifted */
-+ coord_t source; /* item being estimated */
-+ item_plugin *iplug;
-+
-+ /* shifting to left/right starts from first/last units of
-+ @shift->wish_stop.node */
-+ if (shift->pend == SHIFT_LEFT) {
-+ coord_init_first_unit(&source, shift->wish_stop.node);
-+ } else {
-+ coord_init_last_unit(&source, shift->wish_stop.node);
-+ }
-+ shift->real_stop = source;
-+
-+ /* free space in target node and number of items in source */
-+ target_free_space = znode_free_space(shift->target);
-+
-+ shift->everything = 0;
-+ if (!node_is_empty(shift->target)) {
-+ /* target node is not empty, check for boundary items
-+ mergeability */
-+ coord_t to;
-+
-+ /* item we try to merge @source with */
-+ if (shift->pend == SHIFT_LEFT) {
-+ coord_init_last_unit(&to, shift->target);
-+ } else {
-+ coord_init_first_unit(&to, shift->target);
-+ }
-+
-+ if ((shift->pend == SHIFT_LEFT) ? are_items_mergeable(&to,
-+ &source) :
-+ are_items_mergeable(&source, &to)) {
-+ /* how many units of @source do we want to merge to
-+ item @to */
-+ want =
-+ wanted_units(&source, &shift->wish_stop,
-+ shift->pend);
-+
-+ /* how many units of @source we can merge to item
-+ @to */
-+ iplug = item_plugin_by_coord(&source);
-+ if (iplug->b.can_shift != NULL)
-+ shift->merging_units =
-+ iplug->b.can_shift(target_free_space,
-+ &source, shift->target,
-+ shift->pend, &size,
-+ want);
-+ else {
-+ shift->merging_units = 0;
-+ size = 0;
-+ }
-+ shift->merging_bytes = size;
-+ shift->shift_bytes += size;
-+ /* update stop coord to be set to last unit of @source
-+ we can merge to @target */
-+ if (shift->merging_units)
-+ /* at least one unit can be shifted */
-+ shift->real_stop.unit_pos =
-+ (shift->merging_units - source.unit_pos -
-+ 1) * shift->pend;
-+ else {
-+ /* nothing can be shifted */
-+ if (shift->pend == SHIFT_LEFT)
-+ coord_init_before_first_item(&shift->
-+ real_stop,
-+ source.
-+ node);
-+ else
-+ coord_init_after_last_item(&shift->
-+ real_stop,
-+ source.node);
-+ }
-+ assert("nikita-2081", shift->real_stop.unit_pos + 1);
-+
-+ if (shift->merging_units != want) {
-+ /* we could not copy as many as we want, so,
-+ there is no reason for estimating any
-+ longer */
-+ return;
-+ }
-+
-+ target_free_space -= size;
-+ coord_add_item_pos(&source, shift->pend);
-+ }
-+ }
-+
-+ /* number of item nothing of which we want to shift */
-+ stop_item = shift->wish_stop.item_pos + shift->pend;
-+
-+ /* calculate how many items can be copied into given free
-+ space as whole */
-+ for (; source.item_pos != stop_item;
-+ coord_add_item_pos(&source, shift->pend)) {
-+ if (shift->pend == SHIFT_RIGHT)
-+ source.unit_pos = coord_last_unit_pos(&source);
-+
-+ /* how many units of @source do we want to copy */
-+ want = wanted_units(&source, &shift->wish_stop, shift->pend);
-+
-+ if (want == coord_last_unit_pos(&source) + 1) {
-+ /* we want this item to be copied entirely */
-+ size =
-+ item_length_by_coord(&source) +
-+ item_creation_overhead(&source);
-+ if (size <= target_free_space) {
-+ /* item fits into target node as whole */
-+ target_free_space -= size;
-+ shift->shift_bytes +=
-+ size - item_creation_overhead(&source);
-+ shift->entire_bytes +=
-+ size - item_creation_overhead(&source);
-+ shift->entire++;
-+
-+ /* update shift->real_stop coord to be set to
-+ last unit of @source we can merge to
-+ @target */
-+ shift->real_stop = source;
-+ if (shift->pend == SHIFT_LEFT)
-+ shift->real_stop.unit_pos =
-+ coord_last_unit_pos(&shift->
-+ real_stop);
-+ else
-+ shift->real_stop.unit_pos = 0;
-+ continue;
-+ }
-+ }
-+
-+ /* we reach here only for an item which does not fit into
-+ target node in its entirety. This item may be either
-+ partially shifted, or not shifted at all. We will have to
-+ create new item in target node, so decrease amout of free
-+ space by an item creation overhead. We can reach here also
-+ if stop coord is in this item */
-+ if (target_free_space >=
-+ (unsigned)item_creation_overhead(&source)) {
-+ target_free_space -= item_creation_overhead(&source);
-+ iplug = item_plugin_by_coord(&source);
-+ if (iplug->b.can_shift) {
-+ shift->part_units = iplug->b.can_shift(target_free_space,
-+ &source,
-+ NULL, /* target */
-+ shift->pend,
-+ &size,
-+ want);
-+ } else {
-+ target_free_space = 0;
-+ shift->part_units = 0;
-+ size = 0;
-+ }
-+ } else {
-+ target_free_space = 0;
-+ shift->part_units = 0;
-+ size = 0;
-+ }
-+ shift->part_bytes = size;
-+ shift->shift_bytes += size;
-+
-+ /* set @shift->real_stop to last unit of @source we can merge
-+ to @shift->target */
-+ if (shift->part_units) {
-+ shift->real_stop = source;
-+ shift->real_stop.unit_pos =
-+ (shift->part_units - source.unit_pos -
-+ 1) * shift->pend;
-+ assert("nikita-2082", shift->real_stop.unit_pos + 1);
-+ }
-+
-+ if (want != shift->part_units)
-+ /* not everything wanted were shifted */
-+ return;
-+ break;
-+ }
-+
-+ shift->everything = 1;
-+}
-+
-+static void
-+copy_units(coord_t * target, coord_t * source, unsigned from, unsigned count,
-+ shift_direction dir, unsigned free_space)
-+{
-+ item_plugin *iplug;
-+
-+ assert("nikita-1463", target != NULL);
-+ assert("nikita-1464", source != NULL);
-+ assert("nikita-1465", from + count <= coord_num_units(source));
-+
-+ iplug = item_plugin_by_coord(source);
-+ assert("nikita-1468", iplug == item_plugin_by_coord(target));
-+ iplug->b.copy_units(target, source, from, count, dir, free_space);
-+
-+ if (dir == SHIFT_RIGHT) {
-+ /* FIXME-VS: this looks not necessary. update_item_key was
-+ called already by copy_units method */
-+ reiser4_key split_key;
-+
-+ assert("nikita-1469", target->unit_pos == 0);
-+
-+ unit_key_by_coord(target, &split_key);
-+ node_plugin_by_coord(target)->update_item_key(target,
-+ &split_key, NULL);
-+ }
-+}
-+
-+/* copy part of @shift->real_stop.node starting either from its beginning or
-+ from its end and ending at @shift->real_stop to either the end or the
-+ beginning of @shift->target */
-+static void copy(struct shift_params *shift)
-+{
-+ node40_header *nh;
-+ coord_t from;
-+ coord_t to;
-+ item_header40 *from_ih, *to_ih;
-+ int free_space_start;
-+ int new_items;
-+ unsigned old_items;
-+ int old_offset;
-+ unsigned i;
-+
-+ nh = node40_node_header(shift->target);
-+ free_space_start = nh40_get_free_space_start(nh);
-+ old_items = nh40_get_num_items(nh);
-+ new_items = shift->entire + (shift->part_units ? 1 : 0);
-+ assert("vs-185",
-+ shift->shift_bytes ==
-+ shift->merging_bytes + shift->entire_bytes + shift->part_bytes);
-+
-+ from = shift->wish_stop;
-+
-+ coord_init_first_unit(&to, shift->target);
-+
-+ /* NOTE:NIKITA->VS not sure what I am doing: shift->target is empty,
-+ hence to.between is set to EMPTY_NODE above. Looks like we want it
-+ to be AT_UNIT.
-+
-+ Oh, wonders of ->betweeness...
-+
-+ */
-+ to.between = AT_UNIT;
-+
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* copying to left */
-+
-+ coord_set_item_pos(&from, 0);
-+ from_ih = node40_ih_at(from.node, 0);
-+
-+ coord_set_item_pos(&to,
-+ node40_num_of_items_internal(to.node) - 1);
-+ if (shift->merging_units) {
-+ /* expand last item, so that plugin methods will see
-+ correct data */
-+ free_space_start += shift->merging_bytes;
-+ nh40_set_free_space_start(nh,
-+ (unsigned)free_space_start);
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) -
-+ shift->merging_bytes);
-+
-+ /* appending last item of @target */
-+ copy_units(&to, &from, 0, /* starting from 0-th unit */
-+ shift->merging_units, SHIFT_LEFT,
-+ shift->merging_bytes);
-+ coord_inc_item_pos(&from);
-+ from_ih--;
-+ coord_inc_item_pos(&to);
-+ }
-+
-+ to_ih = node40_ih_at(shift->target, old_items);
-+ if (shift->entire) {
-+ /* copy @entire items entirely */
-+
-+ /* copy item headers */
-+ memcpy(to_ih - shift->entire + 1,
-+ from_ih - shift->entire + 1,
-+ shift->entire * sizeof(item_header40));
-+ /* update item header offset */
-+ old_offset = ih40_get_offset(from_ih);
-+ /* AUDIT: Looks like if we calculate old_offset + free_space_start here instead of just old_offset, we can perform one "add" operation less per each iteration */
-+ for (i = 0; i < shift->entire; i++, to_ih--, from_ih--)
-+ ih40_set_offset(to_ih,
-+ ih40_get_offset(from_ih) -
-+ old_offset + free_space_start);
-+
-+ /* copy item bodies */
-+ memcpy(zdata(shift->target) + free_space_start, zdata(from.node) + old_offset, /*ih40_get_offset (from_ih), */
-+ shift->entire_bytes);
-+
-+ coord_add_item_pos(&from, (int)shift->entire);
-+ coord_add_item_pos(&to, (int)shift->entire);
-+ }
-+
-+ nh40_set_free_space_start(nh,
-+ free_space_start +
-+ shift->shift_bytes -
-+ shift->merging_bytes);
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) -
-+ (shift->shift_bytes - shift->merging_bytes +
-+ sizeof(item_header40) * new_items));
-+
-+ /* update node header */
-+ node40_set_num_items(shift->target, nh, old_items + new_items);
-+ assert("vs-170",
-+ nh40_get_free_space(nh) < znode_size(shift->target));
-+
-+ if (shift->part_units) {
-+ /* copy heading part (@part units) of @source item as
-+ a new item into @target->node */
-+
-+ /* copy item header of partially copied item */
-+ coord_set_item_pos(&to,
-+ node40_num_of_items_internal(to.node)
-+ - 1);
-+ memcpy(to_ih, from_ih, sizeof(item_header40));
-+ ih40_set_offset(to_ih,
-+ nh40_get_free_space_start(nh) -
-+ shift->part_bytes);
-+ if (item_plugin_by_coord(&to)->b.init)
-+ item_plugin_by_coord(&to)->b.init(&to, &from,
-+ NULL);
-+ copy_units(&to, &from, 0, shift->part_units, SHIFT_LEFT,
-+ shift->part_bytes);
-+ }
-+
-+ } else {
-+ /* copying to right */
-+
-+ coord_set_item_pos(&from,
-+ node40_num_of_items_internal(from.node) - 1);
-+ from_ih = node40_ih_at_coord(&from);
-+
-+ coord_set_item_pos(&to, 0);
-+
-+ /* prepare space for new items */
-+ memmove(zdata(to.node) + sizeof(node40_header) +
-+ shift->shift_bytes,
-+ zdata(to.node) + sizeof(node40_header),
-+ free_space_start - sizeof(node40_header));
-+ /* update item headers of moved items */
-+ to_ih = node40_ih_at(to.node, 0);
-+ /* first item gets @merging_bytes longer. free space appears
-+ at its beginning */
-+ if (!node_is_empty(to.node))
-+ ih40_set_offset(to_ih,
-+ ih40_get_offset(to_ih) +
-+ shift->shift_bytes -
-+ shift->merging_bytes);
-+
-+ for (i = 1; i < old_items; i++)
-+ ih40_set_offset(to_ih - i,
-+ ih40_get_offset(to_ih - i) +
-+ shift->shift_bytes);
-+
-+ /* move item headers to make space for new items */
-+ memmove(to_ih - old_items + 1 - new_items,
-+ to_ih - old_items + 1,
-+ sizeof(item_header40) * old_items);
-+ to_ih -= (new_items - 1);
-+
-+ nh40_set_free_space_start(nh,
-+ free_space_start +
-+ shift->shift_bytes);
-+ nh40_set_free_space(nh,
-+ nh40_get_free_space(nh) -
-+ (shift->shift_bytes +
-+ sizeof(item_header40) * new_items));
-+
-+ /* update node header */
-+ node40_set_num_items(shift->target, nh, old_items + new_items);
-+ assert("vs-170",
-+ nh40_get_free_space(nh) < znode_size(shift->target));
-+
-+ if (shift->merging_units) {
-+ coord_add_item_pos(&to, new_items);
-+ to.unit_pos = 0;
-+ to.between = AT_UNIT;
-+ /* prepend first item of @to */
-+ copy_units(&to, &from,
-+ coord_last_unit_pos(&from) -
-+ shift->merging_units + 1,
-+ shift->merging_units, SHIFT_RIGHT,
-+ shift->merging_bytes);
-+ coord_dec_item_pos(&from);
-+ from_ih++;
-+ }
-+
-+ if (shift->entire) {
-+ /* copy @entire items entirely */
-+
-+ /* copy item headers */
-+ memcpy(to_ih, from_ih,
-+ shift->entire * sizeof(item_header40));
-+
-+ /* update item header offset */
-+ old_offset =
-+ ih40_get_offset(from_ih + shift->entire - 1);
-+ /* AUDIT: old_offset + sizeof (node40_header) + shift->part_bytes calculation can be taken off the loop. */
-+ for (i = 0; i < shift->entire; i++, to_ih++, from_ih++)
-+ ih40_set_offset(to_ih,
-+ ih40_get_offset(from_ih) -
-+ old_offset +
-+ sizeof(node40_header) +
-+ shift->part_bytes);
-+ /* copy item bodies */
-+ coord_add_item_pos(&from, -(int)(shift->entire - 1));
-+ memcpy(zdata(to.node) + sizeof(node40_header) +
-+ shift->part_bytes, item_by_coord_node40(&from),
-+ shift->entire_bytes);
-+ coord_dec_item_pos(&from);
-+ }
-+
-+ if (shift->part_units) {
-+ coord_set_item_pos(&to, 0);
-+ to.unit_pos = 0;
-+ to.between = AT_UNIT;
-+ /* copy heading part (@part units) of @source item as
-+ a new item into @target->node */
-+
-+ /* copy item header of partially copied item */
-+ memcpy(to_ih, from_ih, sizeof(item_header40));
-+ ih40_set_offset(to_ih, sizeof(node40_header));
-+ if (item_plugin_by_coord(&to)->b.init)
-+ item_plugin_by_coord(&to)->b.init(&to, &from,
-+ NULL);
-+ copy_units(&to, &from,
-+ coord_last_unit_pos(&from) -
-+ shift->part_units + 1, shift->part_units,
-+ SHIFT_RIGHT, shift->part_bytes);
-+ }
-+ }
-+}
-+
-+/* remove everything either before or after @fact_stop. Number of items
-+ removed completely is returned */
-+static int delete_copied(struct shift_params *shift)
-+{
-+ coord_t from;
-+ coord_t to;
-+ struct carry_cut_data cdata;
-+
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* we were shifting to left, remove everything from the
-+ beginning of @shift->wish_stop->node upto
-+ @shift->wish_stop */
-+ coord_init_first_unit(&from, shift->real_stop.node);
-+ to = shift->real_stop;
-+
-+ /* store old coordinate of unit which will be first after
-+ shift to left */
-+ shift->u.future_first = to;
-+ coord_next_unit(&shift->u.future_first);
-+ } else {
-+ /* we were shifting to right, remove everything from
-+ @shift->stop_coord upto to end of
-+ @shift->stop_coord->node */
-+ from = shift->real_stop;
-+ coord_init_last_unit(&to, from.node);
-+
-+ /* store old coordinate of unit which will be last after
-+ shift to right */
-+ shift->u.future_last = from;
-+ coord_prev_unit(&shift->u.future_last);
-+ }
-+
-+ cdata.params.from = &from;
-+ cdata.params.to = &to;
-+ cdata.params.from_key = NULL;
-+ cdata.params.to_key = NULL;
-+ cdata.params.smallest_removed = NULL;
-+ return cut_node40(&cdata, NULL);
-+}
-+
-+/* something was moved between @left and @right. Add carry operation to @info
-+ list to have carry to update delimiting key between them */
-+static int
-+prepare_for_update(znode * left, znode * right, carry_plugin_info * info)
-+{
-+ carry_op *op;
-+ carry_node *cn;
-+
-+ if (info == NULL)
-+ /* nowhere to send operation to. */
-+ return 0;
-+
-+ if (!should_notify_parent(right))
-+ return 0;
-+
-+ op = node_post_carry(info, COP_UPDATE, right, 1);
-+ if (IS_ERR(op) || op == NULL)
-+ return op ? PTR_ERR(op) : -EIO;
-+
-+ if (left != NULL) {
-+ carry_node *reference;
-+
-+ if (info->doing)
-+ reference = insert_carry_node(info->doing,
-+ info->todo, left);
-+ else
-+ reference = op->node;
-+ assert("nikita-2992", reference != NULL);
-+ cn = reiser4_add_carry(info->todo, POOLO_BEFORE, reference);
-+ if (IS_ERR(cn))
-+ return PTR_ERR(cn);
-+ cn->parent = 1;
-+ cn->node = left;
-+ if (ZF_ISSET(left, JNODE_ORPHAN))
-+ cn->left_before = 1;
-+ op->u.update.left = cn;
-+ } else
-+ op->u.update.left = NULL;
-+ return 0;
-+}
-+
-+/* plugin->u.node.prepare_removal
-+ to delete a pointer to @empty from the tree add corresponding carry
-+ operation (delete) to @info list */
-+int prepare_removal_node40(znode * empty, carry_plugin_info * info)
-+{
-+ carry_op *op;
-+ reiser4_tree *tree;
-+
-+ if (!should_notify_parent(empty))
-+ return 0;
-+ /* already on a road to Styx */
-+ if (ZF_ISSET(empty, JNODE_HEARD_BANSHEE))
-+ return 0;
-+ op = node_post_carry(info, COP_DELETE, empty, 1);
-+ if (IS_ERR(op) || op == NULL)
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+
-+ op->u.delete.child = NULL;
-+ op->u.delete.flags = 0;
-+
-+ /* fare thee well */
-+ tree = znode_get_tree(empty);
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+ znode_set_ld_key(empty, znode_get_rd_key(empty));
-+ if (znode_is_left_connected(empty) && empty->left)
-+ znode_set_rd_key(empty->left, znode_get_rd_key(empty));
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+
-+ ZF_SET(empty, JNODE_HEARD_BANSHEE);
-+ return 0;
-+}
-+
-+/* something were shifted from @insert_coord->node to @shift->target, update
-+ @insert_coord correspondingly */
-+static void
-+adjust_coord(coord_t * insert_coord, struct shift_params *shift, int removed,
-+ int including_insert_coord)
-+{
-+ /* item plugin was invalidated by shifting */
-+ coord_clear_iplug(insert_coord);
-+
-+ if (node_is_empty(shift->wish_stop.node)) {
-+ assert("vs-242", shift->everything);
-+ if (including_insert_coord) {
-+ if (shift->pend == SHIFT_RIGHT) {
-+ /* set @insert_coord before first unit of
-+ @shift->target node */
-+ coord_init_before_first_item(insert_coord,
-+ shift->target);
-+ } else {
-+ /* set @insert_coord after last in target node */
-+ coord_init_after_last_item(insert_coord,
-+ shift->target);
-+ }
-+ } else {
-+ /* set @insert_coord inside of empty node. There is
-+ only one possible coord within an empty
-+ node. init_first_unit will set that coord */
-+ coord_init_first_unit(insert_coord,
-+ shift->wish_stop.node);
-+ }
-+ return;
-+ }
-+
-+ if (shift->pend == SHIFT_RIGHT) {
-+ /* there was shifting to right */
-+ if (shift->everything) {
-+ /* everything wanted was shifted */
-+ if (including_insert_coord) {
-+ /* @insert_coord is set before first unit of
-+ @to node */
-+ coord_init_before_first_item(insert_coord,
-+ shift->target);
-+ insert_coord->between = BEFORE_UNIT;
-+ } else {
-+ /* @insert_coord is set after last unit of
-+ @insert->node */
-+ coord_init_last_unit(insert_coord,
-+ shift->wish_stop.node);
-+ insert_coord->between = AFTER_UNIT;
-+ }
-+ }
-+ return;
-+ }
-+
-+ /* there was shifting to left */
-+ if (shift->everything) {
-+ /* everything wanted was shifted */
-+ if (including_insert_coord) {
-+ /* @insert_coord is set after last unit in @to node */
-+ coord_init_after_last_item(insert_coord, shift->target);
-+ } else {
-+ /* @insert_coord is set before first unit in the same
-+ node */
-+ coord_init_before_first_item(insert_coord,
-+ shift->wish_stop.node);
-+ }
-+ return;
-+ }
-+
-+ /* FIXME-VS: the code below is complicated because with between ==
-+ AFTER_ITEM unit_pos is set to 0 */
-+
-+ if (!removed) {
-+ /* no items were shifted entirely */
-+ assert("vs-195", shift->merging_units == 0
-+ || shift->part_units == 0);
-+
-+ if (shift->real_stop.item_pos == insert_coord->item_pos) {
-+ if (shift->merging_units) {
-+ if (insert_coord->between == AFTER_UNIT) {
-+ assert("nikita-1441",
-+ insert_coord->unit_pos >=
-+ shift->merging_units);
-+ insert_coord->unit_pos -=
-+ shift->merging_units;
-+ } else if (insert_coord->between == BEFORE_UNIT) {
-+ assert("nikita-2090",
-+ insert_coord->unit_pos >
-+ shift->merging_units);
-+ insert_coord->unit_pos -=
-+ shift->merging_units;
-+ }
-+
-+ assert("nikita-2083",
-+ insert_coord->unit_pos + 1);
-+ } else {
-+ if (insert_coord->between == AFTER_UNIT) {
-+ assert("nikita-1442",
-+ insert_coord->unit_pos >=
-+ shift->part_units);
-+ insert_coord->unit_pos -=
-+ shift->part_units;
-+ } else if (insert_coord->between == BEFORE_UNIT) {
-+ assert("nikita-2089",
-+ insert_coord->unit_pos >
-+ shift->part_units);
-+ insert_coord->unit_pos -=
-+ shift->part_units;
-+ }
-+
-+ assert("nikita-2084",
-+ insert_coord->unit_pos + 1);
-+ }
-+ }
-+ return;
-+ }
-+
-+ /* we shifted to left and there was no enough space for everything */
-+ switch (insert_coord->between) {
-+ case AFTER_UNIT:
-+ case BEFORE_UNIT:
-+ if (shift->real_stop.item_pos == insert_coord->item_pos)
-+ insert_coord->unit_pos -= shift->part_units;
-+ case AFTER_ITEM:
-+ coord_add_item_pos(insert_coord, -removed);
-+ break;
-+ default:
-+ impossible("nikita-2087", "not ready");
-+ }
-+ assert("nikita-2085", insert_coord->unit_pos + 1);
-+}
-+
-+static int call_shift_hooks(struct shift_params *shift)
-+{
-+ unsigned i, shifted;
-+ coord_t coord;
-+ item_plugin *iplug;
-+
-+ assert("vs-275", !node_is_empty(shift->target));
-+
-+ /* number of items shift touches */
-+ shifted =
-+ shift->entire + (shift->merging_units ? 1 : 0) +
-+ (shift->part_units ? 1 : 0);
-+
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* moved items are at the end */
-+ coord_init_last_unit(&coord, shift->target);
-+ coord.unit_pos = 0;
-+
-+ assert("vs-279", shift->pend == 1);
-+ for (i = 0; i < shifted; i++) {
-+ unsigned from, count;
-+
-+ iplug = item_plugin_by_coord(&coord);
-+ if (i == 0 && shift->part_units) {
-+ assert("vs-277",
-+ coord_num_units(&coord) ==
-+ shift->part_units);
-+ count = shift->part_units;
-+ from = 0;
-+ } else if (i == shifted - 1 && shift->merging_units) {
-+ count = shift->merging_units;
-+ from = coord_num_units(&coord) - count;
-+ } else {
-+ count = coord_num_units(&coord);
-+ from = 0;
-+ }
-+
-+ if (iplug->b.shift_hook) {
-+ iplug->b.shift_hook(&coord, from, count,
-+ shift->wish_stop.node);
-+ }
-+ coord_add_item_pos(&coord, -shift->pend);
-+ }
-+ } else {
-+ /* moved items are at the beginning */
-+ coord_init_first_unit(&coord, shift->target);
-+
-+ assert("vs-278", shift->pend == -1);
-+ for (i = 0; i < shifted; i++) {
-+ unsigned from, count;
-+
-+ iplug = item_plugin_by_coord(&coord);
-+ if (i == 0 && shift->part_units) {
-+ assert("vs-277",
-+ coord_num_units(&coord) ==
-+ shift->part_units);
-+ count = coord_num_units(&coord);
-+ from = 0;
-+ } else if (i == shifted - 1 && shift->merging_units) {
-+ count = shift->merging_units;
-+ from = 0;
-+ } else {
-+ count = coord_num_units(&coord);
-+ from = 0;
-+ }
-+
-+ if (iplug->b.shift_hook) {
-+ iplug->b.shift_hook(&coord, from, count,
-+ shift->wish_stop.node);
-+ }
-+ coord_add_item_pos(&coord, -shift->pend);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* shift to left is completed. Return 1 if unit @old was moved to left neighbor */
-+static int
-+unit_moved_left(const struct shift_params *shift, const coord_t * old)
-+{
-+ assert("vs-944", shift->real_stop.node == old->node);
-+
-+ if (shift->real_stop.item_pos < old->item_pos)
-+ return 0;
-+ if (shift->real_stop.item_pos == old->item_pos) {
-+ if (shift->real_stop.unit_pos < old->unit_pos)
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* shift to right is completed. Return 1 if unit @old was moved to right
-+ neighbor */
-+static int
-+unit_moved_right(const struct shift_params *shift, const coord_t * old)
-+{
-+ assert("vs-944", shift->real_stop.node == old->node);
-+
-+ if (shift->real_stop.item_pos > old->item_pos)
-+ return 0;
-+ if (shift->real_stop.item_pos == old->item_pos) {
-+ if (shift->real_stop.unit_pos > old->unit_pos)
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+/* coord @old was set in node from which shift was performed. What was shifted
-+ is stored in @shift. Update @old correspondingly to performed shift */
-+static coord_t *adjust_coord2(const struct shift_params *shift,
-+ const coord_t * old, coord_t * new)
-+{
-+ coord_clear_iplug(new);
-+ new->between = old->between;
-+
-+ coord_clear_iplug(new);
-+ if (old->node == shift->target) {
-+ if (shift->pend == SHIFT_LEFT) {
-+ /* coord which is set inside of left neighbor does not
-+ change during shift to left */
-+ coord_dup(new, old);
-+ return new;
-+ }
-+ new->node = old->node;
-+ coord_set_item_pos(new,
-+ old->item_pos + shift->entire +
-+ (shift->part_units ? 1 : 0));
-+ new->unit_pos = old->unit_pos;
-+ if (old->item_pos == 0 && shift->merging_units)
-+ new->unit_pos += shift->merging_units;
-+ return new;
-+ }
-+
-+ assert("vs-977", old->node == shift->wish_stop.node);
-+ if (shift->pend == SHIFT_LEFT) {
-+ if (unit_moved_left(shift, old)) {
-+ /* unit @old moved to left neighbor. Calculate its
-+ coordinate there */
-+ new->node = shift->target;
-+ coord_set_item_pos(new,
-+ node_num_items(shift->target) -
-+ shift->entire -
-+ (shift->part_units ? 1 : 0) +
-+ old->item_pos);
-+
-+ new->unit_pos = old->unit_pos;
-+ if (shift->merging_units) {
-+ coord_dec_item_pos(new);
-+ if (old->item_pos == 0) {
-+ /* unit_pos only changes if item got
-+ merged */
-+ new->unit_pos =
-+ coord_num_units(new) -
-+ (shift->merging_units -
-+ old->unit_pos);
-+ }
-+ }
-+ } else {
-+ /* unit @old did not move to left neighbor.
-+
-+ Use _nocheck, because @old is outside of its node.
-+ */
-+ coord_dup_nocheck(new, old);
-+ coord_add_item_pos(new,
-+ -shift->u.future_first.item_pos);
-+ if (new->item_pos == 0)
-+ new->unit_pos -= shift->u.future_first.unit_pos;
-+ }
-+ } else {
-+ if (unit_moved_right(shift, old)) {
-+ /* unit @old moved to right neighbor */
-+ new->node = shift->target;
-+ coord_set_item_pos(new,
-+ old->item_pos -
-+ shift->real_stop.item_pos);
-+ if (new->item_pos == 0) {
-+ /* unit @old might change unit pos */
-+ coord_set_item_pos(new,
-+ old->unit_pos -
-+ shift->real_stop.unit_pos);
-+ }
-+ } else {
-+ /* unit @old did not move to right neighbor, therefore
-+ it did not change */
-+ coord_dup(new, old);
-+ }
-+ }
-+ coord_set_iplug(new, item_plugin_by_coord(new));
-+ return new;
-+}
-+
-+/* this is called when shift is completed (something of source node is copied
-+ to target and deleted in source) to update all taps set in current
-+ context */
-+static void update_taps(const struct shift_params *shift)
-+{
-+ tap_t *tap;
-+ coord_t new;
-+
-+ for_all_taps(tap) {
-+ /* update only taps set to nodes participating in shift */
-+ if (tap->coord->node == shift->wish_stop.node
-+ || tap->coord->node == shift->target)
-+ tap_to_coord(tap,
-+ adjust_coord2(shift, tap->coord, &new));
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+
-+struct shift_check {
-+ reiser4_key key;
-+ __u16 plugin_id;
-+ union {
-+ __u64 bytes;
-+ __u64 entries;
-+ void *unused;
-+ } u;
-+};
-+
-+void *shift_check_prepare(const znode * left, const znode * right)
-+{
-+ pos_in_node_t i, nr_items;
-+ int mergeable;
-+ struct shift_check *data;
-+ item_header40 *ih;
-+
-+ if (node_is_empty(left) || node_is_empty(right))
-+ mergeable = 0;
-+ else {
-+ coord_t l, r;
-+
-+ coord_init_last_unit(&l, left);
-+ coord_init_first_unit(&r, right);
-+ mergeable = are_items_mergeable(&l, &r);
-+ }
-+ nr_items =
-+ node40_num_of_items_internal(left) +
-+ node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
-+ data =
-+ kmalloc(sizeof(struct shift_check) * nr_items,
-+ reiser4_ctx_gfp_mask_get());
-+ if (data != NULL) {
-+ coord_t coord;
-+ pos_in_node_t item_pos;
-+
-+ coord_init_first_unit(&coord, left);
-+ i = 0;
-+
-+ for (item_pos = 0;
-+ item_pos < node40_num_of_items_internal(left);
-+ item_pos++) {
-+
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ data[i].key = ih->key;
-+ data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ data[i].u.bytes = coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ data[i].u.bytes =
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ data[i].u.entries = coord_num_units(&coord);
-+ break;
-+ default:
-+ data[i].u.unused = NULL;
-+ break;
-+ }
-+ i++;
-+ }
-+
-+ coord_init_first_unit(&coord, right);
-+
-+ if (mergeable) {
-+ assert("vs-1609", i != 0);
-+
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1589",
-+ data[i - 1].plugin_id ==
-+ le16_to_cpu(get_unaligned(&ih->plugin_id)));
-+ switch (data[i - 1].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ data[i - 1].u.bytes += coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ data[i - 1].u.bytes +=
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ data[i - 1].u.entries +=
-+ coord_num_units(&coord);
-+ break;
-+ default:
-+ impossible("vs-1605", "wrong mergeable item");
-+ break;
-+ }
-+ item_pos = 1;
-+ } else
-+ item_pos = 0;
-+ for (; item_pos < node40_num_of_items_internal(right);
-+ item_pos++) {
-+
-+ assert("vs-1604", i < nr_items);
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ data[i].key = ih->key;
-+ data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ data[i].u.bytes = coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ data[i].u.bytes =
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ data[i].u.entries = coord_num_units(&coord);
-+ break;
-+ default:
-+ data[i].u.unused = NULL;
-+ break;
-+ }
-+ i++;
-+ }
-+ assert("vs-1606", i == nr_items);
-+ }
-+ return data;
-+}
-+
-+void shift_check(void *vp, const znode * left, const znode * right)
-+{
-+ pos_in_node_t i, nr_items;
-+ coord_t coord;
-+ __u64 last_bytes;
-+ int mergeable;
-+ item_header40 *ih;
-+ pos_in_node_t item_pos;
-+ struct shift_check *data;
-+
-+ data = (struct shift_check *)vp;
-+
-+ if (data == NULL)
-+ return;
-+
-+ if (node_is_empty(left) || node_is_empty(right))
-+ mergeable = 0;
-+ else {
-+ coord_t l, r;
-+
-+ coord_init_last_unit(&l, left);
-+ coord_init_first_unit(&r, right);
-+ mergeable = are_items_mergeable(&l, &r);
-+ }
-+
-+ nr_items =
-+ node40_num_of_items_internal(left) +
-+ node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
-+
-+ i = 0;
-+ last_bytes = 0;
-+
-+ coord_init_first_unit(&coord, left);
-+
-+ for (item_pos = 0; item_pos < node40_num_of_items_internal(left);
-+ item_pos++) {
-+
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1611", i == item_pos);
-+ assert("vs-1590", keyeq(&ih->key, &data[i].key));
-+ assert("vs-1591",
-+ le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
-+ if ((i < (node40_num_of_items_internal(left) - 1))
-+ || !mergeable) {
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ assert("vs-1592",
-+ data[i].u.bytes ==
-+ coord_num_units(&coord));
-+ break;
-+ case EXTENT_POINTER_ID:
-+ assert("vs-1593",
-+ data[i].u.bytes ==
-+ reiser4_extent_size(&coord,
-+ coord_num_units
-+ (&coord)));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ assert("vs-1594",
-+ data[i].u.entries ==
-+ coord_num_units(&coord));
-+ break;
-+ default:
-+ break;
-+ }
-+ }
-+ if (item_pos == (node40_num_of_items_internal(left) - 1)
-+ && mergeable) {
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ last_bytes = coord_num_units(&coord);
-+ break;
-+ case EXTENT_POINTER_ID:
-+ last_bytes =
-+ reiser4_extent_size(&coord,
-+ coord_num_units(&coord));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ last_bytes = coord_num_units(&coord);
-+ break;
-+ default:
-+ impossible("vs-1595", "wrong mergeable item");
-+ break;
-+ }
-+ }
-+ i++;
-+ }
-+
-+ coord_init_first_unit(&coord, right);
-+ if (mergeable) {
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1589",
-+ data[i - 1].plugin_id == le16_to_cpu(get_unaligned(&ih->plugin_id)));
-+ assert("vs-1608", last_bytes != 0);
-+ switch (data[i - 1].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ assert("vs-1596",
-+ data[i - 1].u.bytes ==
-+ last_bytes + coord_num_units(&coord));
-+ break;
-+
-+ case EXTENT_POINTER_ID:
-+ assert("vs-1597",
-+ data[i - 1].u.bytes ==
-+ last_bytes + reiser4_extent_size(&coord,
-+ coord_num_units
-+ (&coord)));
-+ break;
-+
-+ case COMPOUND_DIR_ID:
-+ assert("vs-1598",
-+ data[i - 1].u.bytes ==
-+ last_bytes + coord_num_units(&coord));
-+ break;
-+ default:
-+ impossible("vs-1599", "wrong mergeable item");
-+ break;
-+ }
-+ item_pos = 1;
-+ } else
-+ item_pos = 0;
-+
-+ for (; item_pos < node40_num_of_items_internal(right); item_pos++) {
-+
-+ coord_set_item_pos(&coord, item_pos);
-+ ih = node40_ih_at_coord(&coord);
-+
-+ assert("vs-1612", keyeq(&ih->key, &data[i].key));
-+ assert("vs-1613",
-+ le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
-+ switch (data[i].plugin_id) {
-+ case CTAIL_ID:
-+ case FORMATTING_ID:
-+ assert("vs-1600",
-+ data[i].u.bytes == coord_num_units(&coord));
-+ break;
-+ case EXTENT_POINTER_ID:
-+ assert("vs-1601",
-+ data[i].u.bytes ==
-+ reiser4_extent_size(&coord,
-+ coord_num_units
-+ (&coord)));
-+ break;
-+ case COMPOUND_DIR_ID:
-+ assert("vs-1602",
-+ data[i].u.entries == coord_num_units(&coord));
-+ break;
-+ default:
-+ break;
-+ }
-+ i++;
-+ }
-+
-+ assert("vs-1603", i == nr_items);
-+ kfree(data);
-+}
-+
-+#endif
-+
-+/* plugin->u.node.shift
-+ look for description of this method in plugin/node/node.h */
-+int shift_node40(coord_t * from, znode * to, shift_direction pend, int delete_child, /* if @from->node becomes empty - it will be
-+ deleted from the tree if this is set to 1 */
-+ int including_stop_coord, carry_plugin_info * info)
-+{
-+ struct shift_params shift;
-+ int result;
-+ znode *left, *right;
-+ znode *source;
-+ int target_empty;
-+
-+ assert("nikita-2161", coord_check(from));
-+
-+ memset(&shift, 0, sizeof(shift));
-+ shift.pend = pend;
-+ shift.wish_stop = *from;
-+ shift.target = to;
-+
-+ assert("nikita-1473", znode_is_write_locked(from->node));
-+ assert("nikita-1474", znode_is_write_locked(to));
-+
-+ source = from->node;
-+
-+ /* set @shift.wish_stop to rightmost/leftmost unit among units we want
-+ shifted */
-+ if (pend == SHIFT_LEFT) {
-+ result = coord_set_to_left(&shift.wish_stop);
-+ left = to;
-+ right = from->node;
-+ } else {
-+ result = coord_set_to_right(&shift.wish_stop);
-+ left = from->node;
-+ right = to;
-+ }
-+
-+ if (result) {
-+ /* move insertion coord even if there is nothing to move */
-+ if (including_stop_coord) {
-+ /* move insertion coord (@from) */
-+ if (pend == SHIFT_LEFT) {
-+ /* after last item in target node */
-+ coord_init_after_last_item(from, to);
-+ } else {
-+ /* before first item in target node */
-+ coord_init_before_first_item(from, to);
-+ }
-+ }
-+
-+ if (delete_child && node_is_empty(shift.wish_stop.node))
-+ result =
-+ prepare_removal_node40(shift.wish_stop.node, info);
-+ else
-+ result = 0;
-+ /* there is nothing to shift */
-+ assert("nikita-2078", coord_check(from));
-+ return result;
-+ }
-+
-+ target_empty = node_is_empty(to);
-+
-+ /* when first node plugin with item body compression is implemented,
-+ this must be changed to call node specific plugin */
-+
-+ /* shift->stop_coord is updated to last unit which really will be
-+ shifted */
-+ estimate_shift(&shift, get_current_context());
-+ if (!shift.shift_bytes) {
-+ /* we could not shift anything */
-+ assert("nikita-2079", coord_check(from));
-+ return 0;
-+ }
-+
-+ copy(&shift);
-+
-+ /* result value of this is important. It is used by adjust_coord below */
-+ result = delete_copied(&shift);
-+
-+ assert("vs-1610", result >= 0);
-+ assert("vs-1471",
-+ ((reiser4_context *) current->journal_info)->magic ==
-+ context_magic);
-+
-+ /* item which has been moved from one node to another might want to do
-+ something on that event. This can be done by item's shift_hook
-+ method, which will be now called for every moved items */
-+ call_shift_hooks(&shift);
-+
-+ assert("vs-1472",
-+ ((reiser4_context *) current->journal_info)->magic ==
-+ context_magic);
-+
-+ update_taps(&shift);
-+
-+ assert("vs-1473",
-+ ((reiser4_context *) current->journal_info)->magic ==
-+ context_magic);
-+
-+ /* adjust @from pointer in accordance with @including_stop_coord flag
-+ and amount of data which was really shifted */
-+ adjust_coord(from, &shift, result, including_stop_coord);
-+
-+ if (target_empty)
-+ /*
-+ * items were shifted into empty node. Update delimiting key.
-+ */
-+ result = prepare_for_update(NULL, left, info);
-+
-+ /* add update operation to @info, which is the list of operations to
-+ be performed on a higher level */
-+ result = prepare_for_update(left, right, info);
-+ if (!result && node_is_empty(source) && delete_child) {
-+ /* all contents of @from->node is moved to @to and @from->node
-+ has to be removed from the tree, so, on higher level we
-+ will be removing the pointer to node @from->node */
-+ result = prepare_removal_node40(source, info);
-+ }
-+ assert("nikita-2080", coord_check(from));
-+ return result ? result : (int)shift.shift_bytes;
-+}
-+
-+/* plugin->u.node.fast_insert()
-+ look for description of this method in plugin/node/node.h */
-+int fast_insert_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
-+{
-+ return 1;
-+}
-+
-+/* plugin->u.node.fast_paste()
-+ look for description of this method in plugin/node/node.h */
-+int fast_paste_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
-+{
-+ return 1;
-+}
-+
-+/* plugin->u.node.fast_cut()
-+ look for description of this method in plugin/node/node.h */
-+int fast_cut_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
-+{
-+ return 1;
-+}
-+
-+/* plugin->u.node.modify - not defined */
-+
-+/* plugin->u.node.max_item_size */
-+int max_item_size_node40(void)
-+{
-+ return reiser4_get_current_sb()->s_blocksize - sizeof(node40_header) -
-+ sizeof(item_header40);
-+}
-+
-+/* plugin->u.node.set_item_plugin */
-+int set_item_plugin_node40(coord_t *coord, item_id id)
-+{
-+ item_header40 *ih;
-+
-+ ih = node40_ih_at_coord(coord);
-+ put_unaligned(cpu_to_le16(id), &ih->plugin_id);
-+ coord->iplugid = id;
-+ return 0;
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/node/node40.h linux-2.6.24/fs/reiser4/plugin/node/node40.h
---- linux-2.6.24.orig/fs/reiser4/plugin/node/node40.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/node/node40.h 2008-01-25 11:39:07.040234479 +0300
-@@ -0,0 +1,125 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined( __REISER4_NODE40_H__ )
-+#define __REISER4_NODE40_H__
-+
-+#include "../../forward.h"
-+#include "../../dformat.h"
-+#include "node.h"
-+
-+#include <linux/types.h>
-+
-+/* format of node header for 40 node layouts. Keep bloat out of this struct. */
-+typedef struct node40_header {
-+ /* identifier of node plugin. Must be located at the very beginning
-+ of a node. */
-+ common_node_header common_header; /* this is 16 bits */
-+ /* number of items. Should be first element in the node header,
-+ because we haven't yet finally decided whether it shouldn't go into
-+ common_header.
-+ */
-+/* NIKITA-FIXME-HANS: Create a macro such that if there is only one
-+ * node format at compile time, and it is this one, accesses do not function dereference when
-+ * accessing these fields (and otherwise they do). Probably 80% of users will only have one node format at a time throughout the life of reiser4. */
-+ d16 nr_items;
-+ /* free space in node measured in bytes */
-+ d16 free_space;
-+ /* offset to start of free space in node */
-+ d16 free_space_start;
-+ /* for reiser4_fsck. When information about what is a free
-+ block is corrupted, and we try to recover everything even
-+ if marked as freed, then old versions of data may
-+ duplicate newer versions, and this field allows us to
-+ restore the newer version. Also useful for when users
-+ who don't have the new trashcan installed on their linux distro
-+ delete the wrong files and send us desperate emails
-+ offering $25 for them back. */
-+
-+ /* magic field we need to tell formatted nodes NIKITA-FIXME-HANS: improve this comment */
-+ d32 magic;
-+ /* flushstamp is made of mk_id and write_counter. mk_id is an
-+ id generated randomly at mkreiserfs time. So we can just
-+ skip all nodes with different mk_id. write_counter is d64
-+ incrementing counter of writes on disk. It is used for
-+ choosing the newest data at fsck time. NIKITA-FIXME-HANS: why was field name changed but not comment? */
-+
-+ d32 mkfs_id;
-+ d64 flush_id;
-+ /* node flags to be used by fsck (reiser4ck or reiser4fsck?)
-+ and repacker NIKITA-FIXME-HANS: say more or reference elsewhere that says more */
-+ d16 flags;
-+
-+ /* 1 is leaf level, 2 is twig level, root is the numerically
-+ largest level */
-+ d8 level;
-+
-+ d8 pad;
-+} PACKED node40_header;
-+
-+/* item headers are not standard across all node layouts, pass
-+ pos_in_node to functions instead */
-+typedef struct item_header40 {
-+ /* key of item */
-+ /* 0 */ reiser4_key key;
-+ /* offset from start of a node measured in 8-byte chunks */
-+ /* 24 */ d16 offset;
-+ /* 26 */ d16 flags;
-+ /* 28 */ d16 plugin_id;
-+} PACKED item_header40;
-+
-+size_t item_overhead_node40(const znode * node, flow_t * aflow);
-+size_t free_space_node40(znode * node);
-+node_search_result lookup_node40(znode * node, const reiser4_key * key,
-+ lookup_bias bias, coord_t * coord);
-+int num_of_items_node40(const znode * node);
-+char *item_by_coord_node40(const coord_t * coord);
-+int length_by_coord_node40(const coord_t * coord);
-+item_plugin *plugin_by_coord_node40(const coord_t * coord);
-+reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key);
-+size_t estimate_node40(znode * node);
-+int check_node40(const znode * node, __u32 flags, const char **error);
-+int parse_node40(znode * node);
-+int init_node40(znode * node);
-+#ifdef GUESS_EXISTS
-+int guess_node40(const znode * node);
-+#endif
-+void change_item_size_node40(coord_t * coord, int by);
-+int create_item_node40(coord_t * target, const reiser4_key * key,
-+ reiser4_item_data * data, carry_plugin_info * info);
-+void update_item_key_node40(coord_t * target, const reiser4_key * key,
-+ carry_plugin_info * info);
-+int kill_node40(struct carry_kill_data *, carry_plugin_info *);
-+int cut_node40(struct carry_cut_data *, carry_plugin_info *);
-+int shift_node40(coord_t * from, znode * to, shift_direction pend,
-+ /* if @from->node becomes
-+ empty - it will be deleted from
-+ the tree if this is set to 1
-+ */
-+ int delete_child, int including_stop_coord,
-+ carry_plugin_info * info);
-+
-+int fast_insert_node40(const coord_t * coord);
-+int fast_paste_node40(const coord_t * coord);
-+int fast_cut_node40(const coord_t * coord);
-+int max_item_size_node40(void);
-+int prepare_removal_node40(znode * empty, carry_plugin_info * info);
-+int set_item_plugin_node40(coord_t * coord, item_id id);
-+int shrink_item_node40(coord_t * coord, int delta);
-+
-+#if REISER4_DEBUG
-+void *shift_check_prepare(const znode *left, const znode *right);
-+void shift_check(void *vp, const znode *left, const znode *right);
-+#endif
-+
-+/* __REISER4_NODE40_H__ */
-+#endif
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/node/node.c linux-2.6.24/fs/reiser4/plugin/node/node.c
---- linux-2.6.24.orig/fs/reiser4/plugin/node/node.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/node/node.c 2008-01-25 11:39:07.040234479 +0300
-@@ -0,0 +1,131 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Node plugin interface.
-+
-+ Description: The tree provides the abstraction of flows, which it
-+ internally fragments into items which it stores in nodes.
-+
-+ A key_atom is a piece of data bound to a single key.
-+
-+ For reasonable space efficiency to be achieved it is often
-+ necessary to store key_atoms in the nodes in the form of items, where
-+ an item is a sequence of key_atoms of the same or similar type. It is
-+ more space-efficient, because the item can implement (very)
-+ efficient compression of key_atom's bodies using internal knowledge
-+ about their semantics, and it can often avoid having a key for each
-+ key_atom. Each type of item has specific operations implemented by its
-+ item handler (see balance.c).
-+
-+ Rationale: the rest of the code (specifically balancing routines)
-+ accesses leaf level nodes through this interface. This way we can
-+ implement various block layouts and even combine various layouts
-+ within the same tree. Balancing/allocating algorithms should not
-+ care about peculiarities of splitting/merging specific item types,
-+ but rather should leave that to the item's item handler.
-+
-+ Items, including those that provide the abstraction of flows, have
-+ the property that if you move them in part or in whole to another
-+ node, the balancing code invokes their is_left_mergeable()
-+ item_operation to determine if they are mergeable with their new
-+ neighbor in the node you have moved them to. For some items the
-+ is_left_mergeable() function always returns null.
-+
-+ When moving the bodies of items from one node to another:
-+
-+ if a partial item is shifted to another node the balancing code invokes
-+ an item handler method to handle the item splitting.
-+
-+ if the balancing code needs to merge with an item in the node it
-+ is shifting to, it will invoke an item handler method to handle
-+ the item merging.
-+
-+ if it needs to move whole item bodies unchanged, the balancing code uses xmemcpy()
-+ adjusting the item headers after the move is done using the node handler.
-+*/
-+
-+#include "../../forward.h"
-+#include "../../debug.h"
-+#include "../../key.h"
-+#include "../../coord.h"
-+#include "../plugin_header.h"
-+#include "../item/item.h"
-+#include "node.h"
-+#include "../plugin.h"
-+#include "../../znode.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../../reiser4.h"
-+
-+/**
-+ * leftmost_key_in_node - get the smallest key in node
-+ * @node:
-+ * @key: store result here
-+ *
-+ * Stores the leftmost key of @node in @key.
-+ */
-+reiser4_key *leftmost_key_in_node(const znode *node, reiser4_key *key)
-+{
-+ assert("nikita-1634", node != NULL);
-+ assert("nikita-1635", key != NULL);
-+
-+ if (!node_is_empty(node)) {
-+ coord_t first_item;
-+
-+ coord_init_first_unit(&first_item, (znode *) node);
-+ item_key_by_coord(&first_item, key);
-+ } else
-+ *key = *reiser4_max_key();
-+ return key;
-+}
-+
-+node_plugin node_plugins[LAST_NODE_ID] = {
-+ [NODE40_ID] = {
-+ .h = {
-+ .type_id = REISER4_NODE_PLUGIN_TYPE,
-+ .id = NODE40_ID,
-+ .pops = NULL,
-+ .label = "unified",
-+ .desc = "unified node layout",
-+ .linkage = {NULL, NULL}
-+ },
-+ .item_overhead = item_overhead_node40,
-+ .free_space = free_space_node40,
-+ .lookup = lookup_node40,
-+ .num_of_items = num_of_items_node40,
-+ .item_by_coord = item_by_coord_node40,
-+ .length_by_coord = length_by_coord_node40,
-+ .plugin_by_coord = plugin_by_coord_node40,
-+ .key_at = key_at_node40,
-+ .estimate = estimate_node40,
-+ .check = check_node40,
-+ .parse = parse_node40,
-+ .init = init_node40,
-+#ifdef GUESS_EXISTS
-+ .guess = guess_node40,
-+#endif
-+ .change_item_size = change_item_size_node40,
-+ .create_item = create_item_node40,
-+ .update_item_key = update_item_key_node40,
-+ .cut_and_kill = kill_node40,
-+ .cut = cut_node40,
-+ .shift = shift_node40,
-+ .shrink_item = shrink_item_node40,
-+ .fast_insert = fast_insert_node40,
-+ .fast_paste = fast_paste_node40,
-+ .fast_cut = fast_cut_node40,
-+ .max_item_size = max_item_size_node40,
-+ .prepare_removal = prepare_removal_node40,
-+ .set_item_plugin = set_item_plugin_node40
-+ }
-+};
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/node/node.h linux-2.6.24/fs/reiser4/plugin/node/node.h
---- linux-2.6.24.orig/fs/reiser4/plugin/node/node.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/node/node.h 2008-01-25 11:39:07.044235509 +0300
-@@ -0,0 +1,272 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* We need a definition of the default node layout here. */
-+
-+/* Generally speaking, it is best to have free space in the middle of the
-+ node so that two sets of things can grow towards it, and to have the
-+ item bodies on the left so that the last one of them grows into free
-+ space. We optimize for the case where we append new items to the end
-+ of the node, or grow the last item, because it hurts nothing to so
-+ optimize and it is a common special case to do massive insertions in
-+ increasing key order (and one of cases more likely to have a real user
-+ notice the delay time for).
-+
-+ formatted leaf default layout: (leaf1)
-+
-+ |node header:item bodies:free space:key + pluginid + item offset|
-+
-+ We grow towards the middle, optimizing layout for the case where we
-+ append new items to the end of the node. The node header is fixed
-+ length. Keys, and item offsets plus pluginids for the items
-+ corresponding to them are in increasing key order, and are fixed
-+ length. Item offsets are relative to start of node (16 bits creating
-+ a node size limit of 64k, 12 bits might be a better choice....). Item
-+ bodies are in decreasing key order. Item bodies have a variable size.
-+ There is a one to one to one mapping of keys to item offsets to item
-+ bodies. Item offsets consist of pointers to the zeroth byte of the
-+ item body. Item length equals the start of the next item minus the
-+ start of this item, except the zeroth item whose length equals the end
-+ of the node minus the start of that item (plus a byte). In other
-+ words, the item length is not recorded anywhere, and it does not need
-+ to be since it is computable.
-+
-+ Leaf variable length items and keys layout : (lvar)
-+
-+ |node header:key offset + item offset + pluginid triplets:free space:key bodies:item bodies|
-+
-+ We grow towards the middle, optimizing layout for the case where we
-+ append new items to the end of the node. The node header is fixed
-+ length. Keys and item offsets for the items corresponding to them are
-+ in increasing key order, and keys are variable length. Item offsets
-+ are relative to start of node (16 bits). Item bodies are in
-+ decreasing key order. Item bodies have a variable size. There is a
-+ one to one to one mapping of keys to item offsets to item bodies.
-+ Item offsets consist of pointers to the zeroth byte of the item body.
-+ Item length equals the start of the next item's key minus the start of
-+ this item, except the zeroth item whose length equals the end of the
-+ node minus the start of that item (plus a byte).
-+
-+ leaf compressed keys layout: (lcomp)
-+
-+ |node header:key offset + key inherit + item offset pairs:free space:key bodies:item bodies|
-+
-+ We grow towards the middle, optimizing layout for the case where we
-+ append new items to the end of the node. The node header is fixed
-+ length. Keys and item offsets for the items corresponding to them are
-+ in increasing key order, and keys are variable length. The "key
-+ inherit" field indicates how much of the key prefix is identical to
-+ the previous key (stem compression as described in "Managing
-+ Gigabytes" is used). key_inherit is a one byte integer. The
-+ intra-node searches performed through this layout are linear searches,
-+ and this is theorized to not hurt performance much due to the high
-+ cost of processor stalls on modern CPUs, and the small number of keys
-+ in a single node. Item offsets are relative to start of node (16
-+ bits). Item bodies are in decreasing key order. Item bodies have a
-+ variable size. There is a one to one to one mapping of keys to item
-+ offsets to item bodies. Item offsets consist of pointers to the
-+ zeroth byte of the item body. Item length equals the start of the
-+ next item minus the start of this item, except the zeroth item whose
-+ length equals the end of the node minus the start of that item (plus a
-+ byte). In other words, item length and key length is not recorded
-+ anywhere, and it does not need to be since it is computable.
-+
-+ internal node default layout: (idef1)
-+
-+ just like ldef1 except that item bodies are either blocknrs of
-+ children or extents, and moving them may require updating parent
-+ pointers in the nodes that they point to.
-+*/
-+
-+/* There is an inherent 3-way tradeoff between optimizing and
-+ exchanging disks between different architectures and code
-+ complexity. This is optimal and simple and inexchangeable.
-+ Someone else can do the code for exchanging disks and make it
-+ complex. It would not be that hard. Using other than the PAGE_SIZE
-+ might be suboptimal.
-+*/
-+
-+#if !defined( __REISER4_NODE_H__ )
-+#define __REISER4_NODE_H__
-+
-+#define LEAF40_NODE_SIZE PAGE_CACHE_SIZE
-+
-+#include "../../dformat.h"
-+#include "../plugin_header.h"
-+
-+#include <linux/types.h>
-+
-+typedef enum {
-+ NS_FOUND = 0,
-+ NS_NOT_FOUND = -ENOENT
-+} node_search_result;
-+
-+/* Maximal possible space overhead for creation of new item in a node */
-+#define REISER4_NODE_MAX_OVERHEAD ( sizeof( reiser4_key ) + 32 )
-+
-+typedef enum {
-+ REISER4_NODE_DKEYS = (1 << 0),
-+ REISER4_NODE_TREE_STABLE = (1 << 1)
-+} reiser4_node_check_flag;
-+
-+/* cut and cut_and_kill have too long list of parameters. This structure is just to safe some space on stack */
-+struct cut_list {
-+ coord_t *from;
-+ coord_t *to;
-+ const reiser4_key *from_key;
-+ const reiser4_key *to_key;
-+ reiser4_key *smallest_removed;
-+ carry_plugin_info *info;
-+ __u32 flags;
-+ struct inode *inode; /* this is to pass list of eflushed jnodes down to extent_kill_hook */
-+ lock_handle *left;
-+ lock_handle *right;
-+};
-+
-+struct carry_cut_data;
-+struct carry_kill_data;
-+
-+/* The responsibility of the node plugin is to store and give access
-+ to the sequence of items within the node. */
-+typedef struct node_plugin {
-+ /* generic plugin fields */
-+ plugin_header h;
-+
-+ /* calculates the amount of space that will be required to store an
-+ item which is in addition to the space consumed by the item body.
-+ (the space consumed by the item body can be gotten by calling
-+ item->estimate) */
-+ size_t(*item_overhead) (const znode * node, flow_t * f);
-+
-+ /* returns free space by looking into node (i.e., without using
-+ znode->free_space). */
-+ size_t(*free_space) (znode * node);
-+ /* search within the node for the one item which might
-+ contain the key, invoking item->search_within to search within
-+ that item to see if it is in there */
-+ node_search_result(*lookup) (znode * node, const reiser4_key * key,
-+ lookup_bias bias, coord_t * coord);
-+ /* number of items in node */
-+ int (*num_of_items) (const znode * node);
-+
-+ /* store information about item in @coord in @data */
-+ /* break into several node ops, don't add any more uses of this before doing so */
-+ /*int ( *item_at )( const coord_t *coord, reiser4_item_data *data ); */
-+ char *(*item_by_coord) (const coord_t * coord);
-+ int (*length_by_coord) (const coord_t * coord);
-+ item_plugin *(*plugin_by_coord) (const coord_t * coord);
-+
-+ /* store item key in @key */
-+ reiser4_key *(*key_at) (const coord_t * coord, reiser4_key * key);
-+ /* conservatively estimate whether unit of what size can fit
-+ into node. This estimation should be performed without
-+ actually looking into the node's content (free space is saved in
-+ znode). */
-+ size_t(*estimate) (znode * node);
-+
-+ /* performs every consistency check the node plugin author could
-+ imagine. Optional. */
-+ int (*check) (const znode * node, __u32 flags, const char **error);
-+
-+ /* Called when node is read into memory and node plugin is
-+ already detected. This should read some data into znode (like free
-+ space counter) and, optionally, check data consistency.
-+ */
-+ int (*parse) (znode * node);
-+ /* This method is called on a new node to initialise plugin specific
-+ data (header, etc.) */
-+ int (*init) (znode * node);
-+ /* Check whether @node content conforms to this plugin format.
-+ Probably only useful after support for old V3.x formats is added.
-+ Uncomment after 4.0 only.
-+ */
-+ /* int ( *guess )( const znode *node ); */
-+#if REISER4_DEBUG
-+ void (*print) (const char *prefix, const znode * node, __u32 flags);
-+#endif
-+ /* change size of @item by @by bytes. @item->node has enough free
-+ space. When @by > 0 - free space is appended to end of item. When
-+ @by < 0 - item is truncated - it is assumed that last @by bytes if
-+ the item are freed already */
-+ void (*change_item_size) (coord_t * item, int by);
-+
-+ /* create new item @length bytes long in coord @target */
-+ int (*create_item) (coord_t * target, const reiser4_key * key,
-+ reiser4_item_data * data, carry_plugin_info * info);
-+
-+ /* update key of item. */
-+ void (*update_item_key) (coord_t * target, const reiser4_key * key,
-+ carry_plugin_info * info);
-+
-+ int (*cut_and_kill) (struct carry_kill_data *, carry_plugin_info *);
-+ int (*cut) (struct carry_cut_data *, carry_plugin_info *);
-+
-+ /*
-+ * shrink item pointed to by @coord by @delta bytes.
-+ */
-+ int (*shrink_item) (coord_t * coord, int delta);
-+
-+ /* copy as much as possible but not more than up to @stop from
-+ @stop->node to @target. If (pend == append) then data from beginning of
-+ @stop->node are copied to the end of @target. If (pend == prepend) then
-+ data from the end of @stop->node are copied to the beginning of
-+ @target. Copied data are removed from @stop->node. Information
-+ about what to do on upper level is stored in @todo */
-+ int (*shift) (coord_t * stop, znode * target, shift_direction pend,
-+ int delete_node, int including_insert_coord,
-+ carry_plugin_info * info);
-+ /* return true if this node allows skip carry() in some situations
-+ (see fs/reiser4/tree.c:insert_by_coord()). Reiser3.x format
-+ emulation doesn't.
-+
-+ This will speedup insertions that doesn't require updates to the
-+ parent, by bypassing initialisation of carry() structures. It's
-+ believed that majority of insertions will fit there.
-+
-+ */
-+ int (*fast_insert) (const coord_t * coord);
-+ int (*fast_paste) (const coord_t * coord);
-+ int (*fast_cut) (const coord_t * coord);
-+ /* this limits max size of item which can be inserted into a node and
-+ number of bytes item in a node may be appended with */
-+ int (*max_item_size) (void);
-+ int (*prepare_removal) (znode * empty, carry_plugin_info * info);
-+ /* change plugin id of items which are in a node already. Currently it is Used in tail conversion for regular
-+ * files */
-+ int (*set_item_plugin) (coord_t * coord, item_id);
-+} node_plugin;
-+
-+typedef enum {
-+ /* standard unified node layout used for both leaf and internal
-+ nodes */
-+ NODE40_ID,
-+ LAST_NODE_ID
-+} reiser4_node_id;
-+
-+extern reiser4_key *leftmost_key_in_node(const znode * node, reiser4_key * key);
-+#if REISER4_DEBUG
-+extern void print_node_content(const char *prefix, const znode * node,
-+ __u32 flags);
-+#endif
-+
-+extern void indent_znode(const znode * node);
-+
-+typedef struct common_node_header {
-+ /*
-+ * identifier of node plugin. Must be located at the very beginning of
-+ * a node.
-+ */
-+ __le16 plugin_id;
-+} common_node_header;
-+
-+/* __REISER4_NODE_H__ */
-+#endif
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/object.c linux-2.6.24/fs/reiser4/plugin/object.c
---- linux-2.6.24.orig/fs/reiser4/plugin/object.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/object.c 2008-01-25 11:39:07.044235509 +0300
-@@ -0,0 +1,531 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * Examples of object plugins: file, directory, symlink, special file.
-+ *
-+ * Plugins associated with inode:
-+ *
-+ * Plugin of inode is plugin referenced by plugin-id field of on-disk
-+ * stat-data. How we store this plugin in in-core inode is not
-+ * important. Currently pointers are used, another variant is to store offsets
-+ * and do array lookup on each access.
-+ *
-+ * Now, each inode has one selected plugin: object plugin that
-+ * determines what type of file this object is: directory, regular etc.
-+ *
-+ * This main plugin can use other plugins that are thus subordinated to
-+ * it. Directory instance of object plugin uses hash; regular file
-+ * instance uses tail policy plugin.
-+ *
-+ * Object plugin is either taken from id in stat-data or guessed from
-+ * i_mode bits. Once it is established we ask it to install its
-+ * subordinate plugins, by looking again in stat-data or inheriting them
-+ * from parent.
-+ *
-+ * How new inode is initialized during ->read_inode():
-+ * 1 read stat-data and initialize inode fields: i_size, i_mode,
-+ * i_generation, capabilities etc.
-+ * 2 read plugin id from stat data or try to guess plugin id
-+ * from inode->i_mode bits if plugin id is missing.
-+ * 3 Call ->init_inode() method of stat-data plugin to initialise inode fields.
-+ *
-+ * NIKITA-FIXME-HANS: can you say a little about 1 being done before 3? What
-+ * if stat data does contain i_size, etc., due to it being an unusual plugin?
-+ *
-+ * 4 Call ->activate() method of object's plugin. Plugin is either read from
-+ * from stat-data or guessed from mode bits
-+ * 5 Call ->inherit() method of object plugin to inherit as yet un initialized
-+ * plugins from parent.
-+ *
-+ * Easy induction proves that on last step all plugins of inode would be
-+ * initialized.
-+ *
-+ * When creating new object:
-+ * 1 obtain object plugin id (see next period)
-+ * NIKITA-FIXME-HANS: period?
-+ * 2 ->install() this plugin
-+ * 3 ->inherit() the rest from the parent
-+ *
-+ * We need some examples of creating an object with default and non-default
-+ * plugin ids. Nikita, please create them.
-+ */
-+
-+#include "../inode.h"
-+
-+static int _bugop(void)
-+{
-+ BUG_ON(1);
-+ return 0;
-+}
-+
-+#define bugop ((void *)_bugop)
-+
-+static int _dummyop(void)
-+{
-+ return 0;
-+}
-+
-+#define dummyop ((void *)_dummyop)
-+
-+static int change_file(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ /* cannot change object plugin of already existing object */
-+ if (memb == PSET_FILE)
-+ return RETERR(-EINVAL);
-+
-+ /* Change PSET_CREATE */
-+ return aset_set_unsafe(&reiser4_inode_data(inode)->pset, memb, plugin);
-+}
-+
-+static reiser4_plugin_ops file_plugin_ops = {
-+ .change = change_file
-+};
-+
-+static struct inode_operations null_i_ops = {.create = NULL};
-+static struct file_operations null_f_ops = {.owner = NULL};
-+static struct address_space_operations null_a_ops = {.writepage = NULL};
-+
-+/* VFS methods for regular files */
-+static struct inode_operations regular_file_i_ops = {
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr,
-+ .getattr = reiser4_getattr_common
-+};
-+static struct file_operations regular_file_f_ops = {
-+ .llseek = generic_file_llseek,
-+ .read = reiser4_read_careful,
-+ .write = reiser4_write_careful,
-+ .aio_read = generic_file_aio_read,
-+ .ioctl = reiser4_ioctl_careful,
-+ .mmap = reiser4_mmap_careful,
-+ .open = reiser4_open_careful,
-+ .release = reiser4_release_careful,
-+ .fsync = reiser4_sync_file_common,
-+ .splice_read = generic_file_splice_read,
-+ .splice_write = generic_file_splice_write
-+};
-+static struct address_space_operations regular_file_a_ops = {
-+ .writepage = reiser4_writepage,
-+ .readpage = reiser4_readpage,
-+ .sync_page = block_sync_page,
-+ .writepages = reiser4_writepages,
-+ .set_page_dirty = reiser4_set_page_dirty,
-+ .readpages = reiser4_readpages,
-+ .prepare_write = reiser4_prepare_write,
-+ .commit_write = reiser4_commit_write,
-+ .bmap = reiser4_bmap_careful,
-+ .invalidatepage = reiser4_invalidatepage,
-+ .releasepage = reiser4_releasepage
-+};
-+
-+/* VFS methods for symlink files */
-+static struct inode_operations symlink_file_i_ops = {
-+ .readlink = generic_readlink,
-+ .follow_link = reiser4_follow_link_common,
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr_common,
-+ .getattr = reiser4_getattr_common
-+};
-+
-+/* VFS methods for special files */
-+static struct inode_operations special_file_i_ops = {
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr_common,
-+ .getattr = reiser4_getattr_common
-+};
-+
-+/* VFS methods for directories */
-+static struct inode_operations directory_i_ops = {
-+ .create = reiser4_create_common,
-+ .lookup = reiser4_lookup_common,
-+ .link = reiser4_link_common,
-+ .unlink = reiser4_unlink_common,
-+ .symlink = reiser4_symlink_common,
-+ .mkdir = reiser4_mkdir_common,
-+ .rmdir = reiser4_unlink_common,
-+ .mknod = reiser4_mknod_common,
-+ .rename = reiser4_rename_common,
-+ .permission = reiser4_permission_common,
-+ .setattr = reiser4_setattr_common,
-+ .getattr = reiser4_getattr_common
-+};
-+static struct file_operations directory_f_ops = {
-+ .llseek = reiser4_llseek_dir_common,
-+ .read = generic_read_dir,
-+ .readdir = reiser4_readdir_common,
-+ .release = reiser4_release_dir_common,
-+ .fsync = reiser4_sync_common
-+};
-+static struct address_space_operations directory_a_ops = {
-+ .writepage = bugop,
-+ .sync_page = bugop,
-+ .writepages = dummyop,
-+ .set_page_dirty = bugop,
-+ .readpages = bugop,
-+ .prepare_write = bugop,
-+ .commit_write = bugop,
-+ .bmap = bugop,
-+ .invalidatepage = bugop,
-+ .releasepage = bugop
-+};
-+
-+/*
-+ * Definitions of object plugins.
-+ */
-+
-+file_plugin file_plugins[LAST_FILE_PLUGIN_ID] = {
-+ [UNIX_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = UNIX_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_REGULAR_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "reg",
-+ .desc = "regular file",
-+ .linkage = {NULL, NULL},
-+ },
-+ /*
-+ * invariant vfs ops
-+ */
-+ .inode_ops = ®ular_file_i_ops,
-+ .file_ops = ®ular_file_f_ops,
-+ .as_ops = ®ular_file_a_ops,
-+ /*
-+ * private i_ops
-+ */
-+ .setattr = setattr_unix_file,
-+ .open = open_unix_file,
-+ .read = read_unix_file,
-+ .write = write_unix_file,
-+ .ioctl = ioctl_unix_file,
-+ .mmap = mmap_unix_file,
-+ .release = release_unix_file,
-+ /*
-+ * private f_ops
-+ */
-+ .readpage = readpage_unix_file,
-+ .readpages = readpages_unix_file,
-+ .writepages = writepages_unix_file,
-+ .prepare_write = prepare_write_unix_file,
-+ .commit_write = commit_write_unix_file,
-+ /*
-+ * private a_ops
-+ */
-+ .bmap = bmap_unix_file,
-+ /*
-+ * other private methods
-+ */
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .flow_by_inode = flow_by_inode_unix_file,
-+ .key_by_inode = key_by_inode_and_offset_common,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common,
-+ .create_object = reiser4_create_object_common,
-+ .delete_object = delete_object_unix_file,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .owns_item = owns_item_unix_file,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_data_unix_file,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ },
-+ [DIRECTORY_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = DIRECTORY_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_DIRECTORY_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "dir",
-+ .desc = "directory",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = &null_i_ops,
-+ .file_ops = &null_f_ops,
-+ .as_ops = &null_a_ops,
-+
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .flow_by_inode = bugop,
-+ .key_by_inode = bugop,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common_dir,
-+ .create_object = reiser4_create_object_common,
-+ .delete_object = reiser4_delete_dir_common,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = rem_link_common_dir,
-+ .owns_item = owns_item_common_dir,
-+ .can_add_link = can_add_link_common,
-+ .can_rem_link = can_rem_link_common_dir,
-+ .detach = reiser4_detach_common_dir,
-+ .bind = reiser4_bind_common_dir,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common_dir,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common_dir
-+ },
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ },
-+ .init_inode_data = init_inode_ordering,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ },
-+ [SYMLINK_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = SYMLINK_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_SYMLINK_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "symlink",
-+ .desc = "symbolic link",
-+ .linkage = {NULL,NULL}
-+ },
-+ .inode_ops = &symlink_file_i_ops,
-+ /* inode->i_fop of symlink is initialized
-+ by NULL in setup_inode_ops */
-+ .file_ops = &null_f_ops,
-+ .as_ops = &null_a_ops,
-+
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common,
-+ .create_object = reiser4_create_symlink,
-+ .delete_object = reiser4_delete_object_common,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_ordering,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ .destroy_inode = destroy_inode_symlink,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ },
-+ [SPECIAL_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = SPECIAL_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_SPECIAL_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "special",
-+ .desc =
-+ "special: fifo, device or socket",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = &special_file_i_ops,
-+ /* file_ops of special files (sockets, block, char, fifo) are
-+ initialized by init_special_inode. */
-+ .file_ops = &null_f_ops,
-+ .as_ops = &null_a_ops,
-+
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_common,
-+ .create_object = reiser4_create_object_common,
-+ .delete_object = reiser4_delete_object_common,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .owns_item = owns_item_common,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_ordering,
-+ .cut_tree_worker = cut_tree_worker_common,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ },
-+ [CRYPTCOMPRESS_FILE_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .id = CRYPTCOMPRESS_FILE_PLUGIN_ID,
-+ .groups = (1 << REISER4_REGULAR_FILE),
-+ .pops = &file_plugin_ops,
-+ .label = "cryptcompress",
-+ .desc = "cryptcompress file",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = ®ular_file_i_ops,
-+ .file_ops = ®ular_file_f_ops,
-+ .as_ops = ®ular_file_a_ops,
-+
-+ .setattr = setattr_cryptcompress,
-+ .open = open_cryptcompress,
-+ .read = read_cryptcompress,
-+ .write = write_cryptcompress,
-+ .ioctl = ioctl_cryptcompress,
-+ .mmap = mmap_cryptcompress,
-+ .release = release_cryptcompress,
-+
-+ .readpage = readpage_cryptcompress,
-+ .readpages = readpages_cryptcompress,
-+ .writepages = writepages_cryptcompress,
-+ .prepare_write = prepare_write_cryptcompress,
-+ .commit_write = commit_write_cryptcompress,
-+
-+ .bmap = bmap_cryptcompress,
-+
-+ .write_sd_by_inode = write_sd_by_inode_common,
-+ .flow_by_inode = flow_by_inode_cryptcompress,
-+ .key_by_inode = key_by_inode_cryptcompress,
-+ .set_plug_in_inode = set_plug_in_inode_common,
-+ .adjust_to_parent = adjust_to_parent_cryptcompress,
-+ .create_object = create_object_cryptcompress,
-+ .delete_object = delete_object_cryptcompress,
-+ .add_link = reiser4_add_link_common,
-+ .rem_link = reiser4_rem_link_common,
-+ .owns_item = owns_item_common,
-+ .can_add_link = can_add_link_common,
-+ .detach = dummyop,
-+ .bind = dummyop,
-+ .safelink = safelink_common,
-+ .estimate = {
-+ .create = estimate_create_common,
-+ .update = estimate_update_common,
-+ .unlink = estimate_unlink_common
-+ },
-+ .init_inode_data = init_inode_data_cryptcompress,
-+ .cut_tree_worker = cut_tree_worker_cryptcompress,
-+ .destroy_inode = destroy_inode_cryptcompress,
-+ .wire = {
-+ .write = wire_write_common,
-+ .read = wire_read_common,
-+ .get = wire_get_common,
-+ .size = wire_size_common,
-+ .done = wire_done_common
-+ }
-+ }
-+};
-+
-+static int change_dir(struct inode *inode,
-+ reiser4_plugin * plugin,
-+ pset_member memb)
-+{
-+ /* cannot change dir plugin of already existing object */
-+ return RETERR(-EINVAL);
-+}
-+
-+static reiser4_plugin_ops dir_plugin_ops = {
-+ .change = change_dir
-+};
-+
-+/*
-+ * definition of directory plugins
-+ */
-+
-+dir_plugin dir_plugins[LAST_DIR_ID] = {
-+ /* standard hashed directory plugin */
-+ [HASHED_DIR_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_DIR_PLUGIN_TYPE,
-+ .id = HASHED_DIR_PLUGIN_ID,
-+ .pops = &dir_plugin_ops,
-+ .label = "dir",
-+ .desc = "hashed directory",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = &directory_i_ops,
-+ .file_ops = &directory_f_ops,
-+ .as_ops = &directory_a_ops,
-+
-+ .get_parent = get_parent_common,
-+ .is_name_acceptable = is_name_acceptable_common,
-+ .build_entry_key = build_entry_key_hashed,
-+ .build_readdir_key = build_readdir_key_common,
-+ .add_entry = reiser4_add_entry_common,
-+ .rem_entry = reiser4_rem_entry_common,
-+ .init = reiser4_dir_init_common,
-+ .done = reiser4_dir_done_common,
-+ .attach = reiser4_attach_common,
-+ .detach = reiser4_detach_common,
-+ .estimate = {
-+ .add_entry = estimate_add_entry_common,
-+ .rem_entry = estimate_rem_entry_common,
-+ .unlink = dir_estimate_unlink_common
-+ }
-+ },
-+ /* hashed directory for which seekdir/telldir are guaranteed to
-+ * work. Brain-damage. */
-+ [SEEKABLE_HASHED_DIR_PLUGIN_ID] = {
-+ .h = {
-+ .type_id = REISER4_DIR_PLUGIN_TYPE,
-+ .id = SEEKABLE_HASHED_DIR_PLUGIN_ID,
-+ .pops = &dir_plugin_ops,
-+ .label = "dir32",
-+ .desc = "directory hashed with 31 bit hash",
-+ .linkage = {NULL, NULL}
-+ },
-+ .inode_ops = &directory_i_ops,
-+ .file_ops = &directory_f_ops,
-+ .as_ops = &directory_a_ops,
-+
-+ .get_parent = get_parent_common,
-+ .is_name_acceptable = is_name_acceptable_common,
-+ .build_entry_key = build_entry_key_seekable,
-+ .build_readdir_key = build_readdir_key_common,
-+ .add_entry = reiser4_add_entry_common,
-+ .rem_entry = reiser4_rem_entry_common,
-+ .init = reiser4_dir_init_common,
-+ .done = reiser4_dir_done_common,
-+ .attach = reiser4_attach_common,
-+ .detach = reiser4_detach_common,
-+ .estimate = {
-+ .add_entry = estimate_add_entry_common,
-+ .rem_entry = estimate_rem_entry_common,
-+ .unlink = dir_estimate_unlink_common
-+ }
-+ }
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/object.h linux-2.6.24/fs/reiser4/plugin/object.h
---- linux-2.6.24.orig/fs/reiser4/plugin/object.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/object.h 2008-01-25 11:39:07.044235509 +0300
-@@ -0,0 +1,121 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Declaration of object plugin functions. */
-+
-+#if !defined( __FS_REISER4_PLUGIN_OBJECT_H__ )
-+#define __FS_REISER4_PLUGIN_OBJECT_H__
-+
-+#include "../type_safe_hash.h"
-+
-+/* common implementations of inode operations */
-+int reiser4_create_common(struct inode *parent, struct dentry *dentry,
-+ int mode, struct nameidata *);
-+struct dentry * reiser4_lookup_common(struct inode *parent,
-+ struct dentry *dentry,
-+ struct nameidata *nameidata);
-+int reiser4_link_common(struct dentry *existing, struct inode *parent,
-+ struct dentry *newname);
-+int reiser4_unlink_common(struct inode *parent, struct dentry *victim);
-+int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, int mode);
-+int reiser4_symlink_common(struct inode *parent, struct dentry *dentry,
-+ const char *linkname);
-+int reiser4_mknod_common(struct inode *parent, struct dentry *dentry,
-+ int mode, dev_t rdev);
-+int reiser4_rename_common(struct inode *old_dir, struct dentry *old_name,
-+ struct inode *new_dir, struct dentry *new_name);
-+void *reiser4_follow_link_common(struct dentry *, struct nameidata *data);
-+int reiser4_permission_common(struct inode *, int mask,
-+ struct nameidata *nameidata);
-+int reiser4_setattr_common(struct dentry *, struct iattr *);
-+int reiser4_getattr_common(struct vfsmount *mnt, struct dentry *,
-+ struct kstat *);
-+
-+/* common implementations of file operations */
-+loff_t reiser4_llseek_dir_common(struct file *, loff_t off, int origin);
-+int reiser4_readdir_common(struct file *, void *dirent, filldir_t);
-+int reiser4_release_dir_common(struct inode *, struct file *);
-+int reiser4_sync_common(struct file *, struct dentry *, int datasync);
-+
-+/* common implementations of address space operations */
-+int prepare_write_common(struct file *, struct page *, unsigned from,
-+ unsigned to);
-+
-+/* file plugin operations: common implementations */
-+int write_sd_by_inode_common(struct inode *);
-+int key_by_inode_and_offset_common(struct inode *, loff_t, reiser4_key *);
-+int set_plug_in_inode_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+int adjust_to_parent_common(struct inode *object, struct inode *parent,
-+ struct inode *root);
-+int adjust_to_parent_common_dir(struct inode *object, struct inode *parent,
-+ struct inode *root);
-+int adjust_to_parent_cryptcompress(struct inode *object, struct inode *parent,
-+ struct inode *root);
-+int reiser4_create_object_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+int reiser4_delete_object_common(struct inode *);
-+int reiser4_delete_dir_common(struct inode *);
-+int reiser4_add_link_common(struct inode *object, struct inode *parent);
-+int reiser4_rem_link_common(struct inode *object, struct inode *parent);
-+int rem_link_common_dir(struct inode *object, struct inode *parent);
-+int owns_item_common(const struct inode *, const coord_t *);
-+int owns_item_common_dir(const struct inode *, const coord_t *);
-+int can_add_link_common(const struct inode *);
-+int can_rem_link_common_dir(const struct inode *);
-+int reiser4_detach_common_dir(struct inode *child, struct inode *parent);
-+int reiser4_bind_common_dir(struct inode *child, struct inode *parent);
-+int safelink_common(struct inode *, reiser4_safe_link_t, __u64 value);
-+reiser4_block_nr estimate_create_common(const struct inode *);
-+reiser4_block_nr estimate_create_common_dir(const struct inode *);
-+reiser4_block_nr estimate_update_common(const struct inode *);
-+reiser4_block_nr estimate_unlink_common(const struct inode *,
-+ const struct inode *);
-+reiser4_block_nr estimate_unlink_common_dir(const struct inode *,
-+ const struct inode *);
-+char *wire_write_common(struct inode *, char *start);
-+char *wire_read_common(char *addr, reiser4_object_on_wire *);
-+struct dentry *wire_get_common(struct super_block *, reiser4_object_on_wire *);
-+int wire_size_common(struct inode *);
-+void wire_done_common(reiser4_object_on_wire *);
-+
-+/* dir plugin operations: common implementations */
-+struct dentry *get_parent_common(struct inode *child);
-+int is_name_acceptable_common(const struct inode *, const char *name, int len);
-+void build_entry_key_common(const struct inode *,
-+ const struct qstr *qname, reiser4_key *);
-+int build_readdir_key_common(struct file *dir, reiser4_key *);
-+int reiser4_add_entry_common(struct inode *object, struct dentry *where,
-+ reiser4_object_create_data *, reiser4_dir_entry_desc *);
-+int reiser4_rem_entry_common(struct inode *object, struct dentry *where,
-+ reiser4_dir_entry_desc *);
-+int reiser4_dir_init_common(struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+int reiser4_dir_done_common(struct inode *);
-+int reiser4_attach_common(struct inode *child, struct inode *parent);
-+int reiser4_detach_common(struct inode *object, struct inode *parent);
-+reiser4_block_nr estimate_add_entry_common(const struct inode *);
-+reiser4_block_nr estimate_rem_entry_common(const struct inode *);
-+reiser4_block_nr dir_estimate_unlink_common(const struct inode *,
-+ const struct inode *);
-+
-+/* these are essential parts of common implementations, they are to make
-+ customized implementations easier */
-+int do_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-+
-+/* merely useful functions */
-+int lookup_sd(struct inode *, znode_lock_mode, coord_t *, lock_handle *,
-+ const reiser4_key *, int silent);
-+
-+/* __FS_REISER4_PLUGIN_OBJECT_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/plugin.c linux-2.6.24/fs/reiser4/plugin/plugin.c
---- linux-2.6.24.orig/fs/reiser4/plugin/plugin.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/plugin.c 2008-01-25 11:39:07.048236540 +0300
-@@ -0,0 +1,559 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Basic plugin infrastructure, lookup etc. */
-+
-+/* PLUGINS:
-+
-+ Plugins are internal Reiser4 "modules" or "objects" used to increase
-+ extensibility and allow external users to easily adapt reiser4 to
-+ their needs.
-+
-+ Plugins are classified into several disjoint "types". Plugins
-+ belonging to the particular plugin type are termed "instances" of
-+ this type. Existing types are listed by enum reiser4_plugin_type
-+ (see plugin/plugin_header.h)
-+
-+NIKITA-FIXME-HANS: update this list, and review this entire comment for currency
-+
-+ Object (file) plugin determines how given file-system object serves
-+ standard VFS requests for read, write, seek, mmap etc. Instances of
-+ file plugins are: regular file, directory, symlink. Another example
-+ of file plugin is audit plugin, that optionally records accesses to
-+ underlying object and forwards requests to it.
-+
-+ Hash plugins compute hashes used by reiser4 to store and locate
-+ files within directories. Instances of hash plugin type are: r5,
-+ tea, rupasov.
-+
-+ Tail plugins (or, more precisely, tail policy plugins) determine
-+ when last part of the file should be stored in a formatted item.
-+
-+ Scope and lookup:
-+
-+ label such that pair ( type_label, plugin_label ) is unique. This
-+ pair is a globally persistent and user-visible plugin
-+ identifier. Internally kernel maintains plugins and plugin types in
-+ arrays using an index into those arrays as plugin and plugin type
-+ identifiers. File-system in turn, also maintains persistent
-+ "dictionary" which is mapping from plugin label to numerical
-+ identifier which is stored in file-system objects. That is, we
-+ store the offset into the plugin array for that plugin type as the
-+ plugin id in the stat data of the filesystem object.
-+
-+ Internal kernel plugin type identifier (index in plugins[] array) is
-+ of type reiser4_plugin_type. Set of available plugin types is
-+ currently static, but dynamic loading doesn't seem to pose
-+ insurmountable problems.
-+
-+ Within each type plugins are addressed by the identifiers of type
-+ reiser4_plugin_id (indices in reiser4_plugin_type_data.builtin[]).
-+ Such identifiers are only required to be unique within one type,
-+ not globally.
-+
-+ Thus, plugin in memory is uniquely identified by the pair (type_id,
-+ id).
-+
-+ Usage:
-+
-+ There exists only one instance of each plugin instance, but this
-+ single instance can be associated with many entities (file-system
-+ objects, items, nodes, transactions, file-descriptors etc.). Entity
-+ to which plugin of given type is termed (due to the lack of
-+ imagination) "subject" of this plugin type and, by abuse of
-+ terminology, subject of particular instance of this type to which
-+ it's attached currently. For example, inode is subject of object
-+ plugin type. Inode representing directory is subject of directory
-+ plugin, hash plugin type and some particular instance of hash plugin
-+ type. Inode, representing regular file is subject of "regular file"
-+ plugin, tail-policy plugin type etc.
-+
-+ With each subject the plugin possibly stores some state. For example,
-+ the state of a directory plugin (instance of object plugin type) is pointer
-+ to hash plugin (if directories always use hashing that is).
-+
-+ Interface:
-+
-+ In addition to a scalar identifier, each plugin type and plugin
-+ proper has a "label": short string and a "description"---longer
-+ descriptive string. Labels and descriptions of plugin types are
-+ hard-coded into plugins[] array, declared and defined in
-+ plugin.c. Label and description of plugin are stored in .label and
-+ .desc fields of reiser4_plugin_header respectively. It's possible to
-+ locate plugin by the pair of labels.
-+
-+ Features (not implemented):
-+
-+ . user-level plugin manipulations:
-+ + reiser4("filename/..file_plugin<='audit'");
-+ + write(open("filename/..file_plugin"), "audit", 8);
-+
-+ . user level utilities lsplug and chplug to manipulate plugins.
-+ Utilities are not of primary priority. Possibly they will be not
-+ working on v4.0
-+
-+ NIKITA-FIXME-HANS: this should be a mkreiserfs option not a mount
-+ option, do you agree? I don't think that specifying it at mount time,
-+ and then changing it with each mount, is a good model for usage.
-+
-+ . mount option "plug" to set-up plugins of root-directory.
-+ "plug=foo:bar" will set "bar" as default plugin of type "foo".
-+
-+ Limitations:
-+
-+ . each plugin type has to provide at least one builtin
-+ plugin. This is technical limitation and it can be lifted in the
-+ future.
-+
-+ TODO:
-+
-+ New plugin types/plugings:
-+ Things we should be able to separately choose to inherit:
-+
-+ security plugins
-+
-+ stat data
-+
-+ file bodies
-+
-+ file plugins
-+
-+ dir plugins
-+
-+ . perm:acl
-+
-+ . audi---audit plugin intercepting and possibly logging all
-+ accesses to object. Requires to put stub functions in file_operations
-+ in stead of generic_file_*.
-+
-+NIKITA-FIXME-HANS: why make overflows a plugin?
-+ . over---handle hash overflows
-+
-+ . sqnt---handle different access patterns and instruments read-ahead
-+
-+NIKITA-FIXME-HANS: describe the line below in more detail.
-+
-+ . hier---handle inheritance of plugins along file-system hierarchy
-+
-+ Different kinds of inheritance: on creation vs. on access.
-+ Compatible/incompatible plugins.
-+ Inheritance for multi-linked files.
-+ Layered plugins.
-+ Notion of plugin context is abandoned.
-+
-+Each file is associated
-+ with one plugin and dependant plugins (hash, etc.) are stored as
-+ main plugin state. Now, if we have plugins used for regular files
-+ but not for directories, how such plugins would be inherited?
-+ . always store them with directories also
-+
-+NIKTIA-FIXME-HANS: Do the line above. It is not exclusive of doing
-+the line below which is also useful.
-+
-+ . use inheritance hierarchy, independent of file-system namespace
-+*/
-+
-+#include "../debug.h"
-+#include "../dformat.h"
-+#include "plugin_header.h"
-+#include "item/static_stat.h"
-+#include "node/node.h"
-+#include "security/perm.h"
-+#include "space/space_allocator.h"
-+#include "disk_format/disk_format.h"
-+#include "plugin.h"
-+#include "../reiser4.h"
-+#include "../jnode.h"
-+#include "../inode.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+
-+/*
-+ * init_plugins - initialize plugin sub-system.
-+ * Just call this once on reiser4 startup.
-+ *
-+ * Initializes plugin sub-system. It is part of reiser4 module
-+ * initialization. For each plugin of each type init method is called and each
-+ * plugin is put into list of plugins.
-+ */
-+int init_plugins(void)
-+{
-+ reiser4_plugin_type type_id;
-+
-+ for (type_id = 0; type_id < REISER4_PLUGIN_TYPES; ++type_id) {
-+ struct reiser4_plugin_type_data *ptype;
-+ int i;
-+
-+ ptype = &plugins[type_id];
-+ assert("nikita-3508", ptype->label != NULL);
-+ assert("nikita-3509", ptype->type_id == type_id);
-+
-+ INIT_LIST_HEAD(&ptype->plugins_list);
-+/* NIKITA-FIXME-HANS: change builtin_num to some other name lacking the term builtin. */
-+ for (i = 0; i < ptype->builtin_num; ++i) {
-+ reiser4_plugin *plugin;
-+
-+ plugin = plugin_at(ptype, i);
-+
-+ if (plugin->h.label == NULL)
-+ /* uninitialized slot encountered */
-+ continue;
-+ assert("nikita-3445", plugin->h.type_id == type_id);
-+ plugin->h.id = i;
-+ if (plugin->h.pops != NULL &&
-+ plugin->h.pops->init != NULL) {
-+ int result;
-+
-+ result = plugin->h.pops->init(plugin);
-+ if (result != 0)
-+ return result;
-+ }
-+ INIT_LIST_HEAD(&plugin->h.linkage);
-+ list_add_tail(&plugin->h.linkage, &ptype->plugins_list);
-+ }
-+ }
-+ return 0;
-+}
-+
-+/* true if plugin type id is valid */
-+int is_plugin_type_valid(reiser4_plugin_type type)
-+{
-+ /* "type" is unsigned, so no comparison with 0 is
-+ necessary */
-+ return (type < REISER4_PLUGIN_TYPES);
-+}
-+
-+/* true if plugin id is valid */
-+int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id)
-+{
-+ assert("nikita-1653", is_plugin_type_valid(type));
-+ return id < plugins[type].builtin_num;
-+}
-+
-+/* return plugin by its @type and @id.
-+
-+ Both arguments are checked for validness: this is supposed to be called
-+ from user-level.
-+
-+NIKITA-FIXME-HANS: Do you instead mean that this checks ids created in
-+user space, and passed to the filesystem by use of method files? Your
-+comment really confused me on the first reading....
-+
-+*/
-+reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type /* plugin type
-+ * unchecked */,
-+ reiser4_plugin_id id /* plugin id,
-+ * unchecked */)
-+{
-+ if (is_plugin_type_valid(type)) {
-+ if (is_plugin_id_valid(type, id))
-+ return plugin_at(&plugins[type], id);
-+ else
-+ /* id out of bounds */
-+ warning("nikita-2913",
-+ "Invalid plugin id: [%i:%i]", type, id);
-+ } else
-+ /* type_id out of bounds */
-+ warning("nikita-2914", "Invalid type_id: %i", type);
-+ return NULL;
-+}
-+
-+/**
-+ * save_plugin_id - store plugin id in disk format
-+ * @plugin: plugin to convert
-+ * @area: where to store result
-+ *
-+ * Puts id of @plugin in little endian format to address @area.
-+ */
-+int save_plugin_id(reiser4_plugin *plugin /* plugin to convert */ ,
-+ d16 *area /* where to store result */ )
-+{
-+ assert("nikita-1261", plugin != NULL);
-+ assert("nikita-1262", area != NULL);
-+
-+ put_unaligned(cpu_to_le16(plugin->h.id), area);
-+ return 0;
-+}
-+
-+/* list of all plugins of given type */
-+struct list_head *get_plugin_list(reiser4_plugin_type type)
-+{
-+ assert("nikita-1056", is_plugin_type_valid(type));
-+ return &plugins[type].plugins_list;
-+}
-+
-+static void update_pset_mask(reiser4_inode * info, pset_member memb)
-+{
-+ struct dentry *rootdir;
-+ reiser4_inode *root;
-+
-+ assert("edward-1443", memb != PSET_FILE);
-+
-+ rootdir = inode_by_reiser4_inode(info)->i_sb->s_root;
-+ if (rootdir != NULL) {
-+ root = reiser4_inode_data(rootdir->d_inode);
-+ /*
-+ * if inode is different from the default one, or we are
-+ * changing plugin of root directory, update plugin_mask
-+ */
-+ if (aset_get(info->pset, memb) !=
-+ aset_get(root->pset, memb) ||
-+ info == root)
-+ info->plugin_mask |= (1 << memb);
-+ else
-+ info->plugin_mask &= ~(1 << memb);
-+ }
-+}
-+
-+/* Get specified plugin set member from parent,
-+ or from fs-defaults (if no parent is given) and
-+ install the result to pset of @self */
-+int grab_plugin_pset(struct inode *self,
-+ struct inode *ancestor,
-+ pset_member memb)
-+{
-+ reiser4_plugin *plug;
-+ reiser4_inode *info;
-+ int result = 0;
-+
-+ /* Do not grab if initialised already. */
-+ info = reiser4_inode_data(self);
-+ if (aset_get(info->pset, memb) != NULL)
-+ return 0;
-+ if (ancestor) {
-+ reiser4_inode *parent;
-+
-+ parent = reiser4_inode_data(ancestor);
-+ plug = aset_get(parent->hset, memb) ? :
-+ aset_get(parent->pset, memb);
-+ }
-+ else
-+ plug = get_default_plugin(memb);
-+
-+ result = set_plugin(&info->pset, memb, plug);
-+ if (result == 0) {
-+ if (!ancestor || self->i_sb->s_root->d_inode != self)
-+ update_pset_mask(info, memb);
-+ }
-+ return result;
-+}
-+
-+/* Take missing pset members from root inode */
-+int finish_pset(struct inode *inode)
-+{
-+ reiser4_plugin *plug;
-+ reiser4_inode *root;
-+ reiser4_inode *info;
-+ pset_member memb;
-+ int result = 0;
-+
-+ root = reiser4_inode_data(inode->i_sb->s_root->d_inode);
-+ info = reiser4_inode_data(inode);
-+
-+ assert("edward-1455", root != NULL);
-+ assert("edward-1456", info != NULL);
-+
-+ /* file and directory plugins are already initialized. */
-+ for (memb = PSET_DIR + 1; memb < PSET_LAST; ++memb) {
-+
-+ /* Do not grab if initialised already. */
-+ if (aset_get(info->pset, memb) != NULL)
-+ continue;
-+
-+ plug = aset_get(root->pset, memb);
-+ result = set_plugin(&info->pset, memb, plug);
-+ if (result != 0)
-+ break;
-+ }
-+ if (result != 0) {
-+ warning("nikita-3447",
-+ "Cannot set up plugins for %lli",
-+ (unsigned long long)
-+ get_inode_oid(inode));
-+ }
-+ return result;
-+}
-+
-+int force_plugin_pset(struct inode *self, pset_member memb, reiser4_plugin * plug)
-+{
-+ reiser4_inode *info;
-+ int result = 0;
-+
-+ if (!self->i_sb->s_root || self->i_sb->s_root->d_inode == self) {
-+ /* Changing pset in the root object. */
-+ return RETERR(-EINVAL);
-+ }
-+
-+ info = reiser4_inode_data(self);
-+ if (plug->h.pops != NULL && plug->h.pops->change != NULL)
-+ result = plug->h.pops->change(self, plug, memb);
-+ else
-+ result = aset_set_unsafe(&info->pset, memb, plug);
-+ if (result == 0) {
-+ __u16 oldmask = info->plugin_mask;
-+
-+ update_pset_mask(info, memb);
-+ if (oldmask != info->plugin_mask)
-+ reiser4_inode_clr_flag(self, REISER4_SDLEN_KNOWN);
-+ }
-+ return result;
-+}
-+
-+struct reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES] = {
-+ /* C90 initializers */
-+ [REISER4_FILE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_FILE_PLUGIN_TYPE,
-+ .label = "file",
-+ .desc = "Object plugins",
-+ .builtin_num = sizeof_array(file_plugins),
-+ .builtin = file_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(file_plugin)
-+ },
-+ [REISER4_DIR_PLUGIN_TYPE] = {
-+ .type_id = REISER4_DIR_PLUGIN_TYPE,
-+ .label = "dir",
-+ .desc = "Directory plugins",
-+ .builtin_num = sizeof_array(dir_plugins),
-+ .builtin = dir_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(dir_plugin)
-+ },
-+ [REISER4_HASH_PLUGIN_TYPE] = {
-+ .type_id = REISER4_HASH_PLUGIN_TYPE,
-+ .label = "hash",
-+ .desc = "Directory hashes",
-+ .builtin_num = sizeof_array(hash_plugins),
-+ .builtin = hash_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(hash_plugin)
-+ },
-+ [REISER4_FIBRATION_PLUGIN_TYPE] = {
-+ .type_id =
-+ REISER4_FIBRATION_PLUGIN_TYPE,
-+ .label = "fibration",
-+ .desc = "Directory fibrations",
-+ .builtin_num = sizeof_array(fibration_plugins),
-+ .builtin = fibration_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(fibration_plugin)
-+ },
-+ [REISER4_CIPHER_PLUGIN_TYPE] = {
-+ .type_id = REISER4_CIPHER_PLUGIN_TYPE,
-+ .label = "cipher",
-+ .desc = "Cipher plugins",
-+ .builtin_num = sizeof_array(cipher_plugins),
-+ .builtin = cipher_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(cipher_plugin)
-+ },
-+ [REISER4_DIGEST_PLUGIN_TYPE] = {
-+ .type_id = REISER4_DIGEST_PLUGIN_TYPE,
-+ .label = "digest",
-+ .desc = "Digest plugins",
-+ .builtin_num = sizeof_array(digest_plugins),
-+ .builtin = digest_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(digest_plugin)
-+ },
-+ [REISER4_COMPRESSION_PLUGIN_TYPE] = {
-+ .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .label = "compression",
-+ .desc = "Compression plugins",
-+ .builtin_num = sizeof_array(compression_plugins),
-+ .builtin = compression_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(compression_plugin)
-+ },
-+ [REISER4_FORMATTING_PLUGIN_TYPE] = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .label = "formatting",
-+ .desc = "Tail inlining policies",
-+ .builtin_num = sizeof_array(formatting_plugins),
-+ .builtin = formatting_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(formatting_plugin)
-+ },
-+ [REISER4_PERM_PLUGIN_TYPE] = {
-+ .type_id = REISER4_PERM_PLUGIN_TYPE,
-+ .label = "perm",
-+ .desc = "Permission checks",
-+ .builtin_num = sizeof_array(perm_plugins),
-+ .builtin = perm_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(perm_plugin)
-+ },
-+ [REISER4_ITEM_PLUGIN_TYPE] = {
-+ .type_id = REISER4_ITEM_PLUGIN_TYPE,
-+ .label = "item",
-+ .desc = "Item handlers",
-+ .builtin_num = sizeof_array(item_plugins),
-+ .builtin = item_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(item_plugin)
-+ },
-+ [REISER4_NODE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_NODE_PLUGIN_TYPE,
-+ .label = "node",
-+ .desc = "node layout handlers",
-+ .builtin_num = sizeof_array(node_plugins),
-+ .builtin = node_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(node_plugin)
-+ },
-+ [REISER4_SD_EXT_PLUGIN_TYPE] = {
-+ .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
-+ .label = "sd_ext",
-+ .desc = "Parts of stat-data",
-+ .builtin_num = sizeof_array(sd_ext_plugins),
-+ .builtin = sd_ext_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(sd_ext_plugin)
-+ },
-+ [REISER4_FORMAT_PLUGIN_TYPE] = {
-+ .type_id = REISER4_FORMAT_PLUGIN_TYPE,
-+ .label = "disk_layout",
-+ .desc = "defines filesystem on disk layout",
-+ .builtin_num = sizeof_array(format_plugins),
-+ .builtin = format_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(disk_format_plugin)
-+ },
-+ [REISER4_JNODE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_JNODE_PLUGIN_TYPE,
-+ .label = "jnode",
-+ .desc = "defines kind of jnode",
-+ .builtin_num = sizeof_array(jnode_plugins),
-+ .builtin = jnode_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(jnode_plugin)
-+ },
-+ [REISER4_COMPRESSION_MODE_PLUGIN_TYPE] = {
-+ .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .label = "compression_mode",
-+ .desc = "Defines compression mode",
-+ .builtin_num = sizeof_array(compression_mode_plugins),
-+ .builtin = compression_mode_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(compression_mode_plugin)
-+ },
-+ [REISER4_CLUSTER_PLUGIN_TYPE] = {
-+ .type_id = REISER4_CLUSTER_PLUGIN_TYPE,
-+ .label = "cluster",
-+ .desc = "Defines cluster size",
-+ .builtin_num = sizeof_array(cluster_plugins),
-+ .builtin = cluster_plugins,
-+ .plugins_list = {NULL, NULL},
-+ .size = sizeof(cluster_plugin)
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/plugin.h linux-2.6.24/fs/reiser4/plugin/plugin.h
---- linux-2.6.24.orig/fs/reiser4/plugin/plugin.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/plugin.h 2008-01-25 11:39:07.052237570 +0300
-@@ -0,0 +1,937 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Basic plugin data-types.
-+ see fs/reiser4/plugin/plugin.c for details */
-+
-+#if !defined( __FS_REISER4_PLUGIN_TYPES_H__ )
-+#define __FS_REISER4_PLUGIN_TYPES_H__
-+
-+#include "../forward.h"
-+#include "../debug.h"
-+#include "../dformat.h"
-+#include "../key.h"
-+#include "compress/compress.h"
-+#include "crypto/cipher.h"
-+#include "plugin_header.h"
-+#include "item/static_stat.h"
-+#include "item/internal.h"
-+#include "item/sde.h"
-+#include "item/cde.h"
-+#include "item/item.h"
-+#include "node/node.h"
-+#include "node/node40.h"
-+#include "security/perm.h"
-+#include "fibration.h"
-+
-+#include "space/bitmap.h"
-+#include "space/space_allocator.h"
-+
-+#include "disk_format/disk_format40.h"
-+#include "disk_format/disk_format.h"
-+
-+#include <linux/fs.h> /* for struct super_block, address_space */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/buffer_head.h> /* for struct buffer_head */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/types.h>
-+#include <linux/crypto.h>
-+
-+typedef struct reiser4_object_on_wire reiser4_object_on_wire;
-+
-+/*
-+ * File plugin. Defines the set of methods that file plugins implement, some
-+ * of which are optional.
-+ *
-+ * A file plugin offers to the caller an interface for IO ( writing to and/or
-+ * reading from) to what the caller sees as one sequence of bytes. An IO to it
-+ * may affect more than one physical sequence of bytes, or no physical sequence
-+ * of bytes, it may affect sequences of bytes offered by other file plugins to
-+ * the semantic layer, and the file plugin may invoke other plugins and
-+ * delegate work to them, but its interface is structured for offering the
-+ * caller the ability to read and/or write what the caller sees as being a
-+ * single sequence of bytes.
-+ *
-+ * The file plugin must present a sequence of bytes to the caller, but it does
-+ * not necessarily have to store a sequence of bytes, it does not necessarily
-+ * have to support efficient tree traversal to any offset in the sequence of
-+ * bytes (tail and extent items, whose keys contain offsets, do however provide
-+ * efficient non-sequential lookup of any offset in the sequence of bytes).
-+ *
-+ * Directory plugins provide methods for selecting file plugins by resolving a
-+ * name for them.
-+ *
-+ * The functionality other filesystems call an attribute, and rigidly tie
-+ * together, we decompose into orthogonal selectable features of files. Using
-+ * the terminology we will define next, an attribute is a perhaps constrained,
-+ * perhaps static length, file whose parent has a uni-count-intra-link to it,
-+ * which might be grandparent-major-packed, and whose parent has a deletion
-+ * method that deletes it.
-+ *
-+ * File plugins can implement constraints.
-+ *
-+ * Files can be of variable length (e.g. regular unix files), or of static
-+ * length (e.g. static sized attributes).
-+ *
-+ * An object may have many sequences of bytes, and many file plugins, but, it
-+ * has exactly one objectid. It is usually desirable that an object has a
-+ * deletion method which deletes every item with that objectid. Items cannot
-+ * in general be found by just their objectids. This means that an object must
-+ * have either a method built into its deletion plugin method for knowing what
-+ * items need to be deleted, or links stored with the object that provide the
-+ * plugin with a method for finding those items. Deleting a file within an
-+ * object may or may not have the effect of deleting the entire object,
-+ * depending on the file plugin's deletion method.
-+ *
-+ * LINK TAXONOMY:
-+ *
-+ * Many objects have a reference count, and when the reference count reaches 0
-+ * the object's deletion method is invoked. Some links embody a reference
-+ * count increase ("countlinks"), and others do not ("nocountlinks").
-+ *
-+ * Some links are bi-directional links ("bilinks"), and some are
-+ * uni-directional("unilinks").
-+ *
-+ * Some links are between parts of the same object ("intralinks"), and some are
-+ * between different objects ("interlinks").
-+ *
-+ * PACKING TAXONOMY:
-+ *
-+ * Some items of an object are stored with a major packing locality based on
-+ * their object's objectid (e.g. unix directory items in plan A), and these are
-+ * called "self-major-packed".
-+ *
-+ * Some items of an object are stored with a major packing locality based on
-+ * their semantic parent object's objectid (e.g. unix file bodies in plan A),
-+ * and these are called "parent-major-packed".
-+ *
-+ * Some items of an object are stored with a major packing locality based on
-+ * their semantic grandparent, and these are called "grandparent-major-packed".
-+ * Now carefully notice that we run into trouble with key length if we have to
-+ * store a 8 byte major+minor grandparent based packing locality, an 8 byte
-+ * parent objectid, an 8 byte attribute objectid, and an 8 byte offset, all in
-+ * a 24 byte key. One of these fields must be sacrificed if an item is to be
-+ * grandparent-major-packed, and which to sacrifice is left to the item author
-+ * choosing to make the item grandparent-major-packed. You cannot make tail
-+ * items and extent items grandparent-major-packed, though you could make them
-+ * self-major-packed (usually they are parent-major-packed).
-+ *
-+ * In the case of ACLs (which are composed of fixed length ACEs which consist
-+ * of {subject-type, subject, and permission bitmask} triples), it makes sense
-+ * to not have an offset field in the ACE item key, and to allow duplicate keys
-+ * for ACEs. Thus, the set of ACES for a given file is found by looking for a
-+ * key consisting of the objectid of the grandparent (thus grouping all ACLs in
-+ * a directory together), the minor packing locality of ACE, the objectid of
-+ * the file, and 0.
-+ *
-+ * IO involves moving data from one location to another, which means that two
-+ * locations must be specified, source and destination.
-+ *
-+ * This source and destination can be in the filesystem, or they can be a
-+ * pointer in the user process address space plus a byte count.
-+ *
-+ * If both source and destination are in the filesystem, then at least one of
-+ * them must be representable as a pure stream of bytes (which we call a flow,
-+ * and define as a struct containing a key, a data pointer, and a length).
-+ * This may mean converting one of them into a flow. We provide a generic
-+ * cast_into_flow() method, which will work for any plugin supporting
-+ * read_flow(), though it is inefficiently implemented in that it temporarily
-+ * stores the flow in a buffer (Question: what to do with huge flows that
-+ * cannot fit into memory? Answer: we must not convert them all at once. )
-+ *
-+ * Performing a write requires resolving the write request into a flow defining
-+ * the source, and a method that performs the write, and a key that defines
-+ * where in the tree the write is to go.
-+ *
-+ * Performing a read requires resolving the read request into a flow defining
-+ * the target, and a method that performs the read, and a key that defines
-+ * where in the tree the read is to come from.
-+ *
-+ * There will exist file plugins which have no pluginid stored on the disk for
-+ * them, and which are only invoked by other plugins.
-+ */
-+
-+/* This should be incremented with each new contributed
-+ pair (plugin type, plugin id).
-+ NOTE: Make sure there is a release of reiser4progs
-+ with the corresponding version number */
-+#define PLUGIN_LIBRARY_VERSION 0
-+
-+ /* enumeration of fields within plugin_set */
-+typedef enum {
-+ PSET_FILE,
-+ PSET_DIR, /* PSET_FILE and PSET_DIR should be first elements:
-+ * inode.c:read_inode() depends on this. */
-+ PSET_PERM,
-+ PSET_FORMATTING,
-+ PSET_HASH,
-+ PSET_FIBRATION,
-+ PSET_SD,
-+ PSET_DIR_ITEM,
-+ PSET_CIPHER,
-+ PSET_DIGEST,
-+ PSET_COMPRESSION,
-+ PSET_COMPRESSION_MODE,
-+ PSET_CLUSTER,
-+ PSET_CREATE,
-+ PSET_LAST
-+} pset_member;
-+
-+/* builtin file-plugins */
-+typedef enum {
-+ /* regular file */
-+ UNIX_FILE_PLUGIN_ID,
-+ /* directory */
-+ DIRECTORY_FILE_PLUGIN_ID,
-+ /* symlink */
-+ SYMLINK_FILE_PLUGIN_ID,
-+ /* for objects completely handled by the VFS: fifos, devices,
-+ sockets */
-+ SPECIAL_FILE_PLUGIN_ID,
-+ /* regular cryptcompress file */
-+ CRYPTCOMPRESS_FILE_PLUGIN_ID,
-+ /* number of file plugins. Used as size of arrays to hold
-+ file plugins. */
-+ LAST_FILE_PLUGIN_ID
-+} reiser4_file_id;
-+
-+typedef struct file_plugin {
-+
-+ /* generic fields */
-+ plugin_header h;
-+
-+ /* VFS methods.
-+ * Must be invariant with respect to plugin conversion.
-+ * It can be achieved by using "common" methods, which
-+ * are the same for all plugins that take participation in
-+ * conversion, or by using "generic" or "careful" methods,
-+ * which provide automatic redirection to proper private
-+ * plugin methods ("careful" are the same as "generic",
-+ * but with protection of pset and other disk structures
-+ * from being rebuilt during conversion.
-+ */
-+ struct inode_operations * inode_ops;
-+ struct file_operations * file_ops;
-+ struct address_space_operations * as_ops;
-+ /**
-+ * Private methods. These are optional. If used they will allow you
-+ * to minimize the amount of code needed to implement a deviation
-+ * from some other method that also uses them.
-+ */
-+ /*
-+ * private inode_ops
-+ */
-+ int (*setattr)(struct dentry *, struct iattr *);
-+ /*
-+ * private file_ops
-+ */
-+ /* do whatever is necessary to do when object is opened */
-+ int (*open) (struct inode * inode, struct file * file);
-+ ssize_t (*read) (struct file *, char __user *buf, size_t read_amount,
-+ loff_t *off);
-+ /* write as much as possible bytes from nominated @write_amount
-+ * before plugin scheduling is occurred. Save scheduling state
-+ * in @cont */
-+ ssize_t (*write) (struct file *, const char __user *buf,
-+ size_t write_amount, loff_t * off,
-+ struct psched_context * cont);
-+ int (*ioctl) (struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg);
-+ int (*mmap) (struct file *, struct vm_area_struct *);
-+ int (*release) (struct inode *, struct file *);
-+ /*
-+ * private a_ops
-+ */
-+ int (*readpage) (struct file *file, struct page *page);
-+ int (*readpages)(struct file *file, struct address_space *mapping,
-+ struct list_head *pages, unsigned nr_pages);
-+ int (*writepages)(struct address_space *mapping,
-+ struct writeback_control *wbc);
-+ int (*prepare_write)(struct file *file, struct page *page,
-+ unsigned from, unsigned to);
-+ int (*commit_write)(struct file *file, struct page *page,
-+ unsigned from, unsigned to);
-+ sector_t (*bmap) (struct address_space * mapping, sector_t lblock);
-+ /* other private methods */
-+ /* save inode cached stat-data onto disk. It was called
-+ reiserfs_update_sd() in 3.x */
-+ int (*write_sd_by_inode) (struct inode *);
-+ /*
-+ * Construct flow into @flow according to user-supplied data.
-+ *
-+ * This is used by read/write methods to construct a flow to
-+ * write/read. ->flow_by_inode() is plugin method, rather than single
-+ * global implementation, because key in a flow used by plugin may
-+ * depend on data in a @buf.
-+ *
-+ * NIKITA-FIXME-HANS: please create statistics on what functions are
-+ * dereferenced how often for the mongo benchmark. You can supervise
-+ * Elena doing this for you if that helps. Email me the list of the
-+ * top 10, with their counts, and an estimate of the total number of
-+ * CPU cycles spent dereferencing as a percentage of CPU cycles spent
-+ * processing (non-idle processing). If the total percent is, say,
-+ * less than 1%, it will make our coding discussions much easier, and
-+ * keep me from questioning whether functions like the below are too
-+ * frequently called to be dereferenced. If the total percent is more
-+ * than 1%, perhaps private methods should be listed in a "required"
-+ * comment at the top of each plugin (with stern language about how if
-+ * the comment is missing it will not be accepted by the maintainer),
-+ * and implemented using macros not dereferenced functions. How about
-+ * replacing this whole private methods part of the struct with a
-+ * thorough documentation of what the standard helper functions are for
-+ * use in constructing plugins? I think users have been asking for
-+ * that, though not in so many words.
-+ */
-+ int (*flow_by_inode) (struct inode *, const char __user *buf,
-+ int user, loff_t size,
-+ loff_t off, rw_op op, flow_t *);
-+ /*
-+ * Return the key used to retrieve an offset of a file. It is used by
-+ * default implementation of ->flow_by_inode() method
-+ * (common_build_flow()) and, among other things, to get to the extent
-+ * from jnode of unformatted node.
-+ */
-+ int (*key_by_inode) (struct inode *, loff_t off, reiser4_key *);
-+
-+ /* NIKITA-FIXME-HANS: this comment is not as clear to others as you think.... */
-+ /*
-+ * set the plugin for a file. Called during file creation in creat()
-+ * but not reiser4() unless an inode already exists for the file.
-+ */
-+ int (*set_plug_in_inode) (struct inode *inode, struct inode *parent,
-+ reiser4_object_create_data *);
-+
-+ /* NIKITA-FIXME-HANS: comment and name seem to say different things,
-+ * are you setting up the object itself also or just adjusting the
-+ * parent?.... */
-+ /* set up plugins for new @object created in @parent. @root is root
-+ directory. */
-+ int (*adjust_to_parent) (struct inode *object, struct inode *parent,
-+ struct inode *root);
-+ /*
-+ * this does whatever is necessary to do when object is created. For
-+ * instance, for unix files stat data is inserted. It is supposed to be
-+ * called by create of struct inode_operations.
-+ */
-+ int (*create_object) (struct inode *object, struct inode *parent,
-+ reiser4_object_create_data *);
-+ /*
-+ * this method should check REISER4_NO_SD and set REISER4_NO_SD on
-+ * success. Deletion of an object usually includes removal of items
-+ * building file body (for directories this is removal of "." and "..")
-+ * and removal of stat-data item.
-+ */
-+ int (*delete_object) (struct inode *);
-+
-+ /* add link from @parent to @object */
-+ int (*add_link) (struct inode *object, struct inode *parent);
-+
-+ /* remove link from @parent to @object */
-+ int (*rem_link) (struct inode *object, struct inode *parent);
-+
-+ /*
-+ * return true if item addressed by @coord belongs to @inode. This is
-+ * used by read/write to properly slice flow into items in presence of
-+ * multiple key assignment policies, because items of a file are not
-+ * necessarily contiguous in a key space, for example, in a plan-b.
-+ */
-+ int (*owns_item) (const struct inode *, const coord_t *);
-+
-+ /* checks whether yet another hard links to this object can be
-+ added */
-+ int (*can_add_link) (const struct inode *);
-+
-+ /* checks whether hard links to this object can be removed */
-+ int (*can_rem_link) (const struct inode *);
-+
-+ /* not empty for DIRECTORY_FILE_PLUGIN_ID only currently. It calls
-+ detach of directory plugin to remove ".." */
-+ int (*detach) (struct inode * child, struct inode * parent);
-+
-+ /* called when @child was just looked up in the @parent. It is not
-+ empty for DIRECTORY_FILE_PLUGIN_ID only where it calls attach of
-+ directory plugin */
-+ int (*bind) (struct inode * child, struct inode * parent);
-+
-+ /* process safe-link during mount */
-+ int (*safelink) (struct inode * object, reiser4_safe_link_t link,
-+ __u64 value);
-+
-+ /* The couple of estimate methods for all file operations */
-+ struct {
-+ reiser4_block_nr(*create) (const struct inode *);
-+ reiser4_block_nr(*update) (const struct inode *);
-+ reiser4_block_nr(*unlink) (const struct inode *,
-+ const struct inode *);
-+ } estimate;
-+
-+ /*
-+ * reiser4 specific part of inode has a union of structures which are
-+ * specific to a plugin. This method is called when inode is read
-+ * (read_inode) and when file is created (common_create_child) so that
-+ * file plugin could initialize its inode data
-+ */
-+ void (*init_inode_data) (struct inode *, reiser4_object_create_data *,
-+ int);
-+
-+ /*
-+ * This method performs progressive deletion of items and whole nodes
-+ * from right to left.
-+ *
-+ * @tap: the point deletion process begins from,
-+ * @from_key: the beginning of the deleted key range,
-+ * @to_key: the end of the deleted key range,
-+ * @smallest_removed: the smallest removed key,
-+ *
-+ * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree
-+ * operation was interrupted for allowing atom commit .
-+ */
-+ int (*cut_tree_worker) (tap_t *, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed, struct inode *,
-+ int, int *);
-+
-+ /* called from ->destroy_inode() */
-+ void (*destroy_inode) (struct inode *);
-+
-+ /*
-+ * methods to serialize object identify. This is used, for example, by
-+ * reiser4_{en,de}code_fh().
-+ */
-+ struct {
-+ /* store object's identity at @area */
-+ char *(*write) (struct inode * inode, char *area);
-+ /* parse object from wire to the @obj */
-+ char *(*read) (char *area, reiser4_object_on_wire * obj);
-+ /* given object identity in @obj, find or create its dentry */
-+ struct dentry *(*get) (struct super_block * s,
-+ reiser4_object_on_wire * obj);
-+ /* how many bytes ->wire.write() consumes */
-+ int (*size) (struct inode * inode);
-+ /* finish with object identify */
-+ void (*done) (reiser4_object_on_wire * obj);
-+ } wire;
-+} file_plugin;
-+
-+extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
-+
-+struct reiser4_object_on_wire {
-+ file_plugin *plugin;
-+ union {
-+ struct {
-+ obj_key_id key_id;
-+ } std;
-+ void *generic;
-+ } u;
-+};
-+
-+/* builtin dir-plugins */
-+typedef enum {
-+ HASHED_DIR_PLUGIN_ID,
-+ SEEKABLE_HASHED_DIR_PLUGIN_ID,
-+ LAST_DIR_ID
-+} reiser4_dir_id;
-+
-+typedef struct dir_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+
-+ struct inode_operations * inode_ops;
-+ struct file_operations * file_ops;
-+ struct address_space_operations * as_ops;
-+
-+ /*
-+ * private methods: These are optional. If used they will allow you to
-+ * minimize the amount of code needed to implement a deviation from
-+ * some other method that uses them. You could logically argue that
-+ * they should be a separate type of plugin.
-+ */
-+
-+ struct dentry *(*get_parent) (struct inode * childdir);
-+
-+ /*
-+ * check whether "name" is acceptable name to be inserted into this
-+ * object. Optionally implemented by directory-like objects. Can check
-+ * for maximal length, reserved symbols etc
-+ */
-+ int (*is_name_acceptable) (const struct inode * inode, const char *name,
-+ int len);
-+
-+ void (*build_entry_key) (const struct inode * dir /* directory where
-+ * entry is (or will
-+ * be) in.*/ ,
-+ const struct qstr * name /* name of file
-+ * referenced by this
-+ * entry */ ,
-+ reiser4_key * result /* resulting key of
-+ * directory entry */ );
-+ int (*build_readdir_key) (struct file * dir, reiser4_key * result);
-+ int (*add_entry) (struct inode * object, struct dentry * where,
-+ reiser4_object_create_data * data,
-+ reiser4_dir_entry_desc * entry);
-+ int (*rem_entry) (struct inode * object, struct dentry * where,
-+ reiser4_dir_entry_desc * entry);
-+
-+ /*
-+ * initialize directory structure for newly created object. For normal
-+ * unix directories, insert dot and dotdot.
-+ */
-+ int (*init) (struct inode * object, struct inode * parent,
-+ reiser4_object_create_data * data);
-+
-+ /* destroy directory */
-+ int (*done) (struct inode * child);
-+
-+ /* called when @subdir was just looked up in the @dir */
-+ int (*attach) (struct inode * subdir, struct inode * dir);
-+ int (*detach) (struct inode * subdir, struct inode * dir);
-+
-+ struct {
-+ reiser4_block_nr(*add_entry) (const struct inode *);
-+ reiser4_block_nr(*rem_entry) (const struct inode *);
-+ reiser4_block_nr(*unlink) (const struct inode *,
-+ const struct inode *);
-+ } estimate;
-+} dir_plugin;
-+
-+extern dir_plugin dir_plugins[LAST_DIR_ID];
-+
-+typedef struct formatting_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* returns non-zero iff file's tail has to be stored
-+ in a direct item. */
-+ int (*have_tail) (const struct inode * inode, loff_t size);
-+} formatting_plugin;
-+
-+typedef struct hash_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* computes hash of the given name */
-+ __u64(*hash) (const unsigned char *name, int len);
-+} hash_plugin;
-+
-+typedef struct cipher_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ struct crypto_blkcipher * (*alloc) (void);
-+ void (*free) (struct crypto_blkcipher * tfm);
-+ /* Offset translator. For each offset this returns (k * offset), where
-+ k (k >= 1) is an expansion factor of the cipher algorithm.
-+ For all symmetric algorithms k == 1. For asymmetric algorithms (which
-+ inflate data) offset translation guarantees that all disk cluster's
-+ units will have keys smaller then next cluster's one.
-+ */
-+ loff_t(*scale) (struct inode * inode, size_t blocksize, loff_t src);
-+ /* Cipher algorithms can accept data only by chunks of cipher block
-+ size. This method is to align any flow up to cipher block size when
-+ we pass it to cipher algorithm. To align means to append padding of
-+ special format specific to the cipher algorithm */
-+ int (*align_stream) (__u8 * tail, int clust_size, int blocksize);
-+ /* low-level key manager (check, install, etc..) */
-+ int (*setkey) (struct crypto_tfm * tfm, const __u8 * key,
-+ unsigned int keylen);
-+ /* main text processing procedures */
-+ void (*encrypt) (__u32 * expkey, __u8 * dst, const __u8 * src);
-+ void (*decrypt) (__u32 * expkey, __u8 * dst, const __u8 * src);
-+} cipher_plugin;
-+
-+typedef struct digest_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* fingerprint size in bytes */
-+ int fipsize;
-+ struct crypto_hash * (*alloc) (void);
-+ void (*free) (struct crypto_hash * tfm);
-+} digest_plugin;
-+
-+typedef struct compression_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*init) (void);
-+ /* the maximum number of bytes the size of the "compressed" data can
-+ * exceed the uncompressed data. */
-+ int (*overrun) (unsigned src_len);
-+ coa_t(*alloc) (tfm_action act);
-+ void (*free) (coa_t coa, tfm_action act);
-+ /* minimal size of the flow we still try to compress */
-+ int (*min_size_deflate) (void);
-+ __u32(*checksum) (char *data, __u32 length);
-+ /* main transform procedures */
-+ void (*compress) (coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len);
-+ void (*decompress) (coa_t coa, __u8 * src_first, unsigned src_len,
-+ __u8 * dst_first, unsigned *dst_len);
-+} compression_plugin;
-+
-+typedef struct compression_mode_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* this is called when estimating compressibility
-+ of a logical cluster by its content */
-+ int (*should_deflate) (struct inode * inode, cloff_t index);
-+ /* this is called when results of compression should be saved */
-+ int (*accept_hook) (struct inode * inode, cloff_t index);
-+ /* this is called when results of compression should be discarded */
-+ int (*discard_hook) (struct inode * inode, cloff_t index);
-+} compression_mode_plugin;
-+
-+typedef struct cluster_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int shift;
-+} cluster_plugin;
-+
-+typedef struct sd_ext_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*present) (struct inode * inode, char **area, int *len);
-+ int (*absent) (struct inode * inode);
-+ int (*save_len) (struct inode * inode);
-+ int (*save) (struct inode * inode, char **area);
-+ /* alignment requirement for this stat-data part */
-+ int alignment;
-+} sd_ext_plugin;
-+
-+/* this plugin contains methods to allocate objectid for newly created files,
-+ to deallocate objectid when file gets removed, to report number of used and
-+ free objectids */
-+typedef struct oid_allocator_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*init_oid_allocator) (reiser4_oid_allocator * map, __u64 nr_files,
-+ __u64 oids);
-+ /* used to report statfs->f_files */
-+ __u64(*oids_used) (reiser4_oid_allocator * map);
-+ /* get next oid to use */
-+ __u64(*next_oid) (reiser4_oid_allocator * map);
-+ /* used to report statfs->f_ffree */
-+ __u64(*oids_free) (reiser4_oid_allocator * map);
-+ /* allocate new objectid */
-+ int (*allocate_oid) (reiser4_oid_allocator * map, oid_t *);
-+ /* release objectid */
-+ int (*release_oid) (reiser4_oid_allocator * map, oid_t);
-+ /* how many pages to reserve in transaction for allocation of new
-+ objectid */
-+ int (*oid_reserve_allocate) (reiser4_oid_allocator * map);
-+ /* how many pages to reserve in transaction for freeing of an
-+ objectid */
-+ int (*oid_reserve_release) (reiser4_oid_allocator * map);
-+ void (*print_info) (const char *, reiser4_oid_allocator *);
-+} oid_allocator_plugin;
-+
-+/* disk layout plugin: this specifies super block, journal, bitmap (if there
-+ are any) locations, etc */
-+typedef struct disk_format_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* replay journal, initialize super_info_data, etc */
-+ int (*init_format) (struct super_block *, void *data);
-+
-+ /* key of root directory stat data */
-+ const reiser4_key *(*root_dir_key) (const struct super_block *);
-+
-+ int (*release) (struct super_block *);
-+ jnode *(*log_super) (struct super_block *);
-+ int (*check_open) (const struct inode * object);
-+ int (*version_update) (struct super_block *);
-+} disk_format_plugin;
-+
-+struct jnode_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ int (*init) (jnode * node);
-+ int (*parse) (jnode * node);
-+ struct address_space *(*mapping) (const jnode * node);
-+ unsigned long (*index) (const jnode * node);
-+ jnode *(*clone) (jnode * node);
-+};
-+
-+/* plugin instance. */
-+/* */
-+/* This is "wrapper" union for all types of plugins. Most of the code uses */
-+/* plugins of particular type (file_plugin, dir_plugin, etc.) rather than */
-+/* operates with pointers to reiser4_plugin. This union is only used in */
-+/* some generic code in plugin/plugin.c that operates on all */
-+/* plugins. Technically speaking purpose of this union is to add type */
-+/* safety to said generic code: each plugin type (file_plugin, for */
-+/* example), contains plugin_header as its first memeber. This first member */
-+/* is located at the same place in memory as .h member of */
-+/* reiser4_plugin. Generic code, obtains pointer to reiser4_plugin and */
-+/* looks in the .h which is header of plugin type located in union. This */
-+/* allows to avoid type-casts. */
-+union reiser4_plugin {
-+ /* generic fields */
-+ plugin_header h;
-+ /* file plugin */
-+ file_plugin file;
-+ /* directory plugin */
-+ dir_plugin dir;
-+ /* hash plugin, used by directory plugin */
-+ hash_plugin hash;
-+ /* fibration plugin used by directory plugin */
-+ fibration_plugin fibration;
-+ /* cipher transform plugin, used by file plugin */
-+ cipher_plugin cipher;
-+ /* digest transform plugin, used by file plugin */
-+ digest_plugin digest;
-+ /* compression transform plugin, used by file plugin */
-+ compression_plugin compression;
-+ /* tail plugin, used by file plugin */
-+ formatting_plugin formatting;
-+ /* permission plugin */
-+ perm_plugin perm;
-+ /* node plugin */
-+ node_plugin node;
-+ /* item plugin */
-+ item_plugin item;
-+ /* stat-data extension plugin */
-+ sd_ext_plugin sd_ext;
-+ /* disk layout plugin */
-+ disk_format_plugin format;
-+ /* object id allocator plugin */
-+ oid_allocator_plugin oid_allocator;
-+ /* plugin for different jnode types */
-+ jnode_plugin jnode;
-+ /* compression mode plugin, used by object plugin */
-+ compression_mode_plugin compression_mode;
-+ /* cluster plugin, used by object plugin */
-+ cluster_plugin clust;
-+ /* place-holder for new plugin types that can be registered
-+ dynamically, and used by other dynamically loaded plugins. */
-+ void *generic;
-+};
-+
-+struct reiser4_plugin_ops {
-+ /* called when plugin is initialized */
-+ int (*init) (reiser4_plugin * plugin);
-+ /* called when plugin is unloaded */
-+ int (*done) (reiser4_plugin * plugin);
-+ /* load given plugin from disk */
-+ int (*load) (struct inode * inode,
-+ reiser4_plugin * plugin, char **area, int *len);
-+ /* how many space is required to store this plugin's state
-+ in stat-data */
-+ int (*save_len) (struct inode * inode, reiser4_plugin * plugin);
-+ /* save persistent plugin-data to disk */
-+ int (*save) (struct inode * inode, reiser4_plugin * plugin,
-+ char **area);
-+ /* alignment requirement for on-disk state of this plugin
-+ in number of bytes */
-+ int alignment;
-+ /* install itself into given inode. This can return error
-+ (e.g., you cannot change hash of non-empty directory). */
-+ int (*change) (struct inode * inode, reiser4_plugin * plugin,
-+ pset_member memb);
-+ /* install itself into given inode. This can return error
-+ (e.g., you cannot change hash of non-empty directory). */
-+ int (*inherit) (struct inode * inode, struct inode * parent,
-+ reiser4_plugin * plugin);
-+};
-+
-+/* functions implemented in fs/reiser4/plugin/plugin.c */
-+
-+/* stores plugin reference in reiser4-specific part of inode */
-+extern int set_object_plugin(struct inode *inode, reiser4_plugin_id id);
-+extern int init_plugins(void);
-+
-+/* builtin plugins */
-+
-+/* builtin hash-plugins */
-+
-+typedef enum {
-+ RUPASOV_HASH_ID,
-+ R5_HASH_ID,
-+ TEA_HASH_ID,
-+ FNV1_HASH_ID,
-+ DEGENERATE_HASH_ID,
-+ LAST_HASH_ID
-+} reiser4_hash_id;
-+
-+/* builtin cipher plugins */
-+
-+typedef enum {
-+ NONE_CIPHER_ID,
-+ LAST_CIPHER_ID
-+} reiser4_cipher_id;
-+
-+/* builtin digest plugins */
-+
-+typedef enum {
-+ SHA256_32_DIGEST_ID,
-+ LAST_DIGEST_ID
-+} reiser4_digest_id;
-+
-+/* builtin compression mode plugins */
-+typedef enum {
-+ NONE_COMPRESSION_MODE_ID,
-+ LATTD_COMPRESSION_MODE_ID,
-+ ULTIM_COMPRESSION_MODE_ID,
-+ FORCE_COMPRESSION_MODE_ID,
-+ CONVX_COMPRESSION_MODE_ID,
-+ LAST_COMPRESSION_MODE_ID
-+} reiser4_compression_mode_id;
-+
-+/* builtin cluster plugins */
-+typedef enum {
-+ CLUSTER_64K_ID,
-+ CLUSTER_32K_ID,
-+ CLUSTER_16K_ID,
-+ CLUSTER_8K_ID,
-+ CLUSTER_4K_ID,
-+ LAST_CLUSTER_ID
-+} reiser4_cluster_id;
-+
-+/* builtin tail-plugins */
-+
-+typedef enum {
-+ NEVER_TAILS_FORMATTING_ID,
-+ ALWAYS_TAILS_FORMATTING_ID,
-+ SMALL_FILE_FORMATTING_ID,
-+ LAST_TAIL_FORMATTING_ID
-+} reiser4_formatting_id;
-+
-+/* data type used to pack parameters that we pass to vfs object creation
-+ function create_object() */
-+struct reiser4_object_create_data {
-+ /* plugin to control created object */
-+ reiser4_file_id id;
-+ /* mode of regular file, directory or special file */
-+/* what happens if some other sort of perm plugin is in use? */
-+ int mode;
-+ /* rdev of special file */
-+ dev_t rdev;
-+ /* symlink target */
-+ const char *name;
-+ /* add here something for non-standard objects you invent, like
-+ query for interpolation file etc. */
-+
-+ struct reiser4_crypto_info * crypto;
-+
-+ struct inode *parent;
-+ struct dentry *dentry;
-+};
-+
-+/* description of directory entry being created/destroyed/sought for
-+
-+ It is passed down to the directory plugin and farther to the
-+ directory item plugin methods. Creation of new directory is done in
-+ several stages: first we search for an entry with the same name, then
-+ create new one. reiser4_dir_entry_desc is used to store some information
-+ collected at some stage of this process and required later: key of
-+ item that we want to insert/delete and pointer to an object that will
-+ be bound by the new directory entry. Probably some more fields will
-+ be added there.
-+
-+*/
-+struct reiser4_dir_entry_desc {
-+ /* key of directory entry */
-+ reiser4_key key;
-+ /* object bound by this entry. */
-+ struct inode *obj;
-+};
-+
-+#define MAX_PLUGIN_TYPE_LABEL_LEN 32
-+#define MAX_PLUGIN_PLUG_LABEL_LEN 32
-+
-+#define PLUGIN_BY_ID(TYPE,ID,FIELD) \
-+static inline TYPE *TYPE ## _by_id( reiser4_plugin_id id ) \
-+{ \
-+ reiser4_plugin *plugin = plugin_by_id ( ID, id ); \
-+ return plugin ? & plugin -> FIELD : NULL; \
-+} \
-+static inline TYPE *TYPE ## _by_disk_id( reiser4_tree *tree, d16 *id ) \
-+{ \
-+ reiser4_plugin *plugin = plugin_by_disk_id ( tree, ID, id ); \
-+ return plugin ? & plugin -> FIELD : NULL; \
-+} \
-+static inline TYPE *TYPE ## _by_unsafe_id( reiser4_plugin_id id ) \
-+{ \
-+ reiser4_plugin *plugin = plugin_by_unsafe_id ( ID, id ); \
-+ return plugin ? & plugin -> FIELD : NULL; \
-+} \
-+static inline reiser4_plugin* TYPE ## _to_plugin( TYPE* plugin ) \
-+{ \
-+ return ( reiser4_plugin * ) plugin; \
-+} \
-+static inline reiser4_plugin_id TYPE ## _id( TYPE* plugin ) \
-+{ \
-+ return TYPE ## _to_plugin (plugin) -> h.id; \
-+} \
-+typedef struct { int foo; } TYPE ## _plugin_dummy
-+
-+PLUGIN_BY_ID(item_plugin, REISER4_ITEM_PLUGIN_TYPE, item);
-+PLUGIN_BY_ID(file_plugin, REISER4_FILE_PLUGIN_TYPE, file);
-+PLUGIN_BY_ID(dir_plugin, REISER4_DIR_PLUGIN_TYPE, dir);
-+PLUGIN_BY_ID(node_plugin, REISER4_NODE_PLUGIN_TYPE, node);
-+PLUGIN_BY_ID(sd_ext_plugin, REISER4_SD_EXT_PLUGIN_TYPE, sd_ext);
-+PLUGIN_BY_ID(perm_plugin, REISER4_PERM_PLUGIN_TYPE, perm);
-+PLUGIN_BY_ID(hash_plugin, REISER4_HASH_PLUGIN_TYPE, hash);
-+PLUGIN_BY_ID(fibration_plugin, REISER4_FIBRATION_PLUGIN_TYPE, fibration);
-+PLUGIN_BY_ID(cipher_plugin, REISER4_CIPHER_PLUGIN_TYPE, cipher);
-+PLUGIN_BY_ID(digest_plugin, REISER4_DIGEST_PLUGIN_TYPE, digest);
-+PLUGIN_BY_ID(compression_plugin, REISER4_COMPRESSION_PLUGIN_TYPE, compression);
-+PLUGIN_BY_ID(formatting_plugin, REISER4_FORMATTING_PLUGIN_TYPE, formatting);
-+PLUGIN_BY_ID(disk_format_plugin, REISER4_FORMAT_PLUGIN_TYPE, format);
-+PLUGIN_BY_ID(jnode_plugin, REISER4_JNODE_PLUGIN_TYPE, jnode);
-+PLUGIN_BY_ID(compression_mode_plugin, REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ compression_mode);
-+PLUGIN_BY_ID(cluster_plugin, REISER4_CLUSTER_PLUGIN_TYPE, clust);
-+
-+extern int save_plugin_id(reiser4_plugin * plugin, d16 * area);
-+
-+extern struct list_head *get_plugin_list(reiser4_plugin_type type_id);
-+
-+#define for_all_plugins(ptype, plugin) \
-+for (plugin = list_entry(get_plugin_list(ptype)->next, reiser4_plugin, h.linkage); \
-+ get_plugin_list(ptype) != &plugin->h.linkage; \
-+ plugin = list_entry(plugin->h.linkage.next, reiser4_plugin, h.linkage))
-+
-+
-+extern int grab_plugin_pset(struct inode *self, struct inode *ancestor, pset_member memb);
-+extern int force_plugin_pset(struct inode *self, pset_member memb, reiser4_plugin *plug);
-+extern int finish_pset(struct inode *inode);
-+
-+/* defined in fs/reiser4/plugin/object.c */
-+extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
-+/* defined in fs/reiser4/plugin/object.c */
-+extern dir_plugin dir_plugins[LAST_DIR_ID];
-+/* defined in fs/reiser4/plugin/item/static_stat.c */
-+extern sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION];
-+/* defined in fs/reiser4/plugin/hash.c */
-+extern hash_plugin hash_plugins[LAST_HASH_ID];
-+/* defined in fs/reiser4/plugin/fibration.c */
-+extern fibration_plugin fibration_plugins[LAST_FIBRATION_ID];
-+/* defined in fs/reiser4/plugin/crypt.c */
-+extern cipher_plugin cipher_plugins[LAST_CIPHER_ID];
-+/* defined in fs/reiser4/plugin/digest.c */
-+extern digest_plugin digest_plugins[LAST_DIGEST_ID];
-+/* defined in fs/reiser4/plugin/compress/compress.c */
-+extern compression_plugin compression_plugins[LAST_COMPRESSION_ID];
-+/* defined in fs/reiser4/plugin/compress/compression_mode.c */
-+extern compression_mode_plugin
-+compression_mode_plugins[LAST_COMPRESSION_MODE_ID];
-+/* defined in fs/reiser4/plugin/cluster.c */
-+extern cluster_plugin cluster_plugins[LAST_CLUSTER_ID];
-+/* defined in fs/reiser4/plugin/tail.c */
-+extern formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID];
-+/* defined in fs/reiser4/plugin/security/security.c */
-+extern perm_plugin perm_plugins[LAST_PERM_ID];
-+/* defined in fs/reiser4/plugin/item/item.c */
-+extern item_plugin item_plugins[LAST_ITEM_ID];
-+/* defined in fs/reiser4/plugin/node/node.c */
-+extern node_plugin node_plugins[LAST_NODE_ID];
-+/* defined in fs/reiser4/plugin/disk_format/disk_format.c */
-+extern disk_format_plugin format_plugins[LAST_FORMAT_ID];
-+
-+/* __FS_REISER4_PLUGIN_TYPES_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/plugin_header.h linux-2.6.24/fs/reiser4/plugin/plugin_header.h
---- linux-2.6.24.orig/fs/reiser4/plugin/plugin_header.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/plugin_header.h 2008-01-25 11:39:07.052237570 +0300
-@@ -0,0 +1,155 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* plugin header. Data structures required by all plugin types. */
-+
-+#if !defined( __PLUGIN_HEADER_H__ )
-+#define __PLUGIN_HEADER_H__
-+
-+/* plugin data-types and constants */
-+
-+#include "../debug.h"
-+#include "../dformat.h"
-+
-+/* Every plugin type can be considered as a class of virtual objects
-+ {(type, i) | i = 0, 1, ...}, which has one the following categories
-+ of virtualization:
-+ A - no virtualization;
-+ F - per-file virtualization;
-+ S - per-superblock virtualization;
-+ FIXME-EDWARD: Define every such category */
-+
-+/* Supported plugin types: (id, (virtualization category), short description) */
-+typedef enum {
-+ REISER4_FILE_PLUGIN_TYPE, /* (F) service VFS enry-points */
-+ REISER4_DIR_PLUGIN_TYPE, /* (F) service VFS enry-points */
-+ REISER4_ITEM_PLUGIN_TYPE, /* (F) manage items */
-+ REISER4_NODE_PLUGIN_TYPE, /* (S) manage formatted nodes */
-+ REISER4_HASH_PLUGIN_TYPE, /* (F) compute hash */
-+ REISER4_FIBRATION_PLUGIN_TYPE, /* (F) directory fibrations */
-+ REISER4_FORMATTING_PLUGIN_TYPE, /* (F) tail-packing policy */
-+ REISER4_PERM_PLUGIN_TYPE, /* stub (vacancy) */
-+ REISER4_SD_EXT_PLUGIN_TYPE, /* (A) stat-data extensions */
-+ REISER4_FORMAT_PLUGIN_TYPE, /* (S) specify disk format */
-+ REISER4_JNODE_PLUGIN_TYPE, /* (A) in-memory node headers */
-+ REISER4_CIPHER_PLUGIN_TYPE, /* (F) cipher transform algs */
-+ REISER4_DIGEST_PLUGIN_TYPE, /* (F) digest transform algs */
-+ REISER4_COMPRESSION_PLUGIN_TYPE, /* (F) compression tfm algs */
-+ REISER4_COMPRESSION_MODE_PLUGIN_TYPE, /* (F) compression heuristic */
-+ REISER4_CLUSTER_PLUGIN_TYPE, /* (F) size of logical cluster */
-+ REISER4_PLUGIN_TYPES
-+} reiser4_plugin_type;
-+
-+/* Supported plugin groups */
-+typedef enum {
-+ REISER4_DIRECTORY_FILE,
-+ REISER4_REGULAR_FILE,
-+ REISER4_SYMLINK_FILE,
-+ REISER4_SPECIAL_FILE,
-+} file_plugin_group;
-+
-+struct reiser4_plugin_ops;
-+/* generic plugin operations, supported by each
-+ plugin type. */
-+typedef struct reiser4_plugin_ops reiser4_plugin_ops;
-+
-+/* the common part of all plugin instances. */
-+typedef struct plugin_header {
-+ /* plugin type */
-+ reiser4_plugin_type type_id;
-+ /* id of this plugin */
-+ reiser4_plugin_id id;
-+ /* bitmask of groups the plugin belongs to. */
-+ reiser4_plugin_groups groups;
-+ /* plugin operations */
-+ reiser4_plugin_ops *pops;
-+/* NIKITA-FIXME-HANS: usage of and access to label and desc is not commented and defined. */
-+ /* short label of this plugin */
-+ const char *label;
-+ /* descriptive string.. */
-+ const char *desc;
-+ /* list linkage */
-+ struct list_head linkage;
-+} plugin_header;
-+
-+#define plugin_of_group(plug, group) (plug->h.groups & (1 << group))
-+
-+/* PRIVATE INTERFACES */
-+/* NIKITA-FIXME-HANS: what is this for and why does it duplicate what is in plugin_header? */
-+/* plugin type representation. */
-+struct reiser4_plugin_type_data {
-+ /* internal plugin type identifier. Should coincide with
-+ index of this item in plugins[] array. */
-+ reiser4_plugin_type type_id;
-+ /* short symbolic label of this plugin type. Should be no longer
-+ than MAX_PLUGIN_TYPE_LABEL_LEN characters including '\0'. */
-+ const char *label;
-+ /* plugin type description longer than .label */
-+ const char *desc;
-+
-+/* NIKITA-FIXME-HANS: define built-in */
-+ /* number of built-in plugin instances of this type */
-+ int builtin_num;
-+ /* array of built-in plugins */
-+ void *builtin;
-+ struct list_head plugins_list;
-+ size_t size;
-+};
-+
-+extern struct reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES];
-+
-+int is_plugin_type_valid(reiser4_plugin_type type);
-+int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id);
-+
-+static inline reiser4_plugin *plugin_at(struct reiser4_plugin_type_data * ptype,
-+ int i)
-+{
-+ char *builtin;
-+
-+ builtin = ptype->builtin;
-+ return (reiser4_plugin *) (builtin + i * ptype->size);
-+}
-+
-+/* return plugin by its @type_id and @id */
-+static inline reiser4_plugin *plugin_by_id(reiser4_plugin_type type,
-+ reiser4_plugin_id id)
-+{
-+ assert("nikita-1651", is_plugin_type_valid(type));
-+ assert("nikita-1652", is_plugin_id_valid(type, id));
-+ return plugin_at(&plugins[type], id);
-+}
-+
-+extern reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type_id,
-+ reiser4_plugin_id id);
-+
-+/**
-+ * plugin_by_disk_id - get reiser4_plugin
-+ * @type_id: plugin type id
-+ * @did: plugin id in disk format
-+ *
-+ * Returns reiser4_plugin by plugin type id an dplugin_id.
-+ */
-+static inline reiser4_plugin *plugin_by_disk_id(reiser4_tree * tree UNUSED_ARG,
-+ reiser4_plugin_type type_id,
-+ __le16 *plugin_id)
-+{
-+ /*
-+ * what we should do properly is to maintain within each file-system a
-+ * dictionary that maps on-disk plugin ids to "universal" ids. This
-+ * dictionary will be resolved on mount time, so that this function
-+ * will perform just one additional array lookup.
-+ */
-+ return plugin_by_unsafe_id(type_id, le16_to_cpu(*plugin_id));
-+}
-+
-+/* __PLUGIN_HEADER_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/plugin_set.c linux-2.6.24/fs/reiser4/plugin/plugin_set.c
---- linux-2.6.24.orig/fs/reiser4/plugin/plugin_set.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/plugin_set.c 2008-01-25 11:39:07.052237570 +0300
-@@ -0,0 +1,379 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* This file contains Reiser4 plugin set operations */
-+
-+/* plugin sets
-+ *
-+ * Each file in reiser4 is controlled by a whole set of plugins (file plugin,
-+ * directory plugin, hash plugin, tail policy plugin, security plugin, etc.)
-+ * assigned (inherited, deduced from mode bits, etc.) at creation time. This
-+ * set of plugins (so called pset) is described by structure plugin_set (see
-+ * plugin/plugin_set.h), which contains pointers to all required plugins.
-+ *
-+ * Children can inherit some pset members from their parent, however sometimes
-+ * it is useful to specify members different from parent ones. Since object's
-+ * pset can not be easily changed without fatal consequences, we use for this
-+ * purpose another special plugin table (so called hset, or heir set) described
-+ * by the same structure.
-+ *
-+ * Inode only stores a pointers to pset and hset. Different inodes with the
-+ * same set of pset (hset) members point to the same pset (hset). This is
-+ * archived by storing psets and hsets in global hash table. Races are avoided
-+ * by simple (and efficient so far) solution of never recycling psets, even
-+ * when last inode pointing to it is destroyed.
-+ */
-+
-+#include "../debug.h"
-+#include "../super.h"
-+#include "plugin_set.h"
-+
-+#include <linux/slab.h>
-+#include <linux/stddef.h>
-+
-+/* slab for plugin sets */
-+static struct kmem_cache *plugin_set_slab;
-+
-+static spinlock_t plugin_set_lock[8] __cacheline_aligned_in_smp = {
-+ [0 ... 7] = SPIN_LOCK_UNLOCKED
-+};
-+
-+/* hash table support */
-+
-+#define PS_TABLE_SIZE (32)
-+
-+static inline plugin_set *cast_to(const unsigned long *a)
-+{
-+ return container_of(a, plugin_set, hashval);
-+}
-+
-+static inline int pseq(const unsigned long *a1, const unsigned long *a2)
-+{
-+ plugin_set *set1;
-+ plugin_set *set2;
-+
-+ /* make sure fields are not missed in the code below */
-+ cassert(sizeof *set1 ==
-+ sizeof set1->hashval +
-+ sizeof set1->link +
-+ sizeof set1->file +
-+ sizeof set1->dir +
-+ sizeof set1->perm +
-+ sizeof set1->formatting +
-+ sizeof set1->hash +
-+ sizeof set1->fibration +
-+ sizeof set1->sd +
-+ sizeof set1->dir_item +
-+ sizeof set1->cipher +
-+ sizeof set1->digest +
-+ sizeof set1->compression +
-+ sizeof set1->compression_mode +
-+ sizeof set1->cluster +
-+ sizeof set1->create);
-+
-+ set1 = cast_to(a1);
-+ set2 = cast_to(a2);
-+ return
-+ set1->hashval == set2->hashval &&
-+ set1->file == set2->file &&
-+ set1->dir == set2->dir &&
-+ set1->perm == set2->perm &&
-+ set1->formatting == set2->formatting &&
-+ set1->hash == set2->hash &&
-+ set1->fibration == set2->fibration &&
-+ set1->sd == set2->sd &&
-+ set1->dir_item == set2->dir_item &&
-+ set1->cipher == set2->cipher &&
-+ set1->digest == set2->digest &&
-+ set1->compression == set2->compression &&
-+ set1->compression_mode == set2->compression_mode &&
-+ set1->cluster == set2->cluster &&
-+ set1->create == set2->create;
-+}
-+
-+#define HASH_FIELD(hash, set, field) \
-+({ \
-+ (hash) += (unsigned long)(set)->field >> 2; \
-+})
-+
-+static inline unsigned long calculate_hash(const plugin_set * set)
-+{
-+ unsigned long result;
-+
-+ result = 0;
-+ HASH_FIELD(result, set, file);
-+ HASH_FIELD(result, set, dir);
-+ HASH_FIELD(result, set, perm);
-+ HASH_FIELD(result, set, formatting);
-+ HASH_FIELD(result, set, hash);
-+ HASH_FIELD(result, set, fibration);
-+ HASH_FIELD(result, set, sd);
-+ HASH_FIELD(result, set, dir_item);
-+ HASH_FIELD(result, set, cipher);
-+ HASH_FIELD(result, set, digest);
-+ HASH_FIELD(result, set, compression);
-+ HASH_FIELD(result, set, compression_mode);
-+ HASH_FIELD(result, set, cluster);
-+ HASH_FIELD(result, set, create);
-+ return result & (PS_TABLE_SIZE - 1);
-+}
-+
-+static inline unsigned long
-+pshash(ps_hash_table * table, const unsigned long *a)
-+{
-+ return *a;
-+}
-+
-+/* The hash table definition */
-+#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
-+#define KFREE(ptr, size) kfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(ps, plugin_set, unsigned long, hashval, link, pshash,
-+ pseq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+static ps_hash_table ps_table;
-+static plugin_set empty_set = {
-+ .hashval = 0,
-+ .file = NULL,
-+ .dir = NULL,
-+ .perm = NULL,
-+ .formatting = NULL,
-+ .hash = NULL,
-+ .fibration = NULL,
-+ .sd = NULL,
-+ .dir_item = NULL,
-+ .cipher = NULL,
-+ .digest = NULL,
-+ .compression = NULL,
-+ .compression_mode = NULL,
-+ .cluster = NULL,
-+ .create = NULL,
-+ .link = {NULL}
-+};
-+
-+plugin_set *plugin_set_get_empty(void)
-+{
-+ return &empty_set;
-+}
-+
-+void plugin_set_put(plugin_set * set)
-+{
-+}
-+
-+static inline unsigned long *pset_field(plugin_set * set, int offset)
-+{
-+ return (unsigned long *)(((char *)set) + offset);
-+}
-+
-+static int plugin_set_field(plugin_set ** set, const unsigned long val,
-+ const int offset)
-+{
-+ unsigned long *spot;
-+ spinlock_t *lock;
-+ plugin_set replica;
-+ plugin_set *twin;
-+ plugin_set *psal;
-+ plugin_set *orig;
-+
-+ assert("nikita-2902", set != NULL);
-+ assert("nikita-2904", *set != NULL);
-+
-+ spot = pset_field(*set, offset);
-+ if (unlikely(*spot == val))
-+ return 0;
-+
-+ replica = *(orig = *set);
-+ *pset_field(&replica, offset) = val;
-+ replica.hashval = calculate_hash(&replica);
-+ rcu_read_lock();
-+ twin = ps_hash_find(&ps_table, &replica.hashval);
-+ if (unlikely(twin == NULL)) {
-+ rcu_read_unlock();
-+ psal = kmem_cache_alloc(plugin_set_slab,
-+ reiser4_ctx_gfp_mask_get());
-+ if (psal == NULL)
-+ return RETERR(-ENOMEM);
-+ *psal = replica;
-+ lock = &plugin_set_lock[replica.hashval & 7];
-+ spin_lock(lock);
-+ twin = ps_hash_find(&ps_table, &replica.hashval);
-+ if (likely(twin == NULL)) {
-+ *set = psal;
-+ ps_hash_insert_rcu(&ps_table, psal);
-+ } else {
-+ *set = twin;
-+ kmem_cache_free(plugin_set_slab, psal);
-+ }
-+ spin_unlock(lock);
-+ } else {
-+ rcu_read_unlock();
-+ *set = twin;
-+ }
-+ return 0;
-+}
-+
-+static struct {
-+ int offset;
-+ reiser4_plugin_groups groups;
-+ reiser4_plugin_type type;
-+} pset_descr[PSET_LAST] = {
-+ [PSET_FILE] = {
-+ .offset = offsetof(plugin_set, file),
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_DIR] = {
-+ .offset = offsetof(plugin_set, dir),
-+ .type = REISER4_DIR_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_PERM] = {
-+ .offset = offsetof(plugin_set, perm),
-+ .type = REISER4_PERM_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_FORMATTING] = {
-+ .offset = offsetof(plugin_set, formatting),
-+ .type = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_HASH] = {
-+ .offset = offsetof(plugin_set, hash),
-+ .type = REISER4_HASH_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_FIBRATION] = {
-+ .offset = offsetof(plugin_set, fibration),
-+ .type = REISER4_FIBRATION_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_SD] = {
-+ .offset = offsetof(plugin_set, sd),
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .groups = (1 << STAT_DATA_ITEM_TYPE)
-+ },
-+ [PSET_DIR_ITEM] = {
-+ .offset = offsetof(plugin_set, dir_item),
-+ .type = REISER4_ITEM_PLUGIN_TYPE,
-+ .groups = (1 << DIR_ENTRY_ITEM_TYPE)
-+ },
-+ [PSET_CIPHER] = {
-+ .offset = offsetof(plugin_set, cipher),
-+ .type = REISER4_CIPHER_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_DIGEST] = {
-+ .offset = offsetof(plugin_set, digest),
-+ .type = REISER4_DIGEST_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_COMPRESSION] = {
-+ .offset = offsetof(plugin_set, compression),
-+ .type = REISER4_COMPRESSION_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_COMPRESSION_MODE] = {
-+ .offset = offsetof(plugin_set, compression_mode),
-+ .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_CLUSTER] = {
-+ .offset = offsetof(plugin_set, cluster),
-+ .type = REISER4_CLUSTER_PLUGIN_TYPE,
-+ .groups = 0
-+ },
-+ [PSET_CREATE] = {
-+ .offset = offsetof(plugin_set, create),
-+ .type = REISER4_FILE_PLUGIN_TYPE,
-+ .groups = (1 << REISER4_REGULAR_FILE)
-+ }
-+};
-+
-+#define DEFINE_PSET_OPS(PREFIX) \
-+ reiser4_plugin_type PREFIX##_member_to_type_unsafe(pset_member memb) \
-+{ \
-+ if (memb > PSET_LAST) \
-+ return REISER4_PLUGIN_TYPES; \
-+ return pset_descr[memb].type; \
-+} \
-+ \
-+int PREFIX##_set_unsafe(plugin_set ** set, pset_member memb, \
-+ reiser4_plugin * plugin) \
-+{ \
-+ assert("nikita-3492", set != NULL); \
-+ assert("nikita-3493", *set != NULL); \
-+ assert("nikita-3494", plugin != NULL); \
-+ assert("nikita-3495", 0 <= memb && memb < PSET_LAST); \
-+ assert("nikita-3496", plugin->h.type_id == pset_descr[memb].type); \
-+ \
-+ if (pset_descr[memb].groups) \
-+ if (!(pset_descr[memb].groups & plugin->h.groups)) \
-+ return -EINVAL; \
-+ \
-+ return plugin_set_field(set, \
-+ (unsigned long)plugin, pset_descr[memb].offset); \
-+} \
-+ \
-+reiser4_plugin *PREFIX##_get(plugin_set * set, pset_member memb) \
-+{ \
-+ assert("nikita-3497", set != NULL); \
-+ assert("nikita-3498", 0 <= memb && memb < PSET_LAST); \
-+ \
-+ return *(reiser4_plugin **) (((char *)set) + pset_descr[memb].offset); \
-+}
-+
-+DEFINE_PSET_OPS(aset);
-+
-+int set_plugin(plugin_set ** set, pset_member memb, reiser4_plugin * plugin) {
-+ return plugin_set_field(set,
-+ (unsigned long)plugin, pset_descr[memb].offset);
-+}
-+
-+/**
-+ * init_plugin_set - create plugin set cache and hash table
-+ *
-+ * Initializes slab cache of plugin_set-s and their hash table. It is part of
-+ * reiser4 module initialization.
-+ */
-+int init_plugin_set(void)
-+{
-+ int result;
-+
-+ result = ps_hash_init(&ps_table, PS_TABLE_SIZE);
-+ if (result == 0) {
-+ plugin_set_slab = kmem_cache_create("plugin_set",
-+ sizeof(plugin_set), 0,
-+ SLAB_HWCACHE_ALIGN,
-+ NULL);
-+ if (plugin_set_slab == NULL)
-+ result = RETERR(-ENOMEM);
-+ }
-+ return result;
-+}
-+
-+/**
-+ * done_plugin_set - delete plugin_set cache and plugin_set hash table
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_plugin_set(void)
-+{
-+ plugin_set *cur, *next;
-+
-+ for_all_in_htable(&ps_table, ps, cur, next) {
-+ ps_hash_remove(&ps_table, cur);
-+ kmem_cache_free(plugin_set_slab, cur);
-+ }
-+ destroy_reiser4_cache(&plugin_set_slab);
-+ ps_hash_done(&ps_table);
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/plugin_set.h linux-2.6.24/fs/reiser4/plugin/plugin_set.h
---- linux-2.6.24.orig/fs/reiser4/plugin/plugin_set.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/plugin_set.h 2008-01-25 11:39:07.056238601 +0300
-@@ -0,0 +1,77 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Reiser4 plugin set definition.
-+ See fs/reiser4/plugin/plugin_set.c for details */
-+
-+#if !defined( __PLUGIN_SET_H__ )
-+#define __PLUGIN_SET_H__
-+
-+#include "../type_safe_hash.h"
-+#include "plugin.h"
-+
-+#include <linux/rcupdate.h>
-+
-+struct plugin_set;
-+typedef struct plugin_set plugin_set;
-+
-+TYPE_SAFE_HASH_DECLARE(ps, plugin_set);
-+
-+struct plugin_set {
-+ unsigned long hashval;
-+ /* plugin of file */
-+ file_plugin *file;
-+ /* plugin of dir */
-+ dir_plugin *dir;
-+ /* perm plugin for this file */
-+ perm_plugin *perm;
-+ /* tail policy plugin. Only meaningful for regular files */
-+ formatting_plugin *formatting;
-+ /* hash plugin. Only meaningful for directories. */
-+ hash_plugin *hash;
-+ /* fibration plugin. Only meaningful for directories. */
-+ fibration_plugin *fibration;
-+ /* plugin of stat-data */
-+ item_plugin *sd;
-+ /* plugin of items a directory is built of */
-+ item_plugin *dir_item;
-+ /* cipher plugin */
-+ cipher_plugin *cipher;
-+ /* digest plugin */
-+ digest_plugin *digest;
-+ /* compression plugin */
-+ compression_plugin *compression;
-+ /* compression mode plugin */
-+ compression_mode_plugin *compression_mode;
-+ /* cluster plugin */
-+ cluster_plugin *cluster;
-+ /* this specifies file plugin of regular children.
-+ only meaningful for directories */
-+ file_plugin *create;
-+ ps_hash_link link;
-+};
-+
-+extern plugin_set *plugin_set_get_empty(void);
-+extern void plugin_set_put(plugin_set * set);
-+
-+extern int init_plugin_set(void);
-+extern void done_plugin_set(void);
-+
-+extern reiser4_plugin *aset_get(plugin_set * set, pset_member memb);
-+extern int set_plugin(plugin_set ** set, pset_member memb,
-+ reiser4_plugin * plugin);
-+extern int aset_set_unsafe(plugin_set ** set, pset_member memb,
-+ reiser4_plugin * plugin);
-+extern reiser4_plugin_type aset_member_to_type_unsafe(pset_member memb);
-+
-+/* __PLUGIN_SET_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/security/Makefile linux-2.6.24/fs/reiser4/plugin/security/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/security/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/security/Makefile 2008-01-25 11:39:07.056238601 +0300
-@@ -0,0 +1,4 @@
-+obj-$(CONFIG_REISER4_FS) += security_plugins.o
-+
-+security_plugins-objs := \
-+ perm.o
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/security/perm.c linux-2.6.24/fs/reiser4/plugin/security/perm.c
---- linux-2.6.24.orig/fs/reiser4/plugin/security/perm.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/security/perm.c 2008-01-25 11:39:07.056238601 +0300
-@@ -0,0 +1,33 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/*
-+ * This file contains implementation of permission plugins.
-+ * See the comments in perm.h
-+ */
-+
-+#include "../plugin.h"
-+#include "../plugin_header.h"
-+#include "../../debug.h"
-+
-+perm_plugin perm_plugins[LAST_PERM_ID] = {
-+ [NULL_PERM_ID] = {
-+ .h = {
-+ .type_id = REISER4_PERM_PLUGIN_TYPE,
-+ .id = NULL_PERM_ID,
-+ .pops = NULL,
-+ .label = "null",
-+ .desc = "stub permission plugin",
-+ .linkage = {NULL, NULL}
-+ }
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/security/perm.h linux-2.6.24/fs/reiser4/plugin/security/perm.h
---- linux-2.6.24.orig/fs/reiser4/plugin/security/perm.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/security/perm.h 2008-01-25 11:39:07.060239631 +0300
-@@ -0,0 +1,38 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Perm (short for "permissions") plugins common stuff. */
-+
-+#if !defined( __REISER4_PERM_H__ )
-+#define __REISER4_PERM_H__
-+
-+#include "../../forward.h"
-+#include "../plugin_header.h"
-+
-+#include <linux/types.h>
-+
-+/* Definition of permission plugin */
-+/* NIKITA-FIXME-HANS: define what this is targeted for.
-+ It does not seem to be intended for use with sys_reiser4. Explain. */
-+
-+/* NOTE-EDWARD: This seems to be intended for deprecated sys_reiser4.
-+ Consider it like a temporary "seam" and reserved pset member.
-+ If you have something usefull to add, then rename this plugin and add here */
-+typedef struct perm_plugin {
-+ /* generic plugin fields */
-+ plugin_header h;
-+} perm_plugin;
-+
-+typedef enum { NULL_PERM_ID, LAST_PERM_ID } reiser4_perm_id;
-+
-+/* __REISER4_PERM_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/space/bitmap.c linux-2.6.24/fs/reiser4/plugin/space/bitmap.c
---- linux-2.6.24.orig/fs/reiser4/plugin/space/bitmap.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/space/bitmap.c 2008-01-25 11:39:07.064240661 +0300
-@@ -0,0 +1,1585 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#include "../../debug.h"
-+#include "../../dformat.h"
-+#include "../../txnmgr.h"
-+#include "../../jnode.h"
-+#include "../../block_alloc.h"
-+#include "../../tree.h"
-+#include "../../super.h"
-+#include "../plugin.h"
-+#include "space_allocator.h"
-+#include "bitmap.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/mutex.h>
-+#include <asm/div64.h>
-+
-+/* Proposed (but discarded) optimization: dynamic loading/unloading of bitmap
-+ * blocks
-+
-+ A useful optimization of reiser4 bitmap handling would be dynamic bitmap
-+ blocks loading/unloading which is different from v3.x where all bitmap
-+ blocks are loaded at mount time.
-+
-+ To implement bitmap blocks unloading we need to count bitmap block usage
-+ and detect currently unused blocks allowing them to be unloaded. It is not
-+ a simple task since we allow several threads to modify one bitmap block
-+ simultaneously.
-+
-+ Briefly speaking, the following schema is proposed: we count in special
-+ variable associated with each bitmap block. That is for counting of block
-+ alloc/dealloc operations on that bitmap block. With a deferred block
-+ deallocation feature of reiser4 all those operation will be represented in
-+ atom dirty/deleted lists as jnodes for freshly allocated or deleted
-+ nodes.
-+
-+ So, we increment usage counter for each new node allocated or deleted, and
-+ decrement it at atom commit one time for each node from the dirty/deleted
-+ atom's list. Of course, freshly allocated node deletion and node reusing
-+ from atom deleted (if we do so) list should decrement bitmap usage counter
-+ also.
-+
-+ This schema seems to be working but that reference counting is
-+ not easy to debug. I think we should agree with Hans and do not implement
-+ it in v4.0. Current code implements "on-demand" bitmap blocks loading only.
-+
-+ For simplicity all bitmap nodes (both commit and working bitmap blocks) are
-+ loaded into memory on fs mount time or each bitmap nodes are loaded at the
-+ first access to it, the "dont_load_bitmap" mount option controls whether
-+ bimtap nodes should be loaded at mount time. Dynamic unloading of bitmap
-+ nodes currently is not supported. */
-+
-+#define CHECKSUM_SIZE 4
-+
-+#define BYTES_PER_LONG (sizeof(long))
-+
-+#if BITS_PER_LONG == 64
-+# define LONG_INT_SHIFT (6)
-+#else
-+# define LONG_INT_SHIFT (5)
-+#endif
-+
-+#define LONG_INT_MASK (BITS_PER_LONG - 1UL)
-+
-+typedef unsigned long ulong_t;
-+
-+#define bmap_size(blocksize) ((blocksize) - CHECKSUM_SIZE)
-+#define bmap_bit_count(blocksize) (bmap_size(blocksize) << 3)
-+
-+/* Block allocation/deallocation are done through special bitmap objects which
-+ are allocated in an array at fs mount. */
-+struct bitmap_node {
-+ struct mutex mutex; /* long term lock object */
-+
-+ jnode *wjnode; /* j-nodes for WORKING ... */
-+ jnode *cjnode; /* ... and COMMIT bitmap blocks */
-+
-+ bmap_off_t first_zero_bit; /* for skip_busy option implementation */
-+
-+ atomic_t loaded; /* a flag which shows that bnode is loaded
-+ * already */
-+};
-+
-+static inline char *bnode_working_data(struct bitmap_node *bnode)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->wjnode);
-+ assert("zam-429", data != NULL);
-+
-+ return data + CHECKSUM_SIZE;
-+}
-+
-+static inline char *bnode_commit_data(const struct bitmap_node *bnode)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->cjnode);
-+ assert("zam-430", data != NULL);
-+
-+ return data + CHECKSUM_SIZE;
-+}
-+
-+static inline __u32 bnode_commit_crc(const struct bitmap_node *bnode)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->cjnode);
-+ assert("vpf-261", data != NULL);
-+
-+ return le32_to_cpu(get_unaligned((d32 *)data));
-+}
-+
-+static inline void bnode_set_commit_crc(struct bitmap_node *bnode, __u32 crc)
-+{
-+ char *data;
-+
-+ data = jdata(bnode->cjnode);
-+ assert("vpf-261", data != NULL);
-+
-+ put_unaligned(cpu_to_le32(crc), (d32 *)data);
-+}
-+
-+/* ZAM-FIXME-HANS: is the idea that this might be a union someday? having
-+ * written the code, does this added abstraction still have */
-+/* ANSWER(Zam): No, the abstractions is in the level above (exact place is the
-+ * reiser4_space_allocator structure) */
-+/* ZAM-FIXME-HANS: I don't understand your english in comment above. */
-+/* FIXME-HANS(Zam): I don't understand the questions like "might be a union
-+ * someday?". What they about? If there is a reason to have a union, it should
-+ * be a union, if not, it should not be a union. "..might be someday" means no
-+ * reason. */
-+struct bitmap_allocator_data {
-+ /* an array for bitmap blocks direct access */
-+ struct bitmap_node *bitmap;
-+};
-+
-+#define get_barray(super) \
-+(((struct bitmap_allocator_data *)(get_super_private(super)->space_allocator.u.generic)) -> bitmap)
-+
-+#define get_bnode(super, i) (get_barray(super) + i)
-+
-+/* allocate and initialize jnode with JNODE_BITMAP type */
-+static jnode *bnew(void)
-+{
-+ jnode *jal = jalloc();
-+
-+ if (jal)
-+ jnode_init(jal, current_tree, JNODE_BITMAP);
-+
-+ return jal;
-+}
-+
-+/* this file contains:
-+ - bitmap based implementation of space allocation plugin
-+ - all the helper functions like set bit, find_first_zero_bit, etc */
-+
-+/* Audited by: green(2002.06.12) */
-+static int find_next_zero_bit_in_word(ulong_t word, int start_bit)
-+{
-+ ulong_t mask = 1UL << start_bit;
-+ int i = start_bit;
-+
-+ while ((word & mask) != 0) {
-+ mask <<= 1;
-+ if (++i >= BITS_PER_LONG)
-+ break;
-+ }
-+
-+ return i;
-+}
-+
-+#include <linux/bitops.h>
-+
-+#if BITS_PER_LONG == 64
-+
-+#define OFF(addr) (((ulong_t)(addr) & (BYTES_PER_LONG - 1)) << 3)
-+#define BASE(addr) ((ulong_t*) ((ulong_t)(addr) & ~(BYTES_PER_LONG - 1)))
-+
-+static inline void reiser4_set_bit(int nr, void *addr)
-+{
-+ ext2_set_bit(nr + OFF(addr), BASE(addr));
-+}
-+
-+static inline void reiser4_clear_bit(int nr, void *addr)
-+{
-+ ext2_clear_bit(nr + OFF(addr), BASE(addr));
-+}
-+
-+static inline int reiser4_test_bit(int nr, void *addr)
-+{
-+ return ext2_test_bit(nr + OFF(addr), BASE(addr));
-+}
-+static inline int reiser4_find_next_zero_bit(void *addr, int maxoffset,
-+ int offset)
-+{
-+ int off = OFF(addr);
-+
-+ return ext2_find_next_zero_bit(BASE(addr), maxoffset + off,
-+ offset + off) - off;
-+}
-+
-+#else
-+
-+#define reiser4_set_bit(nr, addr) ext2_set_bit(nr, addr)
-+#define reiser4_clear_bit(nr, addr) ext2_clear_bit(nr, addr)
-+#define reiser4_test_bit(nr, addr) ext2_test_bit(nr, addr)
-+
-+#define reiser4_find_next_zero_bit(addr, maxoffset, offset) \
-+ext2_find_next_zero_bit(addr, maxoffset, offset)
-+#endif
-+
-+/* Search for a set bit in the bit array [@start_offset, @max_offset[, offsets
-+ * are counted from @addr, return the offset of the first bit if it is found,
-+ * @maxoffset otherwise. */
-+static bmap_off_t __reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
-+ bmap_off_t start_offset)
-+{
-+ ulong_t *base = addr;
-+ /* start_offset is in bits, convert it to byte offset within bitmap. */
-+ int word_nr = start_offset >> LONG_INT_SHIFT;
-+ /* bit number within the byte. */
-+ int bit_nr = start_offset & LONG_INT_MASK;
-+ int max_word_nr = (max_offset - 1) >> LONG_INT_SHIFT;
-+
-+ assert("zam-387", max_offset != 0);
-+
-+ /* Unaligned @start_offset case. */
-+ if (bit_nr != 0) {
-+ bmap_nr_t nr;
-+
-+ nr = find_next_zero_bit_in_word(~(base[word_nr]), bit_nr);
-+
-+ if (nr < BITS_PER_LONG)
-+ return (word_nr << LONG_INT_SHIFT) + nr;
-+
-+ ++word_nr;
-+ }
-+
-+ /* Fast scan trough aligned words. */
-+ while (word_nr <= max_word_nr) {
-+ if (base[word_nr] != 0) {
-+ return (word_nr << LONG_INT_SHIFT)
-+ + find_next_zero_bit_in_word(~(base[word_nr]), 0);
-+ }
-+
-+ ++word_nr;
-+ }
-+
-+ return max_offset;
-+}
-+
-+#if BITS_PER_LONG == 64
-+
-+static bmap_off_t reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
-+ bmap_off_t start_offset)
-+{
-+ bmap_off_t off = OFF(addr);
-+
-+ return __reiser4_find_next_set_bit(BASE(addr), max_offset + off,
-+ start_offset + off) - off;
-+}
-+
-+#else
-+#define reiser4_find_next_set_bit(addr, max_offset, start_offset) \
-+ __reiser4_find_next_set_bit(addr, max_offset, start_offset)
-+#endif
-+
-+/* search for the first set bit in single word. */
-+static int find_last_set_bit_in_word(ulong_t word, int start_bit)
-+{
-+ ulong_t bit_mask;
-+ int nr = start_bit;
-+
-+ assert("zam-965", start_bit < BITS_PER_LONG);
-+ assert("zam-966", start_bit >= 0);
-+
-+ bit_mask = (1UL << nr);
-+
-+ while (bit_mask != 0) {
-+ if (bit_mask & word)
-+ return nr;
-+ bit_mask >>= 1;
-+ nr--;
-+ }
-+ return BITS_PER_LONG;
-+}
-+
-+/* Search bitmap for a set bit in backward direction from the end to the
-+ * beginning of given region
-+ *
-+ * @result: result offset of the last set bit
-+ * @addr: base memory address,
-+ * @low_off: low end of the search region, edge bit included into the region,
-+ * @high_off: high end of the search region, edge bit included into the region,
-+ *
-+ * @return: 0 - set bit was found, -1 otherwise.
-+ */
-+static int
-+reiser4_find_last_set_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
-+ bmap_off_t high_off)
-+{
-+ ulong_t *base = addr;
-+ int last_word;
-+ int first_word;
-+ int last_bit;
-+ int nr;
-+
-+ assert("zam-962", high_off >= low_off);
-+
-+ last_word = high_off >> LONG_INT_SHIFT;
-+ last_bit = high_off & LONG_INT_MASK;
-+ first_word = low_off >> LONG_INT_SHIFT;
-+
-+ if (last_bit < BITS_PER_LONG) {
-+ nr = find_last_set_bit_in_word(base[last_word], last_bit);
-+ if (nr < BITS_PER_LONG) {
-+ *result = (last_word << LONG_INT_SHIFT) + nr;
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+ while (last_word >= first_word) {
-+ if (base[last_word] != 0x0) {
-+ last_bit =
-+ find_last_set_bit_in_word(base[last_word],
-+ BITS_PER_LONG - 1);
-+ assert("zam-972", last_bit < BITS_PER_LONG);
-+ *result = (last_word << LONG_INT_SHIFT) + last_bit;
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+
-+ return -1; /* set bit not found */
-+}
-+
-+/* Search bitmap for a clear bit in backward direction from the end to the
-+ * beginning of given region */
-+static int
-+reiser4_find_last_zero_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
-+ bmap_off_t high_off)
-+{
-+ ulong_t *base = addr;
-+ int last_word;
-+ int first_word;
-+ int last_bit;
-+ int nr;
-+
-+ last_word = high_off >> LONG_INT_SHIFT;
-+ last_bit = high_off & LONG_INT_MASK;
-+ first_word = low_off >> LONG_INT_SHIFT;
-+
-+ if (last_bit < BITS_PER_LONG) {
-+ nr = find_last_set_bit_in_word(~base[last_word], last_bit);
-+ if (nr < BITS_PER_LONG) {
-+ *result = (last_word << LONG_INT_SHIFT) + nr;
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+ while (last_word >= first_word) {
-+ if (base[last_word] != (ulong_t) (-1)) {
-+ *result = (last_word << LONG_INT_SHIFT) +
-+ find_last_set_bit_in_word(~base[last_word],
-+ BITS_PER_LONG - 1);
-+ return 0;
-+ }
-+ --last_word;
-+ }
-+
-+ return -1; /* zero bit not found */
-+}
-+
-+/* Audited by: green(2002.06.12) */
-+static void reiser4_clear_bits(char *addr, bmap_off_t start, bmap_off_t end)
-+{
-+ int first_byte;
-+ int last_byte;
-+
-+ unsigned char first_byte_mask = 0xFF;
-+ unsigned char last_byte_mask = 0xFF;
-+
-+ assert("zam-410", start < end);
-+
-+ first_byte = start >> 3;
-+ last_byte = (end - 1) >> 3;
-+
-+ if (last_byte > first_byte + 1)
-+ memset(addr + first_byte + 1, 0,
-+ (size_t) (last_byte - first_byte - 1));
-+
-+ first_byte_mask >>= 8 - (start & 0x7);
-+ last_byte_mask <<= ((end - 1) & 0x7) + 1;
-+
-+ if (first_byte == last_byte) {
-+ addr[first_byte] &= (first_byte_mask | last_byte_mask);
-+ } else {
-+ addr[first_byte] &= first_byte_mask;
-+ addr[last_byte] &= last_byte_mask;
-+ }
-+}
-+
-+/* Audited by: green(2002.06.12) */
-+/* ZAM-FIXME-HANS: comment this */
-+static void reiser4_set_bits(char *addr, bmap_off_t start, bmap_off_t end)
-+{
-+ int first_byte;
-+ int last_byte;
-+
-+ unsigned char first_byte_mask = 0xFF;
-+ unsigned char last_byte_mask = 0xFF;
-+
-+ assert("zam-386", start < end);
-+
-+ first_byte = start >> 3;
-+ last_byte = (end - 1) >> 3;
-+
-+ if (last_byte > first_byte + 1)
-+ memset(addr + first_byte + 1, 0xFF,
-+ (size_t) (last_byte - first_byte - 1));
-+
-+ first_byte_mask <<= start & 0x7;
-+ last_byte_mask >>= 7 - ((end - 1) & 0x7);
-+
-+ if (first_byte == last_byte) {
-+ addr[first_byte] |= (first_byte_mask & last_byte_mask);
-+ } else {
-+ addr[first_byte] |= first_byte_mask;
-+ addr[last_byte] |= last_byte_mask;
-+ }
-+}
-+
-+#define ADLER_BASE 65521
-+#define ADLER_NMAX 5552
-+
-+/* Calculates the adler32 checksum for the data pointed by `data` of the
-+ length `len`. This function was originally taken from zlib, version 1.1.3,
-+ July 9th, 1998.
-+
-+ Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
-+
-+ This software is provided 'as-is', without any express or implied
-+ warranty. In no event will the authors be held liable for any damages
-+ arising from the use of this software.
-+
-+ Permission is granted to anyone to use this software for any purpose,
-+ including commercial applications, and to alter it and redistribute it
-+ freely, subject to the following restrictions:
-+
-+ 1. The origin of this software must not be misrepresented; you must not
-+ claim that you wrote the original software. If you use this software
-+ in a product, an acknowledgment in the product documentation would be
-+ appreciated but is not required.
-+ 2. Altered source versions must be plainly marked as such, and must not be
-+ misrepresented as being the original software.
-+ 3. This notice may not be removed or altered from any source distribution.
-+
-+ Jean-loup Gailly Mark Adler
-+ jloup@gzip.org madler@alumni.caltech.edu
-+
-+ The above comment applies only to the reiser4_adler32 function.
-+*/
-+
-+__u32 reiser4_adler32(char *data, __u32 len)
-+{
-+ unsigned char *t = data;
-+ __u32 s1 = 1;
-+ __u32 s2 = 0;
-+ int k;
-+
-+ while (len > 0) {
-+ k = len < ADLER_NMAX ? len : ADLER_NMAX;
-+ len -= k;
-+
-+ while (k--) {
-+ s1 += *t++;
-+ s2 += s1;
-+ }
-+
-+ s1 %= ADLER_BASE;
-+ s2 %= ADLER_BASE;
-+ }
-+ return (s2 << 16) | s1;
-+}
-+
-+#define sb_by_bnode(bnode) \
-+ ((struct super_block *)jnode_get_tree(bnode->wjnode)->super)
-+
-+static __u32 bnode_calc_crc(const struct bitmap_node *bnode, unsigned long size)
-+{
-+ return reiser4_adler32(bnode_commit_data(bnode), bmap_size(size));
-+}
-+
-+static int
-+bnode_check_adler32(const struct bitmap_node *bnode, unsigned long size)
-+{
-+ if (bnode_calc_crc(bnode, size) != bnode_commit_crc(bnode)) {
-+ bmap_nr_t bmap;
-+
-+ bmap = bnode - get_bnode(sb_by_bnode(bnode), 0);
-+
-+ warning("vpf-263",
-+ "Checksum for the bitmap block %llu is incorrect",
-+ bmap);
-+
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+#define REISER4_CHECK_BMAP_CRC (0)
-+
-+#if REISER4_CHECK_BMAP_CRC
-+static int bnode_check_crc(const struct bitmap_node *bnode)
-+{
-+ return bnode_check_adler32(bnode,
-+ bmap_size(sb_by_bnode(bnode)->s_blocksize));
-+}
-+
-+/* REISER4_CHECK_BMAP_CRC */
-+#else
-+
-+#define bnode_check_crc(bnode) (0)
-+
-+/* REISER4_CHECK_BMAP_CRC */
-+#endif
-+
-+/* Recalculates the adler32 checksum for only 1 byte change.
-+ adler - previous adler checksum
-+ old_data, data - old, new byte values.
-+ tail == (chunk - offset) : length, checksum was calculated for, - offset of
-+ the changed byte within this chunk.
-+ This function can be used for checksum calculation optimisation.
-+*/
-+
-+static __u32
-+adler32_recalc(__u32 adler, unsigned char old_data, unsigned char data,
-+ __u32 tail)
-+{
-+ __u32 delta = data - old_data + 2 * ADLER_BASE;
-+ __u32 s1 = adler & 0xffff;
-+ __u32 s2 = (adler >> 16) & 0xffff;
-+
-+ s1 = (delta + s1) % ADLER_BASE;
-+ s2 = (delta * tail + s2) % ADLER_BASE;
-+
-+ return (s2 << 16) | s1;
-+}
-+
-+#define LIMIT(val, boundary) ((val) > (boundary) ? (boundary) : (val))
-+
-+/**
-+ * get_nr_bitmap - calculate number of bitmap blocks
-+ * @super: super block with initialized blocksize and block count
-+ *
-+ * Calculates number of bitmap blocks of a filesystem which uses bitmaps to
-+ * maintain free disk space. It assumes that each bitmap addresses the same
-+ * number of blocks which is calculated by bmap_block_count macro defined in
-+ * above. Number of blocks in the filesystem has to be initialized in reiser4
-+ * private data of super block already so that it can be obtained via
-+ * reiser4_block_count(). Unfortunately, number of blocks addressed by a bitmap
-+ * is not power of 2 because 4 bytes are used for checksum. Therefore, we have
-+ * to use special function to divide and modulo 64bits filesystem block
-+ * counters.
-+ *
-+ * Example: suppose filesystem have 32768 blocks. Blocksize is 4096. Each bitmap
-+ * block addresses (4096 - 4) * 8 = 32736 blocks. Number of bitmaps to address
-+ * all 32768 blocks is calculated as (32768 - 1) / 32736 + 1 = 2.
-+ */
-+static bmap_nr_t get_nr_bmap(const struct super_block *super)
-+{
-+ u64 quotient;
-+
-+ assert("zam-393", reiser4_block_count(super) != 0);
-+
-+ quotient = reiser4_block_count(super) - 1;
-+ do_div(quotient, bmap_bit_count(super->s_blocksize));
-+ return quotient + 1;
-+}
-+
-+/**
-+ * parse_blocknr - calculate bitmap number and offset in it by block number
-+ * @block: pointer to block number to calculate location in bitmap of
-+ * @bmap: pointer where to store bitmap block number
-+ * @offset: pointer where to store offset within bitmap block
-+ *
-+ * Calculates location of bit which is responsible for allocation/freeing of
-+ * block @*block. That location is represented by bitmap block number and offset
-+ * within that bitmap block.
-+ */
-+static void
-+parse_blocknr(const reiser4_block_nr *block, bmap_nr_t *bmap,
-+ bmap_off_t *offset)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ u64 quotient = *block;
-+
-+ *offset = do_div(quotient, bmap_bit_count(super->s_blocksize));
-+ *bmap = quotient;
-+
-+ assert("zam-433", *bmap < get_nr_bmap(super));
-+ assert("", *offset < bmap_bit_count(super->s_blocksize));
-+}
-+
-+#if REISER4_DEBUG
-+/* Audited by: green(2002.06.12) */
-+static void
-+check_block_range(const reiser4_block_nr * start, const reiser4_block_nr * len)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ assert("zam-436", sb != NULL);
-+
-+ assert("zam-455", start != NULL);
-+ assert("zam-437", *start != 0);
-+ assert("zam-541", !reiser4_blocknr_is_fake(start));
-+ assert("zam-441", *start < reiser4_block_count(sb));
-+
-+ if (len != NULL) {
-+ assert("zam-438", *len != 0);
-+ assert("zam-442", *start + *len <= reiser4_block_count(sb));
-+ }
-+}
-+
-+static void check_bnode_loaded(const struct bitmap_node *bnode)
-+{
-+ assert("zam-485", bnode != NULL);
-+ assert("zam-483", jnode_page(bnode->wjnode) != NULL);
-+ assert("zam-484", jnode_page(bnode->cjnode) != NULL);
-+ assert("nikita-2820", jnode_is_loaded(bnode->wjnode));
-+ assert("nikita-2821", jnode_is_loaded(bnode->cjnode));
-+}
-+
-+#else
-+
-+# define check_block_range(start, len) do { /* nothing */} while(0)
-+# define check_bnode_loaded(bnode) do { /* nothing */} while(0)
-+
-+#endif
-+
-+/* modify bnode->first_zero_bit (if we free bits before); bnode should be
-+ spin-locked */
-+static inline void
-+adjust_first_zero_bit(struct bitmap_node *bnode, bmap_off_t offset)
-+{
-+ if (offset < bnode->first_zero_bit)
-+ bnode->first_zero_bit = offset;
-+}
-+
-+/* return a physical disk address for logical bitmap number @bmap */
-+/* FIXME-VS: this is somehow related to disk layout? */
-+/* ZAM-FIXME-HANS: your answer is? Use not more than one function dereference
-+ * per block allocation so that performance is not affected. Probably this
-+ * whole file should be considered part of the disk layout plugin, and other
-+ * disk layouts can use other defines and efficiency will not be significantly
-+ * affected. */
-+
-+#define REISER4_FIRST_BITMAP_BLOCK \
-+ ((REISER4_MASTER_OFFSET / PAGE_CACHE_SIZE) + 2)
-+
-+/* Audited by: green(2002.06.12) */
-+static void
-+get_bitmap_blocknr(struct super_block *super, bmap_nr_t bmap,
-+ reiser4_block_nr * bnr)
-+{
-+
-+ assert("zam-390", bmap < get_nr_bmap(super));
-+
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+#define BITMAP_PLUGIN_DISKMAP_ID ((0xc0e1<<16) | (0xe0ff))
-+ /* Check if the diskmap have this already, first. */
-+ if (reiser4_get_diskmap_value(BITMAP_PLUGIN_DISKMAP_ID, bmap, bnr) == 0)
-+ return; /* Found it in diskmap */
-+#endif
-+ /* FIXME_ZAM: before discussing of disk layouts and disk format
-+ plugins I implement bitmap location scheme which is close to scheme
-+ used in reiser 3.6 */
-+ if (bmap == 0) {
-+ *bnr = REISER4_FIRST_BITMAP_BLOCK;
-+ } else {
-+ *bnr = bmap * bmap_bit_count(super->s_blocksize);
-+ }
-+}
-+
-+/* construct a fake block number for shadow bitmap (WORKING BITMAP) block */
-+/* Audited by: green(2002.06.12) */
-+static void get_working_bitmap_blocknr(bmap_nr_t bmap, reiser4_block_nr * bnr)
-+{
-+ *bnr =
-+ (reiser4_block_nr) ((bmap & ~REISER4_BLOCKNR_STATUS_BIT_MASK) |
-+ REISER4_BITMAP_BLOCKS_STATUS_VALUE);
-+}
-+
-+/* bnode structure initialization */
-+static void
-+init_bnode(struct bitmap_node *bnode,
-+ struct super_block *super UNUSED_ARG, bmap_nr_t bmap UNUSED_ARG)
-+{
-+ memset(bnode, 0, sizeof(struct bitmap_node));
-+
-+ mutex_init(&bnode->mutex);
-+ atomic_set(&bnode->loaded, 0);
-+}
-+
-+static void release(jnode * node)
-+{
-+ jrelse(node);
-+ JF_SET(node, JNODE_HEARD_BANSHEE);
-+ jput(node);
-+}
-+
-+/* This function is for internal bitmap.c use because it assumes that jnode is
-+ in under full control of this thread */
-+static void done_bnode(struct bitmap_node *bnode)
-+{
-+ if (bnode) {
-+ atomic_set(&bnode->loaded, 0);
-+ if (bnode->wjnode != NULL)
-+ release(bnode->wjnode);
-+ if (bnode->cjnode != NULL)
-+ release(bnode->cjnode);
-+ bnode->wjnode = bnode->cjnode = NULL;
-+ }
-+}
-+
-+/* ZAM-FIXME-HANS: comment this. Called only by load_and_lock_bnode()*/
-+static int prepare_bnode(struct bitmap_node *bnode, jnode **cjnode_ret,
-+ jnode **wjnode_ret)
-+{
-+ struct super_block *super;
-+ jnode *cjnode;
-+ jnode *wjnode;
-+ bmap_nr_t bmap;
-+ int ret;
-+
-+ super = reiser4_get_current_sb();
-+
-+ *wjnode_ret = wjnode = bnew();
-+ if (wjnode == NULL) {
-+ *cjnode_ret = NULL;
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ *cjnode_ret = cjnode = bnew();
-+ if (cjnode == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ bmap = bnode - get_bnode(super, 0);
-+
-+ get_working_bitmap_blocknr(bmap, &wjnode->blocknr);
-+ get_bitmap_blocknr(super, bmap, &cjnode->blocknr);
-+
-+ jref(cjnode);
-+ jref(wjnode);
-+
-+ /* load commit bitmap */
-+ ret = jload_gfp(cjnode, GFP_NOFS, 1);
-+
-+ if (ret)
-+ goto error;
-+
-+ /* allocate memory for working bitmap block. Note that for
-+ * bitmaps jinit_new() doesn't actually modifies node content,
-+ * so parallel calls to this are ok. */
-+ ret = jinit_new(wjnode, GFP_NOFS);
-+
-+ if (ret != 0) {
-+ jrelse(cjnode);
-+ goto error;
-+ }
-+
-+ return 0;
-+
-+ error:
-+ jput(cjnode);
-+ jput(wjnode);
-+ *wjnode_ret = *cjnode_ret = NULL;
-+ return ret;
-+
-+}
-+
-+/* Check the bnode data on read. */
-+static int check_struct_bnode(struct bitmap_node *bnode, __u32 blksize)
-+{
-+ void *data;
-+ int ret;
-+
-+ /* Check CRC */
-+ ret = bnode_check_adler32(bnode, blksize);
-+
-+ if (ret) {
-+ return ret;
-+ }
-+
-+ data = jdata(bnode->cjnode) + CHECKSUM_SIZE;
-+
-+ /* Check the very first bit -- it must be busy. */
-+ if (!reiser4_test_bit(0, data)) {
-+ warning("vpf-1362", "The allocator block %llu is not marked "
-+ "as used.", (unsigned long long)bnode->cjnode->blocknr);
-+
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+
-+/* load bitmap blocks "on-demand" */
-+static int load_and_lock_bnode(struct bitmap_node *bnode)
-+{
-+ int ret;
-+
-+ jnode *cjnode;
-+ jnode *wjnode;
-+
-+ assert("nikita-3040", reiser4_schedulable());
-+
-+/* ZAM-FIXME-HANS: since bitmaps are never unloaded, this does not
-+ * need to be atomic, right? Just leave a comment that if bitmaps were
-+ * unloadable, this would need to be atomic. */
-+ if (atomic_read(&bnode->loaded)) {
-+ /* bitmap is already loaded, nothing to do */
-+ check_bnode_loaded(bnode);
-+ mutex_lock(&bnode->mutex);
-+ assert("nikita-2827", atomic_read(&bnode->loaded));
-+ return 0;
-+ }
-+
-+ ret = prepare_bnode(bnode, &cjnode, &wjnode);
-+ if (ret == 0) {
-+ mutex_lock(&bnode->mutex);
-+
-+ if (!atomic_read(&bnode->loaded)) {
-+ assert("nikita-2822", cjnode != NULL);
-+ assert("nikita-2823", wjnode != NULL);
-+ assert("nikita-2824", jnode_is_loaded(cjnode));
-+ assert("nikita-2825", jnode_is_loaded(wjnode));
-+
-+ bnode->wjnode = wjnode;
-+ bnode->cjnode = cjnode;
-+
-+ ret = check_struct_bnode(bnode, current_blocksize);
-+ if (!ret) {
-+ cjnode = wjnode = NULL;
-+ atomic_set(&bnode->loaded, 1);
-+ /* working bitmap is initialized by on-disk
-+ * commit bitmap. This should be performed
-+ * under mutex. */
-+ memcpy(bnode_working_data(bnode),
-+ bnode_commit_data(bnode),
-+ bmap_size(current_blocksize));
-+ } else
-+ mutex_unlock(&bnode->mutex);
-+ } else
-+ /* race: someone already loaded bitmap while we were
-+ * busy initializing data. */
-+ check_bnode_loaded(bnode);
-+ }
-+
-+ if (wjnode != NULL) {
-+ release(wjnode);
-+ bnode->wjnode = NULL;
-+ }
-+ if (cjnode != NULL) {
-+ release(cjnode);
-+ bnode->cjnode = NULL;
-+ }
-+
-+ return ret;
-+}
-+
-+static void release_and_unlock_bnode(struct bitmap_node *bnode)
-+{
-+ check_bnode_loaded(bnode);
-+ mutex_unlock(&bnode->mutex);
-+}
-+
-+/* This function does all block allocation work but only for one bitmap
-+ block.*/
-+/* FIXME_ZAM: It does not allow us to allocate block ranges across bitmap
-+ block responsibility zone boundaries. This had no sense in v3.6 but may
-+ have it in v4.x */
-+/* ZAM-FIXME-HANS: do you mean search one bitmap block forward? */
-+static int
-+search_one_bitmap_forward(bmap_nr_t bmap, bmap_off_t * offset,
-+ bmap_off_t max_offset, int min_len, int max_len)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ struct bitmap_node *bnode = get_bnode(super, bmap);
-+
-+ char *data;
-+
-+ bmap_off_t search_end;
-+ bmap_off_t start;
-+ bmap_off_t end;
-+
-+ int set_first_zero_bit = 0;
-+
-+ int ret;
-+
-+ assert("zam-364", min_len > 0);
-+ assert("zam-365", max_len >= min_len);
-+ assert("zam-366", *offset <= max_offset);
-+
-+ ret = load_and_lock_bnode(bnode);
-+
-+ if (ret)
-+ return ret;
-+
-+ data = bnode_working_data(bnode);
-+
-+ start = *offset;
-+
-+ if (bnode->first_zero_bit >= start) {
-+ start = bnode->first_zero_bit;
-+ set_first_zero_bit = 1;
-+ }
-+
-+ while (start + min_len < max_offset) {
-+
-+ start =
-+ reiser4_find_next_zero_bit((long *)data, max_offset, start);
-+ if (set_first_zero_bit) {
-+ bnode->first_zero_bit = start;
-+ set_first_zero_bit = 0;
-+ }
-+ if (start >= max_offset)
-+ break;
-+
-+ search_end = LIMIT(start + max_len, max_offset);
-+ end =
-+ reiser4_find_next_set_bit((long *)data, search_end, start);
-+ if (end >= start + min_len) {
-+ /* we can't trust find_next_set_bit result if set bit
-+ was not fount, result may be bigger than
-+ max_offset */
-+ if (end > search_end)
-+ end = search_end;
-+
-+ ret = end - start;
-+ *offset = start;
-+
-+ reiser4_set_bits(data, start, end);
-+
-+ /* FIXME: we may advance first_zero_bit if [start,
-+ end] region overlaps the first_zero_bit point */
-+
-+ break;
-+ }
-+
-+ start = end + 1;
-+ }
-+
-+ release_and_unlock_bnode(bnode);
-+
-+ return ret;
-+}
-+
-+static int
-+search_one_bitmap_backward(bmap_nr_t bmap, bmap_off_t * start_offset,
-+ bmap_off_t end_offset, int min_len, int max_len)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ struct bitmap_node *bnode = get_bnode(super, bmap);
-+ char *data;
-+ bmap_off_t start;
-+ int ret;
-+
-+ assert("zam-958", min_len > 0);
-+ assert("zam-959", max_len >= min_len);
-+ assert("zam-960", *start_offset >= end_offset);
-+
-+ ret = load_and_lock_bnode(bnode);
-+ if (ret)
-+ return ret;
-+
-+ data = bnode_working_data(bnode);
-+ start = *start_offset;
-+
-+ while (1) {
-+ bmap_off_t end, search_end;
-+
-+ /* Find the beginning of the zero filled region */
-+ if (reiser4_find_last_zero_bit(&start, data, end_offset, start))
-+ break;
-+ /* Is there more than `min_len' bits from `start' to
-+ * `end_offset'? */
-+ if (start < end_offset + min_len - 1)
-+ break;
-+
-+ /* Do not search to `end_offset' if we need to find less than
-+ * `max_len' zero bits. */
-+ if (end_offset + max_len - 1 < start)
-+ search_end = start - max_len + 1;
-+ else
-+ search_end = end_offset;
-+
-+ if (reiser4_find_last_set_bit(&end, data, search_end, start))
-+ end = search_end;
-+ else
-+ end++;
-+
-+ if (end + min_len <= start + 1) {
-+ if (end < search_end)
-+ end = search_end;
-+ ret = start - end + 1;
-+ *start_offset = end; /* `end' is lowest offset */
-+ assert("zam-987",
-+ reiser4_find_next_set_bit(data, start + 1,
-+ end) >= start + 1);
-+ reiser4_set_bits(data, end, start + 1);
-+ break;
-+ }
-+
-+ if (end <= end_offset)
-+ /* left search boundary reached. */
-+ break;
-+ start = end - 1;
-+ }
-+
-+ release_and_unlock_bnode(bnode);
-+ return ret;
-+}
-+
-+/* allocate contiguous range of blocks in bitmap */
-+static int bitmap_alloc_forward(reiser4_block_nr * start,
-+ const reiser4_block_nr * end, int min_len,
-+ int max_len)
-+{
-+ bmap_nr_t bmap, end_bmap;
-+ bmap_off_t offset, end_offset;
-+ int len;
-+
-+ reiser4_block_nr tmp;
-+
-+ struct super_block *super = get_current_context()->super;
-+ const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
-+
-+ parse_blocknr(start, &bmap, &offset);
-+
-+ tmp = *end - 1;
-+ parse_blocknr(&tmp, &end_bmap, &end_offset);
-+ ++end_offset;
-+
-+ assert("zam-358", end_bmap >= bmap);
-+ assert("zam-359", ergo(end_bmap == bmap, end_offset >= offset));
-+
-+ for (; bmap < end_bmap; bmap++, offset = 0) {
-+ len =
-+ search_one_bitmap_forward(bmap, &offset, max_offset,
-+ min_len, max_len);
-+ if (len != 0)
-+ goto out;
-+ }
-+
-+ len =
-+ search_one_bitmap_forward(bmap, &offset, end_offset, min_len,
-+ max_len);
-+ out:
-+ *start = bmap * max_offset + offset;
-+ return len;
-+}
-+
-+/* allocate contiguous range of blocks in bitmap (from @start to @end in
-+ * backward direction) */
-+static int bitmap_alloc_backward(reiser4_block_nr * start,
-+ const reiser4_block_nr * end, int min_len,
-+ int max_len)
-+{
-+ bmap_nr_t bmap, end_bmap;
-+ bmap_off_t offset, end_offset;
-+ int len;
-+ struct super_block *super = get_current_context()->super;
-+ const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
-+
-+ parse_blocknr(start, &bmap, &offset);
-+ parse_blocknr(end, &end_bmap, &end_offset);
-+
-+ assert("zam-961", end_bmap <= bmap);
-+ assert("zam-962", ergo(end_bmap == bmap, end_offset <= offset));
-+
-+ for (; bmap > end_bmap; bmap--, offset = max_offset - 1) {
-+ len =
-+ search_one_bitmap_backward(bmap, &offset, 0, min_len,
-+ max_len);
-+ if (len != 0)
-+ goto out;
-+ }
-+
-+ len =
-+ search_one_bitmap_backward(bmap, &offset, end_offset, min_len,
-+ max_len);
-+ out:
-+ *start = bmap * max_offset + offset;
-+ return len;
-+}
-+
-+/* plugin->u.space_allocator.alloc_blocks() */
-+static int alloc_blocks_forward(reiser4_blocknr_hint *hint, int needed,
-+ reiser4_block_nr *start, reiser4_block_nr *len)
-+{
-+ struct super_block *super = get_current_context()->super;
-+ int actual_len;
-+
-+ reiser4_block_nr search_start;
-+ reiser4_block_nr search_end;
-+
-+ assert("zam-398", super != NULL);
-+ assert("zam-412", hint != NULL);
-+ assert("zam-397", hint->blk <= reiser4_block_count(super));
-+
-+ if (hint->max_dist == 0)
-+ search_end = reiser4_block_count(super);
-+ else
-+ search_end =
-+ LIMIT(hint->blk + hint->max_dist,
-+ reiser4_block_count(super));
-+
-+ /* We use @hint -> blk as a search start and search from it to the end
-+ of the disk or in given region if @hint -> max_dist is not zero */
-+ search_start = hint->blk;
-+
-+ actual_len =
-+ bitmap_alloc_forward(&search_start, &search_end, 1, needed);
-+
-+ /* There is only one bitmap search if max_dist was specified or first
-+ pass was from the beginning of the bitmap. We also do one pass for
-+ scanning bitmap in backward direction. */
-+ if (!(actual_len != 0 || hint->max_dist != 0 || search_start == 0)) {
-+ /* next step is a scanning from 0 to search_start */
-+ search_end = search_start;
-+ search_start = 0;
-+ actual_len =
-+ bitmap_alloc_forward(&search_start, &search_end, 1, needed);
-+ }
-+ if (actual_len == 0)
-+ return RETERR(-ENOSPC);
-+ if (actual_len < 0)
-+ return RETERR(actual_len);
-+ *len = actual_len;
-+ *start = search_start;
-+ return 0;
-+}
-+
-+static int alloc_blocks_backward(reiser4_blocknr_hint * hint, int needed,
-+ reiser4_block_nr * start,
-+ reiser4_block_nr * len)
-+{
-+ reiser4_block_nr search_start;
-+ reiser4_block_nr search_end;
-+ int actual_len;
-+
-+ ON_DEBUG(struct super_block *super = reiser4_get_current_sb());
-+
-+ assert("zam-969", super != NULL);
-+ assert("zam-970", hint != NULL);
-+ assert("zam-971", hint->blk <= reiser4_block_count(super));
-+
-+ search_start = hint->blk;
-+ if (hint->max_dist == 0 || search_start <= hint->max_dist)
-+ search_end = 0;
-+ else
-+ search_end = search_start - hint->max_dist;
-+
-+ actual_len =
-+ bitmap_alloc_backward(&search_start, &search_end, 1, needed);
-+ if (actual_len == 0)
-+ return RETERR(-ENOSPC);
-+ if (actual_len < 0)
-+ return RETERR(actual_len);
-+ *len = actual_len;
-+ *start = search_start;
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.alloc_blocks() */
-+int reiser4_alloc_blocks_bitmap(reiser4_space_allocator * allocator,
-+ reiser4_blocknr_hint * hint, int needed,
-+ reiser4_block_nr * start, reiser4_block_nr * len)
-+{
-+ if (hint->backward)
-+ return alloc_blocks_backward(hint, needed, start, len);
-+ return alloc_blocks_forward(hint, needed, start, len);
-+}
-+
-+/* plugin->u.space_allocator.dealloc_blocks(). */
-+/* It just frees blocks in WORKING BITMAP. Usually formatted an unformatted
-+ nodes deletion is deferred until transaction commit. However, deallocation
-+ of temporary objects like wandered blocks and transaction commit records
-+ requires immediate node deletion from WORKING BITMAP.*/
-+void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator * allocator,
-+ reiser4_block_nr start, reiser4_block_nr len)
-+{
-+ struct super_block *super = reiser4_get_current_sb();
-+
-+ bmap_nr_t bmap;
-+ bmap_off_t offset;
-+
-+ struct bitmap_node *bnode;
-+ int ret;
-+
-+ assert("zam-468", len != 0);
-+ check_block_range(&start, &len);
-+
-+ parse_blocknr(&start, &bmap, &offset);
-+
-+ assert("zam-469", offset + len <= bmap_bit_count(super->s_blocksize));
-+
-+ bnode = get_bnode(super, bmap);
-+
-+ assert("zam-470", bnode != NULL);
-+
-+ ret = load_and_lock_bnode(bnode);
-+ assert("zam-481", ret == 0);
-+
-+ reiser4_clear_bits(bnode_working_data(bnode), offset,
-+ (bmap_off_t) (offset + len));
-+
-+ adjust_first_zero_bit(bnode, offset);
-+
-+ release_and_unlock_bnode(bnode);
-+}
-+
-+/* plugin->u.space_allocator.check_blocks(). */
-+void reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
-+ const reiser4_block_nr * len, int desired)
-+{
-+#if REISER4_DEBUG
-+ struct super_block *super = reiser4_get_current_sb();
-+
-+ bmap_nr_t bmap;
-+ bmap_off_t start_offset;
-+ bmap_off_t end_offset;
-+
-+ struct bitmap_node *bnode;
-+ int ret;
-+
-+ assert("zam-622", len != NULL);
-+ check_block_range(start, len);
-+ parse_blocknr(start, &bmap, &start_offset);
-+
-+ end_offset = start_offset + *len;
-+ assert("nikita-2214", end_offset <= bmap_bit_count(super->s_blocksize));
-+
-+ bnode = get_bnode(super, bmap);
-+
-+ assert("nikita-2215", bnode != NULL);
-+
-+ ret = load_and_lock_bnode(bnode);
-+ assert("zam-626", ret == 0);
-+
-+ assert("nikita-2216", jnode_is_loaded(bnode->wjnode));
-+
-+ if (desired) {
-+ assert("zam-623",
-+ reiser4_find_next_zero_bit(bnode_working_data(bnode),
-+ end_offset, start_offset)
-+ >= end_offset);
-+ } else {
-+ assert("zam-624",
-+ reiser4_find_next_set_bit(bnode_working_data(bnode),
-+ end_offset, start_offset)
-+ >= end_offset);
-+ }
-+
-+ release_and_unlock_bnode(bnode);
-+#endif
-+}
-+
-+/* conditional insertion of @node into atom's overwrite set if it was not there */
-+static void cond_add_to_overwrite_set(txn_atom * atom, jnode * node)
-+{
-+ assert("zam-546", atom != NULL);
-+ assert("zam-547", atom->stage == ASTAGE_PRE_COMMIT);
-+ assert("zam-548", node != NULL);
-+
-+ spin_lock_atom(atom);
-+ spin_lock_jnode(node);
-+
-+ if (node->atom == NULL) {
-+ JF_SET(node, JNODE_OVRWR);
-+ insert_into_atom_ovrwr_list(atom, node);
-+ } else {
-+ assert("zam-549", node->atom == atom);
-+ }
-+
-+ spin_unlock_jnode(node);
-+ spin_unlock_atom(atom);
-+}
-+
-+/* an actor which applies delete set to COMMIT bitmap pages and link modified
-+ pages in a single-linked list */
-+static int
-+apply_dset_to_commit_bmap(txn_atom * atom, const reiser4_block_nr * start,
-+ const reiser4_block_nr * len, void *data)
-+{
-+
-+ bmap_nr_t bmap;
-+ bmap_off_t offset;
-+ int ret;
-+
-+ long long *blocks_freed_p = data;
-+
-+ struct bitmap_node *bnode;
-+
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ check_block_range(start, len);
-+
-+ parse_blocknr(start, &bmap, &offset);
-+
-+ /* FIXME-ZAM: we assume that all block ranges are allocated by this
-+ bitmap-based allocator and each block range can't go over a zone of
-+ responsibility of one bitmap block; same assumption is used in
-+ other journal hooks in bitmap code. */
-+ bnode = get_bnode(sb, bmap);
-+ assert("zam-448", bnode != NULL);
-+
-+ /* it is safe to unlock atom with is in ASTAGE_PRE_COMMIT */
-+ assert("zam-767", atom->stage == ASTAGE_PRE_COMMIT);
-+ ret = load_and_lock_bnode(bnode);
-+ if (ret)
-+ return ret;
-+
-+ /* put bnode into atom's overwrite set */
-+ cond_add_to_overwrite_set(atom, bnode->cjnode);
-+
-+ data = bnode_commit_data(bnode);
-+
-+ ret = bnode_check_crc(bnode);
-+ if (ret != 0)
-+ return ret;
-+
-+ if (len != NULL) {
-+ /* FIXME-ZAM: a check that all bits are set should be there */
-+ assert("zam-443",
-+ offset + *len <= bmap_bit_count(sb->s_blocksize));
-+ reiser4_clear_bits(data, offset, (bmap_off_t) (offset + *len));
-+
-+ (*blocks_freed_p) += *len;
-+ } else {
-+ reiser4_clear_bit(offset, data);
-+ (*blocks_freed_p)++;
-+ }
-+
-+ bnode_set_commit_crc(bnode, bnode_calc_crc(bnode, sb->s_blocksize));
-+
-+ release_and_unlock_bnode(bnode);
-+
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.pre_commit_hook(). */
-+/* It just applies transaction changes to fs-wide COMMIT BITMAP, hoping the
-+ rest is done by transaction manager (allocate wandered locations for COMMIT
-+ BITMAP blocks, copy COMMIT BITMAP blocks data). */
-+/* Only one instance of this function can be running at one given time, because
-+ only one transaction can be committed a time, therefore it is safe to access
-+ some global variables without any locking */
-+
-+int reiser4_pre_commit_hook_bitmap(void)
-+{
-+ struct super_block *super = reiser4_get_current_sb();
-+ txn_atom *atom;
-+
-+ long long blocks_freed = 0;
-+
-+ atom = get_current_atom_locked();
-+ assert("zam-876", atom->stage == ASTAGE_PRE_COMMIT);
-+ spin_unlock_atom(atom);
-+
-+ { /* scan atom's captured list and find all freshly allocated nodes,
-+ * mark corresponded bits in COMMIT BITMAP as used */
-+ struct list_head *head = ATOM_CLEAN_LIST(atom);
-+ jnode *node = list_entry(head->next, jnode, capture_link);
-+
-+ while (head != &node->capture_link) {
-+ /* we detect freshly allocated jnodes */
-+ if (JF_ISSET(node, JNODE_RELOC)) {
-+ int ret;
-+ bmap_nr_t bmap;
-+
-+ bmap_off_t offset;
-+ bmap_off_t index;
-+ struct bitmap_node *bn;
-+ __u32 size = bmap_size(super->s_blocksize);
-+ __u32 crc;
-+ char byte;
-+
-+ assert("zam-559", !JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-460",
-+ !reiser4_blocknr_is_fake(&node->blocknr));
-+
-+ parse_blocknr(&node->blocknr, &bmap, &offset);
-+ bn = get_bnode(super, bmap);
-+
-+ index = offset >> 3;
-+ assert("vpf-276", index < size);
-+
-+ ret = bnode_check_crc(bnode);
-+ if (ret != 0)
-+ return ret;
-+
-+ check_bnode_loaded(bn);
-+ load_and_lock_bnode(bn);
-+
-+ byte = *(bnode_commit_data(bn) + index);
-+ reiser4_set_bit(offset, bnode_commit_data(bn));
-+
-+ crc = adler32_recalc(bnode_commit_crc(bn), byte,
-+ *(bnode_commit_data(bn) +
-+ index),
-+ size - index),
-+ bnode_set_commit_crc(bn, crc);
-+
-+ release_and_unlock_bnode(bn);
-+
-+ ret = bnode_check_crc(bn);
-+ if (ret != 0)
-+ return ret;
-+
-+ /* working of this depends on how it inserts
-+ new j-node into clean list, because we are
-+ scanning the same list now. It is OK, if
-+ insertion is done to the list front */
-+ cond_add_to_overwrite_set(atom, bn->cjnode);
-+ }
-+
-+ node = list_entry(node->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ blocknr_set_iterator(atom, &atom->delete_set, apply_dset_to_commit_bmap,
-+ &blocks_freed, 0);
-+
-+ blocks_freed -= atom->nr_blocks_allocated;
-+
-+ {
-+ reiser4_super_info_data *sbinfo;
-+
-+ sbinfo = get_super_private(super);
-+
-+ spin_lock_reiser4_super(sbinfo);
-+ sbinfo->blocks_free_committed += blocks_freed;
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.init_allocator
-+ constructor of reiser4_space_allocator object. It is called on fs mount */
-+int reiser4_init_allocator_bitmap(reiser4_space_allocator * allocator,
-+ struct super_block *super, void *arg)
-+{
-+ struct bitmap_allocator_data *data = NULL;
-+ bmap_nr_t bitmap_blocks_nr;
-+ bmap_nr_t i;
-+
-+ assert("nikita-3039", reiser4_schedulable());
-+
-+ /* getting memory for bitmap allocator private data holder */
-+ data =
-+ kmalloc(sizeof(struct bitmap_allocator_data),
-+ reiser4_ctx_gfp_mask_get());
-+
-+ if (data == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ /* allocation and initialization for the array of bnodes */
-+ bitmap_blocks_nr = get_nr_bmap(super);
-+
-+ /* FIXME-ZAM: it is not clear what to do with huge number of bitmaps
-+ which is bigger than 2^32 (= 8 * 4096 * 4096 * 2^32 bytes = 5.76e+17,
-+ may I never meet someone who still uses the ia32 architecture when
-+ storage devices of that size enter the market, and wants to use ia32
-+ with that storage device, much less reiser4. ;-) -Hans). Kmalloc is not possible and,
-+ probably, another dynamic data structure should replace a static
-+ array of bnodes. */
-+ /*data->bitmap = reiser4_kmalloc((size_t) (sizeof (struct bitmap_node) * bitmap_blocks_nr), GFP_KERNEL); */
-+ data->bitmap = reiser4_vmalloc(sizeof(struct bitmap_node) * bitmap_blocks_nr);
-+ if (data->bitmap == NULL) {
-+ kfree(data);
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ for (i = 0; i < bitmap_blocks_nr; i++)
-+ init_bnode(data->bitmap + i, super, i);
-+
-+ allocator->u.generic = data;
-+
-+#if REISER4_DEBUG
-+ get_super_private(super)->min_blocks_used += bitmap_blocks_nr;
-+#endif
-+
-+ /* Load all bitmap blocks at mount time. */
-+ if (!test_bit
-+ (REISER4_DONT_LOAD_BITMAP, &get_super_private(super)->fs_flags)) {
-+ __u64 start_time, elapsed_time;
-+ struct bitmap_node *bnode;
-+ int ret;
-+
-+ if (REISER4_DEBUG)
-+ printk(KERN_INFO "loading reiser4 bitmap...");
-+ start_time = jiffies;
-+
-+ for (i = 0; i < bitmap_blocks_nr; i++) {
-+ bnode = data->bitmap + i;
-+ ret = load_and_lock_bnode(bnode);
-+ if (ret) {
-+ reiser4_destroy_allocator_bitmap(allocator,
-+ super);
-+ return ret;
-+ }
-+ release_and_unlock_bnode(bnode);
-+ }
-+
-+ elapsed_time = jiffies - start_time;
-+ if (REISER4_DEBUG)
-+ printk("...done (%llu jiffies)\n",
-+ (unsigned long long)elapsed_time);
-+ }
-+
-+ return 0;
-+}
-+
-+/* plugin->u.space_allocator.destroy_allocator
-+ destructor. It is called on fs unmount */
-+int reiser4_destroy_allocator_bitmap(reiser4_space_allocator * allocator,
-+ struct super_block *super)
-+{
-+ bmap_nr_t bitmap_blocks_nr;
-+ bmap_nr_t i;
-+
-+ struct bitmap_allocator_data *data = allocator->u.generic;
-+
-+ assert("zam-414", data != NULL);
-+ assert("zam-376", data->bitmap != NULL);
-+
-+ bitmap_blocks_nr = get_nr_bmap(super);
-+
-+ for (i = 0; i < bitmap_blocks_nr; i++) {
-+ struct bitmap_node *bnode = data->bitmap + i;
-+
-+ mutex_lock(&bnode->mutex);
-+
-+#if REISER4_DEBUG
-+ if (atomic_read(&bnode->loaded)) {
-+ jnode *wj = bnode->wjnode;
-+ jnode *cj = bnode->cjnode;
-+
-+ assert("zam-480", jnode_page(cj) != NULL);
-+ assert("zam-633", jnode_page(wj) != NULL);
-+
-+ assert("zam-634",
-+ memcmp(jdata(wj), jdata(wj),
-+ bmap_size(super->s_blocksize)) == 0);
-+
-+ }
-+#endif
-+ done_bnode(bnode);
-+ mutex_unlock(&bnode->mutex);
-+ }
-+
-+ vfree(data->bitmap);
-+ kfree(data);
-+
-+ allocator->u.generic = NULL;
-+
-+ return 0;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * scroll-step: 1
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/space/bitmap.h linux-2.6.24/fs/reiser4/plugin/space/bitmap.h
---- linux-2.6.24.orig/fs/reiser4/plugin/space/bitmap.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/space/bitmap.h 2008-01-25 11:39:07.068241692 +0300
-@@ -0,0 +1,47 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__REISER4_PLUGIN_SPACE_BITMAP_H__)
-+#define __REISER4_PLUGIN_SPACE_BITMAP_H__
-+
-+#include "../../dformat.h"
-+#include "../../block_alloc.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+/* EDWARD-FIXME-HANS: write something as informative as the below for every .h file lacking it. */
-+/* declarations of functions implementing methods of space allocator plugin for
-+ bitmap based allocator. The functions themselves are in bitmap.c */
-+extern int reiser4_init_allocator_bitmap(reiser4_space_allocator *,
-+ struct super_block *, void *);
-+extern int reiser4_destroy_allocator_bitmap(reiser4_space_allocator *,
-+ struct super_block *);
-+extern int reiser4_alloc_blocks_bitmap(reiser4_space_allocator *,
-+ reiser4_blocknr_hint *, int needed,
-+ reiser4_block_nr * start,
-+ reiser4_block_nr * len);
-+extern void reiser4_check_blocks_bitmap(const reiser4_block_nr *,
-+ const reiser4_block_nr *, int);
-+extern void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator *,
-+ reiser4_block_nr,
-+ reiser4_block_nr);
-+extern int reiser4_pre_commit_hook_bitmap(void);
-+
-+#define reiser4_post_commit_hook_bitmap() do{}while(0)
-+#define reiser4_post_write_back_hook_bitmap() do{}while(0)
-+#define reiser4_print_info_bitmap(pref, al) do{}while(0)
-+
-+typedef __u64 bmap_nr_t;
-+typedef __u32 bmap_off_t;
-+
-+#endif /* __REISER4_PLUGIN_SPACE_BITMAP_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/space/Makefile linux-2.6.24/fs/reiser4/plugin/space/Makefile
---- linux-2.6.24.orig/fs/reiser4/plugin/space/Makefile 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/space/Makefile 2008-01-25 11:39:07.068241692 +0300
-@@ -0,0 +1,4 @@
-+obj-$(CONFIG_REISER4_FS) += space_plugins.o
-+
-+space_plugins-objs := \
-+ bitmap.o
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/space/space_allocator.h linux-2.6.24/fs/reiser4/plugin/space/space_allocator.h
---- linux-2.6.24.orig/fs/reiser4/plugin/space/space_allocator.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/space/space_allocator.h 2008-01-25 11:39:07.068241692 +0300
-@@ -0,0 +1,80 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#ifndef __SPACE_ALLOCATOR_H__
-+#define __SPACE_ALLOCATOR_H__
-+
-+#include "../../forward.h"
-+#include "bitmap.h"
-+/* NIKITA-FIXME-HANS: surely this could use a comment. Something about how bitmap is the only space allocator for now,
-+ * but... */
-+#define DEF_SPACE_ALLOCATOR(allocator) \
-+ \
-+static inline int sa_init_allocator (reiser4_space_allocator * al, struct super_block *s, void * opaque) \
-+{ \
-+ return reiser4_init_allocator_##allocator (al, s, opaque); \
-+} \
-+ \
-+static inline void sa_destroy_allocator (reiser4_space_allocator *al, struct super_block *s) \
-+{ \
-+ reiser4_destroy_allocator_##allocator (al, s); \
-+} \
-+ \
-+static inline int sa_alloc_blocks (reiser4_space_allocator *al, reiser4_blocknr_hint * hint, \
-+ int needed, reiser4_block_nr * start, reiser4_block_nr * len) \
-+{ \
-+ return reiser4_alloc_blocks_##allocator (al, hint, needed, start, len); \
-+} \
-+static inline void sa_dealloc_blocks (reiser4_space_allocator * al, reiser4_block_nr start, reiser4_block_nr len) \
-+{ \
-+ reiser4_dealloc_blocks_##allocator (al, start, len); \
-+} \
-+ \
-+static inline void sa_check_blocks (const reiser4_block_nr * start, const reiser4_block_nr * end, int desired) \
-+{ \
-+ reiser4_check_blocks_##allocator (start, end, desired); \
-+} \
-+ \
-+static inline void sa_pre_commit_hook (void) \
-+{ \
-+ reiser4_pre_commit_hook_##allocator (); \
-+} \
-+ \
-+static inline void sa_post_commit_hook (void) \
-+{ \
-+ reiser4_post_commit_hook_##allocator (); \
-+} \
-+ \
-+static inline void sa_post_write_back_hook (void) \
-+{ \
-+ reiser4_post_write_back_hook_##allocator(); \
-+} \
-+ \
-+static inline void sa_print_info(const char * prefix, reiser4_space_allocator * al) \
-+{ \
-+ reiser4_print_info_##allocator (prefix, al); \
-+}
-+
-+DEF_SPACE_ALLOCATOR(bitmap)
-+
-+/* this object is part of reiser4 private in-core super block */
-+struct reiser4_space_allocator {
-+ union {
-+ /* space allocators might use this pointer to reference their
-+ * data. */
-+ void *generic;
-+ } u;
-+};
-+
-+/* __SPACE_ALLOCATOR_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/plugin/tail_policy.c linux-2.6.24/fs/reiser4/plugin/tail_policy.c
---- linux-2.6.24.orig/fs/reiser4/plugin/tail_policy.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/plugin/tail_policy.c 2008-01-25 11:39:07.068241692 +0300
-@@ -0,0 +1,113 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Formatting policy plugins */
-+
-+/*
-+ * Formatting policy plugin is used by object plugin (of regular file) to
-+ * convert file between two representations.
-+ *
-+ * Currently following policies are implemented:
-+ * never store file in formatted nodes
-+ * always store file in formatted nodes
-+ * store file in formatted nodes if file is smaller than 4 blocks (default)
-+ */
-+
-+#include "../tree.h"
-+#include "../inode.h"
-+#include "../super.h"
-+#include "object.h"
-+#include "plugin.h"
-+#include "node/node.h"
-+#include "plugin_header.h"
-+
-+#include <linux/pagemap.h>
-+#include <linux/fs.h> /* For struct inode */
-+
-+/**
-+ * have_formatting_never -
-+ * @inode:
-+ * @size:
-+ *
-+ *
-+ */
-+/* Never store file's tail as direct item */
-+/* Audited by: green(2002.06.12) */
-+static int have_formatting_never(const struct inode *inode UNUSED_ARG
-+ /* inode to operate on */ ,
-+ loff_t size UNUSED_ARG /* new object size */ )
-+{
-+ return 0;
-+}
-+
-+/* Always store file's tail as direct item */
-+/* Audited by: green(2002.06.12) */
-+static int
-+have_formatting_always(const struct inode *inode UNUSED_ARG
-+ /* inode to operate on */ ,
-+ loff_t size UNUSED_ARG /* new object size */ )
-+{
-+ return 1;
-+}
-+
-+/* This function makes test if we should store file denoted @inode as tails only or
-+ as extents only. */
-+static int
-+have_formatting_default(const struct inode *inode UNUSED_ARG
-+ /* inode to operate on */ ,
-+ loff_t size /* new object size */ )
-+{
-+ assert("umka-1253", inode != NULL);
-+
-+ if (size > inode->i_sb->s_blocksize * 4)
-+ return 0;
-+
-+ return 1;
-+}
-+
-+/* tail plugins */
-+formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID] = {
-+ [NEVER_TAILS_FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = NEVER_TAILS_FORMATTING_ID,
-+ .pops = NULL,
-+ .label = "never",
-+ .desc = "Never store file's tail",
-+ .linkage = {NULL, NULL}
-+ },
-+ .have_tail = have_formatting_never
-+ },
-+ [ALWAYS_TAILS_FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = ALWAYS_TAILS_FORMATTING_ID,
-+ .pops = NULL,
-+ .label = "always",
-+ .desc = "Always store file's tail",
-+ .linkage = {NULL, NULL}
-+ },
-+ .have_tail = have_formatting_always
-+ },
-+ [SMALL_FILE_FORMATTING_ID] = {
-+ .h = {
-+ .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
-+ .id = SMALL_FILE_FORMATTING_ID,
-+ .pops = NULL,
-+ .label = "4blocks",
-+ .desc = "store files shorter than 4 blocks in tail items",
-+ .linkage = {NULL, NULL}
-+ },
-+ .have_tail = have_formatting_default
-+ }
-+};
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/pool.c linux-2.6.24/fs/reiser4/pool.c
---- linux-2.6.24.orig/fs/reiser4/pool.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/pool.c 2008-01-25 11:39:07.072242722 +0300
-@@ -0,0 +1,231 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Fast pool allocation.
-+
-+ There are situations when some sub-system normally asks memory allocator
-+ for only few objects, but under some circumstances could require much
-+ more. Typical and actually motivating example is tree balancing. It needs
-+ to keep track of nodes that were involved into it, and it is well-known
-+ that in reasonable packed balanced tree most (92.938121%) percent of all
-+ balancings end up after working with only few nodes (3.141592 on
-+ average). But in rare cases balancing can involve much more nodes
-+ (3*tree_height+1 in extremal situation).
-+
-+ On the one hand, we don't want to resort to dynamic allocation (slab,
-+ malloc(), etc.) to allocate data structures required to keep track of
-+ nodes during balancing. On the other hand, we cannot statically allocate
-+ required amount of space on the stack, because first: it is useless wastage
-+ of precious resource, and second: this amount is unknown in advance (tree
-+ height can change).
-+
-+ Pools, implemented in this file are solution for this problem:
-+
-+ - some configurable amount of objects is statically preallocated on the
-+ stack
-+
-+ - if this preallocated pool is exhausted and more objects is requested
-+ they are allocated dynamically.
-+
-+ Pools encapsulate distinction between statically and dynamically allocated
-+ objects. Both allocation and recycling look exactly the same.
-+
-+ To keep track of dynamically allocated objects, pool adds its own linkage
-+ to each object.
-+
-+ NOTE-NIKITA This linkage also contains some balancing-specific data. This
-+ is not perfect. On the other hand, balancing is currently the only client
-+ of pool code.
-+
-+ NOTE-NIKITA Another desirable feature is to rewrite all pool manipulation
-+ functions in the style of tslist/tshash, i.e., make them unreadable, but
-+ type-safe.
-+
-+*/
-+
-+#include "debug.h"
-+#include "pool.h"
-+#include "super.h"
-+
-+#include <linux/types.h>
-+#include <linux/err.h>
-+
-+/* initialize new pool object @h */
-+static void reiser4_init_pool_obj(struct reiser4_pool_header * h)
-+{
-+ INIT_LIST_HEAD(&h->usage_linkage);
-+ INIT_LIST_HEAD(&h->level_linkage);
-+ INIT_LIST_HEAD(&h->extra_linkage);
-+}
-+
-+/* initialize new pool */
-+void reiser4_init_pool(struct reiser4_pool * pool /* pool to initialize */ ,
-+ size_t obj_size /* size of objects in @pool */ ,
-+ int num_of_objs /* number of preallocated objects */ ,
-+ char *data /* area for preallocated objects */ )
-+{
-+ struct reiser4_pool_header *h;
-+ int i;
-+
-+ assert("nikita-955", pool != NULL);
-+ assert("nikita-1044", obj_size > 0);
-+ assert("nikita-956", num_of_objs >= 0);
-+ assert("nikita-957", data != NULL);
-+
-+ memset(pool, 0, sizeof *pool);
-+ pool->obj_size = obj_size;
-+ pool->data = data;
-+ INIT_LIST_HEAD(&pool->free);
-+ INIT_LIST_HEAD(&pool->used);
-+ INIT_LIST_HEAD(&pool->extra);
-+ memset(data, 0, obj_size * num_of_objs);
-+ for (i = 0; i < num_of_objs; ++i) {
-+ h = (struct reiser4_pool_header *) (data + i * obj_size);
-+ reiser4_init_pool_obj(h);
-+ /* add pool header to the end of pool's free list */
-+ list_add_tail(&h->usage_linkage, &pool->free);
-+ }
-+}
-+
-+/* release pool resources
-+
-+ Release all resources acquired by this pool, specifically, dynamically
-+ allocated objects.
-+
-+*/
-+void reiser4_done_pool(struct reiser4_pool * pool UNUSED_ARG)
-+{
-+}
-+
-+/* allocate carry object from @pool
-+
-+ First, try to get preallocated object. If this fails, resort to dynamic
-+ allocation.
-+
-+*/
-+static void *reiser4_pool_alloc(struct reiser4_pool * pool)
-+{
-+ struct reiser4_pool_header *result;
-+
-+ assert("nikita-959", pool != NULL);
-+
-+ if (!list_empty(&pool->free)) {
-+ struct list_head *linkage;
-+
-+ linkage = pool->free.next;
-+ list_del(linkage);
-+ INIT_LIST_HEAD(linkage);
-+ result = list_entry(linkage, struct reiser4_pool_header,
-+ usage_linkage);
-+ BUG_ON(!list_empty(&result->level_linkage) ||
-+ !list_empty(&result->extra_linkage));
-+ } else {
-+ /* pool is empty. Extra allocations don't deserve dedicated
-+ slab to be served from, as they are expected to be rare. */
-+ result = kmalloc(pool->obj_size, reiser4_ctx_gfp_mask_get());
-+ if (result != 0) {
-+ reiser4_init_pool_obj(result);
-+ list_add(&result->extra_linkage, &pool->extra);
-+ } else
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ BUG_ON(!list_empty(&result->usage_linkage) ||
-+ !list_empty(&result->level_linkage));
-+ }
-+ ++pool->objs;
-+ list_add(&result->usage_linkage, &pool->used);
-+ memset(result + 1, 0, pool->obj_size - sizeof *result);
-+ return result;
-+}
-+
-+/* return object back to the pool */
-+void reiser4_pool_free(struct reiser4_pool * pool,
-+ struct reiser4_pool_header * h)
-+{
-+ assert("nikita-961", h != NULL);
-+ assert("nikita-962", pool != NULL);
-+
-+ --pool->objs;
-+ assert("nikita-963", pool->objs >= 0);
-+
-+ list_del_init(&h->usage_linkage);
-+ list_del_init(&h->level_linkage);
-+
-+ if (list_empty(&h->extra_linkage))
-+ /*
-+ * pool header is not an extra one. Push it onto free list
-+ * using usage_linkage
-+ */
-+ list_add(&h->usage_linkage, &pool->free);
-+ else {
-+ /* remove pool header from pool's extra list and kfree it */
-+ list_del(&h->extra_linkage);
-+ kfree(h);
-+ }
-+}
-+
-+/* add new object to the carry level list
-+
-+ Carry level is FIFO most of the time, but not always. Complications arise
-+ when make_space() function tries to go to the left neighbor and thus adds
-+ carry node before existing nodes, and also, when updating delimiting keys
-+ after moving data between two nodes, we want left node to be locked before
-+ right node.
-+
-+ Latter case is confusing at the first glance. Problem is that COP_UPDATE
-+ opration that updates delimiting keys is sometimes called with two nodes
-+ (when data are moved between two nodes) and sometimes with only one node
-+ (when leftmost item is deleted in a node). In any case operation is
-+ supplied with at least node whose left delimiting key is to be updated
-+ (that is "right" node).
-+
-+ @pool - from which to allocate new object;
-+ @list - where to add object;
-+ @reference - after (or before) which existing object to add
-+*/
-+struct reiser4_pool_header *reiser4_add_obj(struct reiser4_pool * pool,
-+ struct list_head *list,
-+ pool_ordering order,
-+ struct reiser4_pool_header * reference)
-+{
-+ struct reiser4_pool_header *result;
-+
-+ assert("nikita-972", pool != NULL);
-+
-+ result = reiser4_pool_alloc(pool);
-+ if (IS_ERR(result))
-+ return result;
-+
-+ assert("nikita-973", result != NULL);
-+
-+ switch (order) {
-+ case POOLO_BEFORE:
-+ __list_add(&result->level_linkage,
-+ reference->level_linkage.prev,
-+ &reference->level_linkage);
-+ break;
-+ case POOLO_AFTER:
-+ __list_add(&result->level_linkage,
-+ &reference->level_linkage,
-+ reference->level_linkage.next);
-+ break;
-+ case POOLO_LAST:
-+ list_add_tail(&result->level_linkage, list);
-+ break;
-+ case POOLO_FIRST:
-+ list_add(&result->level_linkage, list);
-+ break;
-+ default:
-+ wrong_return_value("nikita-927", "order");
-+ }
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/pool.h linux-2.6.24/fs/reiser4/pool.h
---- linux-2.6.24.orig/fs/reiser4/pool.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/pool.h 2008-01-25 11:39:07.072242722 +0300
-@@ -0,0 +1,56 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Fast pool allocation */
-+
-+#ifndef __REISER4_POOL_H__
-+#define __REISER4_POOL_H__
-+
-+#include <linux/types.h>
-+
-+struct reiser4_pool {
-+ size_t obj_size;
-+ int objs;
-+ char *data;
-+ struct list_head free;
-+ struct list_head used;
-+ struct list_head extra;
-+};
-+
-+struct reiser4_pool_header {
-+ /* object is either on free or "used" lists */
-+ struct list_head usage_linkage;
-+ struct list_head level_linkage;
-+ struct list_head extra_linkage;
-+};
-+
-+typedef enum {
-+ POOLO_BEFORE,
-+ POOLO_AFTER,
-+ POOLO_LAST,
-+ POOLO_FIRST
-+} pool_ordering;
-+
-+/* pool manipulation functions */
-+
-+extern void reiser4_init_pool(struct reiser4_pool * pool, size_t obj_size,
-+ int num_of_objs, char *data);
-+extern void reiser4_done_pool(struct reiser4_pool * pool);
-+extern void reiser4_pool_free(struct reiser4_pool * pool,
-+ struct reiser4_pool_header * h);
-+struct reiser4_pool_header *reiser4_add_obj(struct reiser4_pool * pool,
-+ struct list_head * list,
-+ pool_ordering order,
-+ struct reiser4_pool_header *reference);
-+
-+/* __REISER4_POOL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/readahead.c linux-2.6.24/fs/reiser4/readahead.c
---- linux-2.6.24.orig/fs/reiser4/readahead.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/readahead.c 2008-01-25 11:39:07.072242722 +0300
-@@ -0,0 +1,138 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "forward.h"
-+#include "tree.h"
-+#include "tree_walk.h"
-+#include "super.h"
-+#include "inode.h"
-+#include "key.h"
-+#include "znode.h"
-+
-+#include <linux/swap.h> /* for totalram_pages */
-+
-+void reiser4_init_ra_info(ra_info_t * rai)
-+{
-+ rai->key_to_stop = *reiser4_min_key();
-+}
-+
-+/* global formatted node readahead parameter. It can be set by mount option -o readahead:NUM:1 */
-+static inline int ra_adjacent_only(int flags)
-+{
-+ return flags & RA_ADJACENT_ONLY;
-+}
-+
-+/* this is used by formatted_readahead to decide whether read for right neighbor of node is to be issued. It returns 1
-+ if right neighbor's first key is less or equal to readahead's stop key */
-+static int should_readahead_neighbor(znode * node, ra_info_t * info)
-+{
-+ int result;
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = keyle(znode_get_rd_key(node), &info->key_to_stop);
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+#define LOW_MEM_PERCENTAGE (5)
-+
-+static int low_on_memory(void)
-+{
-+ unsigned int freepages;
-+
-+ freepages = nr_free_pages();
-+ return freepages < (totalram_pages * LOW_MEM_PERCENTAGE / 100);
-+}
-+
-+/* start read for @node and for a few of its right neighbors */
-+void formatted_readahead(znode * node, ra_info_t * info)
-+{
-+ struct formatted_ra_params *ra_params;
-+ znode *cur;
-+ int i;
-+ int grn_flags;
-+ lock_handle next_lh;
-+
-+ /* do nothing if node block number has not been assigned to node (which means it is still in cache). */
-+ if (reiser4_blocknr_is_fake(znode_get_block(node)))
-+ return;
-+
-+ ra_params = get_current_super_ra_params();
-+
-+ if (znode_page(node) == NULL)
-+ jstartio(ZJNODE(node));
-+
-+ if (znode_get_level(node) != LEAF_LEVEL)
-+ return;
-+
-+ /* don't waste memory for read-ahead when low on memory */
-+ if (low_on_memory())
-+ return;
-+
-+ /* We can have locked nodes on upper tree levels, in this situation lock
-+ priorities do not help to resolve deadlocks, we have to use TRY_LOCK
-+ here. */
-+ grn_flags = (GN_CAN_USE_UPPER_LEVELS | GN_TRY_LOCK);
-+
-+ i = 0;
-+ cur = zref(node);
-+ init_lh(&next_lh);
-+ while (i < ra_params->max) {
-+ const reiser4_block_nr *nextblk;
-+
-+ if (!should_readahead_neighbor(cur, info))
-+ break;
-+
-+ if (reiser4_get_right_neighbor
-+ (&next_lh, cur, ZNODE_READ_LOCK, grn_flags))
-+ break;
-+
-+ nextblk = znode_get_block(next_lh.node);
-+ if (reiser4_blocknr_is_fake(nextblk) ||
-+ (ra_adjacent_only(ra_params->flags)
-+ && *nextblk != *znode_get_block(cur) + 1)) {
-+ break;
-+ }
-+
-+ zput(cur);
-+ cur = zref(next_lh.node);
-+ done_lh(&next_lh);
-+ if (znode_page(cur) == NULL)
-+ jstartio(ZJNODE(cur));
-+ else
-+ /* Do not scan read-ahead window if pages already
-+ * allocated (and i/o already started). */
-+ break;
-+
-+ i++;
-+ }
-+ zput(cur);
-+ done_lh(&next_lh);
-+}
-+
-+void reiser4_readdir_readahead_init(struct inode *dir, tap_t * tap)
-+{
-+ reiser4_key *stop_key;
-+
-+ assert("nikita-3542", dir != NULL);
-+ assert("nikita-3543", tap != NULL);
-+
-+ stop_key = &tap->ra_info.key_to_stop;
-+ /* initialize readdir readahead information: include into readahead
-+ * stat data of all files of the directory */
-+ set_key_locality(stop_key, get_inode_oid(dir));
-+ set_key_type(stop_key, KEY_SD_MINOR);
-+ set_key_ordering(stop_key, get_key_ordering(reiser4_max_key()));
-+ set_key_objectid(stop_key, get_key_objectid(reiser4_max_key()));
-+ set_key_offset(stop_key, get_key_offset(reiser4_max_key()));
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/readahead.h linux-2.6.24/fs/reiser4/readahead.h
---- linux-2.6.24.orig/fs/reiser4/readahead.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/readahead.h 2008-01-25 11:39:07.072242722 +0300
-@@ -0,0 +1,51 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#ifndef __READAHEAD_H__
-+#define __READAHEAD_H__
-+
-+#include "key.h"
-+
-+typedef enum {
-+ RA_ADJACENT_ONLY = 1, /* only requests nodes which are adjacent.
-+ Default is NO (not only adjacent) */
-+} ra_global_flags;
-+
-+/* reiser4 super block has a field of this type.
-+ It controls readahead during tree traversals */
-+struct formatted_ra_params {
-+ unsigned long max; /* request not more than this amount of nodes.
-+ Default is totalram_pages / 4 */
-+ int flags;
-+};
-+
-+typedef struct {
-+ reiser4_key key_to_stop;
-+} ra_info_t;
-+
-+void formatted_readahead(znode *, ra_info_t *);
-+void reiser4_init_ra_info(ra_info_t * rai);
-+
-+struct reiser4_file_ra_state {
-+ loff_t start; /* Current window */
-+ loff_t size;
-+ loff_t next_size; /* Next window size */
-+ loff_t ahead_start; /* Ahead window */
-+ loff_t ahead_size;
-+ loff_t max_window_size; /* Maximum readahead window */
-+ loff_t slow_start; /* enlarging r/a size algorithm. */
-+};
-+
-+extern void reiser4_readdir_readahead_init(struct inode *dir, tap_t * tap);
-+
-+/* __READAHEAD_H__ */
-+#endif
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/README linux-2.6.24/fs/reiser4/README
---- linux-2.6.24.orig/fs/reiser4/README 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/README 2008-01-25 11:39:07.076243753 +0300
-@@ -0,0 +1,128 @@
-+[LICENSING]
-+
-+Reiser4 is hereby licensed under the GNU General
-+Public License version 2.
-+
-+Source code files that contain the phrase "licensing governed by
-+reiser4/README" are "governed files" throughout this file. Governed
-+files are licensed under the GPL. The portions of them owned by Hans
-+Reiser, or authorized to be licensed by him, have been in the past,
-+and likely will be in the future, licensed to other parties under
-+other licenses. If you add your code to governed files, and don't
-+want it to be owned by Hans Reiser, put your copyright label on that
-+code so the poor blight and his customers can keep things straight.
-+All portions of governed files not labeled otherwise are owned by Hans
-+Reiser, and by adding your code to it, widely distributing it to
-+others or sending us a patch, and leaving the sentence in stating that
-+licensing is governed by the statement in this file, you accept this.
-+It will be a kindness if you identify whether Hans Reiser is allowed
-+to license code labeled as owned by you on your behalf other than
-+under the GPL, because he wants to know if it is okay to do so and put
-+a check in the mail to you (for non-trivial improvements) when he
-+makes his next sale. He makes no guarantees as to the amount if any,
-+though he feels motivated to motivate contributors, and you can surely
-+discuss this with him before or after contributing. You have the
-+right to decline to allow him to license your code contribution other
-+than under the GPL.
-+
-+Further licensing options are available for commercial and/or other
-+interests directly from Hans Reiser: reiser@namesys.com. If you interpret
-+the GPL as not allowing those additional licensing options, you read
-+it wrongly, and Richard Stallman agrees with me, when carefully read
-+you can see that those restrictions on additional terms do not apply
-+to the owner of the copyright, and my interpretation of this shall
-+govern for this license.
-+
-+[END LICENSING]
-+
-+Reiser4 is a file system based on dancing tree algorithms, and is
-+described at http://www.namesys.com
-+
-+mkfs.reiser4 and other utilities are on our webpage or wherever your
-+Linux provider put them. You really want to be running the latest
-+version off the website if you use fsck.
-+
-+Yes, if you update your reiser4 kernel module you do have to
-+recompile your kernel, most of the time. The errors you get will be
-+quite cryptic if your forget to do so.
-+
-+Hideous Commercial Pitch: Spread your development costs across other OS
-+vendors. Select from the best in the world, not the best in your
-+building, by buying from third party OS component suppliers. Leverage
-+the software component development power of the internet. Be the most
-+aggressive in taking advantage of the commercial possibilities of
-+decentralized internet development, and add value through your branded
-+integration that you sell as an operating system. Let your competitors
-+be the ones to compete against the entire internet by themselves. Be
-+hip, get with the new economic trend, before your competitors do. Send
-+email to reiser@namesys.com
-+
-+Hans Reiser was the primary architect of Reiser4, but a whole team
-+chipped their ideas in. He invested everything he had into Namesys
-+for 5.5 dark years of no money before Reiser3 finally started to work well
-+enough to bring in money. He owns the copyright.
-+
-+DARPA was the primary sponsor of Reiser4. DARPA does not endorse
-+Reiser4, it merely sponsors it. DARPA is, in solely Hans's personal
-+opinion, unique in its willingness to invest into things more
-+theoretical than the VC community can readily understand, and more
-+longterm than allows them to be sure that they will be the ones to
-+extract the economic benefits from. DARPA also integrated us into a
-+security community that transformed our security worldview.
-+
-+Vladimir Saveliev is our lead programmer, with us from the beginning,
-+and he worked long hours writing the cleanest code. This is why he is
-+now the lead programmer after years of commitment to our work. He
-+always made the effort to be the best he could be, and to make his
-+code the best that it could be. What resulted was quite remarkable. I
-+don't think that money can ever motivate someone to work the way he
-+did, he is one of the most selfless men I know.
-+
-+Alexander Lyamin was our sysadmin, and helped to educate us in
-+security issues. Moscow State University and IMT were very generous
-+in the internet access they provided us, and in lots of other little
-+ways that a generous institution can be.
-+
-+Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the
-+locking code, the block allocator, and finished the flushing code.
-+His code is always crystal clean and well structured.
-+
-+Nikita Danilov wrote the core of the balancing code, the core of the
-+plugins code, and the directory code. He worked a steady pace of long
-+hours that produced a whole lot of well abstracted code. He is our
-+senior computer scientist.
-+
-+Vladimir Demidov wrote the parser. Writing an in kernel parser is
-+something very few persons have the skills for, and it is thanks to
-+him that we can say that the parser is really not so big compared to
-+various bits of our other code, and making a parser work in the kernel
-+was not so complicated as everyone would imagine mainly because it was
-+him doing it...
-+
-+Joshua McDonald wrote the transaction manager, and the flush code.
-+The flush code unexpectedly turned out be extremely hairy for reasons
-+you can read about on our web page, and he did a great job on an
-+extremely difficult task.
-+
-+Nina Reiser handled our accounting, government relations, and much
-+more.
-+
-+Ramon Reiser developed our website.
-+
-+Beverly Palmer drew our graphics.
-+
-+Vitaly Fertman developed librepair, userspace plugins repair code, fsck
-+and worked with Umka on developing libreiser4 and userspace plugins.
-+
-+Yury Umanets (aka Umka) developed libreiser4, userspace plugins and
-+userspace tools (reiser4progs).
-+
-+Oleg Drokin (aka Green) is the release manager who fixes everything.
-+It is so nice to have someone like that on the team. He (plus Chris
-+and Jeff) make it possible for the entire rest of the Namesys team to
-+focus on Reiser4, and he fixed a whole lot of Reiser4 bugs also. It
-+is just amazing to watch his talent for spotting bugs in action.
-+
-+Edward Shishkin wrote cryptcompress file plugin (which manages files
-+built of encrypted and(or) compressed bodies) and other plugins related
-+to transparent encryption and compression support.
-diff -urN linux-2.6.24.orig/fs/reiser4/reiser4.h linux-2.6.24/fs/reiser4/reiser4.h
---- linux-2.6.24.orig/fs/reiser4/reiser4.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/reiser4.h 2008-01-25 12:25:01.861363382 +0300
-@@ -0,0 +1,270 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* definitions of common constants used by reiser4 */
-+
-+#if !defined( __REISER4_H__ )
-+#define __REISER4_H__
-+
-+#include <asm/param.h> /* for HZ */
-+#include <linux/errno.h>
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/hardirq.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * reiser4 compilation options.
-+ */
-+
-+#if defined(CONFIG_REISER4_DEBUG)
-+/* turn on assertion checks */
-+#define REISER4_DEBUG (1)
-+#else
-+#define REISER4_DEBUG (0)
-+#endif
-+
-+#if defined(CONFIG_ZLIB_INFLATE)
-+/* turn on zlib */
-+#define REISER4_ZLIB (1)
-+#else
-+#define REISER4_ZLIB (0)
-+#endif
-+
-+#if defined(CONFIG_CRYPTO_SHA256)
-+#define REISER4_SHA256 (1)
-+#else
-+#define REISER4_SHA256 (0)
-+#endif
-+
-+/*
-+ * Turn on large keys mode. In his mode (which is default), reiser4 key has 4
-+ * 8-byte components. In the old "small key" mode, it's 3 8-byte
-+ * components. Additional component, referred to as "ordering" is used to
-+ * order items from which given object is composed of. As such, ordering is
-+ * placed between locality and objectid. For directory item ordering contains
-+ * initial prefix of the file name this item is for. This sorts all directory
-+ * items within given directory lexicographically (but see
-+ * fibration.[ch]). For file body and stat-data, ordering contains initial
-+ * prefix of the name file was initially created with. In the common case
-+ * (files with single name) this allows to order file bodies and stat-datas in
-+ * the same order as their respective directory entries, thus speeding up
-+ * readdir.
-+ *
-+ * Note, that kernel can only mount file system with the same key size as one
-+ * it is compiled for, so flipping this option may render your data
-+ * inaccessible.
-+ */
-+#define REISER4_LARGE_KEY (1)
-+/*#define REISER4_LARGE_KEY (0)*/
-+
-+/*#define GUESS_EXISTS 1*/
-+
-+/*
-+ * PLEASE update fs/reiser4/kattr.c:show_options() when adding new compilation
-+ * option
-+ */
-+
-+extern const char *REISER4_SUPER_MAGIC_STRING;
-+extern const int REISER4_MAGIC_OFFSET; /* offset to magic string from the
-+ * beginning of device */
-+
-+/* here go tunable parameters that are not worth special entry in kernel
-+ configuration */
-+
-+/* default number of slots in coord-by-key caches */
-+#define CBK_CACHE_SLOTS (16)
-+/* how many elementary tree operation to carry on the next level */
-+#define CARRIES_POOL_SIZE (5)
-+/* size of pool of preallocated nodes for carry process. */
-+#define NODES_LOCKED_POOL_SIZE (5)
-+
-+#define REISER4_NEW_NODE_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
-+#define REISER4_NEW_EXTENT_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
-+#define REISER4_PASTE_FLAGS (COPI_GO_LEFT)
-+#define REISER4_INSERT_FLAGS (COPI_GO_LEFT)
-+
-+/* we are supporting reservation of disk space on uid basis */
-+#define REISER4_SUPPORT_UID_SPACE_RESERVATION (0)
-+/* we are supporting reservation of disk space for groups */
-+#define REISER4_SUPPORT_GID_SPACE_RESERVATION (0)
-+/* we are supporting reservation of disk space for root */
-+#define REISER4_SUPPORT_ROOT_SPACE_RESERVATION (0)
-+/* we use rapid flush mode, see flush.c for comments. */
-+#define REISER4_USE_RAPID_FLUSH (1)
-+
-+/*
-+ * set this to 0 if you don't want to use wait-for-flush in ->writepage().
-+ */
-+#define REISER4_USE_ENTD (1)
-+
-+/* key allocation is Plan-A */
-+#define REISER4_PLANA_KEY_ALLOCATION (1)
-+/* key allocation follows good old 3.x scheme */
-+#define REISER4_3_5_KEY_ALLOCATION (0)
-+
-+/* size of hash-table for znodes */
-+#define REISER4_ZNODE_HASH_TABLE_SIZE (1 << 13)
-+
-+/* number of buckets in lnode hash-table */
-+#define LNODE_HTABLE_BUCKETS (1024)
-+
-+/* some ridiculously high maximal limit on height of znode tree. This
-+ is used in declaration of various per level arrays and
-+ to allocate stattistics gathering array for per-level stats. */
-+#define REISER4_MAX_ZTREE_HEIGHT (8)
-+
-+#define REISER4_PANIC_MSG_BUFFER_SIZE (1024)
-+
-+/* If array contains less than REISER4_SEQ_SEARCH_BREAK elements then,
-+ sequential search is on average faster than binary. This is because
-+ of better optimization and because sequential search is more CPU
-+ cache friendly. This number (25) was found by experiments on dual AMD
-+ Athlon(tm), 1400MHz.
-+
-+ NOTE: testing in kernel has shown that binary search is more effective than
-+ implied by results of the user level benchmarking. Probably because in the
-+ node keys are separated by other data. So value was adjusted after few
-+ tests. More thorough tuning is needed.
-+*/
-+#define REISER4_SEQ_SEARCH_BREAK (3)
-+
-+/* don't allow tree to be lower than this */
-+#define REISER4_MIN_TREE_HEIGHT (TWIG_LEVEL)
-+
-+/* NOTE NIKITA this is no longer used: maximal atom size is auto-adjusted to
-+ * available memory. */
-+/* Default value of maximal atom size. Can be ovewritten by
-+ tmgr.atom_max_size mount option. By default infinity. */
-+#define REISER4_ATOM_MAX_SIZE ((unsigned)(~0))
-+
-+/* Default value of maximal atom age (in jiffies). After reaching this age
-+ atom will be forced to commit, either synchronously or asynchronously. Can
-+ be overwritten by tmgr.atom_max_age mount option. */
-+#define REISER4_ATOM_MAX_AGE (600 * HZ)
-+
-+/* sleeping period for ktxnmrgd */
-+#define REISER4_TXNMGR_TIMEOUT (5 * HZ)
-+
-+/* timeout to wait for ent thread in writepage. Default: 3 milliseconds. */
-+#define REISER4_ENTD_TIMEOUT (3 * HZ / 1000)
-+
-+/* start complaining after that many restarts in coord_by_key().
-+
-+ This either means incredibly heavy contention for this part of a tree, or
-+ some corruption or bug.
-+*/
-+#define REISER4_CBK_ITERATIONS_LIMIT (100)
-+
-+/* return -EIO after that many iterations in coord_by_key().
-+
-+ I have witnessed more than 800 iterations (in 30 thread test) before cbk
-+ finished. --nikita
-+*/
-+#define REISER4_MAX_CBK_ITERATIONS 500000
-+
-+/* put a per-inode limit on maximal number of directory entries with identical
-+ keys in hashed directory.
-+
-+ Disable this until inheritance interfaces stabilize: we need some way to
-+ set per directory limit.
-+*/
-+#define REISER4_USE_COLLISION_LIMIT (0)
-+
-+/* If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty leaf-level blocks it
-+ will force them to be relocated. */
-+#define FLUSH_RELOCATE_THRESHOLD 64
-+/* If flush finds can find a block allocation closer than at most FLUSH_RELOCATE_DISTANCE
-+ from the preceder it will relocate to that position. */
-+#define FLUSH_RELOCATE_DISTANCE 64
-+
-+/* If we have written this much or more blocks before encountering busy jnode
-+ in flush list - abort flushing hoping that next time we get called
-+ this jnode will be clean already, and we will save some seeks. */
-+#define FLUSH_WRITTEN_THRESHOLD 50
-+
-+/* The maximum number of nodes to scan left on a level during flush. */
-+#define FLUSH_SCAN_MAXNODES 10000
-+
-+/* per-atom limit of flushers */
-+#define ATOM_MAX_FLUSHERS (1)
-+
-+/* default tracing buffer size */
-+#define REISER4_TRACE_BUF_SIZE (1 << 15)
-+
-+/* what size units of IO we would like cp, etc., to use, in writing to
-+ reiser4. In bytes.
-+
-+ Can be overwritten by optimal_io_size mount option.
-+*/
-+#define REISER4_OPTIMAL_IO_SIZE (64 * 1024)
-+
-+/* see comments in inode.c:oid_to_uino() */
-+#define REISER4_UINO_SHIFT (1 << 30)
-+
-+/* Mark function argument as unused to avoid compiler warnings. */
-+#define UNUSED_ARG __attribute__((unused))
-+
-+#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
-+#define NONNULL __attribute__((nonnull))
-+#else
-+#define NONNULL
-+#endif
-+
-+/* master super block offset in bytes.*/
-+#define REISER4_MASTER_OFFSET 65536
-+
-+/* size of VFS block */
-+#define VFS_BLKSIZE 512
-+/* number of bits in size of VFS block (512==2^9) */
-+#define VFS_BLKSIZE_BITS 9
-+
-+#define REISER4_I reiser4_inode_data
-+
-+/* implication */
-+#define ergo( antecedent, consequent ) ( !( antecedent ) || ( consequent ) )
-+/* logical equivalence */
-+#define equi( p1, p2 ) ( ergo( ( p1 ), ( p2 ) ) && ergo( ( p2 ), ( p1 ) ) )
-+
-+#define sizeof_array(x) ((int) (sizeof(x) / sizeof(x[0])))
-+
-+#define NOT_YET (0)
-+
-+/** Reiser4 specific error codes **/
-+
-+#define REISER4_ERROR_CODE_BASE 10000
-+
-+/* Neighbor is not available (side neighbor or parent) */
-+#define E_NO_NEIGHBOR (REISER4_ERROR_CODE_BASE)
-+
-+/* Node was not found in cache */
-+#define E_NOT_IN_CACHE (REISER4_ERROR_CODE_BASE + 1)
-+
-+/* node has no free space enough for completion of balancing operation */
-+#define E_NODE_FULL (REISER4_ERROR_CODE_BASE + 2)
-+
-+/* repeat operation */
-+#define E_REPEAT (REISER4_ERROR_CODE_BASE + 3)
-+
-+/* deadlock happens */
-+#define E_DEADLOCK (REISER4_ERROR_CODE_BASE + 4)
-+
-+/* operation cannot be performed, because it would block and non-blocking mode
-+ * was requested. */
-+#define E_BLOCK (REISER4_ERROR_CODE_BASE + 5)
-+
-+/* wait some event (depends on context), then repeat */
-+#define E_WAIT (REISER4_ERROR_CODE_BASE + 6)
-+
-+#endif /* __REISER4_H__ */
-+
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/safe_link.c linux-2.6.24/fs/reiser4/safe_link.c
---- linux-2.6.24.orig/fs/reiser4/safe_link.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/safe_link.c 2008-01-25 11:39:07.076243753 +0300
-@@ -0,0 +1,352 @@
-+/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Safe-links. */
-+
-+/*
-+ * Safe-links are used to maintain file system consistency during operations
-+ * that spawns multiple transactions. For example:
-+ *
-+ * 1. Unlink. UNIX supports "open-but-unlinked" files, that is files
-+ * without user-visible names in the file system, but still opened by some
-+ * active process. What happens here is that unlink proper (i.e., removal
-+ * of the last file name) and file deletion (truncate of file body to zero
-+ * and deletion of stat-data, that happens when last file descriptor is
-+ * closed), may belong to different transactions T1 and T2. If a crash
-+ * happens after T1 commit, but before T2 commit, on-disk file system has
-+ * a file without name, that is, disk space leak.
-+ *
-+ * 2. Truncate. Truncate of large file may spawn multiple transactions. If
-+ * system crashes while truncate was in-progress, file is left partially
-+ * truncated, which violates "atomicity guarantees" of reiser4, viz. that
-+ * every system is atomic.
-+ *
-+ * Safe-links address both above cases. Basically, safe-link is a way post
-+ * some operation to be executed during commit of some other transaction than
-+ * current one. (Another way to look at the safe-link is to interpret it as a
-+ * logical logging.)
-+ *
-+ * Specifically, at the beginning of unlink safe-link in inserted in the
-+ * tree. This safe-link is normally removed by file deletion code (during
-+ * transaction T2 in the above terms). Truncate also inserts safe-link that is
-+ * normally removed when truncate operation is finished.
-+ *
-+ * This means, that in the case of "clean umount" there are no safe-links in
-+ * the tree. If safe-links are observed during mount, it means that (a) system
-+ * was terminated abnormally, and (b) safe-link correspond to the "pending"
-+ * (i.e., not finished) operations that were in-progress during system
-+ * termination. Each safe-link record enough information to complete
-+ * corresponding operation, and mount simply "replays" them (hence, the
-+ * analogy with the logical logging).
-+ *
-+ * Safe-links are implemented as blackbox items (see
-+ * plugin/item/blackbox.[ch]).
-+ *
-+ * For the reference: ext3 also has similar mechanism, it's called "an orphan
-+ * list" there.
-+ */
-+
-+#include "safe_link.h"
-+#include "debug.h"
-+#include "inode.h"
-+
-+#include "plugin/item/blackbox.h"
-+
-+#include <linux/fs.h>
-+
-+/*
-+ * On-disk format of safe-link.
-+ */
-+typedef struct safelink {
-+ reiser4_key sdkey; /* key of stat-data for the file safe-link is
-+ * for */
-+ d64 size; /* size to which file should be truncated */
-+} safelink_t;
-+
-+/*
-+ * locality where safe-link items are stored. Next to the objectid of root
-+ * directory.
-+ */
-+static oid_t safe_link_locality(reiser4_tree * tree)
-+{
-+ return get_key_objectid(get_super_private(tree->super)->df_plug->
-+ root_dir_key(tree->super)) + 1;
-+}
-+
-+/*
-+ Construct a key for the safe-link. Key has the following format:
-+
-+| 60 | 4 | 64 | 4 | 60 | 64 |
-++---------------+---+------------------+---+---------------+------------------+
-+| locality | 0 | 0 | 0 | objectid | link type |
-++---------------+---+------------------+---+---------------+------------------+
-+| | | | |
-+| 8 bytes | 8 bytes | 8 bytes | 8 bytes |
-+
-+ This is in large keys format. In small keys format second 8 byte chunk is
-+ out. Locality is a constant returned by safe_link_locality(). objectid is
-+ an oid of a file on which operation protected by this safe-link is
-+ performed. link-type is used to distinguish safe-links for different
-+ operations.
-+
-+ */
-+static reiser4_key *build_link_key(reiser4_tree * tree, oid_t oid,
-+ reiser4_safe_link_t link, reiser4_key * key)
-+{
-+ reiser4_key_init(key);
-+ set_key_locality(key, safe_link_locality(tree));
-+ set_key_objectid(key, oid);
-+ set_key_offset(key, link);
-+ return key;
-+}
-+
-+/*
-+ * how much disk space is necessary to insert and remove (in the
-+ * error-handling path) safe-link.
-+ */
-+static __u64 safe_link_tograb(reiser4_tree * tree)
-+{
-+ return
-+ /* insert safe link */
-+ estimate_one_insert_item(tree) +
-+ /* remove safe link */
-+ estimate_one_item_removal(tree) +
-+ /* drill to the leaf level during insertion */
-+ 1 + estimate_one_insert_item(tree) +
-+ /*
-+ * possible update of existing safe-link. Actually, if
-+ * safe-link existed already (we failed to remove it), then no
-+ * insertion is necessary, so this term is already "covered",
-+ * but for simplicity let's left it.
-+ */
-+ 1;
-+}
-+
-+/*
-+ * grab enough disk space to insert and remove (in the error-handling path)
-+ * safe-link.
-+ */
-+int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags)
-+{
-+ int result;
-+
-+ grab_space_enable();
-+ /* The sbinfo->delete_mutex can be taken here.
-+ * safe_link_release() should be called before leaving reiser4
-+ * context. */
-+ result =
-+ reiser4_grab_reserved(tree->super, safe_link_tograb(tree), flags);
-+ grab_space_enable();
-+ return result;
-+}
-+
-+/*
-+ * release unused disk space reserved by safe_link_grab().
-+ */
-+void safe_link_release(reiser4_tree * tree)
-+{
-+ reiser4_release_reserved(tree->super);
-+}
-+
-+/*
-+ * insert into tree safe-link for operation @link on inode @inode.
-+ */
-+int safe_link_add(struct inode *inode, reiser4_safe_link_t link)
-+{
-+ reiser4_key key;
-+ safelink_t sl;
-+ int length;
-+ int result;
-+ reiser4_tree *tree;
-+
-+ build_sd_key(inode, &sl.sdkey);
-+ length = sizeof sl.sdkey;
-+
-+ if (link == SAFE_TRUNCATE) {
-+ /*
-+ * for truncate we have to store final file length also,
-+ * expand item.
-+ */
-+ length += sizeof(sl.size);
-+ put_unaligned(cpu_to_le64(inode->i_size), &sl.size);
-+ }
-+ tree = reiser4_tree_by_inode(inode);
-+ build_link_key(tree, get_inode_oid(inode), link, &key);
-+
-+ result = store_black_box(tree, &key, &sl, length);
-+ if (result == -EEXIST)
-+ result = update_black_box(tree, &key, &sl, length);
-+ return result;
-+}
-+
-+/*
-+ * remove safe-link corresponding to the operation @link on inode @inode from
-+ * the tree.
-+ */
-+int safe_link_del(reiser4_tree * tree, oid_t oid, reiser4_safe_link_t link)
-+{
-+ reiser4_key key;
-+
-+ return kill_black_box(tree, build_link_key(tree, oid, link, &key));
-+}
-+
-+/*
-+ * in-memory structure to keep information extracted from safe-link. This is
-+ * used to iterate over all safe-links.
-+ */
-+struct safe_link_context {
-+ reiser4_tree *tree; /* internal tree */
-+ reiser4_key key; /* safe-link key */
-+ reiser4_key sdkey; /* key of object stat-data */
-+ reiser4_safe_link_t link; /* safe-link type */
-+ oid_t oid; /* object oid */
-+ __u64 size; /* final size for truncate */
-+};
-+
-+/*
-+ * start iterating over all safe-links.
-+ */
-+static void safe_link_iter_begin(reiser4_tree * tree,
-+ struct safe_link_context * ctx)
-+{
-+ ctx->tree = tree;
-+ reiser4_key_init(&ctx->key);
-+ set_key_locality(&ctx->key, safe_link_locality(tree));
-+ set_key_objectid(&ctx->key, get_key_objectid(reiser4_max_key()));
-+ set_key_offset(&ctx->key, get_key_offset(reiser4_max_key()));
-+}
-+
-+/*
-+ * return next safe-link.
-+ */
-+static int safe_link_iter_next(struct safe_link_context * ctx)
-+{
-+ int result;
-+ safelink_t sl;
-+
-+ result = load_black_box(ctx->tree, &ctx->key, &sl, sizeof sl, 0);
-+ if (result == 0) {
-+ ctx->oid = get_key_objectid(&ctx->key);
-+ ctx->link = get_key_offset(&ctx->key);
-+ ctx->sdkey = sl.sdkey;
-+ if (ctx->link == SAFE_TRUNCATE)
-+ ctx->size = le64_to_cpu(get_unaligned(&sl.size));
-+ }
-+ return result;
-+}
-+
-+/*
-+ * check are there any more safe-links left in the tree.
-+ */
-+static int safe_link_iter_finished(struct safe_link_context * ctx)
-+{
-+ return get_key_locality(&ctx->key) != safe_link_locality(ctx->tree);
-+}
-+
-+/*
-+ * finish safe-link iteration.
-+ */
-+static void safe_link_iter_end(struct safe_link_context * ctx)
-+{
-+ /* nothing special */
-+}
-+
-+/*
-+ * process single safe-link.
-+ */
-+static int process_safelink(struct super_block *super, reiser4_safe_link_t link,
-+ reiser4_key * sdkey, oid_t oid, __u64 size)
-+{
-+ struct inode *inode;
-+ int result;
-+
-+ /*
-+ * obtain object inode by reiser4_iget(), then call object plugin
-+ * ->safelink() method to do actual work, then delete safe-link on
-+ * success.
-+ */
-+ inode = reiser4_iget(super, sdkey, 1);
-+ if (!IS_ERR(inode)) {
-+ file_plugin *fplug;
-+
-+ fplug = inode_file_plugin(inode);
-+ assert("nikita-3428", fplug != NULL);
-+ assert("", oid == get_inode_oid(inode));
-+ if (fplug->safelink != NULL) {
-+ /* reiser4_txn_restart_current is not necessary because
-+ * mounting is signle thread. However, without it
-+ * deadlock detection code will complain (see
-+ * nikita-3361). */
-+ reiser4_txn_restart_current();
-+ result = fplug->safelink(inode, link, size);
-+ } else {
-+ warning("nikita-3430",
-+ "Cannot handle safelink for %lli",
-+ (unsigned long long)oid);
-+ reiser4_print_key("key", sdkey);
-+ result = 0;
-+ }
-+ if (result != 0) {
-+ warning("nikita-3431",
-+ "Error processing safelink for %lli: %i",
-+ (unsigned long long)oid, result);
-+ }
-+ reiser4_iget_complete(inode);
-+ iput(inode);
-+ if (result == 0) {
-+ result = safe_link_grab(reiser4_get_tree(super), BA_CAN_COMMIT);
-+ if (result == 0)
-+ result =
-+ safe_link_del(reiser4_get_tree(super), oid, link);
-+ safe_link_release(reiser4_get_tree(super));
-+ /*
-+ * restart transaction: if there was large number of
-+ * safe-links, their processing may fail to fit into
-+ * single transaction.
-+ */
-+ if (result == 0)
-+ reiser4_txn_restart_current();
-+ }
-+ } else
-+ result = PTR_ERR(inode);
-+ return result;
-+}
-+
-+/*
-+ * iterate over all safe-links in the file-system processing them one by one.
-+ */
-+int process_safelinks(struct super_block *super)
-+{
-+ struct safe_link_context ctx;
-+ int result;
-+
-+ if (rofs_super(super))
-+ /* do nothing on the read-only file system */
-+ return 0;
-+ safe_link_iter_begin(&get_super_private(super)->tree, &ctx);
-+ result = 0;
-+ do {
-+ result = safe_link_iter_next(&ctx);
-+ if (safe_link_iter_finished(&ctx) || result == -ENOENT) {
-+ result = 0;
-+ break;
-+ }
-+ if (result == 0)
-+ result = process_safelink(super, ctx.link,
-+ &ctx.sdkey, ctx.oid,
-+ ctx.size);
-+ } while (result == 0);
-+ safe_link_iter_end(&ctx);
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/safe_link.h linux-2.6.24/fs/reiser4/safe_link.h
---- linux-2.6.24.orig/fs/reiser4/safe_link.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/safe_link.h 2008-01-25 11:39:07.080244783 +0300
-@@ -0,0 +1,29 @@
-+/* Copyright 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Safe-links. See safe_link.c for details. */
-+
-+#if !defined( __FS_SAFE_LINK_H__ )
-+#define __FS_SAFE_LINK_H__
-+
-+#include "tree.h"
-+
-+int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags);
-+void safe_link_release(reiser4_tree * tree);
-+int safe_link_add(struct inode *inode, reiser4_safe_link_t link);
-+int safe_link_del(reiser4_tree *, oid_t oid, reiser4_safe_link_t link);
-+
-+int process_safelinks(struct super_block *super);
-+
-+/* __FS_SAFE_LINK_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/seal.c linux-2.6.24/fs/reiser4/seal.c
---- linux-2.6.24.orig/fs/reiser4/seal.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/seal.c 2008-01-25 11:39:07.080244783 +0300
-@@ -0,0 +1,218 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+/* Seals implementation. */
-+/* Seals are "weak" tree pointers. They are analogous to tree coords in
-+ allowing to bypass tree traversal. But normal usage of coords implies that
-+ node pointed to by coord is locked, whereas seals don't keep a lock (or
-+ even a reference) to znode. In stead, each znode contains a version number,
-+ increased on each znode modification. This version number is copied into a
-+ seal when seal is created. Later, one can "validate" seal by calling
-+ reiser4_seal_validate(). If znode is in cache and its version number is
-+ still the same, seal is "pristine" and coord associated with it can be
-+ re-used immediately.
-+
-+ If, on the other hand, znode is out of cache, or it is obviously different
-+ one from the znode seal was initially attached to (for example, it is on
-+ the different level, or is being removed from the tree), seal is
-+ irreparably invalid ("burned") and tree traversal has to be repeated.
-+
-+ Otherwise, there is some hope, that while znode was modified (and seal was
-+ "broken" as a result), key attached to the seal is still in the node. This
-+ is checked by first comparing this key with delimiting keys of node and, if
-+ key is ok, doing intra-node lookup.
-+
-+ Znode version is maintained in the following way:
-+
-+ there is reiser4_tree.znode_epoch counter. Whenever new znode is created,
-+ znode_epoch is incremented and its new value is stored in ->version field
-+ of new znode. Whenever znode is dirtied (which means it was probably
-+ modified), znode_epoch is also incremented and its new value is stored in
-+ znode->version. This is done so, because just incrementing znode->version
-+ on each update is not enough: it may so happen, that znode get deleted, new
-+ znode is allocated for the same disk block and gets the same version
-+ counter, tricking seal code into false positive.
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "super.h"
-+
-+static znode *seal_node(const seal_t * seal);
-+static int seal_matches(const seal_t * seal, znode * node);
-+
-+/* initialise seal. This can be called several times on the same seal. @coord
-+ and @key can be NULL. */
-+void reiser4_seal_init(seal_t * seal /* seal to initialise */ ,
-+ const coord_t * coord /* coord @seal will be
-+ * attached to */ ,
-+ const reiser4_key * key UNUSED_ARG /* key @seal will be
-+ * attached to */ )
-+{
-+ assert("nikita-1886", seal != NULL);
-+ memset(seal, 0, sizeof *seal);
-+ if (coord != NULL) {
-+ znode *node;
-+
-+ node = coord->node;
-+ assert("nikita-1987", node != NULL);
-+ spin_lock_znode(node);
-+ seal->version = node->version;
-+ assert("nikita-1988", seal->version != 0);
-+ seal->block = *znode_get_block(node);
-+#if REISER4_DEBUG
-+ seal->coord1 = *coord;
-+ if (key != NULL)
-+ seal->key = *key;
-+#endif
-+ spin_unlock_znode(node);
-+ }
-+}
-+
-+/* finish with seal */
-+void reiser4_seal_done(seal_t * seal /* seal to clear */ )
-+{
-+ assert("nikita-1887", seal != NULL);
-+ seal->version = 0;
-+}
-+
-+/* true if seal was initialised */
-+int reiser4_seal_is_set(const seal_t * seal /* seal to query */ )
-+{
-+ assert("nikita-1890", seal != NULL);
-+ return seal->version != 0;
-+}
-+
-+#if REISER4_DEBUG
-+/* helper function for reiser4_seal_validate(). It checks that item at @coord
-+ * has expected key. This is to detect cases where node was modified but wasn't
-+ * marked dirty. */
-+static inline int check_seal_match(const coord_t * coord /* coord to check */ ,
-+ const reiser4_key * k /* expected key */ )
-+{
-+ reiser4_key ukey;
-+
-+ return (coord->between != AT_UNIT) ||
-+ /* FIXME-VS: we only can compare keys for items whose units
-+ represent exactly one key */
-+ ((coord_is_existing_unit(coord))
-+ && (item_is_extent(coord)
-+ || keyeq(k, unit_key_by_coord(coord, &ukey))))
-+ || ((coord_is_existing_unit(coord)) && (item_is_ctail(coord))
-+ && keyge(k, unit_key_by_coord(coord, &ukey)));
-+}
-+#endif
-+
-+/* this is used by reiser4_seal_validate. It accepts return value of
-+ * longterm_lock_znode and returns 1 if it can be interpreted as seal
-+ * validation failure. For instance, when longterm_lock_znode returns -EINVAL,
-+ * reiser4_seal_validate returns -E_REPEAT and caller will call tre search.
-+ * We cannot do this in longterm_lock_znode(), because sometimes we want to
-+ * distinguish between -EINVAL and -E_REPEAT. */
-+static int should_repeat(int return_code)
-+{
-+ return return_code == -EINVAL;
-+}
-+
-+/* (re-)validate seal.
-+
-+ Checks whether seal is pristine, and try to revalidate it if possible.
-+
-+ If seal was burned, or broken irreparably, return -E_REPEAT.
-+
-+ NOTE-NIKITA currently reiser4_seal_validate() returns -E_REPEAT if key we are
-+ looking for is in range of keys covered by the sealed node, but item wasn't
-+ found by node ->lookup() method. Alternative is to return -ENOENT in this
-+ case, but this would complicate callers logic.
-+
-+*/
-+int reiser4_seal_validate(seal_t * seal /* seal to validate */,
-+ coord_t * coord /* coord to validate against */,
-+ const reiser4_key * key /* key to validate against */,
-+ lock_handle * lh /* resulting lock handle */,
-+ znode_lock_mode mode /* lock node */,
-+ znode_lock_request request /* locking priority */)
-+{
-+ znode *node;
-+ int result;
-+
-+ assert("nikita-1889", seal != NULL);
-+ assert("nikita-1881", reiser4_seal_is_set(seal));
-+ assert("nikita-1882", key != NULL);
-+ assert("nikita-1883", coord != NULL);
-+ assert("nikita-1884", lh != NULL);
-+ assert("nikita-1885", keyeq(&seal->key, key));
-+ assert("nikita-1989", coords_equal(&seal->coord1, coord));
-+
-+ /* obtain znode by block number */
-+ node = seal_node(seal);
-+ if (node != NULL) {
-+ /* znode was in cache, lock it */
-+ result = longterm_lock_znode(lh, node, mode, request);
-+ zput(node);
-+ if (result == 0) {
-+ if (seal_matches(seal, node)) {
-+ /* if seal version and znode version
-+ coincide */
-+ ON_DEBUG(coord_update_v(coord));
-+ assert("nikita-1990",
-+ node == seal->coord1.node);
-+ assert("nikita-1898",
-+ WITH_DATA_RET(coord->node, 1,
-+ check_seal_match(coord,
-+ key)));
-+ } else
-+ result = RETERR(-E_REPEAT);
-+ }
-+ if (result != 0) {
-+ if (should_repeat(result))
-+ result = RETERR(-E_REPEAT);
-+ /* unlock node on failure */
-+ done_lh(lh);
-+ }
-+ } else {
-+ /* znode wasn't in cache */
-+ result = RETERR(-E_REPEAT);
-+ }
-+ return result;
-+}
-+
-+/* helpers functions */
-+
-+/* obtain reference to znode seal points to, if in cache */
-+static znode *seal_node(const seal_t * seal /* seal to query */ )
-+{
-+ assert("nikita-1891", seal != NULL);
-+ return zlook(current_tree, &seal->block);
-+}
-+
-+/* true if @seal version and @node version coincide */
-+static int seal_matches(const seal_t * seal /* seal to check */ ,
-+ znode * node /* node to check */ )
-+{
-+ int result;
-+
-+ assert("nikita-1991", seal != NULL);
-+ assert("nikita-1993", node != NULL);
-+
-+ spin_lock_znode(node);
-+ result = (seal->version == node->version);
-+ spin_unlock_znode(node);
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/seal.h linux-2.6.24/fs/reiser4/seal.h
---- linux-2.6.24.orig/fs/reiser4/seal.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/seal.h 2008-01-25 11:39:07.080244783 +0300
-@@ -0,0 +1,49 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Declaration of seals: "weak" tree pointers. See seal.c for comments. */
-+
-+#ifndef __SEAL_H__
-+#define __SEAL_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+
-+/* for __u?? types */
-+/*#include <linux/types.h>*/
-+
-+/* seal. See comment at the top of seal.c */
-+typedef struct seal_s {
-+ /* version of znode recorder at the time of seal creation */
-+ __u64 version;
-+ /* block number of znode attached to this seal */
-+ reiser4_block_nr block;
-+#if REISER4_DEBUG
-+ /* coord this seal is attached to. For debugging. */
-+ coord_t coord1;
-+ /* key this seal is attached to. For debugging. */
-+ reiser4_key key;
-+#endif
-+} seal_t;
-+
-+extern void reiser4_seal_init(seal_t *, const coord_t *, const reiser4_key *);
-+extern void reiser4_seal_done(seal_t *);
-+extern int reiser4_seal_is_set(const seal_t *);
-+extern int reiser4_seal_validate(seal_t *, coord_t *,
-+ const reiser4_key *, lock_handle *,
-+ znode_lock_mode mode, znode_lock_request request);
-+
-+/* __SEAL_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/search.c linux-2.6.24/fs/reiser4/search.c
---- linux-2.6.24.orig/fs/reiser4/search.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/search.c 2008-01-25 11:39:07.084245813 +0300
-@@ -0,0 +1,1611 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "tree.h"
-+#include "reiser4.h"
-+#include "super.h"
-+#include "inode.h"
-+
-+#include <linux/slab.h>
-+
-+static const char *bias_name(lookup_bias bias);
-+
-+/* tree searching algorithm, intranode searching algorithms are in
-+ plugin/node/ */
-+
-+/* tree lookup cache
-+ *
-+ * The coord by key cache consists of small list of recently accessed nodes
-+ * maintained according to the LRU discipline. Before doing real top-to-down
-+ * tree traversal this cache is scanned for nodes that can contain key
-+ * requested.
-+ *
-+ * The efficiency of coord cache depends heavily on locality of reference for
-+ * tree accesses. Our user level simulations show reasonably good hit ratios
-+ * for coord cache under most loads so far.
-+ */
-+
-+/* Initialise coord cache slot */
-+static void cbk_cache_init_slot(cbk_cache_slot *slot)
-+{
-+ assert("nikita-345", slot != NULL);
-+
-+ INIT_LIST_HEAD(&slot->lru);
-+ slot->node = NULL;
-+}
-+
-+/* Initialize coord cache */
-+int cbk_cache_init(cbk_cache *cache /* cache to init */ )
-+{
-+ int i;
-+
-+ assert("nikita-346", cache != NULL);
-+
-+ cache->slot =
-+ kmalloc(sizeof(cbk_cache_slot) * cache->nr_slots,
-+ reiser4_ctx_gfp_mask_get());
-+ if (cache->slot == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ INIT_LIST_HEAD(&cache->lru);
-+ for (i = 0; i < cache->nr_slots; ++i) {
-+ cbk_cache_init_slot(cache->slot + i);
-+ list_add_tail(&((cache->slot + i)->lru), &cache->lru);
-+ }
-+ rwlock_init(&cache->guard);
-+ return 0;
-+}
-+
-+/* free cbk cache data */
-+void cbk_cache_done(cbk_cache * cache /* cache to release */ )
-+{
-+ assert("nikita-2493", cache != NULL);
-+ if (cache->slot != NULL) {
-+ kfree(cache->slot);
-+ cache->slot = NULL;
-+ }
-+}
-+
-+/* macro to iterate over all cbk cache slots */
-+#define for_all_slots(cache, slot) \
-+ for ((slot) = list_entry((cache)->lru.next, cbk_cache_slot, lru); \
-+ &(cache)->lru != &(slot)->lru; \
-+ (slot) = list_entry(slot->lru.next, cbk_cache_slot, lru))
-+
-+#if REISER4_DEBUG
-+/* this function assures that [cbk-cache-invariant] invariant holds */
-+static int cbk_cache_invariant(const cbk_cache *cache)
-+{
-+ cbk_cache_slot *slot;
-+ int result;
-+ int unused;
-+
-+ if (cache->nr_slots == 0)
-+ return 1;
-+
-+ assert("nikita-2469", cache != NULL);
-+ unused = 0;
-+ result = 1;
-+ read_lock(&((cbk_cache *)cache)->guard);
-+ for_all_slots(cache, slot) {
-+ /* in LRU first go all `used' slots followed by `unused' */
-+ if (unused && (slot->node != NULL))
-+ result = 0;
-+ if (slot->node == NULL)
-+ unused = 1;
-+ else {
-+ cbk_cache_slot *scan;
-+
-+ /* all cached nodes are different */
-+ scan = slot;
-+ while (result) {
-+ scan = list_entry(scan->lru.next, cbk_cache_slot, lru);
-+ if (&cache->lru == &scan->lru)
-+ break;
-+ if (slot->node == scan->node)
-+ result = 0;
-+ }
-+ }
-+ if (!result)
-+ break;
-+ }
-+ read_unlock(&((cbk_cache *)cache)->guard);
-+ return result;
-+}
-+
-+#endif
-+
-+/* Remove references, if any, to @node from coord cache */
-+void cbk_cache_invalidate(const znode * node /* node to remove from cache */ ,
-+ reiser4_tree * tree /* tree to remove node from */ )
-+{
-+ cbk_cache_slot *slot;
-+ cbk_cache *cache;
-+ int i;
-+
-+ assert("nikita-350", node != NULL);
-+ assert("nikita-1479", LOCK_CNT_GTZ(rw_locked_tree));
-+
-+ cache = &tree->cbk_cache;
-+ assert("nikita-2470", cbk_cache_invariant(cache));
-+
-+ write_lock(&(cache->guard));
-+ for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
-+ if (slot->node == node) {
-+ list_move_tail(&slot->lru, &cache->lru);
-+ slot->node = NULL;
-+ break;
-+ }
-+ }
-+ write_unlock(&(cache->guard));
-+ assert("nikita-2471", cbk_cache_invariant(cache));
-+}
-+
-+/* add to the cbk-cache in the "tree" information about "node". This
-+ can actually be update of existing slot in a cache. */
-+static void cbk_cache_add(const znode *node /* node to add to the cache */ )
-+{
-+ cbk_cache *cache;
-+ cbk_cache_slot *slot;
-+ int i;
-+
-+ assert("nikita-352", node != NULL);
-+
-+ cache = &znode_get_tree(node)->cbk_cache;
-+ assert("nikita-2472", cbk_cache_invariant(cache));
-+
-+ if (cache->nr_slots == 0)
-+ return;
-+
-+ write_lock(&(cache->guard));
-+ /* find slot to update/add */
-+ for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
-+ /* oops, this node is already in a cache */
-+ if (slot->node == node)
-+ break;
-+ }
-+ /* if all slots are used, reuse least recently used one */
-+ if (i == cache->nr_slots) {
-+ slot = list_entry(cache->lru.prev, cbk_cache_slot, lru);
-+ slot->node = (znode *) node;
-+ }
-+ list_move(&slot->lru, &cache->lru);
-+ write_unlock(&(cache->guard));
-+ assert("nikita-2473", cbk_cache_invariant(cache));
-+}
-+
-+static int setup_delimiting_keys(cbk_handle * h);
-+static lookup_result coord_by_handle(cbk_handle * handle);
-+static lookup_result traverse_tree(cbk_handle * h);
-+static int cbk_cache_search(cbk_handle * h);
-+
-+static level_lookup_result cbk_level_lookup(cbk_handle * h);
-+static level_lookup_result cbk_node_lookup(cbk_handle * h);
-+
-+/* helper functions */
-+
-+static void update_stale_dk(reiser4_tree * tree, znode * node);
-+
-+/* release parent node during traversal */
-+static void put_parent(cbk_handle * h);
-+/* check consistency of fields */
-+static int sanity_check(cbk_handle * h);
-+/* release resources in handle */
-+static void hput(cbk_handle * h);
-+
-+static level_lookup_result search_to_left(cbk_handle * h);
-+
-+/* pack numerous (numberous I should say) arguments of coord_by_key() into
-+ * cbk_handle */
-+static cbk_handle *cbk_pack(cbk_handle * handle,
-+ reiser4_tree * tree,
-+ const reiser4_key * key,
-+ coord_t * coord,
-+ lock_handle * active_lh,
-+ lock_handle * parent_lh,
-+ znode_lock_mode lock_mode,
-+ lookup_bias bias,
-+ tree_level lock_level,
-+ tree_level stop_level,
-+ __u32 flags, ra_info_t * info)
-+{
-+ memset(handle, 0, sizeof *handle);
-+
-+ handle->tree = tree;
-+ handle->key = key;
-+ handle->lock_mode = lock_mode;
-+ handle->bias = bias;
-+ handle->lock_level = lock_level;
-+ handle->stop_level = stop_level;
-+ handle->coord = coord;
-+ /* set flags. See comment in tree.h:cbk_flags */
-+ handle->flags = flags | CBK_TRUST_DK | CBK_USE_CRABLOCK;
-+
-+ handle->active_lh = active_lh;
-+ handle->parent_lh = parent_lh;
-+ handle->ra_info = info;
-+ return handle;
-+}
-+
-+/* main tree lookup procedure
-+
-+ Check coord cache. If key we are looking for is not found there, call cbk()
-+ to do real tree traversal.
-+
-+ As we have extents on the twig level, @lock_level and @stop_level can
-+ be different from LEAF_LEVEL and each other.
-+
-+ Thread cannot keep any reiser4 locks (tree, znode, dk spin-locks, or znode
-+ long term locks) while calling this.
-+*/
-+lookup_result coord_by_key(reiser4_tree * tree /* tree to perform search
-+ * in. Usually this tree is
-+ * part of file-system
-+ * super-block */ ,
-+ const reiser4_key * key /* key to look for */ ,
-+ coord_t * coord /* where to store found
-+ * position in a tree. Fields
-+ * in "coord" are only valid if
-+ * coord_by_key() returned
-+ * "CBK_COORD_FOUND" */ ,
-+ lock_handle * lh, /* resulting lock handle */
-+ znode_lock_mode lock_mode /* type of lookup we
-+ * want on node. Pass
-+ * ZNODE_READ_LOCK here
-+ * if you only want to
-+ * read item found and
-+ * ZNODE_WRITE_LOCK if
-+ * you want to modify
-+ * it */ ,
-+ lookup_bias bias /* what to return if coord
-+ * with exactly the @key is
-+ * not in the tree */ ,
-+ tree_level lock_level /* tree level where to start
-+ * taking @lock type of
-+ * locks */ ,
-+ tree_level stop_level /* tree level to stop. Pass
-+ * LEAF_LEVEL or TWIG_LEVEL
-+ * here Item being looked
-+ * for has to be between
-+ * @lock_level and
-+ * @stop_level, inclusive */ ,
-+ __u32 flags /* search flags */ ,
-+ ra_info_t *
-+ info
-+ /* information about desired tree traversal readahead */
-+ )
-+{
-+ cbk_handle handle;
-+ lock_handle parent_lh;
-+ lookup_result result;
-+
-+ init_lh(lh);
-+ init_lh(&parent_lh);
-+
-+ assert("nikita-3023", reiser4_schedulable());
-+
-+ assert("nikita-353", tree != NULL);
-+ assert("nikita-354", key != NULL);
-+ assert("nikita-355", coord != NULL);
-+ assert("nikita-356", (bias == FIND_EXACT)
-+ || (bias == FIND_MAX_NOT_MORE_THAN));
-+ assert("nikita-357", stop_level >= LEAF_LEVEL);
-+ /* no locks can be held during tree traversal */
-+ assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
-+
-+ cbk_pack(&handle,
-+ tree,
-+ key,
-+ coord,
-+ lh,
-+ &parent_lh,
-+ lock_mode, bias, lock_level, stop_level, flags, info);
-+
-+ result = coord_by_handle(&handle);
-+ assert("nikita-3247",
-+ ergo(!IS_CBKERR(result), coord->node == lh->node));
-+ return result;
-+}
-+
-+/* like coord_by_key(), but starts traversal from vroot of @object rather than
-+ * from tree root. */
-+lookup_result reiser4_object_lookup(struct inode * object,
-+ const reiser4_key * key,
-+ coord_t * coord,
-+ lock_handle * lh,
-+ znode_lock_mode lock_mode,
-+ lookup_bias bias,
-+ tree_level lock_level,
-+ tree_level stop_level, __u32 flags,
-+ ra_info_t * info)
-+{
-+ cbk_handle handle;
-+ lock_handle parent_lh;
-+ lookup_result result;
-+
-+ init_lh(lh);
-+ init_lh(&parent_lh);
-+
-+ assert("nikita-3023", reiser4_schedulable());
-+
-+ assert("nikita-354", key != NULL);
-+ assert("nikita-355", coord != NULL);
-+ assert("nikita-356", (bias == FIND_EXACT)
-+ || (bias == FIND_MAX_NOT_MORE_THAN));
-+ assert("nikita-357", stop_level >= LEAF_LEVEL);
-+ /* no locks can be held during tree search by key */
-+ assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
-+
-+ cbk_pack(&handle,
-+ object != NULL ? reiser4_tree_by_inode(object) : current_tree,
-+ key,
-+ coord,
-+ lh,
-+ &parent_lh,
-+ lock_mode, bias, lock_level, stop_level, flags, info);
-+ handle.object = object;
-+
-+ result = coord_by_handle(&handle);
-+ assert("nikita-3247",
-+ ergo(!IS_CBKERR(result), coord->node == lh->node));
-+ return result;
-+}
-+
-+/* lookup by cbk_handle. Common part of coord_by_key() and
-+ reiser4_object_lookup(). */
-+static lookup_result coord_by_handle(cbk_handle * handle)
-+{
-+ /*
-+ * first check cbk_cache (which is look-aside cache for our tree) and
-+ * of this fails, start traversal.
-+ */
-+ /* first check whether "key" is in cache of recent lookups. */
-+ if (cbk_cache_search(handle) == 0)
-+ return handle->result;
-+ else
-+ return traverse_tree(handle);
-+}
-+
-+/* Execute actor for each item (or unit, depending on @through_units_p),
-+ starting from @coord, right-ward, until either:
-+
-+ - end of the tree is reached
-+ - unformatted node is met
-+ - error occurred
-+ - @actor returns 0 or less
-+
-+ Error code, or last actor return value is returned.
-+
-+ This is used by plugin/dir/hashe_dir.c:reiser4_find_entry() to move through
-+ sequence of entries with identical keys and alikes.
-+*/
-+int reiser4_iterate_tree(reiser4_tree * tree /* tree to scan */ ,
-+ coord_t * coord /* coord to start from */ ,
-+ lock_handle * lh /* lock handle to start with and to
-+ * update along the way */ ,
-+ tree_iterate_actor_t actor /* function to call on each
-+ * item/unit */ ,
-+ void *arg /* argument to pass to @actor */ ,
-+ znode_lock_mode mode /* lock mode on scanned nodes */ ,
-+ int through_units_p /* call @actor on each item or on
-+ * each unit */ )
-+{
-+ int result;
-+
-+ assert("nikita-1143", tree != NULL);
-+ assert("nikita-1145", coord != NULL);
-+ assert("nikita-1146", lh != NULL);
-+ assert("nikita-1147", actor != NULL);
-+
-+ result = zload(coord->node);
-+ coord_clear_iplug(coord);
-+ if (result != 0)
-+ return result;
-+ if (!coord_is_existing_unit(coord)) {
-+ zrelse(coord->node);
-+ return -ENOENT;
-+ }
-+ while ((result = actor(tree, coord, lh, arg)) > 0) {
-+ /* move further */
-+ if ((through_units_p && coord_next_unit(coord)) ||
-+ (!through_units_p && coord_next_item(coord))) {
-+ do {
-+ lock_handle couple;
-+
-+ /* move to the next node */
-+ init_lh(&couple);
-+ result =
-+ reiser4_get_right_neighbor(&couple,
-+ coord->node,
-+ (int)mode,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ zrelse(coord->node);
-+ if (result == 0) {
-+
-+ result = zload(couple.node);
-+ if (result != 0) {
-+ done_lh(&couple);
-+ return result;
-+ }
-+
-+ coord_init_first_unit(coord,
-+ couple.node);
-+ done_lh(lh);
-+ move_lh(lh, &couple);
-+ } else
-+ return result;
-+ } while (node_is_empty(coord->node));
-+ }
-+
-+ assert("nikita-1149", coord_is_existing_unit(coord));
-+ }
-+ zrelse(coord->node);
-+ return result;
-+}
-+
-+/* return locked uber znode for @tree */
-+int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
-+ znode_lock_request pri, lock_handle * lh)
-+{
-+ int result;
-+
-+ result = longterm_lock_znode(lh, tree->uber, mode, pri);
-+ return result;
-+}
-+
-+/* true if @key is strictly within @node
-+
-+ we are looking for possibly non-unique key and it is item is at the edge of
-+ @node. May be it is in the neighbor.
-+*/
-+static int znode_contains_key_strict(znode * node /* node to check key
-+ * against */ ,
-+ const reiser4_key *
-+ key /* key to check */ ,
-+ int isunique)
-+{
-+ int answer;
-+
-+ assert("nikita-1760", node != NULL);
-+ assert("nikita-1722", key != NULL);
-+
-+ if (keyge(key, &node->rd_key))
-+ return 0;
-+
-+ answer = keycmp(&node->ld_key, key);
-+
-+ if (isunique)
-+ return answer != GREATER_THAN;
-+ else
-+ return answer == LESS_THAN;
-+}
-+
-+/*
-+ * Virtual Root (vroot) code.
-+ *
-+ * For given file system object (e.g., regular file or directory) let's
-+ * define its "virtual root" as lowest in the tree (that is, furtherest
-+ * from the tree root) node such that all body items of said object are
-+ * located in a tree rooted at this node.
-+ *
-+ * Once vroot of object is found all tree lookups for items within body of
-+ * this object ("object lookups") can be started from its vroot rather
-+ * than from real root. This has following advantages:
-+ *
-+ * 1. amount of nodes traversed during lookup (and, hence, amount of
-+ * key comparisons made) decreases, and
-+ *
-+ * 2. contention on tree root is decreased. This latter was actually
-+ * motivating reason behind vroot, because spin lock of root node,
-+ * which is taken when acquiring long-term lock on root node is the
-+ * hottest lock in the reiser4.
-+ *
-+ * How to find vroot.
-+ *
-+ * When vroot of object F is not yet determined, all object lookups start
-+ * from the root of the tree. At each tree level during traversal we have
-+ * a node N such that a key we are looking for (which is the key inside
-+ * object's body) is located within N. In function handle_vroot() called
-+ * from cbk_level_lookup() we check whether N is possible vroot for
-+ * F. Check is trivial---if neither leftmost nor rightmost item of N
-+ * belongs to F (and we already have helpful ->owns_item() method of
-+ * object plugin for this), then N is possible vroot of F. This, of
-+ * course, relies on the assumption that each object occupies contiguous
-+ * range of keys in the tree.
-+ *
-+ * Thus, traversing tree downward and checking each node as we go, we can
-+ * find lowest such node, which, by definition, is vroot.
-+ *
-+ * How to track vroot.
-+ *
-+ * Nohow. If actual vroot changes, next object lookup will just restart
-+ * from the actual tree root, refreshing object's vroot along the way.
-+ *
-+ */
-+
-+/*
-+ * Check whether @node is possible vroot of @object.
-+ */
-+static void handle_vroot(struct inode *object, znode * node)
-+{
-+ file_plugin *fplug;
-+ coord_t coord;
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-3353", fplug != NULL);
-+ assert("nikita-3354", fplug->owns_item != NULL);
-+
-+ if (unlikely(node_is_empty(node)))
-+ return;
-+
-+ coord_init_first_unit(&coord, node);
-+ /*
-+ * if leftmost item of @node belongs to @object, we cannot be sure
-+ * that @node is vroot of @object, because, some items of @object are
-+ * probably in the sub-tree rooted at the left neighbor of @node.
-+ */
-+ if (fplug->owns_item(object, &coord))
-+ return;
-+ coord_init_last_unit(&coord, node);
-+ /* mutatis mutandis for the rightmost item */
-+ if (fplug->owns_item(object, &coord))
-+ return;
-+ /* otherwise, @node is possible vroot of @object */
-+ inode_set_vroot(object, node);
-+}
-+
-+/*
-+ * helper function used by traverse tree to start tree traversal not from the
-+ * tree root, but from @h->object's vroot, if possible.
-+ */
-+static int prepare_object_lookup(cbk_handle * h)
-+{
-+ znode *vroot;
-+ int result;
-+
-+ vroot = inode_get_vroot(h->object);
-+ if (vroot == NULL) {
-+ /*
-+ * object doesn't have known vroot, start from real tree root.
-+ */
-+ return LOOKUP_CONT;
-+ }
-+
-+ h->level = znode_get_level(vroot);
-+ /* take a long-term lock on vroot */
-+ h->result = longterm_lock_znode(h->active_lh, vroot,
-+ cbk_lock_mode(h->level, h),
-+ ZNODE_LOCK_LOPRI);
-+ result = LOOKUP_REST;
-+ if (h->result == 0) {
-+ int isunique;
-+ int inside;
-+
-+ isunique = h->flags & CBK_UNIQUE;
-+ /* check that key is inside vroot */
-+ read_lock_dk(h->tree);
-+ inside = (znode_contains_key_strict(vroot, h->key, isunique) &&
-+ !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE));
-+ read_unlock_dk(h->tree);
-+ if (inside) {
-+ h->result = zload(vroot);
-+ if (h->result == 0) {
-+ /* search for key in vroot. */
-+ result = cbk_node_lookup(h);
-+ zrelse(vroot); /*h->active_lh->node); */
-+ if (h->active_lh->node != vroot) {
-+ result = LOOKUP_REST;
-+ } else if (result == LOOKUP_CONT) {
-+ move_lh(h->parent_lh, h->active_lh);
-+ h->flags &= ~CBK_DKSET;
-+ }
-+ }
-+ }
-+ }
-+
-+ zput(vroot);
-+
-+ if (IS_CBKERR(h->result) || result == LOOKUP_REST)
-+ hput(h);
-+ return result;
-+}
-+
-+/* main function that handles common parts of tree traversal: starting
-+ (fake znode handling), restarts, error handling, completion */
-+static lookup_result traverse_tree(cbk_handle * h /* search handle */ )
-+{
-+ int done;
-+ int iterations;
-+ int vroot_used;
-+
-+ assert("nikita-365", h != NULL);
-+ assert("nikita-366", h->tree != NULL);
-+ assert("nikita-367", h->key != NULL);
-+ assert("nikita-368", h->coord != NULL);
-+ assert("nikita-369", (h->bias == FIND_EXACT)
-+ || (h->bias == FIND_MAX_NOT_MORE_THAN));
-+ assert("nikita-370", h->stop_level >= LEAF_LEVEL);
-+ assert("nikita-2949", !(h->flags & CBK_DKSET));
-+ assert("zam-355", lock_stack_isclean(get_current_lock_stack()));
-+
-+ done = 0;
-+ iterations = 0;
-+ vroot_used = 0;
-+
-+ /* loop for restarts */
-+ restart:
-+
-+ assert("nikita-3024", reiser4_schedulable());
-+
-+ h->result = CBK_COORD_FOUND;
-+ /* connect_znode() needs it */
-+ h->ld_key = *reiser4_min_key();
-+ h->rd_key = *reiser4_max_key();
-+ h->flags |= CBK_DKSET;
-+ h->error = NULL;
-+
-+ if (!vroot_used && h->object != NULL) {
-+ vroot_used = 1;
-+ done = prepare_object_lookup(h);
-+ if (done == LOOKUP_REST) {
-+ goto restart;
-+ } else if (done == LOOKUP_DONE)
-+ return h->result;
-+ }
-+ if (h->parent_lh->node == NULL) {
-+ done =
-+ get_uber_znode(h->tree, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI,
-+ h->parent_lh);
-+
-+ assert("nikita-1637", done != -E_DEADLOCK);
-+
-+ h->block = h->tree->root_block;
-+ h->level = h->tree->height;
-+ h->coord->node = h->parent_lh->node;
-+
-+ if (done != 0)
-+ return done;
-+ }
-+
-+ /* loop descending a tree */
-+ while (!done) {
-+
-+ if (unlikely((iterations > REISER4_CBK_ITERATIONS_LIMIT) &&
-+ IS_POW(iterations))) {
-+ warning("nikita-1481", "Too many iterations: %i",
-+ iterations);
-+ reiser4_print_key("key", h->key);
-+ ++iterations;
-+ } else if (unlikely(iterations > REISER4_MAX_CBK_ITERATIONS)) {
-+ h->error =
-+ "reiser-2018: Too many iterations. Tree corrupted, or (less likely) starvation occurring.";
-+ h->result = RETERR(-EIO);
-+ break;
-+ }
-+ switch (cbk_level_lookup(h)) {
-+ case LOOKUP_CONT:
-+ move_lh(h->parent_lh, h->active_lh);
-+ continue;
-+ default:
-+ wrong_return_value("nikita-372", "cbk_level");
-+ case LOOKUP_DONE:
-+ done = 1;
-+ break;
-+ case LOOKUP_REST:
-+ hput(h);
-+ /* deadlock avoidance is normal case. */
-+ if (h->result != -E_DEADLOCK)
-+ ++iterations;
-+ reiser4_preempt_point();
-+ goto restart;
-+ }
-+ }
-+ /* that's all. The rest is error handling */
-+ if (unlikely(h->error != NULL)) {
-+ warning("nikita-373", "%s: level: %i, "
-+ "lock_level: %i, stop_level: %i "
-+ "lock_mode: %s, bias: %s",
-+ h->error, h->level, h->lock_level, h->stop_level,
-+ lock_mode_name(h->lock_mode), bias_name(h->bias));
-+ reiser4_print_address("block", &h->block);
-+ reiser4_print_key("key", h->key);
-+ print_coord_content("coord", h->coord);
-+ }
-+ /* `unlikely' error case */
-+ if (unlikely(IS_CBKERR(h->result))) {
-+ /* failure. do cleanup */
-+ hput(h);
-+ } else {
-+ assert("nikita-1605", WITH_DATA_RET
-+ (h->coord->node, 1,
-+ ergo((h->result == CBK_COORD_FOUND) &&
-+ (h->bias == FIND_EXACT) &&
-+ (!node_is_empty(h->coord->node)),
-+ coord_is_existing_item(h->coord))));
-+ }
-+ return h->result;
-+}
-+
-+/* find delimiting keys of child
-+
-+ Determine left and right delimiting keys for child pointed to by
-+ @parent_coord.
-+
-+*/
-+static void find_child_delimiting_keys(znode * parent /* parent znode, passed
-+ * locked */ ,
-+ const coord_t * parent_coord /* coord where
-+ * pointer to
-+ * child is
-+ * stored */ ,
-+ reiser4_key * ld /* where to store left
-+ * delimiting key */ ,
-+ reiser4_key * rd /* where to store right
-+ * delimiting key */ )
-+{
-+ coord_t neighbor;
-+
-+ assert("nikita-1484", parent != NULL);
-+ assert_rw_locked(&(znode_get_tree(parent)->dk_lock));
-+
-+ coord_dup(&neighbor, parent_coord);
-+
-+ if (neighbor.between == AT_UNIT)
-+ /* imitate item ->lookup() behavior. */
-+ neighbor.between = AFTER_UNIT;
-+
-+ if (coord_set_to_left(&neighbor) == 0)
-+ unit_key_by_coord(&neighbor, ld);
-+ else {
-+ assert("nikita-14851", 0);
-+ *ld = *znode_get_ld_key(parent);
-+ }
-+
-+ coord_dup(&neighbor, parent_coord);
-+ if (neighbor.between == AT_UNIT)
-+ neighbor.between = AFTER_UNIT;
-+ if (coord_set_to_right(&neighbor) == 0)
-+ unit_key_by_coord(&neighbor, rd);
-+ else
-+ *rd = *znode_get_rd_key(parent);
-+}
-+
-+/*
-+ * setup delimiting keys for a child
-+ *
-+ * @parent parent node
-+ *
-+ * @coord location in @parent where pointer to @child is
-+ *
-+ * @child child node
-+ */
-+int
-+set_child_delimiting_keys(znode * parent, const coord_t * coord, znode * child)
-+{
-+ reiser4_tree *tree;
-+
-+ assert("nikita-2952",
-+ znode_get_level(parent) == znode_get_level(coord->node));
-+
-+ /* fast check without taking dk lock. This is safe, because
-+ * JNODE_DKSET is never cleared once set. */
-+ if (!ZF_ISSET(child, JNODE_DKSET)) {
-+ tree = znode_get_tree(parent);
-+ write_lock_dk(tree);
-+ if (likely(!ZF_ISSET(child, JNODE_DKSET))) {
-+ find_child_delimiting_keys(parent, coord,
-+ &child->ld_key,
-+ &child->rd_key);
-+ ON_DEBUG(child->ld_key_version =
-+ atomic_inc_return(&delim_key_version);
-+ child->rd_key_version =
-+ atomic_inc_return(&delim_key_version););
-+ ZF_SET(child, JNODE_DKSET);
-+ }
-+ write_unlock_dk(tree);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+/* Perform tree lookup at one level. This is called from cbk_traverse()
-+ function that drives lookup through tree and calls cbk_node_lookup() to
-+ perform lookup within one node.
-+
-+ See comments in a code.
-+*/
-+static level_lookup_result cbk_level_lookup(cbk_handle * h /* search handle */ )
-+{
-+ int ret;
-+ int setdk;
-+ int ldkeyset = 0;
-+ reiser4_key ldkey;
-+ reiser4_key key;
-+ znode *active;
-+
-+ assert("nikita-3025", reiser4_schedulable());
-+
-+ /* acquire reference to @active node */
-+ active =
-+ zget(h->tree, &h->block, h->parent_lh->node, h->level,
-+ reiser4_ctx_gfp_mask_get());
-+
-+ if (IS_ERR(active)) {
-+ h->result = PTR_ERR(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ /* lock @active */
-+ h->result = longterm_lock_znode(h->active_lh,
-+ active,
-+ cbk_lock_mode(h->level, h),
-+ ZNODE_LOCK_LOPRI);
-+ /* longterm_lock_znode() acquires additional reference to znode (which
-+ will be later released by longterm_unlock_znode()). Release
-+ reference acquired by zget().
-+ */
-+ zput(active);
-+ if (unlikely(h->result != 0))
-+ goto fail_or_restart;
-+
-+ setdk = 0;
-+ /* if @active is accessed for the first time, setup delimiting keys on
-+ it. Delimiting keys are taken from the parent node. See
-+ setup_delimiting_keys() for details.
-+ */
-+ if (h->flags & CBK_DKSET) {
-+ setdk = setup_delimiting_keys(h);
-+ h->flags &= ~CBK_DKSET;
-+ } else {
-+ znode *parent;
-+
-+ parent = h->parent_lh->node;
-+ h->result = zload(parent);
-+ if (unlikely(h->result != 0))
-+ goto fail_or_restart;
-+
-+ if (!ZF_ISSET(active, JNODE_DKSET))
-+ setdk = set_child_delimiting_keys(parent,
-+ h->coord, active);
-+ else {
-+ read_lock_dk(h->tree);
-+ find_child_delimiting_keys(parent, h->coord, &ldkey,
-+ &key);
-+ read_unlock_dk(h->tree);
-+ ldkeyset = 1;
-+ }
-+ zrelse(parent);
-+ }
-+
-+ /* this is ugly kludge. Reminder: this is necessary, because
-+ ->lookup() method returns coord with ->between field probably set
-+ to something different from AT_UNIT.
-+ */
-+ h->coord->between = AT_UNIT;
-+
-+ if (znode_just_created(active) && (h->coord->node != NULL)) {
-+ write_lock_tree(h->tree);
-+ /* if we are going to load znode right now, setup
-+ ->in_parent: coord where pointer to this node is stored in
-+ parent.
-+ */
-+ coord_to_parent_coord(h->coord, &active->in_parent);
-+ write_unlock_tree(h->tree);
-+ }
-+
-+ /* check connectedness without holding tree lock---false negatives
-+ * will be re-checked by connect_znode(), and false positives are
-+ * impossible---@active cannot suddenly turn into unconnected
-+ * state. */
-+ if (!znode_is_connected(active)) {
-+ h->result = connect_znode(h->coord, active);
-+ if (unlikely(h->result != 0)) {
-+ put_parent(h);
-+ goto fail_or_restart;
-+ }
-+ }
-+
-+ jload_prefetch(ZJNODE(active));
-+
-+ if (setdk)
-+ update_stale_dk(h->tree, active);
-+
-+ /* put_parent() cannot be called earlier, because connect_znode()
-+ assumes parent node is referenced; */
-+ put_parent(h);
-+
-+ if ((!znode_contains_key_lock(active, h->key) &&
-+ (h->flags & CBK_TRUST_DK))
-+ || ZF_ISSET(active, JNODE_HEARD_BANSHEE)) {
-+ /* 1. key was moved out of this node while this thread was
-+ waiting for the lock. Restart. More elaborate solution is
-+ to determine where key moved (to the left, or to the right)
-+ and try to follow it through sibling pointers.
-+
-+ 2. or, node itself is going to be removed from the
-+ tree. Release lock and restart.
-+ */
-+ h->result = -E_REPEAT;
-+ }
-+ if (h->result == -E_REPEAT)
-+ return LOOKUP_REST;
-+
-+ h->result = zload_ra(active, h->ra_info);
-+ if (h->result) {
-+ return LOOKUP_DONE;
-+ }
-+
-+ /* sanity checks */
-+ if (sanity_check(h)) {
-+ zrelse(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ /* check that key of leftmost item in the @active is the same as in
-+ * its parent */
-+ if (ldkeyset && !node_is_empty(active) &&
-+ !keyeq(leftmost_key_in_node(active, &key), &ldkey)) {
-+ warning("vs-3533", "Keys are inconsistent. Fsck?");
-+ reiser4_print_key("inparent", &ldkey);
-+ reiser4_print_key("inchild", &key);
-+ h->result = RETERR(-EIO);
-+ zrelse(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ if (h->object != NULL)
-+ handle_vroot(h->object, active);
-+
-+ ret = cbk_node_lookup(h);
-+
-+ /* h->active_lh->node might change, but active is yet to be zrelsed */
-+ zrelse(active);
-+
-+ return ret;
-+
-+ fail_or_restart:
-+ if (h->result == -E_DEADLOCK)
-+ return LOOKUP_REST;
-+ return LOOKUP_DONE;
-+}
-+
-+#if REISER4_DEBUG
-+/* check left and right delimiting keys of a znode */
-+void check_dkeys(znode * node)
-+{
-+ znode *left;
-+ znode *right;
-+
-+ read_lock_tree(current_tree);
-+ read_lock_dk(current_tree);
-+
-+ assert("vs-1710", znode_is_any_locked(node));
-+ assert("vs-1197",
-+ !keygt(znode_get_ld_key(node), znode_get_rd_key(node)));
-+
-+ left = node->left;
-+ right = node->right;
-+
-+ if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
-+ && left != NULL && ZF_ISSET(left, JNODE_DKSET))
-+ /* check left neighbor. Note that left neighbor is not locked,
-+ so it might get wrong delimiting keys therefore */
-+ assert("vs-1198",
-+ (keyeq(znode_get_rd_key(left), znode_get_ld_key(node))
-+ || ZF_ISSET(left, JNODE_HEARD_BANSHEE)));
-+
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
-+ && right != NULL && ZF_ISSET(right, JNODE_DKSET))
-+ /* check right neighbor. Note that right neighbor is not
-+ locked, so it might get wrong delimiting keys therefore */
-+ assert("vs-1199",
-+ (keyeq(znode_get_rd_key(node), znode_get_ld_key(right))
-+ || ZF_ISSET(right, JNODE_HEARD_BANSHEE)));
-+
-+ read_unlock_dk(current_tree);
-+ read_unlock_tree(current_tree);
-+}
-+#endif
-+
-+/* true if @key is left delimiting key of @node */
-+static int key_is_ld(znode * node, const reiser4_key * key)
-+{
-+ int ld;
-+
-+ assert("nikita-1716", node != NULL);
-+ assert("nikita-1758", key != NULL);
-+
-+ read_lock_dk(znode_get_tree(node));
-+ assert("nikita-1759", znode_contains_key(node, key));
-+ ld = keyeq(znode_get_ld_key(node), key);
-+ read_unlock_dk(znode_get_tree(node));
-+ return ld;
-+}
-+
-+/* Process one node during tree traversal.
-+
-+ This is called by cbk_level_lookup(). */
-+static level_lookup_result cbk_node_lookup(cbk_handle * h /* search handle */ )
-+{
-+ /* node plugin of @active */
-+ node_plugin *nplug;
-+ /* item plugin of item that was found */
-+ item_plugin *iplug;
-+ /* search bias */
-+ lookup_bias node_bias;
-+ /* node we are operating upon */
-+ znode *active;
-+ /* tree we are searching in */
-+ reiser4_tree *tree;
-+ /* result */
-+ int result;
-+
-+ assert("nikita-379", h != NULL);
-+
-+ active = h->active_lh->node;
-+ tree = h->tree;
-+
-+ nplug = active->nplug;
-+ assert("nikita-380", nplug != NULL);
-+
-+ ON_DEBUG(check_dkeys(active));
-+
-+ /* return item from "active" node with maximal key not greater than
-+ "key" */
-+ node_bias = h->bias;
-+ result = nplug->lookup(active, h->key, node_bias, h->coord);
-+ if (unlikely(result != NS_FOUND && result != NS_NOT_FOUND)) {
-+ /* error occurred */
-+ h->result = result;
-+ return LOOKUP_DONE;
-+ }
-+ if (h->level == h->stop_level) {
-+ /* welcome to the stop level */
-+ assert("nikita-381", h->coord->node == active);
-+ if (result == NS_FOUND) {
-+ /* success of tree lookup */
-+ if (!(h->flags & CBK_UNIQUE)
-+ && key_is_ld(active, h->key)) {
-+ return search_to_left(h);
-+ } else
-+ h->result = CBK_COORD_FOUND;
-+ } else {
-+ h->result = CBK_COORD_NOTFOUND;
-+ }
-+ if (!(h->flags & CBK_IN_CACHE))
-+ cbk_cache_add(active);
-+ return LOOKUP_DONE;
-+ }
-+
-+ if (h->level > TWIG_LEVEL && result == NS_NOT_FOUND) {
-+ h->error = "not found on internal node";
-+ h->result = result;
-+ return LOOKUP_DONE;
-+ }
-+
-+ assert("vs-361", h->level > h->stop_level);
-+
-+ if (handle_eottl(h, &result)) {
-+ assert("vs-1674", (result == LOOKUP_DONE ||
-+ result == LOOKUP_REST));
-+ return result;
-+ }
-+
-+ /* go down to next level */
-+ check_me("vs-12", zload(h->coord->node) == 0);
-+ assert("nikita-2116", item_is_internal(h->coord));
-+ iplug = item_plugin_by_coord(h->coord);
-+ iplug->s.internal.down_link(h->coord, h->key, &h->block);
-+ zrelse(h->coord->node);
-+ --h->level;
-+ return LOOKUP_CONT; /* continue */
-+}
-+
-+/* scan cbk_cache slots looking for a match for @h */
-+static int cbk_cache_scan_slots(cbk_handle * h /* cbk handle */ )
-+{
-+ level_lookup_result llr;
-+ znode *node;
-+ reiser4_tree *tree;
-+ cbk_cache_slot *slot;
-+ cbk_cache *cache;
-+ tree_level level;
-+ int isunique;
-+ const reiser4_key *key;
-+ int result;
-+
-+ assert("nikita-1317", h != NULL);
-+ assert("nikita-1315", h->tree != NULL);
-+ assert("nikita-1316", h->key != NULL);
-+
-+ tree = h->tree;
-+ cache = &tree->cbk_cache;
-+ if (cache->nr_slots == 0)
-+ /* size of cbk cache was set to 0 by mount time option. */
-+ return RETERR(-ENOENT);
-+
-+ assert("nikita-2474", cbk_cache_invariant(cache));
-+ node = NULL; /* to keep gcc happy */
-+ level = h->level;
-+ key = h->key;
-+ isunique = h->flags & CBK_UNIQUE;
-+ result = RETERR(-ENOENT);
-+
-+ /*
-+ * this is time-critical function and dragons had, hence, been settled
-+ * here.
-+ *
-+ * Loop below scans cbk cache slots trying to find matching node with
-+ * suitable range of delimiting keys and located at the h->level.
-+ *
-+ * Scan is done under cbk cache spin lock that protects slot->node
-+ * pointers. If suitable node is found we want to pin it in
-+ * memory. But slot->node can point to the node with x_count 0
-+ * (unreferenced). Such node can be recycled at any moment, or can
-+ * already be in the process of being recycled (within jput()).
-+ *
-+ * As we found node in the cbk cache, it means that jput() hasn't yet
-+ * called cbk_cache_invalidate().
-+ *
-+ * We acquire reference to the node without holding tree lock, and
-+ * later, check node's RIP bit. This avoids races with jput().
-+ */
-+
-+ rcu_read_lock();
-+ read_lock(&((cbk_cache *)cache)->guard);
-+
-+ slot = list_entry(cache->lru.next, cbk_cache_slot, lru);
-+ slot = list_entry(slot->lru.prev, cbk_cache_slot, lru);
-+ BUG_ON(&slot->lru != &cache->lru);/*????*/
-+ while (1) {
-+
-+ slot = list_entry(slot->lru.next, cbk_cache_slot, lru);
-+
-+ if (&cache->lru != &slot->lru)
-+ node = slot->node;
-+ else
-+ node = NULL;
-+
-+ if (unlikely(node == NULL))
-+ break;
-+
-+ /*
-+ * this is (hopefully) the only place in the code where we are
-+ * working with delimiting keys without holding dk lock. This
-+ * is fine here, because this is only "guess" anyway---keys
-+ * are rechecked under dk lock below.
-+ */
-+ if (znode_get_level(node) == level &&
-+ /* reiser4_min_key < key < reiser4_max_key */
-+ znode_contains_key_strict(node, key, isunique)) {
-+ zref(node);
-+ result = 0;
-+ spin_lock_prefetch(&tree->tree_lock);
-+ break;
-+ }
-+ }
-+ read_unlock(&((cbk_cache *)cache)->guard);
-+
-+ assert("nikita-2475", cbk_cache_invariant(cache));
-+
-+ if (unlikely(result == 0 && ZF_ISSET(node, JNODE_RIP)))
-+ result = -ENOENT;
-+
-+ rcu_read_unlock();
-+
-+ if (result != 0) {
-+ h->result = CBK_COORD_NOTFOUND;
-+ return RETERR(-ENOENT);
-+ }
-+
-+ result =
-+ longterm_lock_znode(h->active_lh, node, cbk_lock_mode(level, h),
-+ ZNODE_LOCK_LOPRI);
-+ zput(node);
-+ if (result != 0)
-+ return result;
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+
-+ /* recheck keys */
-+ read_lock_dk(tree);
-+ result = (znode_contains_key_strict(node, key, isunique) &&
-+ !ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ read_unlock_dk(tree);
-+ if (result) {
-+ /* do lookup inside node */
-+ llr = cbk_node_lookup(h);
-+ /* if cbk_node_lookup() wandered to another node (due to eottl
-+ or non-unique keys), adjust @node */
-+ /*node = h->active_lh->node; */
-+
-+ if (llr != LOOKUP_DONE) {
-+ /* restart or continue on the next level */
-+ result = RETERR(-ENOENT);
-+ } else if (IS_CBKERR(h->result))
-+ /* io or oom */
-+ result = RETERR(-ENOENT);
-+ else {
-+ /* good. Either item found or definitely not found. */
-+ result = 0;
-+
-+ write_lock(&(cache->guard));
-+ if (slot->node == h->active_lh->node /*node */ ) {
-+ /* if this node is still in cbk cache---move
-+ its slot to the head of the LRU list. */
-+ list_move(&slot->lru, &cache->lru);
-+ }
-+ write_unlock(&(cache->guard));
-+ }
-+ } else {
-+ /* race. While this thread was waiting for the lock, node was
-+ rebalanced and item we are looking for, shifted out of it
-+ (if it ever was here).
-+
-+ Continuing scanning is almost hopeless: node key range was
-+ moved to, is almost certainly at the beginning of the LRU
-+ list at this time, because it's hot, but restarting
-+ scanning from the very beginning is complex. Just return,
-+ so that cbk() will be performed. This is not that
-+ important, because such races should be rare. Are they?
-+ */
-+ result = RETERR(-ENOENT); /* -ERAUGHT */
-+ }
-+ zrelse(node);
-+ assert("nikita-2476", cbk_cache_invariant(cache));
-+ return result;
-+}
-+
-+/* look for item with given key in the coord cache
-+
-+ This function, called by coord_by_key(), scans "coord cache" (&cbk_cache)
-+ which is a small LRU list of znodes accessed lately. For each znode in
-+ znode in this list, it checks whether key we are looking for fits into key
-+ range covered by this node. If so, and in addition, node lies at allowed
-+ level (this is to handle extents on a twig level), node is locked, and
-+ lookup inside it is performed.
-+
-+ we need a measurement of the cost of this cache search compared to the cost
-+ of coord_by_key.
-+
-+*/
-+static int cbk_cache_search(cbk_handle * h /* cbk handle */ )
-+{
-+ int result = 0;
-+ tree_level level;
-+
-+ /* add CBK_IN_CACHE to the handle flags. This means that
-+ * cbk_node_lookup() assumes that cbk_cache is scanned and would add
-+ * found node to the cache. */
-+ h->flags |= CBK_IN_CACHE;
-+ for (level = h->stop_level; level <= h->lock_level; ++level) {
-+ h->level = level;
-+ result = cbk_cache_scan_slots(h);
-+ if (result != 0) {
-+ done_lh(h->active_lh);
-+ done_lh(h->parent_lh);
-+ } else {
-+ assert("nikita-1319", !IS_CBKERR(h->result));
-+ break;
-+ }
-+ }
-+ h->flags &= ~CBK_IN_CACHE;
-+ return result;
-+}
-+
-+/* type of lock we want to obtain during tree traversal. On stop level
-+ we want type of lock user asked for, on upper levels: read lock. */
-+znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h)
-+{
-+ assert("nikita-382", h != NULL);
-+
-+ return (level <= h->lock_level) ? h->lock_mode : ZNODE_READ_LOCK;
-+}
-+
-+/* update outdated delimiting keys */
-+static void stale_dk(reiser4_tree * tree, znode * node)
-+{
-+ znode *right;
-+
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+ right = node->right;
-+
-+ if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
-+ right && ZF_ISSET(right, JNODE_DKSET) &&
-+ !keyeq(znode_get_rd_key(node), znode_get_ld_key(right)))
-+ znode_set_rd_key(node, znode_get_ld_key(right));
-+
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+}
-+
-+/* check for possibly outdated delimiting keys, and update them if
-+ * necessary. */
-+static void update_stale_dk(reiser4_tree * tree, znode * node)
-+{
-+ znode *right;
-+ reiser4_key rd;
-+
-+ read_lock_tree(tree);
-+ read_lock_dk(tree);
-+ rd = *znode_get_rd_key(node);
-+ right = node->right;
-+ if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
-+ right && ZF_ISSET(right, JNODE_DKSET) &&
-+ !keyeq(&rd, znode_get_ld_key(right)))) {
-+ assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET));
-+ read_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+ stale_dk(tree, node);
-+ return;
-+ }
-+ read_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+}
-+
-+/*
-+ * handle searches a the non-unique key.
-+ *
-+ * Suppose that we are looking for an item with possibly non-unique key 100.
-+ *
-+ * Root node contains two pointers: one to a node with left delimiting key 0,
-+ * and another to a node with left delimiting key 100. Item we interested in
-+ * may well happen in the sub-tree rooted at the first pointer.
-+ *
-+ * To handle this search_to_left() is called when search reaches stop
-+ * level. This function checks it is _possible_ that item we are looking for
-+ * is in the left neighbor (this can be done by comparing delimiting keys) and
-+ * if so, tries to lock left neighbor (this is low priority lock, so it can
-+ * deadlock, tree traversal is just restarted if it did) and then checks
-+ * whether left neighbor actually contains items with our key.
-+ *
-+ * Note that this is done on the stop level only. It is possible to try such
-+ * left-check on each level, but as duplicate keys are supposed to be rare
-+ * (very unlikely that more than one node is completely filled with items with
-+ * duplicate keys), it sis cheaper to scan to the left on the stop level once.
-+ *
-+ */
-+static level_lookup_result search_to_left(cbk_handle * h /* search handle */ )
-+{
-+ level_lookup_result result;
-+ coord_t *coord;
-+ znode *node;
-+ znode *neighbor;
-+
-+ lock_handle lh;
-+
-+ assert("nikita-1761", h != NULL);
-+ assert("nikita-1762", h->level == h->stop_level);
-+
-+ init_lh(&lh);
-+ coord = h->coord;
-+ node = h->active_lh->node;
-+ assert("nikita-1763", coord_is_leftmost_unit(coord));
-+
-+ h->result =
-+ reiser4_get_left_neighbor(&lh, node, (int)h->lock_mode,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ neighbor = NULL;
-+ switch (h->result) {
-+ case -E_DEADLOCK:
-+ result = LOOKUP_REST;
-+ break;
-+ case 0:{
-+ node_plugin *nplug;
-+ coord_t crd;
-+ lookup_bias bias;
-+
-+ neighbor = lh.node;
-+ h->result = zload(neighbor);
-+ if (h->result != 0) {
-+ result = LOOKUP_DONE;
-+ break;
-+ }
-+
-+ nplug = neighbor->nplug;
-+
-+ coord_init_zero(&crd);
-+ bias = h->bias;
-+ h->bias = FIND_EXACT;
-+ h->result =
-+ nplug->lookup(neighbor, h->key, h->bias, &crd);
-+ h->bias = bias;
-+
-+ if (h->result == NS_NOT_FOUND) {
-+ case -E_NO_NEIGHBOR:
-+ h->result = CBK_COORD_FOUND;
-+ if (!(h->flags & CBK_IN_CACHE))
-+ cbk_cache_add(node);
-+ default: /* some other error */
-+ result = LOOKUP_DONE;
-+ } else if (h->result == NS_FOUND) {
-+ read_lock_dk(znode_get_tree(neighbor));
-+ h->rd_key = *znode_get_ld_key(node);
-+ leftmost_key_in_node(neighbor, &h->ld_key);
-+ read_unlock_dk(znode_get_tree(neighbor));
-+ h->flags |= CBK_DKSET;
-+
-+ h->block = *znode_get_block(neighbor);
-+ /* clear coord -> node so that cbk_level_lookup()
-+ wouldn't overwrite parent hint in neighbor.
-+
-+ Parent hint was set up by
-+ reiser4_get_left_neighbor()
-+ */
-+ /* FIXME: why do we have to spinlock here? */
-+ write_lock_tree(znode_get_tree(neighbor));
-+ h->coord->node = NULL;
-+ write_unlock_tree(znode_get_tree(neighbor));
-+ result = LOOKUP_CONT;
-+ } else {
-+ result = LOOKUP_DONE;
-+ }
-+ if (neighbor != NULL)
-+ zrelse(neighbor);
-+ }
-+ }
-+ done_lh(&lh);
-+ return result;
-+}
-+
-+/* debugging aid: return symbolic name of search bias */
-+static const char *bias_name(lookup_bias bias /* bias to get name of */ )
-+{
-+ if (bias == FIND_EXACT)
-+ return "exact";
-+ else if (bias == FIND_MAX_NOT_MORE_THAN)
-+ return "left-slant";
-+/* else if( bias == RIGHT_SLANT_BIAS ) */
-+/* return "right-bias"; */
-+ else {
-+ static char buf[30];
-+
-+ sprintf(buf, "unknown: %i", bias);
-+ return buf;
-+ }
-+}
-+
-+#if REISER4_DEBUG
-+/* debugging aid: print human readable information about @p */
-+void print_coord_content(const char *prefix /* prefix to print */ ,
-+ coord_t * p /* coord to print */ )
-+{
-+ reiser4_key key;
-+
-+ if (p == NULL) {
-+ printk("%s: null\n", prefix);
-+ return;
-+ }
-+ if ((p->node != NULL) && znode_is_loaded(p->node)
-+ && coord_is_existing_item(p))
-+ printk("%s: data: %p, length: %i\n", prefix,
-+ item_body_by_coord(p), item_length_by_coord(p));
-+ if (znode_is_loaded(p->node)) {
-+ item_key_by_coord(p, &key);
-+ reiser4_print_key(prefix, &key);
-+ }
-+}
-+
-+/* debugging aid: print human readable information about @block */
-+void reiser4_print_address(const char *prefix /* prefix to print */ ,
-+ const reiser4_block_nr * block /* block number to print */ )
-+{
-+ printk("%s: %s\n", prefix, sprint_address(block));
-+}
-+#endif
-+
-+/* return string containing human readable representation of @block */
-+char *sprint_address(const reiser4_block_nr *
-+ block /* block number to print */ )
-+{
-+ static char address[30];
-+
-+ if (block == NULL)
-+ sprintf(address, "null");
-+ else if (reiser4_blocknr_is_fake(block))
-+ sprintf(address, "%llx", (unsigned long long)(*block));
-+ else
-+ sprintf(address, "%llu", (unsigned long long)(*block));
-+ return address;
-+}
-+
-+/* release parent node during traversal */
-+static void put_parent(cbk_handle * h /* search handle */ )
-+{
-+ assert("nikita-383", h != NULL);
-+ if (h->parent_lh->node != NULL) {
-+ longterm_unlock_znode(h->parent_lh);
-+ }
-+}
-+
-+/* helper function used by coord_by_key(): release reference to parent znode
-+ stored in handle before processing its child. */
-+static void hput(cbk_handle * h /* search handle */ )
-+{
-+ assert("nikita-385", h != NULL);
-+ done_lh(h->parent_lh);
-+ done_lh(h->active_lh);
-+}
-+
-+/* Helper function used by cbk(): update delimiting keys of child node (stored
-+ in h->active_lh->node) using key taken from parent on the parent level. */
-+static int setup_delimiting_keys(cbk_handle * h /* search handle */ )
-+{
-+ znode *active;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1088", h != NULL);
-+
-+ active = h->active_lh->node;
-+
-+ /* fast check without taking dk lock. This is safe, because
-+ * JNODE_DKSET is never cleared once set. */
-+ if (!ZF_ISSET(active, JNODE_DKSET)) {
-+ tree = znode_get_tree(active);
-+ write_lock_dk(tree);
-+ if (!ZF_ISSET(active, JNODE_DKSET)) {
-+ znode_set_ld_key(active, &h->ld_key);
-+ znode_set_rd_key(active, &h->rd_key);
-+ ZF_SET(active, JNODE_DKSET);
-+ }
-+ write_unlock_dk(tree);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+/* true if @block makes sense for the @tree. Used to detect corrupted node
-+ * pointers */
-+static int
-+block_nr_is_correct(reiser4_block_nr * block /* block number to check */ ,
-+ reiser4_tree * tree /* tree to check against */ )
-+{
-+ assert("nikita-757", block != NULL);
-+ assert("nikita-758", tree != NULL);
-+
-+ /* check to see if it exceeds the size of the device. */
-+ return reiser4_blocknr_is_sane_for(tree->super, block);
-+}
-+
-+/* check consistency of fields */
-+static int sanity_check(cbk_handle * h /* search handle */ )
-+{
-+ assert("nikita-384", h != NULL);
-+
-+ if (h->level < h->stop_level) {
-+ h->error = "Buried under leaves";
-+ h->result = RETERR(-EIO);
-+ return LOOKUP_DONE;
-+ } else if (!block_nr_is_correct(&h->block, h->tree)) {
-+ h->error = "bad block number";
-+ h->result = RETERR(-EIO);
-+ return LOOKUP_DONE;
-+ } else
-+ return 0;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/status_flags.c linux-2.6.24/fs/reiser4/status_flags.c
---- linux-2.6.24.orig/fs/reiser4/status_flags.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/status_flags.c 2008-01-25 11:54:46.665843146 +0300
-@@ -0,0 +1,170 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Functions that deal with reiser4 status block, query status and update it, if needed */
-+
-+#include <linux/bio.h>
-+#include <linux/highmem.h>
-+#include <linux/fs.h>
-+#include <linux/blkdev.h>
-+#include "debug.h"
-+#include "dformat.h"
-+#include "status_flags.h"
-+#include "super.h"
-+
-+/* This is our end I/O handler that marks page uptodate if IO was successful. It also
-+ unconditionally unlocks the page, so we can see that io was done.
-+ We do not free bio, because we hope to reuse that. */
-+static void reiser4_status_endio(struct bio *bio, int err)
-+{
-+ if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-+ SetPageUptodate(bio->bi_io_vec->bv_page);
-+ } else {
-+ ClearPageUptodate(bio->bi_io_vec->bv_page);
-+ SetPageError(bio->bi_io_vec->bv_page);
-+ }
-+ unlock_page(bio->bi_io_vec->bv_page);
-+}
-+
-+/* Initialise status code. This is expected to be called from the disk format
-+ code. block paremeter is where status block lives. */
-+int reiser4_status_init(reiser4_block_nr block)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ struct reiser4_status *statuspage;
-+ struct bio *bio;
-+ struct page *page;
-+
-+ get_super_private(sb)->status_page = NULL;
-+ get_super_private(sb)->status_bio = NULL;
-+
-+ page = alloc_pages(reiser4_ctx_gfp_mask_get(), 0);
-+ if (!page)
-+ return -ENOMEM;
-+
-+ bio = bio_alloc(reiser4_ctx_gfp_mask_get(), 1);
-+ if (bio != NULL) {
-+ bio->bi_sector = block * (sb->s_blocksize >> 9);
-+ bio->bi_bdev = sb->s_bdev;
-+ bio->bi_io_vec[0].bv_page = page;
-+ bio->bi_io_vec[0].bv_len = sb->s_blocksize;
-+ bio->bi_io_vec[0].bv_offset = 0;
-+ bio->bi_vcnt = 1;
-+ bio->bi_size = sb->s_blocksize;
-+ bio->bi_end_io = reiser4_status_endio;
-+ } else {
-+ __free_pages(page, 0);
-+ return -ENOMEM;
-+ }
-+ lock_page(page);
-+ submit_bio(READ, bio);
-+ blk_run_address_space(reiser4_get_super_fake(sb)->i_mapping);
-+ wait_on_page_locked(page);
-+ if (!PageUptodate(page)) {
-+ warning("green-2007",
-+ "I/O error while tried to read status page\n");
-+ return -EIO;
-+ }
-+
-+ statuspage = (struct reiser4_status *)kmap_atomic(page, KM_USER0);
-+ if (memcmp
-+ (statuspage->magic, REISER4_STATUS_MAGIC,
-+ sizeof(REISER4_STATUS_MAGIC))) {
-+ /* Magic does not match. */
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+ warning("green-2008", "Wrong magic in status block\n");
-+ __free_pages(page, 0);
-+ bio_put(bio);
-+ return -EINVAL;
-+ }
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+
-+ get_super_private(sb)->status_page = page;
-+ get_super_private(sb)->status_bio = bio;
-+ return 0;
-+}
-+
-+/* Query the status of fs. Returns if the FS can be safely mounted.
-+ Also if "status" and "extended" parameters are given, it will fill
-+ actual parts of status from disk there. */
-+int reiser4_status_query(u64 * status, u64 * extended)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ struct reiser4_status *statuspage;
-+ int retval;
-+
-+ if (!get_super_private(sb)->status_page) { // No status page?
-+ return REISER4_STATUS_MOUNT_UNKNOWN;
-+ }
-+ statuspage = (struct reiser4_status *)
-+ kmap_atomic(get_super_private(sb)->status_page, KM_USER0);
-+ switch ((long)le64_to_cpu(get_unaligned(&statuspage->status))) { // FIXME: this cast is a hack for 32 bit arches to work.
-+ case REISER4_STATUS_OK:
-+ retval = REISER4_STATUS_MOUNT_OK;
-+ break;
-+ case REISER4_STATUS_CORRUPTED:
-+ retval = REISER4_STATUS_MOUNT_WARN;
-+ break;
-+ case REISER4_STATUS_DAMAGED:
-+ case REISER4_STATUS_DESTROYED:
-+ case REISER4_STATUS_IOERROR:
-+ retval = REISER4_STATUS_MOUNT_RO;
-+ break;
-+ default:
-+ retval = REISER4_STATUS_MOUNT_UNKNOWN;
-+ break;
-+ }
-+
-+ if (status)
-+ *status = le64_to_cpu(get_unaligned(&statuspage->status));
-+ if (extended)
-+ *extended = le64_to_cpu(get_unaligned(&statuspage->extended_status));
-+
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+ return retval;
-+}
-+
-+/* This function should be called when something bad happens (e.g. from reiser4_panic).
-+ It fills the status structure and tries to push it to disk. */
-+int reiser4_status_write(__u64 status, __u64 extended_status, char *message)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+ struct reiser4_status *statuspage;
-+ struct bio *bio = get_super_private(sb)->status_bio;
-+
-+ if (!get_super_private(sb)->status_page) { // No status page?
-+ return -1;
-+ }
-+ statuspage = (struct reiser4_status *)
-+ kmap_atomic(get_super_private(sb)->status_page, KM_USER0);
-+
-+ put_unaligned(cpu_to_le64(status), &statuspage->status);
-+ put_unaligned(cpu_to_le64(extended_status), &statuspage->extended_status);
-+ strncpy(statuspage->texterror, message, REISER4_TEXTERROR_LEN);
-+
-+ kunmap_atomic((char *)statuspage, KM_USER0);
-+ bio->bi_bdev = sb->s_bdev;
-+ bio->bi_io_vec[0].bv_page = get_super_private(sb)->status_page;
-+ bio->bi_io_vec[0].bv_len = sb->s_blocksize;
-+ bio->bi_io_vec[0].bv_offset = 0;
-+ bio->bi_vcnt = 1;
-+ bio->bi_size = sb->s_blocksize;
-+ bio->bi_end_io = reiser4_status_endio;
-+ lock_page(get_super_private(sb)->status_page); // Safe as nobody should touch our page.
-+ /* We can block now, but we have no other choice anyway */
-+ submit_bio(WRITE, bio);
-+ blk_run_address_space(reiser4_get_super_fake(sb)->i_mapping);
-+ return 0; // We do not wait for io to finish.
-+}
-+
-+/* Frees the page with status and bio structure. Should be called by disk format at umount time */
-+int reiser4_status_finish(void)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ __free_pages(get_super_private(sb)->status_page, 0);
-+ get_super_private(sb)->status_page = NULL;
-+ bio_put(get_super_private(sb)->status_bio);
-+ get_super_private(sb)->status_bio = NULL;
-+ return 0;
-+}
-diff -urN linux-2.6.24.orig/fs/reiser4/status_flags.h linux-2.6.24/fs/reiser4/status_flags.h
---- linux-2.6.24.orig/fs/reiser4/status_flags.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/status_flags.h 2008-01-25 11:39:07.088246844 +0300
-@@ -0,0 +1,43 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Here we declare structures and flags that store reiser4 status on disk.
-+ The status that helps us to find out if the filesystem is valid or if it
-+ contains some critical, or not so critical errors */
-+
-+#if !defined( __REISER4_STATUS_FLAGS_H__ )
-+#define __REISER4_STATUS_FLAGS_H__
-+
-+#include "dformat.h"
-+/* These are major status flags */
-+#define REISER4_STATUS_OK 0
-+#define REISER4_STATUS_CORRUPTED 0x1
-+#define REISER4_STATUS_DAMAGED 0x2
-+#define REISER4_STATUS_DESTROYED 0x4
-+#define REISER4_STATUS_IOERROR 0x8
-+
-+/* Return values for reiser4_status_query() */
-+#define REISER4_STATUS_MOUNT_OK 0
-+#define REISER4_STATUS_MOUNT_WARN 1
-+#define REISER4_STATUS_MOUNT_RO 2
-+#define REISER4_STATUS_MOUNT_UNKNOWN -1
-+
-+#define REISER4_TEXTERROR_LEN 256
-+
-+#define REISER4_STATUS_MAGIC "ReiSeR4StATusBl"
-+/* We probably need to keep its size under sector size which is 512 bytes */
-+struct reiser4_status {
-+ char magic[16];
-+ d64 status; /* Current FS state */
-+ d64 extended_status; /* Any additional info that might have sense in addition to "status". E.g.
-+ last sector where io error happened if status is "io error encountered" */
-+ d64 stacktrace[10]; /* Last ten functional calls made (addresses) */
-+ char texterror[REISER4_TEXTERROR_LEN]; /* Any error message if appropriate, otherwise filled with zeroes */
-+};
-+
-+int reiser4_status_init(reiser4_block_nr block);
-+int reiser4_status_query(u64 * status, u64 * extended);
-+int reiser4_status_write(u64 status, u64 extended_status, char *message);
-+int reiser4_status_finish(void);
-+
-+#endif
-diff -urN linux-2.6.24.orig/fs/reiser4/super.c linux-2.6.24/fs/reiser4/super.c
---- linux-2.6.24.orig/fs/reiser4/super.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/super.c 2008-01-25 11:39:07.088246844 +0300
-@@ -0,0 +1,316 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Super-block manipulations. */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/space/space_allocator.h"
-+#include "plugin/plugin.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+
-+static __u64 reserved_for_gid(const struct super_block *super, gid_t gid);
-+static __u64 reserved_for_uid(const struct super_block *super, uid_t uid);
-+static __u64 reserved_for_root(const struct super_block *super);
-+
-+/* Return reiser4-specific part of super block */
-+reiser4_super_info_data *get_super_private_nocheck(const struct super_block *super /* super block
-+ * queried */ )
-+{
-+ return (reiser4_super_info_data *) super->s_fs_info;
-+}
-+
-+/* Return reiser4 fstype: value that is returned in ->f_type field by statfs() */
-+long reiser4_statfs_type(const struct super_block *super UNUSED_ARG)
-+{
-+ assert("nikita-448", super != NULL);
-+ assert("nikita-449", is_reiser4_super(super));
-+ return (long)REISER4_SUPER_MAGIC;
-+}
-+
-+/* functions to read/modify fields of reiser4_super_info_data */
-+
-+/* get number of blocks in file system */
-+__u64 reiser4_block_count(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("vs-494", super != NULL);
-+ assert("vs-495", is_reiser4_super(super));
-+ return get_super_private(super)->block_count;
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * number of blocks in the current file system
-+ */
-+__u64 reiser4_current_block_count(void)
-+{
-+ return get_current_super_private()->block_count;
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* set number of block in filesystem */
-+void reiser4_set_block_count(const struct super_block *super, __u64 nr)
-+{
-+ assert("vs-501", super != NULL);
-+ assert("vs-502", is_reiser4_super(super));
-+ get_super_private(super)->block_count = nr;
-+ /*
-+ * The proper calculation of the reserved space counter (%5 of device
-+ * block counter) we need a 64 bit division which is missing in Linux
-+ * on i386 platform. Because we do not need a precise calculation here
-+ * we can replace a div64 operation by this combination of
-+ * multiplication and shift: 51. / (2^10) == .0498 .
-+ * FIXME: this is a bug. It comes up only for very small filesystems
-+ * which probably are never used. Nevertheless, it is a bug. Number of
-+ * reserved blocks must be not less than maximal number of blocks which
-+ * get grabbed with BA_RESERVED.
-+ */
-+ get_super_private(super)->blocks_reserved = ((nr * 51) >> 10);
-+}
-+
-+/* amount of blocks used (allocated for data) in file system */
-+__u64 reiser4_data_blocks(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-452", super != NULL);
-+ assert("nikita-453", is_reiser4_super(super));
-+ return get_super_private(super)->blocks_used;
-+}
-+
-+/* set number of block used in filesystem */
-+void reiser4_set_data_blocks(const struct super_block *super, __u64 nr)
-+{
-+ assert("vs-503", super != NULL);
-+ assert("vs-504", is_reiser4_super(super));
-+ get_super_private(super)->blocks_used = nr;
-+}
-+
-+/* amount of free blocks in file system */
-+__u64 reiser4_free_blocks(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-454", super != NULL);
-+ assert("nikita-455", is_reiser4_super(super));
-+ return get_super_private(super)->blocks_free;
-+}
-+
-+/* set number of blocks free in filesystem */
-+void reiser4_set_free_blocks(const struct super_block *super, __u64 nr)
-+{
-+ assert("vs-505", super != NULL);
-+ assert("vs-506", is_reiser4_super(super));
-+ get_super_private(super)->blocks_free = nr;
-+}
-+
-+/* get mkfs unique identifier */
-+__u32 reiser4_mkfs_id(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("vpf-221", super != NULL);
-+ assert("vpf-222", is_reiser4_super(super));
-+ return get_super_private(super)->mkfs_id;
-+}
-+
-+/* amount of free blocks in file system */
-+__u64 reiser4_free_committed_blocks(const struct super_block *super)
-+{
-+ assert("vs-497", super != NULL);
-+ assert("vs-498", is_reiser4_super(super));
-+ return get_super_private(super)->blocks_free_committed;
-+}
-+
-+/* amount of blocks in the file system reserved for @uid and @gid */
-+long reiser4_reserved_blocks(const struct super_block *super /* super block
-+ queried */ ,
-+ uid_t uid /* user id */ ,
-+ gid_t gid /* group id */ )
-+{
-+ long reserved;
-+
-+ assert("nikita-456", super != NULL);
-+ assert("nikita-457", is_reiser4_super(super));
-+
-+ reserved = 0;
-+ if (REISER4_SUPPORT_GID_SPACE_RESERVATION)
-+ reserved += reserved_for_gid(super, gid);
-+ if (REISER4_SUPPORT_UID_SPACE_RESERVATION)
-+ reserved += reserved_for_uid(super, uid);
-+ if (REISER4_SUPPORT_ROOT_SPACE_RESERVATION && (uid == 0))
-+ reserved += reserved_for_root(super);
-+ return reserved;
-+}
-+
-+/* get/set value of/to grabbed blocks counter */
-+__u64 reiser4_grabbed_blocks(const struct super_block * super)
-+{
-+ assert("zam-512", super != NULL);
-+ assert("zam-513", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_grabbed;
-+}
-+
-+__u64 reiser4_flush_reserved(const struct super_block * super)
-+{
-+ assert("vpf-285", super != NULL);
-+ assert("vpf-286", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_flush_reserved;
-+}
-+
-+/* get/set value of/to counter of fake allocated formatted blocks */
-+__u64 reiser4_fake_allocated(const struct super_block * super)
-+{
-+ assert("zam-516", super != NULL);
-+ assert("zam-517", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_fake_allocated;
-+}
-+
-+/* get/set value of/to counter of fake allocated unformatted blocks */
-+__u64 reiser4_fake_allocated_unformatted(const struct super_block * super)
-+{
-+ assert("zam-516", super != NULL);
-+ assert("zam-517", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_fake_allocated_unformatted;
-+}
-+
-+/* get/set value of/to counter of clustered blocks */
-+__u64 reiser4_clustered_blocks(const struct super_block * super)
-+{
-+ assert("edward-601", super != NULL);
-+ assert("edward-602", is_reiser4_super(super));
-+
-+ return get_super_private(super)->blocks_clustered;
-+}
-+
-+/* space allocator used by this file system */
-+reiser4_space_allocator * reiser4_get_space_allocator(const struct super_block
-+ *super)
-+{
-+ assert("nikita-1965", super != NULL);
-+ assert("nikita-1966", is_reiser4_super(super));
-+ return &get_super_private(super)->space_allocator;
-+}
-+
-+/* return fake inode used to bind formatted nodes in the page cache */
-+struct inode *reiser4_get_super_fake(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-1757", super != NULL);
-+ return get_super_private(super)->fake;
-+}
-+
-+/* return fake inode used to bind copied on capture nodes in the page cache */
-+struct inode *reiser4_get_cc_fake(const struct super_block *super /* super block
-+ queried */ )
-+{
-+ assert("nikita-1757", super != NULL);
-+ return get_super_private(super)->cc;
-+}
-+
-+/* return fake inode used to bind bitmaps and journlal heads */
-+struct inode *reiser4_get_bitmap_fake(const struct super_block *super)
-+{
-+ assert("nikita-17571", super != NULL);
-+ return get_super_private(super)->bitmap;
-+}
-+
-+/* tree used by this file system */
-+reiser4_tree *reiser4_get_tree(const struct super_block * super /* super block
-+ * queried */ )
-+{
-+ assert("nikita-460", super != NULL);
-+ assert("nikita-461", is_reiser4_super(super));
-+ return &get_super_private(super)->tree;
-+}
-+
-+/* Check that @super is (looks like) reiser4 super block. This is mainly for
-+ use in assertions. */
-+int is_reiser4_super(const struct super_block *super /* super block
-+ * queried */ )
-+{
-+ return
-+ super != NULL &&
-+ get_super_private(super) != NULL &&
-+ super->s_op == &(get_super_private(super)->ops.super);
-+}
-+
-+int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f)
-+{
-+ return test_bit((int)f, &get_super_private(super)->fs_flags);
-+}
-+
-+/* amount of blocks reserved for given group in file system */
-+static __u64 reserved_for_gid(const struct super_block *super UNUSED_ARG /* super
-+ * block
-+ * queried */ ,
-+ gid_t gid UNUSED_ARG /* group id */ )
-+{
-+ return 0;
-+}
-+
-+/* amount of blocks reserved for given user in file system */
-+static __u64 reserved_for_uid(const struct super_block *super UNUSED_ARG /* super
-+ block
-+ queried */ ,
-+ uid_t uid UNUSED_ARG /* user id */ )
-+{
-+ return 0;
-+}
-+
-+/* amount of blocks reserved for super user in file system */
-+static __u64 reserved_for_root(const struct super_block *super UNUSED_ARG /* super
-+ block
-+ queried */ )
-+{
-+ return 0;
-+}
-+
-+/*
-+ * true if block number @blk makes sense for the file system at @super.
-+ */
-+int
-+reiser4_blocknr_is_sane_for(const struct super_block *super,
-+ const reiser4_block_nr * blk)
-+{
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("nikita-2957", super != NULL);
-+ assert("nikita-2958", blk != NULL);
-+
-+ if (reiser4_blocknr_is_fake(blk))
-+ return 1;
-+
-+ sbinfo = get_super_private(super);
-+ return *blk < sbinfo->block_count;
-+}
-+
-+#if REISER4_DEBUG
-+/*
-+ * true, if block number @blk makes sense for the current file system
-+ */
-+int reiser4_blocknr_is_sane(const reiser4_block_nr * blk)
-+{
-+ return reiser4_blocknr_is_sane_for(reiser4_get_current_sb(), blk);
-+}
-+#endif /* REISER4_DEBUG */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/super.h linux-2.6.24/fs/reiser4/super.h
---- linux-2.6.24.orig/fs/reiser4/super.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/super.h 2008-01-25 11:39:07.088246844 +0300
-@@ -0,0 +1,466 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Super-block functions. See super.c for details. */
-+
-+#if !defined( __REISER4_SUPER_H__ )
-+#define __REISER4_SUPER_H__
-+
-+#include <linux/exportfs.h>
-+
-+#include "tree.h"
-+#include "entd.h"
-+#include "wander.h"
-+#include "fsdata.h"
-+#include "plugin/object.h"
-+#include "plugin/space/space_allocator.h"
-+
-+/*
-+ * Flush algorithms parameters.
-+ */
-+struct flush_params {
-+ unsigned relocate_threshold;
-+ unsigned relocate_distance;
-+ unsigned written_threshold;
-+ unsigned scan_maxnodes;
-+};
-+
-+typedef enum {
-+ /*
-+ * True if this file system doesn't support hard-links (multiple names)
-+ * for directories: this is default UNIX behavior.
-+ *
-+ * If hard-links on directoires are not allowed, file system is Acyclic
-+ * Directed Graph (modulo dot, and dotdot, of course).
-+ *
-+ * This is used by reiser4_link().
-+ */
-+ REISER4_ADG = 0,
-+ /*
-+ * set if all nodes in internal tree have the same node layout plugin.
-+ * If so, znode_guess_plugin() will return tree->node_plugin in stead
-+ * of guessing plugin by plugin id stored in the node.
-+ */
-+ REISER4_ONE_NODE_PLUGIN = 1,
-+ /* if set, bsd gid assignment is supported. */
-+ REISER4_BSD_GID = 2,
-+ /* [mac]_time are 32 bit in inode */
-+ REISER4_32_BIT_TIMES = 3,
-+ /* load all bitmap blocks at mount time */
-+ REISER4_DONT_LOAD_BITMAP = 5,
-+ /* enforce atomicity during write(2) */
-+ REISER4_ATOMIC_WRITE = 6,
-+ /* don't use write barriers in the log writer code. */
-+ REISER4_NO_WRITE_BARRIER = 7
-+} reiser4_fs_flag;
-+
-+/*
-+ * VFS related operation vectors.
-+ */
-+struct object_ops {
-+ struct super_operations super;
-+ struct dentry_operations dentry;
-+ struct export_operations export;
-+};
-+
-+/* reiser4-specific part of super block
-+
-+ Locking
-+
-+ Fields immutable after mount:
-+
-+ ->oid*
-+ ->space*
-+ ->default_[ug]id
-+ ->mkfs_id
-+ ->trace_flags
-+ ->debug_flags
-+ ->fs_flags
-+ ->df_plug
-+ ->optimal_io_size
-+ ->plug
-+ ->flush
-+ ->u (bad name)
-+ ->txnmgr
-+ ->ra_params
-+ ->fsuid
-+ ->journal_header
-+ ->journal_footer
-+
-+ Fields protected by ->lnode_guard
-+
-+ ->lnode_htable
-+
-+ Fields protected by per-super block spin lock
-+
-+ ->block_count
-+ ->blocks_used
-+ ->blocks_free
-+ ->blocks_free_committed
-+ ->blocks_grabbed
-+ ->blocks_fake_allocated_unformatted
-+ ->blocks_fake_allocated
-+ ->blocks_flush_reserved
-+ ->eflushed
-+ ->blocknr_hint_default
-+
-+ After journal replaying during mount,
-+
-+ ->last_committed_tx
-+
-+ is protected by ->tmgr.commit_mutex
-+
-+ Invariants involving this data-type:
-+
-+ [sb-block-counts]
-+ [sb-grabbed]
-+ [sb-fake-allocated]
-+*/
-+struct reiser4_super_info_data {
-+ /*
-+ * guard spinlock which protects reiser4 super block fields (currently
-+ * blocks_free, blocks_free_committed)
-+ */
-+ spinlock_t guard;
-+
-+ /* next oid that will be returned by oid_allocate() */
-+ oid_t next_to_use;
-+ /* total number of used oids */
-+ oid_t oids_in_use;
-+
-+ /* space manager plugin */
-+ reiser4_space_allocator space_allocator;
-+
-+ /* reiser4 internal tree */
-+ reiser4_tree tree;
-+
-+ /*
-+ * default user id used for light-weight files without their own
-+ * stat-data.
-+ */
-+ uid_t default_uid;
-+
-+ /*
-+ * default group id used for light-weight files without their own
-+ * stat-data.
-+ */
-+ gid_t default_gid;
-+
-+ /* mkfs identifier generated at mkfs time. */
-+ __u32 mkfs_id;
-+ /* amount of blocks in a file system */
-+ __u64 block_count;
-+
-+ /* inviolable reserve */
-+ __u64 blocks_reserved;
-+
-+ /* amount of blocks used by file system data and meta-data. */
-+ __u64 blocks_used;
-+
-+ /*
-+ * amount of free blocks. This is "working" free blocks counter. It is
-+ * like "working" bitmap, please see block_alloc.c for description.
-+ */
-+ __u64 blocks_free;
-+
-+ /*
-+ * free block count for fs committed state. This is "commit" version of
-+ * free block counter.
-+ */
-+ __u64 blocks_free_committed;
-+
-+ /*
-+ * number of blocks reserved for further allocation, for all
-+ * threads.
-+ */
-+ __u64 blocks_grabbed;
-+
-+ /* number of fake allocated unformatted blocks in tree. */
-+ __u64 blocks_fake_allocated_unformatted;
-+
-+ /* number of fake allocated formatted blocks in tree. */
-+ __u64 blocks_fake_allocated;
-+
-+ /* number of blocks reserved for flush operations. */
-+ __u64 blocks_flush_reserved;
-+
-+ /* number of blocks reserved for cluster operations. */
-+ __u64 blocks_clustered;
-+
-+ /* unique file-system identifier */
-+ __u32 fsuid;
-+
-+ /* On-disk format version. If does not equal to the disk_format
-+ plugin version, some format updates (e.g. enlarging plugin
-+ set, etc) may have place on mount. */
-+ int version;
-+
-+ /* file-system wide flags. See reiser4_fs_flag enum */
-+ unsigned long fs_flags;
-+
-+ /* transaction manager */
-+ txn_mgr tmgr;
-+
-+ /* ent thread */
-+ entd_context entd;
-+
-+ /* fake inode used to bind formatted nodes */
-+ struct inode *fake;
-+ /* inode used to bind bitmaps (and journal heads) */
-+ struct inode *bitmap;
-+ /* inode used to bind copied on capture nodes */
-+ struct inode *cc;
-+
-+ /* disk layout plugin */
-+ disk_format_plugin *df_plug;
-+
-+ /* disk layout specific part of reiser4 super info data */
-+ union {
-+ format40_super_info format40;
-+ } u;
-+
-+ /* value we return in st_blksize on stat(2) */
-+ unsigned long optimal_io_size;
-+
-+ /* parameters for the flush algorithm */
-+ struct flush_params flush;
-+
-+ /* pointers to jnodes for journal header and footer */
-+ jnode *journal_header;
-+ jnode *journal_footer;
-+
-+ journal_location jloc;
-+
-+ /* head block number of last committed transaction */
-+ __u64 last_committed_tx;
-+
-+ /*
-+ * we remember last written location for using as a hint for new block
-+ * allocation
-+ */
-+ __u64 blocknr_hint_default;
-+
-+ /* committed number of files (oid allocator state variable ) */
-+ __u64 nr_files_committed;
-+
-+ struct formatted_ra_params ra_params;
-+
-+ /*
-+ * A mutex for serializing cut tree operation if out-of-free-space:
-+ * the only one cut_tree thread is allowed to grab space from reserved
-+ * area (it is 5% of disk space)
-+ */
-+ struct mutex delete_mutex;
-+ /* task owning ->delete_mutex */
-+ struct task_struct *delete_mutex_owner;
-+
-+ /* Diskmap's blocknumber */
-+ __u64 diskmap_block;
-+
-+ /* What to do in case of error */
-+ int onerror;
-+
-+ /* operations for objects on this file system */
-+ struct object_ops ops;
-+
-+ /*
-+ * structure to maintain d_cursors. See plugin/file_ops_readdir.c for
-+ * more details
-+ */
-+ struct d_cursor_info d_info;
-+
-+#ifdef CONFIG_REISER4_BADBLOCKS
-+ /* Alternative master superblock offset (in bytes) */
-+ unsigned long altsuper;
-+#endif
-+ struct repacker *repacker;
-+ struct page *status_page;
-+ struct bio *status_bio;
-+
-+#if REISER4_DEBUG
-+ /*
-+ * minimum used blocks value (includes super blocks, bitmap blocks and
-+ * other fs reserved areas), depends on fs format and fs size.
-+ */
-+ __u64 min_blocks_used;
-+
-+ /*
-+ * when debugging is on, all jnodes (including znodes, bitmaps, etc.)
-+ * are kept on a list anchored at sbinfo->all_jnodes. This list is
-+ * protected by sbinfo->all_guard spin lock. This lock should be taken
-+ * with _irq modifier, because it is also modified from interrupt
-+ * contexts (by RCU).
-+ */
-+ spinlock_t all_guard;
-+ /* list of all jnodes */
-+ struct list_head all_jnodes;
-+#endif
-+ struct dentry *debugfs_root;
-+};
-+
-+extern reiser4_super_info_data *get_super_private_nocheck(const struct
-+ super_block *super);
-+
-+/* Return reiser4-specific part of super block */
-+static inline reiser4_super_info_data *get_super_private(const struct
-+ super_block *super)
-+{
-+ assert("nikita-447", super != NULL);
-+
-+ return (reiser4_super_info_data *) super->s_fs_info;
-+}
-+
-+/* get ent context for the @super */
-+static inline entd_context *get_entd_context(struct super_block *super)
-+{
-+ return &get_super_private(super)->entd;
-+}
-+
-+/* "Current" super-block: main super block used during current system
-+ call. Reference to this super block is stored in reiser4_context. */
-+static inline struct super_block *reiser4_get_current_sb(void)
-+{
-+ return get_current_context()->super;
-+}
-+
-+/* Reiser4-specific part of "current" super-block: main super block used
-+ during current system call. Reference to this super block is stored in
-+ reiser4_context. */
-+static inline reiser4_super_info_data *get_current_super_private(void)
-+{
-+ return get_super_private(reiser4_get_current_sb());
-+}
-+
-+static inline struct formatted_ra_params *get_current_super_ra_params(void)
-+{
-+ return &(get_current_super_private()->ra_params);
-+}
-+
-+/*
-+ * true, if file system on @super is read-only
-+ */
-+static inline int rofs_super(struct super_block *super)
-+{
-+ return super->s_flags & MS_RDONLY;
-+}
-+
-+/*
-+ * true, if @tree represents read-only file system
-+ */
-+static inline int rofs_tree(reiser4_tree * tree)
-+{
-+ return rofs_super(tree->super);
-+}
-+
-+/*
-+ * true, if file system where @inode lives on, is read-only
-+ */
-+static inline int rofs_inode(struct inode *inode)
-+{
-+ return rofs_super(inode->i_sb);
-+}
-+
-+/*
-+ * true, if file system where @node lives on, is read-only
-+ */
-+static inline int rofs_jnode(jnode * node)
-+{
-+ return rofs_tree(jnode_get_tree(node));
-+}
-+
-+extern __u64 reiser4_current_block_count(void);
-+
-+extern void build_object_ops(struct super_block *super, struct object_ops * ops);
-+
-+#define REISER4_SUPER_MAGIC 0x52345362 /* (*(__u32 *)"R4Sb"); */
-+
-+static inline void spin_lock_reiser4_super(reiser4_super_info_data *sbinfo)
-+{
-+ spin_lock(&(sbinfo->guard));
-+}
-+
-+static inline void spin_unlock_reiser4_super(reiser4_super_info_data *sbinfo)
-+{
-+ assert_spin_locked(&(sbinfo->guard));
-+ spin_unlock(&(sbinfo->guard));
-+}
-+
-+extern __u64 reiser4_flush_reserved(const struct super_block *);
-+extern int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f);
-+extern long reiser4_statfs_type(const struct super_block *super);
-+extern __u64 reiser4_block_count(const struct super_block *super);
-+extern void reiser4_set_block_count(const struct super_block *super, __u64 nr);
-+extern __u64 reiser4_data_blocks(const struct super_block *super);
-+extern void reiser4_set_data_blocks(const struct super_block *super, __u64 nr);
-+extern __u64 reiser4_free_blocks(const struct super_block *super);
-+extern void reiser4_set_free_blocks(const struct super_block *super, __u64 nr);
-+extern __u32 reiser4_mkfs_id(const struct super_block *super);
-+
-+extern __u64 reiser4_free_committed_blocks(const struct super_block *super);
-+
-+extern __u64 reiser4_grabbed_blocks(const struct super_block *);
-+extern __u64 reiser4_fake_allocated(const struct super_block *);
-+extern __u64 reiser4_fake_allocated_unformatted(const struct super_block *);
-+extern __u64 reiser4_clustered_blocks(const struct super_block *);
-+
-+extern long reiser4_reserved_blocks(const struct super_block *super, uid_t uid,
-+ gid_t gid);
-+
-+extern reiser4_space_allocator *
-+reiser4_get_space_allocator(const struct super_block *super);
-+extern reiser4_oid_allocator *
-+reiser4_get_oid_allocator(const struct super_block *super);
-+extern struct inode *reiser4_get_super_fake(const struct super_block *super);
-+extern struct inode *reiser4_get_cc_fake(const struct super_block *super);
-+extern struct inode *reiser4_get_bitmap_fake(const struct super_block *super);
-+extern reiser4_tree *reiser4_get_tree(const struct super_block *super);
-+extern int is_reiser4_super(const struct super_block *super);
-+
-+extern int reiser4_blocknr_is_sane(const reiser4_block_nr * blk);
-+extern int reiser4_blocknr_is_sane_for(const struct super_block *super,
-+ const reiser4_block_nr * blk);
-+extern int reiser4_fill_super(struct super_block *s, void *data, int silent);
-+extern int reiser4_done_super(struct super_block *s);
-+
-+/* step of fill super */
-+extern int reiser4_init_fs_info(struct super_block *);
-+extern void reiser4_done_fs_info(struct super_block *);
-+extern int reiser4_init_super_data(struct super_block *, char *opt_string);
-+extern int reiser4_init_read_super(struct super_block *, int silent);
-+extern int reiser4_init_root_inode(struct super_block *);
-+extern reiser4_plugin *get_default_plugin(pset_member memb);
-+
-+/* Maximal possible object id. */
-+#define ABSOLUTE_MAX_OID ((oid_t)~0)
-+
-+#define OIDS_RESERVED ( 1 << 16 )
-+int oid_init_allocator(struct super_block *, oid_t nr_files, oid_t next);
-+oid_t oid_allocate(struct super_block *);
-+int oid_release(struct super_block *, oid_t);
-+oid_t oid_next(const struct super_block *);
-+void oid_count_allocated(void);
-+void oid_count_released(void);
-+long oids_used(const struct super_block *);
-+
-+#if REISER4_DEBUG
-+void print_fs_info(const char *prefix, const struct super_block *);
-+#endif
-+
-+extern void destroy_reiser4_cache(struct kmem_cache **);
-+
-+extern struct super_operations reiser4_super_operations;
-+extern struct export_operations reiser4_export_operations;
-+extern struct dentry_operations reiser4_dentry_operations;
-+
-+/* __REISER4_SUPER_H__ */
-+#endif
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 120
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/super_ops.c linux-2.6.24/fs/reiser4/super_ops.c
---- linux-2.6.24.orig/fs/reiser4/super_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/super_ops.c 2008-01-25 12:23:33.922660872 +0300
-@@ -0,0 +1,724 @@
-+/* Copyright 2005 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+#include "flush.h"
-+#include "safe_link.h"
-+
-+#include <linux/vfs.h>
-+#include <linux/writeback.h>
-+#include <linux/mount.h>
-+#include <linux/seq_file.h>
-+#include <linux/debugfs.h>
-+
-+/* slab cache for inodes */
-+static struct kmem_cache *inode_cache;
-+
-+static struct dentry *reiser4_debugfs_root = NULL;
-+
-+/**
-+ * init_once - constructor for reiser4 inodes
-+ * @cache: cache @obj belongs to
-+ * @obj: inode to be initialized
-+ *
-+ * Initialization function to be called when new page is allocated by reiser4
-+ * inode cache. It is set on inode cache creation.
-+ */
-+static void init_once(struct kmem_cache *cache, void *obj)
-+{
-+ struct reiser4_inode_object *info;
-+
-+ info = obj;
-+
-+ /* initialize vfs inode */
-+ inode_init_once(&info->vfs_inode);
-+
-+ /*
-+ * initialize reiser4 specific part fo inode.
-+ * NOTE-NIKITA add here initializations for locks, list heads,
-+ * etc. that will be added to our private inode part.
-+ */
-+ INIT_LIST_HEAD(get_readdir_list(&info->vfs_inode));
-+ init_rwsem(&info->p.conv_sem);
-+ /* init semaphore which is used during inode loading */
-+ loading_init_once(&info->p);
-+ INIT_RADIX_TREE(jnode_tree_by_reiser4_inode(&info->p),
-+ GFP_ATOMIC);
-+#if REISER4_DEBUG
-+ info->p.nr_jnodes = 0;
-+#endif
-+}
-+
-+/**
-+ * init_inodes - create znode cache
-+ *
-+ * Initializes slab cache of inodes. It is part of reiser4 module initialization.
-+ */
-+static int init_inodes(void)
-+{
-+ inode_cache = kmem_cache_create("reiser4_inode",
-+ sizeof(struct reiser4_inode_object),
-+ 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, init_once);
-+ if (inode_cache == NULL)
-+ return RETERR(-ENOMEM);
-+ return 0;
-+}
-+
-+/**
-+ * done_inodes - delete inode cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+static void done_inodes(void)
-+{
-+ destroy_reiser4_cache(&inode_cache);
-+}
-+
-+/**
-+ * reiser4_alloc_inode - alloc_inode of super operations
-+ * @super: super block new inode is allocated for
-+ *
-+ * Allocates new inode, initializes reiser4 specific part of it.
-+ */
-+static struct inode *reiser4_alloc_inode(struct super_block *super)
-+{
-+ struct reiser4_inode_object *obj;
-+
-+ assert("nikita-1696", super != NULL);
-+ obj = kmem_cache_alloc(inode_cache, reiser4_ctx_gfp_mask_get());
-+ if (obj != NULL) {
-+ reiser4_inode *info;
-+
-+ info = &obj->p;
-+
-+ info->pset = plugin_set_get_empty();
-+ info->hset = plugin_set_get_empty();
-+ info->extmask = 0;
-+ info->locality_id = 0ull;
-+ info->plugin_mask = 0;
-+ info->heir_mask = 0;
-+#if !REISER4_INO_IS_OID
-+ info->oid_hi = 0;
-+#endif
-+ reiser4_seal_init(&info->sd_seal, NULL, NULL);
-+ coord_init_invalid(&info->sd_coord, NULL);
-+ info->flags = 0;
-+ spin_lock_init(&info->guard);
-+ /* this deals with info's loading semaphore */
-+ loading_alloc(info);
-+ info->vroot = UBER_TREE_ADDR;
-+ return &obj->vfs_inode;
-+ } else
-+ return NULL;
-+}
-+
-+/**
-+ * reiser4_destroy_inode - destroy_inode of super operations
-+ * @inode: inode being destroyed
-+ *
-+ * Puts reiser4 specific portion of inode, frees memory occupied by inode.
-+ */
-+static void reiser4_destroy_inode(struct inode *inode)
-+{
-+ reiser4_inode *info;
-+
-+ info = reiser4_inode_data(inode);
-+
-+ assert("vs-1220", inode_has_no_jnodes(info));
-+
-+ if (!is_bad_inode(inode) && is_inode_loaded(inode)) {
-+ file_plugin *fplug = inode_file_plugin(inode);
-+ if (fplug->destroy_inode != NULL)
-+ fplug->destroy_inode(inode);
-+ }
-+ reiser4_dispose_cursors(inode);
-+ if (info->pset)
-+ plugin_set_put(info->pset);
-+ if (info->hset)
-+ plugin_set_put(info->hset);
-+
-+ /*
-+ * cannot add similar assertion about ->i_list as prune_icache return
-+ * inode into slab with dangling ->list.{next,prev}. This is safe,
-+ * because they are re-initialized in the new_inode().
-+ */
-+ assert("nikita-2895", list_empty(&inode->i_dentry));
-+ assert("nikita-2896", hlist_unhashed(&inode->i_hash));
-+ assert("nikita-2898", list_empty_careful(get_readdir_list(inode)));
-+
-+ /* this deals with info's loading semaphore */
-+ loading_destroy(info);
-+
-+ kmem_cache_free(inode_cache,
-+ container_of(info, struct reiser4_inode_object, p));
-+}
-+
-+/**
-+ * reiser4_dirty_inode - dirty_inode of super operations
-+ * @inode: inode being dirtied
-+ *
-+ * Updates stat data.
-+ */
-+static void reiser4_dirty_inode(struct inode *inode)
-+{
-+ int result;
-+
-+ if (!is_in_reiser4_context())
-+ return;
-+ assert("", !IS_RDONLY(inode));
-+ assert("", (inode_file_plugin(inode)->estimate.update(inode) <=
-+ get_current_context()->grabbed_blocks));
-+
-+ result = reiser4_update_sd(inode);
-+ if (result)
-+ warning("", "failed to dirty inode for %llu: %d",
-+ get_inode_oid(inode), result);
-+}
-+
-+/**
-+ * reiser4_delete_inode - delete_inode of super operations
-+ * @inode: inode to delete
-+ *
-+ * Calls file plugin's delete_object method to delete object items from
-+ * filesystem tree and calls clear_inode.
-+ */
-+static void reiser4_delete_inode(struct inode *inode)
-+{
-+ reiser4_context *ctx;
-+ file_plugin *fplug;
-+
-+ ctx = reiser4_init_context(inode->i_sb);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-15", "failed to init context");
-+ return;
-+ }
-+
-+ if (is_inode_loaded(inode)) {
-+ fplug = inode_file_plugin(inode);
-+ if (fplug != NULL && fplug->delete_object != NULL)
-+ fplug->delete_object(inode);
-+ }
-+
-+ truncate_inode_pages(&inode->i_data, 0);
-+ inode->i_blocks = 0;
-+ clear_inode(inode);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_put_super - put_super of super operations
-+ * @super: super block to free
-+ *
-+ * Stops daemons, release resources, umounts in short.
-+ */
-+static void reiser4_put_super(struct super_block *super)
-+{
-+ reiser4_super_info_data *sbinfo;
-+ reiser4_context *ctx;
-+
-+ sbinfo = get_super_private(super);
-+ assert("vs-1699", sbinfo);
-+
-+ debugfs_remove(sbinfo->tmgr.debugfs_atom_count);
-+ debugfs_remove(sbinfo->tmgr.debugfs_id_count);
-+ debugfs_remove(sbinfo->debugfs_root);
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-17", "failed to init context");
-+ return;
-+ }
-+
-+ /* have disk format plugin to free its resources */
-+ if (get_super_private(super)->df_plug->release)
-+ get_super_private(super)->df_plug->release(super);
-+
-+ reiser4_done_formatted_fake(super);
-+
-+ /* stop daemons: ktxnmgr and entd */
-+ reiser4_done_entd(super);
-+ reiser4_done_ktxnmgrd(super);
-+ reiser4_done_txnmgr(&sbinfo->tmgr);
-+
-+ reiser4_done_fs_info(super);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_write_super - write_super of super operations
-+ * @super: super block to write
-+ *
-+ * Captures znode associated with super block, comit all transactions.
-+ */
-+static void reiser4_write_super(struct super_block *super)
-+{
-+ int ret;
-+ reiser4_context *ctx;
-+
-+ assert("vs-1700", !rofs_super(super));
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-16", "failed to init context");
-+ return;
-+ }
-+
-+ ret = reiser4_capture_super_block(super);
-+ if (ret != 0)
-+ warning("vs-1701",
-+ "reiser4_capture_super_block failed in write_super: %d",
-+ ret);
-+ ret = txnmgr_force_commit_all(super, 0);
-+ if (ret != 0)
-+ warning("jmacd-77113",
-+ "txn_force failed in write_super: %d", ret);
-+
-+ super->s_dirt = 0;
-+
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_statfs - statfs of super operations
-+ * @super: super block of file system in queried
-+ * @stafs: buffer to fill with statistics
-+ *
-+ * Returns information about filesystem.
-+ */
-+static int reiser4_statfs(struct dentry *dentry, struct kstatfs *statfs)
-+{
-+ sector_t total;
-+ sector_t reserved;
-+ sector_t free;
-+ sector_t forroot;
-+ sector_t deleted;
-+ reiser4_context *ctx;
-+ struct super_block *super = dentry->d_sb;
-+
-+ assert("nikita-408", super != NULL);
-+ assert("nikita-409", statfs != NULL);
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx))
-+ return PTR_ERR(ctx);
-+
-+ statfs->f_type = reiser4_statfs_type(super);
-+ statfs->f_bsize = super->s_blocksize;
-+
-+ /*
-+ * 5% of total block space is reserved. This is needed for flush and
-+ * for truncates (so that we are able to perform truncate/unlink even
-+ * on the otherwise completely full file system). If this reservation
-+ * is hidden from statfs(2), users will mistakenly guess that they
-+ * have enough free space to complete some operation, which is
-+ * frustrating.
-+ *
-+ * Another possible solution is to subtract ->blocks_reserved from
-+ * ->f_bfree, but changing available space seems less intrusive than
-+ * letting user to see 5% of disk space to be used directly after
-+ * mkfs.
-+ */
-+ total = reiser4_block_count(super);
-+ reserved = get_super_private(super)->blocks_reserved;
-+ deleted = txnmgr_count_deleted_blocks();
-+ free = reiser4_free_blocks(super) + deleted;
-+ forroot = reiser4_reserved_blocks(super, 0, 0);
-+
-+ /*
-+ * These counters may be in inconsistent state because we take the
-+ * values without keeping any global spinlock. Here we do a sanity
-+ * check that free block counter does not exceed the number of all
-+ * blocks.
-+ */
-+ if (free > total)
-+ free = total;
-+ statfs->f_blocks = total - reserved;
-+ /* make sure statfs->f_bfree is never larger than statfs->f_blocks */
-+ if (free > reserved)
-+ free -= reserved;
-+ else
-+ free = 0;
-+ statfs->f_bfree = free;
-+
-+ if (free > forroot)
-+ free -= forroot;
-+ else
-+ free = 0;
-+ statfs->f_bavail = free;
-+
-+ statfs->f_files = 0;
-+ statfs->f_ffree = 0;
-+
-+ /* maximal acceptable name length depends on directory plugin. */
-+ assert("nikita-3351", super->s_root->d_inode != NULL);
-+ statfs->f_namelen = reiser4_max_filename_len(super->s_root->d_inode);
-+ reiser4_exit_context(ctx);
-+ return 0;
-+}
-+
-+/**
-+ * reiser4_clear_inode - clear_inode of super operation
-+ * @inode: inode about to destroy
-+ *
-+ * Does sanity checks: being destroyed should have all jnodes detached.
-+ */
-+static void reiser4_clear_inode(struct inode *inode)
-+{
-+#if REISER4_DEBUG
-+ reiser4_inode *r4_inode;
-+
-+ r4_inode = reiser4_inode_data(inode);
-+ if (!inode_has_no_jnodes(r4_inode))
-+ warning("vs-1732", "reiser4 inode has %ld jnodes\n",
-+ r4_inode->nr_jnodes);
-+#endif
-+}
-+
-+/**
-+ * reiser4_sync_inodes - sync_inodes of super operations
-+ * @super:
-+ * @wbc:
-+ *
-+ * This method is called by background and non-backgound writeback. Reiser4's
-+ * implementation uses generic_sync_sb_inodes to call reiser4_writepages for
-+ * each of dirty inodes. Reiser4_writepages handles pages dirtied via shared
-+ * mapping - dirty pages get into atoms. Writeout is called to flush some
-+ * atoms.
-+ */
-+static void reiser4_sync_inodes(struct super_block *super,
-+ struct writeback_control *wbc)
-+{
-+ reiser4_context *ctx;
-+ long to_write;
-+
-+ if (wbc->for_kupdate)
-+ /* reiser4 has its own means of periodical write-out */
-+ return;
-+
-+ to_write = wbc->nr_to_write;
-+ assert("vs-49", wbc->older_than_this == NULL);
-+
-+ ctx = reiser4_init_context(super);
-+ if (IS_ERR(ctx)) {
-+ warning("vs-13", "failed to init context");
-+ return;
-+ }
-+
-+ /*
-+ * call reiser4_writepages for each of dirty inodes to turn dirty pages
-+ * into transactions if they were not yet.
-+ */
-+ generic_sync_sb_inodes(super, wbc);
-+
-+ /* flush goes here */
-+ wbc->nr_to_write = to_write;
-+ reiser4_writeout(super, wbc);
-+
-+ /* avoid recursive calls to ->sync_inodes */
-+ context_set_commit_async(ctx);
-+ reiser4_exit_context(ctx);
-+}
-+
-+/**
-+ * reiser4_show_options - show_options of super operations
-+ * @m: file where to write information
-+ * @mnt: mount structure
-+ *
-+ * Makes reiser4 mount options visible in /proc/mounts.
-+ */
-+static int reiser4_show_options(struct seq_file *m, struct vfsmount *mnt)
-+{
-+ struct super_block *super;
-+ reiser4_super_info_data *sbinfo;
-+
-+ super = mnt->mnt_sb;
-+ sbinfo = get_super_private(super);
-+
-+ seq_printf(m, ",atom_max_size=0x%x", sbinfo->tmgr.atom_max_size);
-+ seq_printf(m, ",atom_max_age=0x%x", sbinfo->tmgr.atom_max_age);
-+ seq_printf(m, ",atom_min_size=0x%x", sbinfo->tmgr.atom_min_size);
-+ seq_printf(m, ",atom_max_flushers=0x%x",
-+ sbinfo->tmgr.atom_max_flushers);
-+ seq_printf(m, ",cbk_cache_slots=0x%x",
-+ sbinfo->tree.cbk_cache.nr_slots);
-+
-+ return 0;
-+}
-+
-+struct super_operations reiser4_super_operations = {
-+ .alloc_inode = reiser4_alloc_inode,
-+ .destroy_inode = reiser4_destroy_inode,
-+ .dirty_inode = reiser4_dirty_inode,
-+ .delete_inode = reiser4_delete_inode,
-+ .put_super = reiser4_put_super,
-+ .write_super = reiser4_write_super,
-+ .statfs = reiser4_statfs,
-+ .clear_inode = reiser4_clear_inode,
-+ .sync_inodes = reiser4_sync_inodes,
-+ .show_options = reiser4_show_options
-+};
-+
-+/**
-+ * fill_super - initialize super block on mount
-+ * @super: super block to fill
-+ * @data: reiser4 specific mount option
-+ * @silent:
-+ *
-+ * This is to be called by reiser4_get_sb. Mounts filesystem.
-+ */
-+static int fill_super(struct super_block *super, void *data, int silent)
-+{
-+ reiser4_context ctx;
-+ int result;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("zam-989", super != NULL);
-+
-+ super->s_op = NULL;
-+ init_stack_context(&ctx, super);
-+
-+ /* allocate reiser4 specific super block */
-+ if ((result = reiser4_init_fs_info(super)) != 0)
-+ goto failed_init_sinfo;
-+
-+ sbinfo = get_super_private(super);
-+ /* initialize various reiser4 parameters, parse mount options */
-+ if ((result = reiser4_init_super_data(super, data)) != 0)
-+ goto failed_init_super_data;
-+
-+ /* read reiser4 master super block, initialize disk format plugin */
-+ if ((result = reiser4_init_read_super(super, silent)) != 0)
-+ goto failed_init_read_super;
-+
-+ /* initialize transaction manager */
-+ reiser4_init_txnmgr(&sbinfo->tmgr);
-+
-+ /* initialize ktxnmgrd context and start kernel thread ktxnmrgd */
-+ if ((result = reiser4_init_ktxnmgrd(super)) != 0)
-+ goto failed_init_ktxnmgrd;
-+
-+ /* initialize entd context and start kernel thread entd */
-+ if ((result = reiser4_init_entd(super)) != 0)
-+ goto failed_init_entd;
-+
-+ /* initialize address spaces for formatted nodes and bitmaps */
-+ if ((result = reiser4_init_formatted_fake(super)) != 0)
-+ goto failed_init_formatted_fake;
-+
-+ /* initialize disk format plugin */
-+ if ((result = get_super_private(super)->df_plug->init_format(super, data)) != 0 )
-+ goto failed_init_disk_format;
-+
-+ /*
-+ * There are some 'committed' versions of reiser4 super block counters,
-+ * which correspond to reiser4 on-disk state. These counters are
-+ * initialized here
-+ */
-+ sbinfo->blocks_free_committed = sbinfo->blocks_free;
-+ sbinfo->nr_files_committed = oids_used(super);
-+
-+ /* get inode of root directory */
-+ if ((result = reiser4_init_root_inode(super)) != 0)
-+ goto failed_init_root_inode;
-+
-+ if ((result = get_super_private(super)->df_plug->version_update(super)) != 0 )
-+ goto failed_update_format_version;
-+
-+ process_safelinks(super);
-+ reiser4_exit_context(&ctx);
-+
-+ sbinfo->debugfs_root = debugfs_create_dir(super->s_id,
-+ reiser4_debugfs_root);
-+ if (sbinfo->debugfs_root) {
-+ sbinfo->tmgr.debugfs_atom_count =
-+ debugfs_create_u32("atom_count", S_IFREG|S_IRUSR,
-+ sbinfo->debugfs_root,
-+ &sbinfo->tmgr.atom_count);
-+ sbinfo->tmgr.debugfs_id_count =
-+ debugfs_create_u32("id_count", S_IFREG|S_IRUSR,
-+ sbinfo->debugfs_root,
-+ &sbinfo->tmgr.id_count);
-+ }
-+ return 0;
-+
-+ failed_update_format_version:
-+ failed_init_root_inode:
-+ if (sbinfo->df_plug->release)
-+ sbinfo->df_plug->release(super);
-+ failed_init_disk_format:
-+ reiser4_done_formatted_fake(super);
-+ failed_init_formatted_fake:
-+ reiser4_done_entd(super);
-+ failed_init_entd:
-+ reiser4_done_ktxnmgrd(super);
-+ failed_init_ktxnmgrd:
-+ reiser4_done_txnmgr(&sbinfo->tmgr);
-+ failed_init_read_super:
-+ failed_init_super_data:
-+ reiser4_done_fs_info(super);
-+ failed_init_sinfo:
-+ reiser4_exit_context(&ctx);
-+ return result;
-+}
-+
-+/**
-+ * reiser4_get_sb - get_sb of file_system_type operations
-+ * @fs_type:
-+ * @flags: mount flags MS_RDONLY, MS_VERBOSE, etc
-+ * @dev_name: block device file name
-+ * @data: specific mount options
-+ *
-+ * Reiser4 mount entry.
-+ */
-+static int reiser4_get_sb(struct file_system_type *fs_type, int flags,
-+ const char *dev_name, void *data, struct vfsmount *mnt)
-+{
-+ return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
-+}
-+
-+/* structure describing the reiser4 filesystem implementation */
-+static struct file_system_type reiser4_fs_type = {
-+ .owner = THIS_MODULE,
-+ .name = "reiser4",
-+ .fs_flags = FS_REQUIRES_DEV,
-+ .get_sb = reiser4_get_sb,
-+ .kill_sb = kill_block_super,
-+ .next = NULL
-+};
-+
-+void destroy_reiser4_cache(struct kmem_cache **cachep)
-+{
-+ BUG_ON(*cachep == NULL);
-+ kmem_cache_destroy(*cachep);
-+ *cachep = NULL;
-+}
-+
-+/**
-+ * init_reiser4 - reiser4 initialization entry point
-+ *
-+ * Initializes reiser4 slabs, registers reiser4 filesystem type. It is called
-+ * on kernel initialization or during reiser4 module load.
-+ */
-+static int __init init_reiser4(void)
-+{
-+ int result;
-+
-+ printk(KERN_INFO
-+ "Loading Reiser4. "
-+ "See www.namesys.com for a description of Reiser4.\n");
-+
-+ /* initialize slab cache of inodes */
-+ if ((result = init_inodes()) != 0)
-+ goto failed_inode_cache;
-+
-+ /* initialize cache of znodes */
-+ if ((result = init_znodes()) != 0)
-+ goto failed_init_znodes;
-+
-+ /* initialize all plugins */
-+ if ((result = init_plugins()) != 0)
-+ goto failed_init_plugins;
-+
-+ /* initialize cache of plugin_set-s and plugin_set's hash table */
-+ if ((result = init_plugin_set()) != 0)
-+ goto failed_init_plugin_set;
-+
-+ /* initialize caches of txn_atom-s and txn_handle-s */
-+ if ((result = init_txnmgr_static()) != 0)
-+ goto failed_init_txnmgr_static;
-+
-+ /* initialize cache of jnodes */
-+ if ((result = init_jnodes()) != 0)
-+ goto failed_init_jnodes;
-+
-+ /* initialize cache of flush queues */
-+ if ((result = reiser4_init_fqs()) != 0)
-+ goto failed_init_fqs;
-+
-+ /* initialize cache of structures attached to dentry->d_fsdata */
-+ if ((result = reiser4_init_dentry_fsdata()) != 0)
-+ goto failed_init_dentry_fsdata;
-+
-+ /* initialize cache of structures attached to file->private_data */
-+ if ((result = reiser4_init_file_fsdata()) != 0)
-+ goto failed_init_file_fsdata;
-+
-+ /*
-+ * initialize cache of d_cursors. See plugin/file_ops_readdir.c for
-+ * more details
-+ */
-+ if ((result = reiser4_init_d_cursor()) != 0)
-+ goto failed_init_d_cursor;
-+
-+ if ((result = register_filesystem(&reiser4_fs_type)) == 0) {
-+ reiser4_debugfs_root = debugfs_create_dir("reiser4", NULL);
-+ return 0;
-+ }
-+
-+ reiser4_done_d_cursor();
-+ failed_init_d_cursor:
-+ reiser4_done_file_fsdata();
-+ failed_init_file_fsdata:
-+ reiser4_done_dentry_fsdata();
-+ failed_init_dentry_fsdata:
-+ reiser4_done_fqs();
-+ failed_init_fqs:
-+ done_jnodes();
-+ failed_init_jnodes:
-+ done_txnmgr_static();
-+ failed_init_txnmgr_static:
-+ done_plugin_set();
-+ failed_init_plugin_set:
-+ failed_init_plugins:
-+ done_znodes();
-+ failed_init_znodes:
-+ done_inodes();
-+ failed_inode_cache:
-+ return result;
-+}
-+
-+/**
-+ * done_reiser4 - reiser4 exit entry point
-+ *
-+ * Unregister reiser4 filesystem type, deletes caches. It is called on shutdown
-+ * or at module unload.
-+ */
-+static void __exit done_reiser4(void)
-+{
-+ int result;
-+
-+ debugfs_remove(reiser4_debugfs_root);
-+ result = unregister_filesystem(&reiser4_fs_type);
-+ BUG_ON(result != 0);
-+ reiser4_done_d_cursor();
-+ reiser4_done_file_fsdata();
-+ reiser4_done_dentry_fsdata();
-+ reiser4_done_fqs();
-+ done_jnodes();
-+ done_txnmgr_static();
-+ done_plugin_set();
-+ done_znodes();
-+ destroy_reiser4_cache(&inode_cache);
-+}
-+
-+module_init(init_reiser4);
-+module_exit(done_reiser4);
-+
-+MODULE_DESCRIPTION("Reiser4 filesystem");
-+MODULE_AUTHOR("Hans Reiser <Reiser@Namesys.COM>");
-+
-+MODULE_LICENSE("GPL");
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/tap.c linux-2.6.24/fs/reiser4/tap.c
---- linux-2.6.24.orig/fs/reiser4/tap.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tap.c 2008-01-25 11:39:07.092247874 +0300
-@@ -0,0 +1,377 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ Tree Access Pointer (tap).
-+
-+ tap is data structure combining coord and lock handle (mostly). It is
-+ useful when one has to scan tree nodes (for example, in readdir, or flush),
-+ for tap functions allow to move tap in either direction transparently
-+ crossing unit/item/node borders.
-+
-+ Tap doesn't provide automatic synchronization of its fields as it is
-+ supposed to be per-thread object.
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "coord.h"
-+#include "tree.h"
-+#include "context.h"
-+#include "tap.h"
-+#include "znode.h"
-+#include "tree_walk.h"
-+
-+#if REISER4_DEBUG
-+static int tap_invariant(const tap_t * tap);
-+static void tap_check(const tap_t * tap);
-+#else
-+#define tap_check(tap) noop
-+#endif
-+
-+/** load node tap is pointing to, if not loaded already */
-+int reiser4_tap_load(tap_t * tap)
-+{
-+ tap_check(tap);
-+ if (tap->loaded == 0) {
-+ int result;
-+
-+ result = zload_ra(tap->coord->node, &tap->ra_info);
-+ if (result != 0)
-+ return result;
-+ coord_clear_iplug(tap->coord);
-+ }
-+ ++tap->loaded;
-+ tap_check(tap);
-+ return 0;
-+}
-+
-+/** release node tap is pointing to. Dual to tap_load() */
-+void reiser4_tap_relse(tap_t * tap)
-+{
-+ tap_check(tap);
-+ if (tap->loaded > 0) {
-+ --tap->loaded;
-+ if (tap->loaded == 0) {
-+ zrelse(tap->coord->node);
-+ }
-+ }
-+ tap_check(tap);
-+}
-+
-+/**
-+ * init tap to consist of @coord and @lh. Locks on nodes will be acquired with
-+ * @mode
-+ */
-+void reiser4_tap_init(tap_t * tap, coord_t * coord, lock_handle * lh,
-+ znode_lock_mode mode)
-+{
-+ tap->coord = coord;
-+ tap->lh = lh;
-+ tap->mode = mode;
-+ tap->loaded = 0;
-+ INIT_LIST_HEAD(&tap->linkage);
-+ reiser4_init_ra_info(&tap->ra_info);
-+}
-+
-+/** add @tap to the per-thread list of all taps */
-+void reiser4_tap_monitor(tap_t * tap)
-+{
-+ assert("nikita-2623", tap != NULL);
-+ tap_check(tap);
-+ list_add(&tap->linkage, reiser4_taps_list());
-+ tap_check(tap);
-+}
-+
-+/* duplicate @src into @dst. Copy lock handle. @dst is not initially
-+ * loaded. */
-+void reiser4_tap_copy(tap_t * dst, tap_t * src)
-+{
-+ assert("nikita-3193", src != NULL);
-+ assert("nikita-3194", dst != NULL);
-+
-+ *dst->coord = *src->coord;
-+ if (src->lh->node)
-+ copy_lh(dst->lh, src->lh);
-+ dst->mode = src->mode;
-+ dst->loaded = 0;
-+ INIT_LIST_HEAD(&dst->linkage);
-+ dst->ra_info = src->ra_info;
-+}
-+
-+/** finish with @tap */
-+void reiser4_tap_done(tap_t * tap)
-+{
-+ assert("nikita-2565", tap != NULL);
-+ tap_check(tap);
-+ if (tap->loaded > 0)
-+ zrelse(tap->coord->node);
-+ done_lh(tap->lh);
-+ tap->loaded = 0;
-+ list_del_init(&tap->linkage);
-+ tap->coord->node = NULL;
-+}
-+
-+/**
-+ * move @tap to the new node, locked with @target. Load @target, if @tap was
-+ * already loaded.
-+ */
-+int reiser4_tap_move(tap_t * tap, lock_handle * target)
-+{
-+ int result = 0;
-+
-+ assert("nikita-2567", tap != NULL);
-+ assert("nikita-2568", target != NULL);
-+ assert("nikita-2570", target->node != NULL);
-+ assert("nikita-2569", tap->coord->node == tap->lh->node);
-+
-+ tap_check(tap);
-+ if (tap->loaded > 0)
-+ result = zload_ra(target->node, &tap->ra_info);
-+
-+ if (result == 0) {
-+ if (tap->loaded > 0)
-+ zrelse(tap->coord->node);
-+ done_lh(tap->lh);
-+ copy_lh(tap->lh, target);
-+ tap->coord->node = target->node;
-+ coord_clear_iplug(tap->coord);
-+ }
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/**
-+ * move @tap to @target. Acquire lock on @target, if @tap was already
-+ * loaded.
-+ */
-+static int tap_to(tap_t * tap, znode * target)
-+{
-+ int result;
-+
-+ assert("nikita-2624", tap != NULL);
-+ assert("nikita-2625", target != NULL);
-+
-+ tap_check(tap);
-+ result = 0;
-+ if (tap->coord->node != target) {
-+ lock_handle here;
-+
-+ init_lh(&here);
-+ result = longterm_lock_znode(&here, target,
-+ tap->mode, ZNODE_LOCK_HIPRI);
-+ if (result == 0) {
-+ result = reiser4_tap_move(tap, &here);
-+ done_lh(&here);
-+ }
-+ }
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/**
-+ * move @tap to given @target, loading and locking @target->node if
-+ * necessary
-+ */
-+int tap_to_coord(tap_t * tap, coord_t * target)
-+{
-+ int result;
-+
-+ tap_check(tap);
-+ result = tap_to(tap, target->node);
-+ if (result == 0)
-+ coord_dup(tap->coord, target);
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/** return list of all taps */
-+struct list_head *reiser4_taps_list(void)
-+{
-+ return &get_current_context()->taps;
-+}
-+
-+/** helper function for go_{next,prev}_{item,unit,node}() */
-+int go_dir_el(tap_t * tap, sideof dir, int units_p)
-+{
-+ coord_t dup;
-+ coord_t *coord;
-+ int result;
-+
-+ int (*coord_dir) (coord_t *);
-+ int (*get_dir_neighbor) (lock_handle *, znode *, int, int);
-+ void (*coord_init) (coord_t *, const znode *);
-+ ON_DEBUG(int (*coord_check) (const coord_t *));
-+
-+ assert("nikita-2556", tap != NULL);
-+ assert("nikita-2557", tap->coord != NULL);
-+ assert("nikita-2558", tap->lh != NULL);
-+ assert("nikita-2559", tap->coord->node != NULL);
-+
-+ tap_check(tap);
-+ if (dir == LEFT_SIDE) {
-+ coord_dir = units_p ? coord_prev_unit : coord_prev_item;
-+ get_dir_neighbor = reiser4_get_left_neighbor;
-+ coord_init = coord_init_last_unit;
-+ } else {
-+ coord_dir = units_p ? coord_next_unit : coord_next_item;
-+ get_dir_neighbor = reiser4_get_right_neighbor;
-+ coord_init = coord_init_first_unit;
-+ }
-+ ON_DEBUG(coord_check =
-+ units_p ? coord_is_existing_unit : coord_is_existing_item);
-+ assert("nikita-2560", coord_check(tap->coord));
-+
-+ coord = tap->coord;
-+ coord_dup(&dup, coord);
-+ if (coord_dir(&dup) != 0) {
-+ do {
-+ /* move to the left neighboring node */
-+ lock_handle dup;
-+
-+ init_lh(&dup);
-+ result =
-+ get_dir_neighbor(&dup, coord->node, (int)tap->mode,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result == 0) {
-+ result = reiser4_tap_move(tap, &dup);
-+ if (result == 0)
-+ coord_init(tap->coord, dup.node);
-+ done_lh(&dup);
-+ }
-+ /* skip empty nodes */
-+ } while ((result == 0) && node_is_empty(coord->node));
-+ } else {
-+ result = 0;
-+ coord_dup(coord, &dup);
-+ }
-+ assert("nikita-2564", ergo(!result, coord_check(tap->coord)));
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/**
-+ * move @tap to the next unit, transparently crossing item and node
-+ * boundaries
-+ */
-+int go_next_unit(tap_t * tap)
-+{
-+ return go_dir_el(tap, RIGHT_SIDE, 1);
-+}
-+
-+/**
-+ * move @tap to the previous unit, transparently crossing item and node
-+ * boundaries
-+ */
-+int go_prev_unit(tap_t * tap)
-+{
-+ return go_dir_el(tap, LEFT_SIDE, 1);
-+}
-+
-+/**
-+ * @shift times apply @actor to the @tap. This is used to move @tap by
-+ * @shift units (or items, or nodes) in either direction.
-+ */
-+static int rewind_to(tap_t * tap, go_actor_t actor, int shift)
-+{
-+ int result;
-+
-+ assert("nikita-2555", shift >= 0);
-+ assert("nikita-2562", tap->coord->node == tap->lh->node);
-+
-+ tap_check(tap);
-+ result = reiser4_tap_load(tap);
-+ if (result != 0)
-+ return result;
-+
-+ for (; shift > 0; --shift) {
-+ result = actor(tap);
-+ assert("nikita-2563", tap->coord->node == tap->lh->node);
-+ if (result != 0)
-+ break;
-+ }
-+ reiser4_tap_relse(tap);
-+ tap_check(tap);
-+ return result;
-+}
-+
-+/** move @tap @shift units rightward */
-+int rewind_right(tap_t * tap, int shift)
-+{
-+ return rewind_to(tap, go_next_unit, shift);
-+}
-+
-+/** move @tap @shift units leftward */
-+int rewind_left(tap_t * tap, int shift)
-+{
-+ return rewind_to(tap, go_prev_unit, shift);
-+}
-+
-+#if REISER4_DEBUG
-+/** debugging function: print @tap content in human readable form */
-+static void print_tap(const char *prefix, const tap_t * tap)
-+{
-+ if (tap == NULL) {
-+ printk("%s: null tap\n", prefix);
-+ return;
-+ }
-+ printk("%s: loaded: %i, in-list: %i, node: %p, mode: %s\n", prefix,
-+ tap->loaded, (&tap->linkage == tap->linkage.next &&
-+ &tap->linkage == tap->linkage.prev),
-+ tap->lh->node,
-+ lock_mode_name(tap->mode));
-+ print_coord("\tcoord", tap->coord, 0);
-+}
-+
-+/** check [tap-sane] invariant */
-+static int tap_invariant(const tap_t * tap)
-+{
-+ /* [tap-sane] invariant */
-+
-+ if (tap == NULL)
-+ return 1;
-+ /* tap->mode is one of
-+ *
-+ * {ZNODE_NO_LOCK, ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}, and
-+ */
-+ if (tap->mode != ZNODE_NO_LOCK &&
-+ tap->mode != ZNODE_READ_LOCK && tap->mode != ZNODE_WRITE_LOCK)
-+ return 2;
-+ /* tap->coord != NULL, and */
-+ if (tap->coord == NULL)
-+ return 3;
-+ /* tap->lh != NULL, and */
-+ if (tap->lh == NULL)
-+ return 4;
-+ /* tap->loaded > 0 => znode_is_loaded(tap->coord->node), and */
-+ if (!ergo(tap->loaded, znode_is_loaded(tap->coord->node)))
-+ return 5;
-+ /* tap->coord->node == tap->lh->node if tap->lh->node is not 0 */
-+ if (tap->lh->node != NULL && tap->coord->node != tap->lh->node)
-+ return 6;
-+ return 0;
-+}
-+
-+/** debugging function: check internal @tap consistency */
-+static void tap_check(const tap_t * tap)
-+{
-+ int result;
-+
-+ result = tap_invariant(tap);
-+ if (result != 0) {
-+ print_tap("broken", tap);
-+ reiser4_panic("nikita-2831", "tap broken: %i\n", result);
-+ }
-+}
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/tap.h linux-2.6.24/fs/reiser4/tap.h
---- linux-2.6.24.orig/fs/reiser4/tap.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tap.h 2008-01-25 11:39:07.092247874 +0300
-@@ -0,0 +1,70 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* Tree Access Pointers. See tap.c for more details. */
-+
-+#if !defined( __REISER4_TAP_H__ )
-+#define __REISER4_TAP_H__
-+
-+#include "forward.h"
-+#include "readahead.h"
-+
-+/**
-+ tree_access_pointer aka tap. Data structure combining coord_t and lock
-+ handle.
-+ Invariants involving this data-type, see doc/lock-ordering for details:
-+
-+ [tap-sane]
-+ */
-+struct tree_access_pointer {
-+ /* coord tap is at */
-+ coord_t *coord;
-+ /* lock handle on ->coord->node */
-+ lock_handle *lh;
-+ /* mode of lock acquired by this tap */
-+ znode_lock_mode mode;
-+ /* incremented by reiser4_tap_load().
-+ Decremented by reiser4_tap_relse(). */
-+ int loaded;
-+ /* list of taps */
-+ struct list_head linkage;
-+ /* read-ahead hint */
-+ ra_info_t ra_info;
-+};
-+
-+typedef int (*go_actor_t) (tap_t * tap);
-+
-+extern int reiser4_tap_load(tap_t * tap);
-+extern void reiser4_tap_relse(tap_t * tap);
-+extern void reiser4_tap_init(tap_t * tap, coord_t * coord, lock_handle * lh,
-+ znode_lock_mode mode);
-+extern void reiser4_tap_monitor(tap_t * tap);
-+extern void reiser4_tap_copy(tap_t * dst, tap_t * src);
-+extern void reiser4_tap_done(tap_t * tap);
-+extern int reiser4_tap_move(tap_t * tap, lock_handle * target);
-+extern int tap_to_coord(tap_t * tap, coord_t * target);
-+
-+extern int go_dir_el(tap_t * tap, sideof dir, int units_p);
-+extern int go_next_unit(tap_t * tap);
-+extern int go_prev_unit(tap_t * tap);
-+extern int rewind_right(tap_t * tap, int shift);
-+extern int rewind_left(tap_t * tap, int shift);
-+
-+extern struct list_head *reiser4_taps_list(void);
-+
-+#define for_all_taps(tap) \
-+ for (tap = list_entry(reiser4_taps_list()->next, tap_t, linkage); \
-+ reiser4_taps_list() != &tap->linkage; \
-+ tap = list_entry(tap->linkage.next, tap_t, linkage))
-+
-+/* __REISER4_TAP_H__ */
-+#endif
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/tree.c linux-2.6.24/fs/reiser4/tree.c
---- linux-2.6.24.orig/fs/reiser4/tree.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tree.c 2008-01-25 11:39:07.096248905 +0300
-@@ -0,0 +1,1876 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * KEYS IN A TREE.
-+ *
-+ * The tree consists of nodes located on the disk. Node in the tree is either
-+ * formatted or unformatted. Formatted node is one that has structure
-+ * understood by the tree balancing and traversal code. Formatted nodes are
-+ * further classified into leaf and internal nodes. Latter distinctions is
-+ * (almost) of only historical importance: general structure of leaves and
-+ * internal nodes is the same in Reiser4. Unformatted nodes contain raw data
-+ * that are part of bodies of ordinary files and attributes.
-+ *
-+ * Each node in the tree spawns some interval in the key space. Key ranges for
-+ * all nodes in the tree are disjoint. Actually, this only holds in some weak
-+ * sense, because of the non-unique keys: intersection of key ranges for
-+ * different nodes is either empty, or consists of exactly one key.
-+ *
-+ * Formatted node consists of a sequence of items. Each item spawns some
-+ * interval in key space. Key ranges for all items in a tree are disjoint,
-+ * modulo non-unique keys again. Items within nodes are ordered in the key
-+ * order of the smallest key in a item.
-+ *
-+ * Particular type of item can be further split into units. Unit is piece of
-+ * item that can be cut from item and moved into another item of the same
-+ * time. Units are used by balancing code to repack data during balancing.
-+ *
-+ * Unit can be further split into smaller entities (for example, extent unit
-+ * represents several pages, and it is natural for extent code to operate on
-+ * particular pages and even bytes within one unit), but this is of no
-+ * relevance to the generic balancing and lookup code.
-+ *
-+ * Although item is said to "spawn" range or interval of keys, it is not
-+ * necessary that item contains piece of data addressable by each and every
-+ * key in this range. For example, compound directory item, consisting of
-+ * units corresponding to directory entries and keyed by hashes of file names,
-+ * looks more as having "discrete spectrum": only some disjoint keys inside
-+ * range occupied by this item really address data.
-+ *
-+ * No than less, each item always has well-defined least (minimal) key, that
-+ * is recorded in item header, stored in the node this item is in. Also, item
-+ * plugin can optionally define method ->max_key_inside() returning maximal
-+ * key that can _possibly_ be located within this item. This method is used
-+ * (mainly) to determine when given piece of data should be merged into
-+ * existing item, in stead of creating new one. Because of this, even though
-+ * ->max_key_inside() can be larger that any key actually located in the item,
-+ * intervals
-+ *
-+ * [ reiser4_min_key( item ), ->max_key_inside( item ) ]
-+ *
-+ * are still disjoint for all items within the _same_ node.
-+ *
-+ * In memory node is represented by znode. It plays several roles:
-+ *
-+ * . something locks are taken on
-+ *
-+ * . something tracked by transaction manager (this is going to change)
-+ *
-+ * . something used to access node data
-+ *
-+ * . something used to maintain tree structure in memory: sibling and
-+ * parental linkage.
-+ *
-+ * . something used to organize nodes into "slums"
-+ *
-+ * More on znodes see in znode.[ch]
-+ *
-+ * DELIMITING KEYS
-+ *
-+ * To simplify balancing, allow some flexibility in locking and speed up
-+ * important coord cache optimization, we keep delimiting keys of nodes in
-+ * memory. Depending on disk format (implemented by appropriate node plugin)
-+ * node on disk can record both left and right delimiting key, only one of
-+ * them, or none. Still, our balancing and tree traversal code keep both
-+ * delimiting keys for a node that is in memory stored in the znode. When
-+ * node is first brought into memory during tree traversal, its left
-+ * delimiting key is taken from its parent, and its right delimiting key is
-+ * either next key in its parent, or is right delimiting key of parent if
-+ * node is the rightmost child of parent.
-+ *
-+ * Physical consistency of delimiting key is protected by special dk
-+ * read-write lock. That is, delimiting keys can only be inspected or
-+ * modified under this lock. But dk lock is only sufficient for fast
-+ * "pessimistic" check, because to simplify code and to decrease lock
-+ * contention, balancing (carry) only updates delimiting keys right before
-+ * unlocking all locked nodes on the given tree level. For example,
-+ * coord-by-key cache scans LRU list of recently accessed znodes. For each
-+ * node it first does fast check under dk spin lock. If key looked for is
-+ * not between delimiting keys for this node, next node is inspected and so
-+ * on. If key is inside of the key range, long term lock is taken on node
-+ * and key range is rechecked.
-+ *
-+ * COORDINATES
-+ *
-+ * To find something in the tree, you supply a key, and the key is resolved
-+ * by coord_by_key() into a coord (coordinate) that is valid as long as the
-+ * node the coord points to remains locked. As mentioned above trees
-+ * consist of nodes that consist of items that consist of units. A unit is
-+ * the smallest and indivisible piece of tree as far as balancing and tree
-+ * search are concerned. Each node, item, and unit can be addressed by
-+ * giving its level in the tree and the key occupied by this entity. A node
-+ * knows what the key ranges are of the items within it, and how to find its
-+ * items and invoke their item handlers, but it does not know how to access
-+ * individual units within its items except through the item handlers.
-+ * coord is a structure containing a pointer to the node, the ordinal number
-+ * of the item within this node (a sort of item offset), and the ordinal
-+ * number of the unit within this item.
-+ *
-+ * TREE LOOKUP
-+ *
-+ * There are two types of access to the tree: lookup and modification.
-+ *
-+ * Lookup is a search for the key in the tree. Search can look for either
-+ * exactly the key given to it, or for the largest key that is not greater
-+ * than the key given to it. This distinction is determined by "bias"
-+ * parameter of search routine (coord_by_key()). coord_by_key() either
-+ * returns error (key is not in the tree, or some kind of external error
-+ * occurred), or successfully resolves key into coord.
-+ *
-+ * This resolution is done by traversing tree top-to-bottom from root level
-+ * to the desired level. On levels above twig level (level one above the
-+ * leaf level) nodes consist exclusively of internal items. Internal item is
-+ * nothing more than pointer to the tree node on the child level. On twig
-+ * level nodes consist of internal items intermixed with extent
-+ * items. Internal items form normal search tree structure used by traversal
-+ * to descent through the tree.
-+ *
-+ * TREE LOOKUP OPTIMIZATIONS
-+ *
-+ * Tree lookup described above is expensive even if all nodes traversed are
-+ * already in the memory: for each node binary search within it has to be
-+ * performed and binary searches are CPU consuming and tend to destroy CPU
-+ * caches.
-+ *
-+ * Several optimizations are used to work around this:
-+ *
-+ * . cbk_cache (look-aside cache for tree traversals, see search.c for
-+ * details)
-+ *
-+ * . seals (see seal.[ch])
-+ *
-+ * . vroot (see search.c)
-+ *
-+ * General search-by-key is layered thusly:
-+ *
-+ * [check seal, if any] --ok--> done
-+ * |
-+ * failed
-+ * |
-+ * V
-+ * [vroot defined] --no--> node = tree_root
-+ * | |
-+ * yes |
-+ * | |
-+ * V |
-+ * node = vroot |
-+ * | |
-+ * | |
-+ * | |
-+ * V V
-+ * [check cbk_cache for key] --ok--> done
-+ * |
-+ * failed
-+ * |
-+ * V
-+ * [start tree traversal from node]
-+ *
-+ */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/item/static_stat.h"
-+#include "plugin/item/item.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "carry.h"
-+#include "carry_ops.h"
-+#include "tap.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "page_cache.h"
-+#include "super.h"
-+#include "reiser4.h"
-+#include "inode.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+
-+/* Disk address (block number) never ever used for any real tree node. This is
-+ used as block number of "uber" znode.
-+
-+ Invalid block addresses are 0 by tradition.
-+
-+*/
-+const reiser4_block_nr UBER_TREE_ADDR = 0ull;
-+
-+#define CUT_TREE_MIN_ITERATIONS 64
-+
-+static int find_child_by_addr(znode * parent, znode * child, coord_t * result);
-+
-+/* return node plugin of coord->node */
-+node_plugin *node_plugin_by_coord(const coord_t * coord)
-+{
-+ assert("vs-1", coord != NULL);
-+ assert("vs-2", coord->node != NULL);
-+
-+ return coord->node->nplug;
-+}
-+
-+/* insert item into tree. Fields of @coord are updated so that they can be
-+ * used by consequent insert operation. */
-+insert_result insert_by_key(reiser4_tree * tree /* tree to insert new item
-+ * into */ ,
-+ const reiser4_key * key /* key of new item */ ,
-+ reiser4_item_data * data /* parameters for item
-+ * creation */ ,
-+ coord_t * coord /* resulting insertion coord */ ,
-+ lock_handle * lh /* resulting lock
-+ * handle */ ,
-+ tree_level stop_level /** level where to insert */ ,
-+ __u32 flags /* insertion flags */ )
-+{
-+ int result;
-+
-+ assert("nikita-358", tree != NULL);
-+ assert("nikita-360", coord != NULL);
-+
-+ result = coord_by_key(tree, key, coord, lh, ZNODE_WRITE_LOCK,
-+ FIND_EXACT, stop_level, stop_level,
-+ flags | CBK_FOR_INSERT, NULL /*ra_info */ );
-+ switch (result) {
-+ default:
-+ break;
-+ case CBK_COORD_FOUND:
-+ result = IBK_ALREADY_EXISTS;
-+ break;
-+ case CBK_COORD_NOTFOUND:
-+ assert("nikita-2017", coord->node != NULL);
-+ result = insert_by_coord(coord, data, key, lh, 0 /*flags */ );
-+ break;
-+ }
-+ return result;
-+}
-+
-+/* insert item by calling carry. Helper function called if short-cut
-+ insertion failed */
-+static insert_result insert_with_carry_by_coord(coord_t * coord, /* coord where to insert */
-+ lock_handle * lh, /* lock handle of insertion
-+ * node */
-+ reiser4_item_data * data, /* parameters of new
-+ * item */
-+ const reiser4_key * key, /* key of new item */
-+ carry_opcode cop, /* carry operation to perform */
-+ cop_insert_flag flags
-+ /* carry flags */ )
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_insert_data *cdata;
-+ carry_op *op;
-+
-+ assert("umka-314", coord != NULL);
-+
-+ /* allocate carry_pool and 3 carry_level-s */
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*cdata));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, cop, coord->node, 0);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+ cdata = (carry_insert_data *) (lowest_level + 3);
-+ cdata->coord = coord;
-+ cdata->data = data;
-+ cdata->key = key;
-+ op->u.insert.d = cdata;
-+ if (flags == 0)
-+ flags = znode_get_tree(coord->node)->carry.insert_flags;
-+ op->u.insert.flags = flags;
-+ op->u.insert.type = COPT_ITEM_DATA;
-+ op->u.insert.child = NULL;
-+ if (lh != NULL) {
-+ assert("nikita-3245", lh->node == coord->node);
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+ }
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* form carry queue to perform paste of @data with @key at @coord, and launch
-+ its execution by calling carry().
-+
-+ Instruct carry to update @lh it after balancing insertion coord moves into
-+ different block.
-+
-+*/
-+static int paste_with_carry(coord_t * coord, /* coord of paste */
-+ lock_handle * lh, /* lock handle of node
-+ * where item is
-+ * pasted */
-+ reiser4_item_data * data, /* parameters of new
-+ * item */
-+ const reiser4_key * key, /* key of new item */
-+ unsigned flags /* paste flags */ )
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_insert_data *cdata;
-+ carry_op *op;
-+
-+ assert("umka-315", coord != NULL);
-+ assert("umka-316", key != NULL);
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*cdata));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, COP_PASTE, coord->node, 0);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+ cdata = (carry_insert_data *) (lowest_level + 3);
-+ cdata->coord = coord;
-+ cdata->data = data;
-+ cdata->key = key;
-+ op->u.paste.d = cdata;
-+ if (flags == 0)
-+ flags = znode_get_tree(coord->node)->carry.paste_flags;
-+ op->u.paste.flags = flags;
-+ op->u.paste.type = COPT_ITEM_DATA;
-+ if (lh != NULL) {
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+ }
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* insert item at the given coord.
-+
-+ First try to skip carry by directly calling ->create_item() method of node
-+ plugin. If this is impossible (there is not enough free space in the node,
-+ or leftmost item in the node is created), call insert_with_carry_by_coord()
-+ that will do full carry().
-+
-+*/
-+insert_result insert_by_coord(coord_t * coord /* coord where to
-+ * insert. coord->node has
-+ * to be write locked by
-+ * caller */ ,
-+ reiser4_item_data * data /* data to be
-+ * inserted */ ,
-+ const reiser4_key * key /* key of new item */ ,
-+ lock_handle * lh /* lock handle of write
-+ * lock on node */ ,
-+ __u32 flags /* insertion flags */ )
-+{
-+ unsigned item_size;
-+ int result;
-+ znode *node;
-+
-+ assert("vs-247", coord != NULL);
-+ assert("vs-248", data != NULL);
-+ assert("vs-249", data->length >= 0);
-+ assert("nikita-1191", znode_is_write_locked(coord->node));
-+
-+ node = coord->node;
-+ coord_clear_iplug(coord);
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+
-+ item_size = space_needed(node, NULL, data, 1);
-+ if (item_size > znode_free_space(node) &&
-+ (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
-+ && (flags & COPI_DONT_ALLOCATE)) {
-+ /* we are forced to use free space of coord->node and new item
-+ does not fit into it.
-+
-+ Currently we get here only when we allocate and copy units
-+ of extent item from a node to its left neighbor during
-+ "squalloc"-ing. If @node (this is left neighbor) does not
-+ have enough free space - we do not want to attempt any
-+ shifting and allocations because we are in squeezing and
-+ everything to the left of @node is tightly packed.
-+ */
-+ result = -E_NODE_FULL;
-+ } else if ((item_size <= znode_free_space(node)) &&
-+ !coord_is_before_leftmost(coord) &&
-+ (node_plugin_by_node(node)->fast_insert != NULL)
-+ && node_plugin_by_node(node)->fast_insert(coord)) {
-+ /* shortcut insertion without carry() overhead.
-+
-+ Only possible if:
-+
-+ - there is enough free space
-+
-+ - insertion is not into the leftmost position in a node
-+ (otherwise it would require updating of delimiting key in a
-+ parent)
-+
-+ - node plugin agrees with this
-+
-+ */
-+ result =
-+ node_plugin_by_node(node)->create_item(coord, key, data,
-+ NULL);
-+ znode_make_dirty(node);
-+ } else {
-+ /* otherwise do full-fledged carry(). */
-+ result =
-+ insert_with_carry_by_coord(coord, lh, data, key, COP_INSERT,
-+ flags);
-+ }
-+ zrelse(node);
-+ return result;
-+}
-+
-+/* @coord is set to leaf level and @data is to be inserted to twig level */
-+insert_result
-+insert_extent_by_coord(coord_t *
-+ coord
-+ /* coord where to insert. coord->node * has to be write * locked by caller */
-+ ,
-+ reiser4_item_data * data /* data to be inserted */ ,
-+ const reiser4_key * key /* key of new item */ ,
-+ lock_handle *
-+ lh /* lock handle of write lock on * node */ )
-+{
-+ assert("vs-405", coord != NULL);
-+ assert("vs-406", data != NULL);
-+ assert("vs-407", data->length > 0);
-+ assert("vs-408", znode_is_write_locked(coord->node));
-+ assert("vs-409", znode_get_level(coord->node) == LEAF_LEVEL);
-+
-+ return insert_with_carry_by_coord(coord, lh, data, key, COP_EXTENT,
-+ 0 /*flags */ );
-+}
-+
-+/* Insert into the item at the given coord.
-+
-+ First try to skip carry by directly calling ->paste() method of item
-+ plugin. If this is impossible (there is not enough free space in the node,
-+ or we are pasting into leftmost position in the node), call
-+ paste_with_carry() that will do full carry().
-+
-+*/
-+/* paste_into_item */
-+int insert_into_item(coord_t * coord /* coord of pasting */ ,
-+ lock_handle * lh /* lock handle on node involved */ ,
-+ const reiser4_key * key /* key of unit being pasted */ ,
-+ reiser4_item_data * data /* parameters for new unit */ ,
-+ unsigned flags /* insert/paste flags */ )
-+{
-+ int result;
-+ int size_change;
-+ node_plugin *nplug;
-+ item_plugin *iplug;
-+
-+ assert("umka-317", coord != NULL);
-+ assert("umka-318", key != NULL);
-+
-+ iplug = item_plugin_by_coord(coord);
-+ nplug = node_plugin_by_coord(coord);
-+
-+ assert("nikita-1480", iplug == data->iplug);
-+
-+ size_change = space_needed(coord->node, coord, data, 0);
-+ if (size_change > (int)znode_free_space(coord->node) &&
-+ (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
-+ && (flags & COPI_DONT_ALLOCATE)) {
-+ /* we are forced to use free space of coord->node and new data
-+ does not fit into it. */
-+ return -E_NODE_FULL;
-+ }
-+
-+ /* shortcut paste without carry() overhead.
-+
-+ Only possible if:
-+
-+ - there is enough free space
-+
-+ - paste is not into the leftmost unit in a node (otherwise
-+ it would require updating of delimiting key in a parent)
-+
-+ - node plugin agrees with this
-+
-+ - item plugin agrees with us
-+ */
-+ if (size_change <= (int)znode_free_space(coord->node) &&
-+ (coord->item_pos != 0 ||
-+ coord->unit_pos != 0 || coord->between == AFTER_UNIT) &&
-+ coord->unit_pos != 0 && nplug->fast_paste != NULL &&
-+ nplug->fast_paste(coord) &&
-+ iplug->b.fast_paste != NULL && iplug->b.fast_paste(coord)) {
-+ if (size_change > 0)
-+ nplug->change_item_size(coord, size_change);
-+ /* NOTE-NIKITA: huh? where @key is used? */
-+ result = iplug->b.paste(coord, data, NULL);
-+ if (size_change < 0)
-+ nplug->change_item_size(coord, size_change);
-+ znode_make_dirty(coord->node);
-+ } else
-+ /* otherwise do full-fledged carry(). */
-+ result = paste_with_carry(coord, lh, data, key, flags);
-+ return result;
-+}
-+
-+/* this either appends or truncates item @coord */
-+int reiser4_resize_item(coord_t * coord /* coord of item being resized */ ,
-+ reiser4_item_data * data /* parameters of resize */ ,
-+ reiser4_key * key /* key of new unit */ ,
-+ lock_handle * lh /* lock handle of node
-+ * being modified */ ,
-+ cop_insert_flag flags /* carry flags */ )
-+{
-+ int result;
-+ znode *node;
-+
-+ assert("nikita-362", coord != NULL);
-+ assert("nikita-363", data != NULL);
-+ assert("vs-245", data->length != 0);
-+
-+ node = coord->node;
-+ coord_clear_iplug(coord);
-+ result = zload(node);
-+ if (result != 0)
-+ return result;
-+
-+ if (data->length < 0)
-+ result = node_plugin_by_coord(coord)->shrink_item(coord,
-+ -data->length);
-+ else
-+ result = insert_into_item(coord, lh, key, data, flags);
-+
-+ zrelse(node);
-+ return result;
-+}
-+
-+/* insert flow @f */
-+int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f)
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ reiser4_item_data *data;
-+ carry_op *op;
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*data));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
-+ 0 /* operate directly on coord -> node */ );
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+
-+ /* these are permanent during insert_flow */
-+ data = (reiser4_item_data *) (lowest_level + 3);
-+ data->user = 1;
-+ data->iplug = item_plugin_by_id(FORMATTING_ID);
-+ data->arg = NULL;
-+ /* data.length and data.data will be set before calling paste or
-+ insert */
-+ data->length = 0;
-+ data->data = NULL;
-+
-+ op->u.insert_flow.flags = 0;
-+ op->u.insert_flow.insert_point = coord;
-+ op->u.insert_flow.flow = f;
-+ op->u.insert_flow.data = data;
-+ op->u.insert_flow.new_nodes = 0;
-+
-+ lowest_level->track_type = CARRY_TRACK_CHANGE;
-+ lowest_level->tracked = lh;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* Given a coord in parent node, obtain a znode for the corresponding child */
-+znode *child_znode(const coord_t * parent_coord /* coord of pointer to
-+ * child */ ,
-+ znode * parent /* parent of child */ ,
-+ int incore_p /* if !0 only return child if already in
-+ * memory */ ,
-+ int setup_dkeys_p /* if !0 update delimiting keys of
-+ * child */ )
-+{
-+ znode *child;
-+
-+ assert("nikita-1374", parent_coord != NULL);
-+ assert("nikita-1482", parent != NULL);
-+#if REISER4_DEBUG
-+ if (setup_dkeys_p)
-+ assert_rw_not_locked(&(znode_get_tree(parent)->dk_lock));
-+#endif
-+ assert("nikita-2947", znode_is_any_locked(parent));
-+
-+ if (znode_get_level(parent) <= LEAF_LEVEL) {
-+ /* trying to get child of leaf node */
-+ warning("nikita-1217", "Child of maize?");
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+ if (item_is_internal(parent_coord)) {
-+ reiser4_block_nr addr;
-+ item_plugin *iplug;
-+ reiser4_tree *tree;
-+
-+ iplug = item_plugin_by_coord(parent_coord);
-+ assert("vs-512", iplug->s.internal.down_link);
-+ iplug->s.internal.down_link(parent_coord, NULL, &addr);
-+
-+ tree = znode_get_tree(parent);
-+ if (incore_p)
-+ child = zlook(tree, &addr);
-+ else
-+ child =
-+ zget(tree, &addr, parent,
-+ znode_get_level(parent) - 1,
-+ reiser4_ctx_gfp_mask_get());
-+ if ((child != NULL) && !IS_ERR(child) && setup_dkeys_p)
-+ set_child_delimiting_keys(parent, parent_coord, child);
-+ } else {
-+ warning("nikita-1483", "Internal item expected");
-+ child = ERR_PTR(RETERR(-EIO));
-+ }
-+ return child;
-+}
-+
-+/* remove znode from transaction */
-+static void uncapture_znode(znode * node)
-+{
-+ struct page *page;
-+
-+ assert("zam-1001", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ if (!reiser4_blocknr_is_fake(znode_get_block(node))) {
-+ int ret;
-+
-+ /* An already allocated block goes right to the atom's delete set. */
-+ ret =
-+ reiser4_dealloc_block(znode_get_block(node), 0,
-+ BA_DEFER | BA_FORMATTED);
-+ if (ret)
-+ warning("zam-942",
-+ "can\'t add a block (%llu) number to atom's delete set\n",
-+ (unsigned long long)(*znode_get_block(node)));
-+
-+ spin_lock_znode(node);
-+ /* Here we return flush reserved block which was reserved at the
-+ * moment when this allocated node was marked dirty and still
-+ * not used by flush in node relocation procedure. */
-+ if (ZF_ISSET(node, JNODE_FLUSH_RESERVED)) {
-+ txn_atom *atom;
-+
-+ atom = jnode_get_atom(ZJNODE(node));
-+ assert("zam-939", atom != NULL);
-+ spin_unlock_znode(node);
-+ flush_reserved2grabbed(atom, (__u64) 1);
-+ spin_unlock_atom(atom);
-+ } else
-+ spin_unlock_znode(node);
-+ } else {
-+ /* znode has assigned block which is counted as "fake
-+ allocated". Return it back to "free blocks") */
-+ fake_allocated2free((__u64) 1, BA_FORMATTED);
-+ }
-+
-+ /*
-+ * uncapture page from transaction. There is a possibility of a race
-+ * with ->releasepage(): reiser4_releasepage() detaches page from this
-+ * jnode and we have nothing to uncapture. To avoid this, get
-+ * reference of node->pg under jnode spin lock. reiser4_uncapture_page()
-+ * will deal with released page itself.
-+ */
-+ spin_lock_znode(node);
-+ page = znode_page(node);
-+ if (likely(page != NULL)) {
-+ /*
-+ * reiser4_uncapture_page() can only be called when we are sure
-+ * that znode is pinned in memory, which we are, because
-+ * forget_znode() is only called from longterm_unlock_znode().
-+ */
-+ page_cache_get(page);
-+ spin_unlock_znode(node);
-+ lock_page(page);
-+ reiser4_uncapture_page(page);
-+ unlock_page(page);
-+ page_cache_release(page);
-+ } else {
-+ txn_atom *atom;
-+
-+ /* handle "flush queued" znodes */
-+ while (1) {
-+ atom = jnode_get_atom(ZJNODE(node));
-+ assert("zam-943", atom != NULL);
-+
-+ if (!ZF_ISSET(node, JNODE_FLUSH_QUEUED)
-+ || !atom->nr_running_queues)
-+ break;
-+
-+ spin_unlock_znode(node);
-+ reiser4_atom_wait_event(atom);
-+ spin_lock_znode(node);
-+ }
-+
-+ reiser4_uncapture_block(ZJNODE(node));
-+ spin_unlock_atom(atom);
-+ zput(node);
-+ }
-+}
-+
-+/* This is called from longterm_unlock_znode() when last lock is released from
-+ the node that has been removed from the tree. At this point node is removed
-+ from sibling list and its lock is invalidated. */
-+void forget_znode(lock_handle * handle)
-+{
-+ znode *node;
-+ reiser4_tree *tree;
-+
-+ assert("umka-319", handle != NULL);
-+
-+ node = handle->node;
-+ tree = znode_get_tree(node);
-+
-+ assert("vs-164", znode_is_write_locked(node));
-+ assert("nikita-1280", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+ assert_rw_locked(&(node->lock.guard));
-+
-+ /* We assume that this node was detached from its parent before
-+ * unlocking, it gives no way to reach this node from parent through a
-+ * down link. The node should have no children and, thereby, can't be
-+ * reached from them by their parent pointers. The only way to obtain a
-+ * reference to the node is to use sibling pointers from its left and
-+ * right neighbors. In the next several lines we remove the node from
-+ * the sibling list. */
-+
-+ write_lock_tree(tree);
-+ sibling_list_remove(node);
-+ znode_remove(node, tree);
-+ write_unlock_tree(tree);
-+
-+ /* Here we set JNODE_DYING and cancel all pending lock requests. It
-+ * forces all lock requestor threads to repeat iterations of getting
-+ * lock on a child, neighbor or parent node. But, those threads can't
-+ * come to this node again, because this node is no longer a child,
-+ * neighbor or parent of any other node. This order of znode
-+ * invalidation does not allow other threads to waste cpu time is a busy
-+ * loop, trying to lock dying object. The exception is in the flush
-+ * code when we take node directly from atom's capture list.*/
-+ reiser4_invalidate_lock(handle);
-+ uncapture_znode(node);
-+}
-+
-+/* Check that internal item at @pointer really contains pointer to @child. */
-+int check_tree_pointer(const coord_t * pointer /* would-be pointer to
-+ * @child */ ,
-+ const znode * child /* child znode */ )
-+{
-+ assert("nikita-1016", pointer != NULL);
-+ assert("nikita-1017", child != NULL);
-+ assert("nikita-1018", pointer->node != NULL);
-+
-+ assert("nikita-1325", znode_is_any_locked(pointer->node));
-+
-+ assert("nikita-2985",
-+ znode_get_level(pointer->node) == znode_get_level(child) + 1);
-+
-+ coord_clear_iplug((coord_t *) pointer);
-+
-+ if (coord_is_existing_unit(pointer)) {
-+ item_plugin *iplug;
-+ reiser4_block_nr addr;
-+
-+ if (item_is_internal(pointer)) {
-+ iplug = item_plugin_by_coord(pointer);
-+ assert("vs-513", iplug->s.internal.down_link);
-+ iplug->s.internal.down_link(pointer, NULL, &addr);
-+ /* check that cached value is correct */
-+ if (disk_addr_eq(&addr, znode_get_block(child))) {
-+ return NS_FOUND;
-+ }
-+ }
-+ }
-+ /* warning ("jmacd-1002", "tree pointer incorrect"); */
-+ return NS_NOT_FOUND;
-+}
-+
-+/* find coord of pointer to new @child in @parent.
-+
-+ Find the &coord_t in the @parent where pointer to a given @child will
-+ be in.
-+
-+*/
-+int find_new_child_ptr(znode * parent /* parent znode, passed locked */ ,
-+ znode *
-+ child UNUSED_ARG /* child znode, passed locked */ ,
-+ znode * left /* left brother of new node */ ,
-+ coord_t * result /* where result is stored in */ )
-+{
-+ int ret;
-+
-+ assert("nikita-1486", parent != NULL);
-+ assert("nikita-1487", child != NULL);
-+ assert("nikita-1488", result != NULL);
-+
-+ ret = find_child_ptr(parent, left, result);
-+ if (ret != NS_FOUND) {
-+ warning("nikita-1489", "Cannot find brother position: %i", ret);
-+ return RETERR(-EIO);
-+ } else {
-+ result->between = AFTER_UNIT;
-+ return RETERR(NS_NOT_FOUND);
-+ }
-+}
-+
-+/* find coord of pointer to @child in @parent.
-+
-+ Find the &coord_t in the @parent where pointer to a given @child is in.
-+
-+*/
-+int find_child_ptr(znode * parent /* parent znode, passed locked */ ,
-+ znode * child /* child znode, passed locked */ ,
-+ coord_t * result /* where result is stored in */ )
-+{
-+ int lookup_res;
-+ node_plugin *nplug;
-+ /* left delimiting key of a child */
-+ reiser4_key ld;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-934", parent != NULL);
-+ assert("nikita-935", child != NULL);
-+ assert("nikita-936", result != NULL);
-+ assert("zam-356", znode_is_loaded(parent));
-+
-+ coord_init_zero(result);
-+ result->node = parent;
-+
-+ nplug = parent->nplug;
-+ assert("nikita-939", nplug != NULL);
-+
-+ tree = znode_get_tree(parent);
-+ /* NOTE-NIKITA taking read-lock on tree here assumes that @result is
-+ * not aliased to ->in_parent of some znode. Otherwise,
-+ * parent_coord_to_coord() below would modify data protected by tree
-+ * lock. */
-+ read_lock_tree(tree);
-+ /* fast path. Try to use cached value. Lock tree to keep
-+ node->pos_in_parent and pos->*_blocknr consistent. */
-+ if (child->in_parent.item_pos + 1 != 0) {
-+ parent_coord_to_coord(&child->in_parent, result);
-+ if (check_tree_pointer(result, child) == NS_FOUND) {
-+ read_unlock_tree(tree);
-+ return NS_FOUND;
-+ }
-+
-+ child->in_parent.item_pos = (unsigned short)~0;
-+ }
-+ read_unlock_tree(tree);
-+
-+ /* is above failed, find some key from @child. We are looking for the
-+ least key in a child. */
-+ read_lock_dk(tree);
-+ ld = *znode_get_ld_key(child);
-+ read_unlock_dk(tree);
-+ /*
-+ * now, lookup parent with key just found. Note, that left delimiting
-+ * key doesn't identify node uniquely, because (in extremely rare
-+ * case) two nodes can have equal left delimiting keys, if one of them
-+ * is completely filled with directory entries that all happened to be
-+ * hash collision. But, we check block number in check_tree_pointer()
-+ * and, so, are safe.
-+ */
-+ lookup_res = nplug->lookup(parent, &ld, FIND_EXACT, result);
-+ /* update cached pos_in_node */
-+ if (lookup_res == NS_FOUND) {
-+ write_lock_tree(tree);
-+ coord_to_parent_coord(result, &child->in_parent);
-+ write_unlock_tree(tree);
-+ lookup_res = check_tree_pointer(result, child);
-+ }
-+ if (lookup_res == NS_NOT_FOUND)
-+ lookup_res = find_child_by_addr(parent, child, result);
-+ return lookup_res;
-+}
-+
-+/* find coord of pointer to @child in @parent by scanning
-+
-+ Find the &coord_t in the @parent where pointer to a given @child
-+ is in by scanning all internal items in @parent and comparing block
-+ numbers in them with that of @child.
-+
-+*/
-+static int find_child_by_addr(znode * parent /* parent znode, passed locked */ ,
-+ znode * child /* child znode, passed locked */ ,
-+ coord_t * result /* where result is stored in */ )
-+{
-+ int ret;
-+
-+ assert("nikita-1320", parent != NULL);
-+ assert("nikita-1321", child != NULL);
-+ assert("nikita-1322", result != NULL);
-+
-+ ret = NS_NOT_FOUND;
-+
-+ for_all_units(result, parent) {
-+ if (check_tree_pointer(result, child) == NS_FOUND) {
-+ write_lock_tree(znode_get_tree(parent));
-+ coord_to_parent_coord(result, &child->in_parent);
-+ write_unlock_tree(znode_get_tree(parent));
-+ ret = NS_FOUND;
-+ break;
-+ }
-+ }
-+ return ret;
-+}
-+
-+/* true, if @addr is "unallocated block number", which is just address, with
-+ highest bit set. */
-+int is_disk_addr_unallocated(const reiser4_block_nr * addr /* address to
-+ * check */ )
-+{
-+ assert("nikita-1766", addr != NULL);
-+ cassert(sizeof(reiser4_block_nr) == 8);
-+ return (*addr & REISER4_BLOCKNR_STATUS_BIT_MASK) ==
-+ REISER4_UNALLOCATED_STATUS_VALUE;
-+}
-+
-+/* returns true if removing bytes of given range of key [from_key, to_key]
-+ causes removing of whole item @from */
-+static int
-+item_removed_completely(coord_t * from, const reiser4_key * from_key,
-+ const reiser4_key * to_key)
-+{
-+ item_plugin *iplug;
-+ reiser4_key key_in_item;
-+
-+ assert("umka-325", from != NULL);
-+ assert("", item_is_extent(from));
-+
-+ /* check first key just for case */
-+ item_key_by_coord(from, &key_in_item);
-+ if (keygt(from_key, &key_in_item))
-+ return 0;
-+
-+ /* check last key */
-+ iplug = item_plugin_by_coord(from);
-+ assert("vs-611", iplug && iplug->s.file.append_key);
-+
-+ iplug->s.file.append_key(from, &key_in_item);
-+ set_key_offset(&key_in_item, get_key_offset(&key_in_item) - 1);
-+
-+ if (keylt(to_key, &key_in_item))
-+ /* last byte is not removed */
-+ return 0;
-+ return 1;
-+}
-+
-+/* helper function for prepare_twig_kill(): @left and @right are formatted
-+ * neighbors of extent item being completely removed. Load and lock neighbors
-+ * and store lock handles into @cdata for later use by kill_hook_extent() */
-+static int
-+prepare_children(znode * left, znode * right, carry_kill_data * kdata)
-+{
-+ int result;
-+ int left_loaded;
-+ int right_loaded;
-+
-+ result = 0;
-+ left_loaded = right_loaded = 0;
-+
-+ if (left != NULL) {
-+ result = zload(left);
-+ if (result == 0) {
-+ left_loaded = 1;
-+ result = longterm_lock_znode(kdata->left, left,
-+ ZNODE_READ_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ }
-+ }
-+ if (result == 0 && right != NULL) {
-+ result = zload(right);
-+ if (result == 0) {
-+ right_loaded = 1;
-+ result = longterm_lock_znode(kdata->right, right,
-+ ZNODE_READ_LOCK,
-+ ZNODE_LOCK_HIPRI |
-+ ZNODE_LOCK_NONBLOCK);
-+ }
-+ }
-+ if (result != 0) {
-+ done_lh(kdata->left);
-+ done_lh(kdata->right);
-+ if (left_loaded != 0)
-+ zrelse(left);
-+ if (right_loaded != 0)
-+ zrelse(right);
-+ }
-+ return result;
-+}
-+
-+static void done_children(carry_kill_data * kdata)
-+{
-+ if (kdata->left != NULL && kdata->left->node != NULL) {
-+ zrelse(kdata->left->node);
-+ done_lh(kdata->left);
-+ }
-+ if (kdata->right != NULL && kdata->right->node != NULL) {
-+ zrelse(kdata->right->node);
-+ done_lh(kdata->right);
-+ }
-+}
-+
-+/* part of cut_node. It is called when cut_node is called to remove or cut part
-+ of extent item. When head of that item is removed - we have to update right
-+ delimiting of left neighbor of extent. When item is removed completely - we
-+ have to set sibling link between left and right neighbor of removed
-+ extent. This may return -E_DEADLOCK because of trying to get left neighbor
-+ locked. So, caller should repeat an attempt
-+*/
-+/* Audited by: umka (2002.06.16) */
-+static int
-+prepare_twig_kill(carry_kill_data * kdata, znode * locked_left_neighbor)
-+{
-+ int result;
-+ reiser4_key key;
-+ lock_handle left_lh;
-+ lock_handle right_lh;
-+ coord_t left_coord;
-+ coord_t *from;
-+ znode *left_child;
-+ znode *right_child;
-+ reiser4_tree *tree;
-+ int left_zloaded_here, right_zloaded_here;
-+
-+ from = kdata->params.from;
-+ assert("umka-326", from != NULL);
-+ assert("umka-327", kdata->params.to != NULL);
-+
-+ /* for one extent item only yet */
-+ assert("vs-591", item_is_extent(from));
-+ assert("vs-592", from->item_pos == kdata->params.to->item_pos);
-+
-+ if ((kdata->params.from_key
-+ && keygt(kdata->params.from_key, item_key_by_coord(from, &key)))
-+ || from->unit_pos != 0) {
-+ /* head of item @from is not removed, there is nothing to
-+ worry about */
-+ return 0;
-+ }
-+
-+ result = 0;
-+ left_zloaded_here = 0;
-+ right_zloaded_here = 0;
-+
-+ left_child = right_child = NULL;
-+
-+ coord_dup(&left_coord, from);
-+ init_lh(&left_lh);
-+ init_lh(&right_lh);
-+ if (coord_prev_unit(&left_coord)) {
-+ /* @from is leftmost item in its node */
-+ if (!locked_left_neighbor) {
-+ result =
-+ reiser4_get_left_neighbor(&left_lh, from->node,
-+ ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ switch (result) {
-+ case 0:
-+ break;
-+ case -E_NO_NEIGHBOR:
-+ /* there is no formatted node to the left of
-+ from->node */
-+ warning("vs-605",
-+ "extent item has smallest key in "
-+ "the tree and it is about to be removed");
-+ return 0;
-+ case -E_DEADLOCK:
-+ /* need to restart */
-+ default:
-+ return result;
-+ }
-+
-+ /* we have acquired left neighbor of from->node */
-+ result = zload(left_lh.node);
-+ if (result)
-+ goto done;
-+
-+ locked_left_neighbor = left_lh.node;
-+ } else {
-+ /* squalloc_right_twig_cut should have supplied locked
-+ * left neighbor */
-+ assert("vs-834",
-+ znode_is_write_locked(locked_left_neighbor));
-+ result = zload(locked_left_neighbor);
-+ if (result)
-+ return result;
-+ }
-+
-+ left_zloaded_here = 1;
-+ coord_init_last_unit(&left_coord, locked_left_neighbor);
-+ }
-+
-+ if (!item_is_internal(&left_coord)) {
-+ /* what else but extent can be on twig level */
-+ assert("vs-606", item_is_extent(&left_coord));
-+
-+ /* there is no left formatted child */
-+ if (left_zloaded_here)
-+ zrelse(locked_left_neighbor);
-+ done_lh(&left_lh);
-+ return 0;
-+ }
-+
-+ tree = znode_get_tree(left_coord.node);
-+ left_child = child_znode(&left_coord, left_coord.node, 1, 0);
-+
-+ if (IS_ERR(left_child)) {
-+ result = PTR_ERR(left_child);
-+ goto done;
-+ }
-+
-+ /* left child is acquired, calculate new right delimiting key for it
-+ and get right child if it is necessary */
-+ if (item_removed_completely
-+ (from, kdata->params.from_key, kdata->params.to_key)) {
-+ /* try to get right child of removed item */
-+ coord_t right_coord;
-+
-+ assert("vs-607",
-+ kdata->params.to->unit_pos ==
-+ coord_last_unit_pos(kdata->params.to));
-+ coord_dup(&right_coord, kdata->params.to);
-+ if (coord_next_unit(&right_coord)) {
-+ /* @to is rightmost unit in the node */
-+ result =
-+ reiser4_get_right_neighbor(&right_lh, from->node,
-+ ZNODE_READ_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ switch (result) {
-+ case 0:
-+ result = zload(right_lh.node);
-+ if (result)
-+ goto done;
-+
-+ right_zloaded_here = 1;
-+ coord_init_first_unit(&right_coord,
-+ right_lh.node);
-+ item_key_by_coord(&right_coord, &key);
-+ break;
-+
-+ case -E_NO_NEIGHBOR:
-+ /* there is no formatted node to the right of
-+ from->node */
-+ read_lock_dk(tree);
-+ key = *znode_get_rd_key(from->node);
-+ read_unlock_dk(tree);
-+ right_coord.node = NULL;
-+ result = 0;
-+ break;
-+ default:
-+ /* real error */
-+ goto done;
-+ }
-+ } else {
-+ /* there is an item to the right of @from - take its key */
-+ item_key_by_coord(&right_coord, &key);
-+ }
-+
-+ /* try to get right child of @from */
-+ if (right_coord.node && /* there is right neighbor of @from */
-+ item_is_internal(&right_coord)) { /* it is internal item */
-+ right_child = child_znode(&right_coord,
-+ right_coord.node, 1, 0);
-+
-+ if (IS_ERR(right_child)) {
-+ result = PTR_ERR(right_child);
-+ goto done;
-+ }
-+
-+ }
-+ /* whole extent is removed between znodes left_child and right_child. Prepare them for linking and
-+ update of right delimiting key of left_child */
-+ result = prepare_children(left_child, right_child, kdata);
-+ } else {
-+ /* head of item @to is removed. left_child has to get right delimting key update. Prepare it for that */
-+ result = prepare_children(left_child, NULL, kdata);
-+ }
-+
-+ done:
-+ if (right_child)
-+ zput(right_child);
-+ if (right_zloaded_here)
-+ zrelse(right_lh.node);
-+ done_lh(&right_lh);
-+
-+ if (left_child)
-+ zput(left_child);
-+ if (left_zloaded_here)
-+ zrelse(locked_left_neighbor);
-+ done_lh(&left_lh);
-+ return result;
-+}
-+
-+/* this is used to remove part of node content between coordinates @from and @to. Units to which @from and @to are set
-+ are to be cut completely */
-+/* for try_to_merge_with_left, delete_copied, reiser4_delete_node */
-+int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key, /* first key to be removed */
-+ const reiser4_key * to_key, /* last key to be removed */
-+ reiser4_key *
-+ smallest_removed /* smallest key actually removed */ )
-+{
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_cut_data *cut_data;
-+ carry_op *op;
-+
-+ assert("vs-1715", coord_compare(from, to) != COORD_CMP_ON_RIGHT);
-+
-+ pool =
-+ init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(*cut_data));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
-+ assert("vs-1509", op != 0);
-+ if (IS_ERR(op)) {
-+ done_carry_pool(pool);
-+ return PTR_ERR(op);
-+ }
-+
-+ cut_data = (carry_cut_data *) (lowest_level + 3);
-+ cut_data->params.from = from;
-+ cut_data->params.to = to;
-+ cut_data->params.from_key = from_key;
-+ cut_data->params.to_key = to_key;
-+ cut_data->params.smallest_removed = smallest_removed;
-+
-+ op->u.cut_or_kill.is_cut = 1;
-+ op->u.cut_or_kill.u.cut = cut_data;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+ done_carry_pool(pool);
-+
-+ return result;
-+}
-+
-+/* cut part of the node
-+
-+ Cut part or whole content of node.
-+
-+ cut data between @from and @to of @from->node and call carry() to make
-+ corresponding changes in the tree. @from->node may become empty. If so -
-+ pointer to it will be removed. Neighboring nodes are not changed. Smallest
-+ removed key is stored in @smallest_removed
-+
-+*/
-+int kill_node_content(coord_t * from, /* coord of the first unit/item that will be eliminated */
-+ coord_t * to, /* coord of the last unit/item that will be eliminated */
-+ const reiser4_key * from_key, /* first key to be removed */
-+ const reiser4_key * to_key, /* last key to be removed */
-+ reiser4_key * smallest_removed, /* smallest key actually removed */
-+ znode * locked_left_neighbor, /* this is set when kill_node_content is called with left neighbor
-+ * locked (in squalloc_right_twig_cut, namely) */
-+ struct inode *inode, /* inode of file whose item (or its part) is to be killed. This is necessary to
-+ invalidate pages together with item pointing to them */
-+ int truncate)
-+{ /* this call is made for file truncate) */
-+ int result;
-+ carry_pool *pool;
-+ carry_level *lowest_level;
-+ carry_kill_data *kdata;
-+ lock_handle *left_child;
-+ lock_handle *right_child;
-+ carry_op *op;
-+
-+ assert("umka-328", from != NULL);
-+ assert("vs-316", !node_is_empty(from->node));
-+ assert("nikita-1812", coord_is_existing_unit(from)
-+ && coord_is_existing_unit(to));
-+
-+ /* allocate carry_pool, 3 carry_level-s, carry_kill_data and structures for kill_hook_extent */
-+ pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
-+ sizeof(carry_kill_data) +
-+ 2 * sizeof(lock_handle) +
-+ 5 * sizeof(reiser4_key) + 2 * sizeof(coord_t));
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+
-+ lowest_level = (carry_level *) (pool + 1);
-+ init_carry_level(lowest_level, pool);
-+
-+ kdata = (carry_kill_data *) (lowest_level + 3);
-+ left_child = (lock_handle *) (kdata + 1);
-+ right_child = left_child + 1;
-+
-+ init_lh(left_child);
-+ init_lh(right_child);
-+
-+ kdata->params.from = from;
-+ kdata->params.to = to;
-+ kdata->params.from_key = from_key;
-+ kdata->params.to_key = to_key;
-+ kdata->params.smallest_removed = smallest_removed;
-+ kdata->params.truncate = truncate;
-+ kdata->flags = 0;
-+ kdata->inode = inode;
-+ kdata->left = left_child;
-+ kdata->right = right_child;
-+ /* memory for 5 reiser4_key and 2 coord_t will be used in kill_hook_extent */
-+ kdata->buf = (char *)(right_child + 1);
-+
-+ if (znode_get_level(from->node) == TWIG_LEVEL && item_is_extent(from)) {
-+ /* left child of extent item may have to get updated right
-+ delimiting key and to get linked with right child of extent
-+ @from if it will be removed completely */
-+ result = prepare_twig_kill(kdata, locked_left_neighbor);
-+ if (result) {
-+ done_children(kdata);
-+ done_carry_pool(pool);
-+ return result;
-+ }
-+ }
-+
-+ op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
-+ if (IS_ERR(op) || (op == NULL)) {
-+ done_children(kdata);
-+ done_carry_pool(pool);
-+ return RETERR(op ? PTR_ERR(op) : -EIO);
-+ }
-+
-+ op->u.cut_or_kill.is_cut = 0;
-+ op->u.cut_or_kill.u.kill = kdata;
-+
-+ result = reiser4_carry(lowest_level, NULL);
-+
-+ done_children(kdata);
-+ done_carry_pool(pool);
-+ return result;
-+}
-+
-+void
-+fake_kill_hook_tail(struct inode *inode, loff_t start, loff_t end, int truncate)
-+{
-+ if (reiser4_inode_get_flag(inode, REISER4_HAS_MMAP)) {
-+ pgoff_t start_pg, end_pg;
-+
-+ start_pg = start >> PAGE_CACHE_SHIFT;
-+ end_pg = (end - 1) >> PAGE_CACHE_SHIFT;
-+
-+ if ((start & (PAGE_CACHE_SIZE - 1)) == 0) {
-+ /*
-+ * kill up to the page boundary.
-+ */
-+ assert("vs-123456", start_pg == end_pg);
-+ reiser4_invalidate_pages(inode->i_mapping, start_pg, 1,
-+ truncate);
-+ } else if (start_pg != end_pg) {
-+ /*
-+ * page boundary is within killed portion of node.
-+ */
-+ assert("vs-654321", end_pg - start_pg == 1);
-+ reiser4_invalidate_pages(inode->i_mapping, end_pg,
-+ end_pg - start_pg, 1);
-+ }
-+ }
-+ inode_sub_bytes(inode, end - start);
-+}
-+
-+/**
-+ * Delete whole @node from the reiser4 tree without loading it.
-+ *
-+ * @left: locked left neighbor,
-+ * @node: node to be deleted,
-+ * @smallest_removed: leftmost key of deleted node,
-+ * @object: inode pointer, if we truncate a file body.
-+ * @truncate: true if called for file truncate.
-+ *
-+ * @return: 0 if success, error code otherwise.
-+ *
-+ * NOTE: if @object!=NULL we assume that @smallest_removed != NULL and it
-+ * contains the right value of the smallest removed key from the previous
-+ * cut_worker() iteration. This is needed for proper accounting of
-+ * "i_blocks" and "i_bytes" fields of the @object.
-+ */
-+int reiser4_delete_node(znode * node, reiser4_key * smallest_removed,
-+ struct inode *object, int truncate)
-+{
-+ lock_handle parent_lock;
-+ coord_t cut_from;
-+ coord_t cut_to;
-+ reiser4_tree *tree;
-+ int ret;
-+
-+ assert("zam-937", node != NULL);
-+ assert("zam-933", znode_is_write_locked(node));
-+ assert("zam-999", smallest_removed != NULL);
-+
-+ init_lh(&parent_lock);
-+
-+ ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
-+ if (ret)
-+ return ret;
-+
-+ assert("zam-934", !znode_above_root(parent_lock.node));
-+
-+ ret = zload(parent_lock.node);
-+ if (ret)
-+ goto failed_nozrelse;
-+
-+ ret = find_child_ptr(parent_lock.node, node, &cut_from);
-+ if (ret)
-+ goto failed;
-+
-+ /* decrement child counter and set parent pointer to NULL before
-+ deleting the list from parent node because of checks in
-+ internal_kill_item_hook (we can delete the last item from the parent
-+ node, the parent node is going to be deleted and its c_count should
-+ be zero). */
-+
-+ tree = znode_get_tree(node);
-+ write_lock_tree(tree);
-+ init_parent_coord(&node->in_parent, NULL);
-+ --parent_lock.node->c_count;
-+ write_unlock_tree(tree);
-+
-+ assert("zam-989", item_is_internal(&cut_from));
-+
-+ /* @node should be deleted after unlocking. */
-+ ZF_SET(node, JNODE_HEARD_BANSHEE);
-+
-+ /* remove a pointer from the parent node to the node being deleted. */
-+ coord_dup(&cut_to, &cut_from);
-+ /* FIXME: shouldn't this be kill_node_content */
-+ ret = cut_node_content(&cut_from, &cut_to, NULL, NULL, NULL);
-+ if (ret)
-+ /* FIXME(Zam): Should we re-connect the node to its parent if
-+ * cut_node fails? */
-+ goto failed;
-+
-+ {
-+ reiser4_tree *tree = current_tree;
-+ __u64 start_offset = 0, end_offset = 0;
-+
-+ read_lock_tree(tree);
-+ write_lock_dk(tree);
-+ if (object) {
-+ /* We use @smallest_removed and the left delimiting of
-+ * the current node for @object->i_blocks, i_bytes
-+ * calculation. We assume that the items after the
-+ * *@smallest_removed key have been deleted from the
-+ * file body. */
-+ start_offset = get_key_offset(znode_get_ld_key(node));
-+ end_offset = get_key_offset(smallest_removed);
-+ }
-+
-+ assert("zam-1021", znode_is_connected(node));
-+ if (node->left)
-+ znode_set_rd_key(node->left, znode_get_rd_key(node));
-+
-+ *smallest_removed = *znode_get_ld_key(node);
-+
-+ write_unlock_dk(tree);
-+ read_unlock_tree(tree);
-+
-+ if (object) {
-+ /* we used to perform actions which are to be performed on items on their removal from tree in
-+ special item method - kill_hook. Here for optimization reasons we avoid reading node
-+ containing item we remove and can not call item's kill hook. Instead we call function which
-+ does exactly the same things as tail kill hook in assumption that node we avoid reading
-+ contains only one item and that item is a tail one. */
-+ fake_kill_hook_tail(object, start_offset, end_offset,
-+ truncate);
-+ }
-+ }
-+ failed:
-+ zrelse(parent_lock.node);
-+ failed_nozrelse:
-+ done_lh(&parent_lock);
-+
-+ return ret;
-+}
-+
-+static int can_delete(const reiser4_key *key, znode *node)
-+{
-+ int result;
-+
-+ read_lock_dk(current_tree);
-+ result = keyle(key, znode_get_ld_key(node));
-+ read_unlock_dk(current_tree);
-+ return result;
-+}
-+
-+/**
-+ * This subroutine is not optimal but implementation seems to
-+ * be easier).
-+ *
-+ * @tap: the point deletion process begins from,
-+ * @from_key: the beginning of the deleted key range,
-+ * @to_key: the end of the deleted key range,
-+ * @smallest_removed: the smallest removed key,
-+ * @truncate: true if called for file truncate.
-+ * @progress: return true if a progress in file items deletions was made,
-+ * @smallest_removed value is actual in that case.
-+ *
-+ * @return: 0 if success, error code otherwise, -E_REPEAT means that long
-+ * reiser4_cut_tree operation was interrupted for allowing atom commit.
-+ */
-+int
-+cut_tree_worker_common(tap_t * tap, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed, struct inode *object,
-+ int truncate, int *progress)
-+{
-+ lock_handle next_node_lock;
-+ coord_t left_coord;
-+ int result;
-+
-+ assert("zam-931", tap->coord->node != NULL);
-+ assert("zam-932", znode_is_write_locked(tap->coord->node));
-+
-+ *progress = 0;
-+ init_lh(&next_node_lock);
-+
-+ while (1) {
-+ znode *node; /* node from which items are cut */
-+ node_plugin *nplug; /* node plugin for @node */
-+
-+ node = tap->coord->node;
-+
-+ /* Move next_node_lock to the next node on the left. */
-+ result =
-+ reiser4_get_left_neighbor(&next_node_lock, node,
-+ ZNODE_WRITE_LOCK,
-+ GN_CAN_USE_UPPER_LEVELS);
-+ if (result != 0 && result != -E_NO_NEIGHBOR)
-+ break;
-+ /* Check can we delete the node as a whole. */
-+ if (*progress && znode_get_level(node) == LEAF_LEVEL &&
-+ can_delete(from_key, node)) {
-+ result = reiser4_delete_node(node, smallest_removed,
-+ object, truncate);
-+ } else {
-+ result = reiser4_tap_load(tap);
-+ if (result)
-+ return result;
-+
-+ /* Prepare the second (right) point for cut_node() */
-+ if (*progress)
-+ coord_init_last_unit(tap->coord, node);
-+
-+ else if (item_plugin_by_coord(tap->coord)->b.lookup ==
-+ NULL)
-+ /* set rightmost unit for the items without lookup method */
-+ tap->coord->unit_pos =
-+ coord_last_unit_pos(tap->coord);
-+
-+ nplug = node->nplug;
-+
-+ assert("vs-686", nplug);
-+ assert("vs-687", nplug->lookup);
-+
-+ /* left_coord is leftmost unit cut from @node */
-+ result = nplug->lookup(node, from_key,
-+ FIND_MAX_NOT_MORE_THAN,
-+ &left_coord);
-+
-+ if (IS_CBKERR(result))
-+ break;
-+
-+ /* adjust coordinates so that they are set to existing units */
-+ if (coord_set_to_right(&left_coord)
-+ || coord_set_to_left(tap->coord)) {
-+ result = 0;
-+ break;
-+ }
-+
-+ if (coord_compare(&left_coord, tap->coord) ==
-+ COORD_CMP_ON_RIGHT) {
-+ /* keys from @from_key to @to_key are not in the tree */
-+ result = 0;
-+ break;
-+ }
-+
-+ if (left_coord.item_pos != tap->coord->item_pos) {
-+ /* do not allow to cut more than one item. It is added to solve problem of truncating
-+ partially converted files. If file is partially converted there may exist a twig node
-+ containing both internal item or items pointing to leaf nodes with formatting items
-+ and extent item. We do not want to kill internal items being at twig node here
-+ because cut_tree_worker assumes killing them from level level */
-+ coord_dup(&left_coord, tap->coord);
-+ assert("vs-1652",
-+ coord_is_existing_unit(&left_coord));
-+ left_coord.unit_pos = 0;
-+ }
-+
-+ /* cut data from one node */
-+ // *smallest_removed = *reiser4_min_key();
-+ result =
-+ kill_node_content(&left_coord, tap->coord, from_key,
-+ to_key, smallest_removed,
-+ next_node_lock.node, object,
-+ truncate);
-+ reiser4_tap_relse(tap);
-+ }
-+ if (result)
-+ break;
-+
-+ ++(*progress);
-+
-+ /* Check whether all items with keys >= from_key were removed
-+ * from the tree. */
-+ if (keyle(smallest_removed, from_key))
-+ /* result = 0; */
-+ break;
-+
-+ if (next_node_lock.node == NULL)
-+ break;
-+
-+ result = reiser4_tap_move(tap, &next_node_lock);
-+ done_lh(&next_node_lock);
-+ if (result)
-+ break;
-+
-+ /* Break long reiser4_cut_tree operation (deletion of a large
-+ file) if atom requires commit. */
-+ if (*progress > CUT_TREE_MIN_ITERATIONS
-+ && current_atom_should_commit()) {
-+ result = -E_REPEAT;
-+ break;
-+ }
-+ }
-+ done_lh(&next_node_lock);
-+ // assert("vs-301", !keyeq(&smallest_removed, reiser4_min_key()));
-+ return result;
-+}
-+
-+/* there is a fundamental problem with optimizing deletes: VFS does it
-+ one file at a time. Another problem is that if an item can be
-+ anything, then deleting items must be done one at a time. It just
-+ seems clean to writes this to specify a from and a to key, and cut
-+ everything between them though. */
-+
-+/* use this function with care if deleting more than what is part of a single file. */
-+/* do not use this when cutting a single item, it is suboptimal for that */
-+
-+/* You are encouraged to write plugin specific versions of this. It
-+ cannot be optimal for all plugins because it works item at a time,
-+ and some plugins could sometimes work node at a time. Regular files
-+ however are not optimizable to work node at a time because of
-+ extents needing to free the blocks they point to.
-+
-+ Optimizations compared to v3 code:
-+
-+ It does not balance (that task is left to memory pressure code).
-+
-+ Nodes are deleted only if empty.
-+
-+ Uses extents.
-+
-+ Performs read-ahead of formatted nodes whose contents are part of
-+ the deletion.
-+*/
-+
-+/**
-+ * Delete everything from the reiser4 tree between two keys: @from_key and
-+ * @to_key.
-+ *
-+ * @from_key: the beginning of the deleted key range,
-+ * @to_key: the end of the deleted key range,
-+ * @smallest_removed: the smallest removed key,
-+ * @object: owner of cutting items.
-+ * @truncate: true if called for file truncate.
-+ * @progress: return true if a progress in file items deletions was made,
-+ * @smallest_removed value is actual in that case.
-+ *
-+ * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree
-+ * operation was interrupted for allowing atom commit .
-+ */
-+
-+int reiser4_cut_tree_object(reiser4_tree * tree, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed_p,
-+ struct inode *object, int truncate, int *progress)
-+{
-+ lock_handle lock;
-+ int result;
-+ tap_t tap;
-+ coord_t right_coord;
-+ reiser4_key smallest_removed;
-+ int (*cut_tree_worker) (tap_t *, const reiser4_key *,
-+ const reiser4_key *, reiser4_key *,
-+ struct inode *, int, int *);
-+ STORE_COUNTERS;
-+
-+ assert("umka-329", tree != NULL);
-+ assert("umka-330", from_key != NULL);
-+ assert("umka-331", to_key != NULL);
-+ assert("zam-936", keyle(from_key, to_key));
-+
-+ if (smallest_removed_p == NULL)
-+ smallest_removed_p = &smallest_removed;
-+
-+ init_lh(&lock);
-+
-+ do {
-+ /* Find rightmost item to cut away from the tree. */
-+ result = reiser4_object_lookup(object, to_key, &right_coord,
-+ &lock, ZNODE_WRITE_LOCK,
-+ FIND_MAX_NOT_MORE_THAN,
-+ TWIG_LEVEL, LEAF_LEVEL,
-+ CBK_UNIQUE, NULL /*ra_info */);
-+ if (result != CBK_COORD_FOUND)
-+ break;
-+ if (object == NULL
-+ || inode_file_plugin(object)->cut_tree_worker == NULL)
-+ cut_tree_worker = cut_tree_worker_common;
-+ else
-+ cut_tree_worker =
-+ inode_file_plugin(object)->cut_tree_worker;
-+ reiser4_tap_init(&tap, &right_coord, &lock, ZNODE_WRITE_LOCK);
-+ result =
-+ cut_tree_worker(&tap, from_key, to_key, smallest_removed_p,
-+ object, truncate, progress);
-+ reiser4_tap_done(&tap);
-+
-+ reiser4_preempt_point();
-+
-+ } while (0);
-+
-+ done_lh(&lock);
-+
-+ if (result) {
-+ switch (result) {
-+ case -E_NO_NEIGHBOR:
-+ result = 0;
-+ break;
-+ case -E_DEADLOCK:
-+ result = -E_REPEAT;
-+ case -E_REPEAT:
-+ case -ENOMEM:
-+ case -ENOENT:
-+ break;
-+ default:
-+ warning("nikita-2861", "failure: %i", result);
-+ }
-+ }
-+
-+ CHECK_COUNTERS;
-+ return result;
-+}
-+
-+/* repeat reiser4_cut_tree_object until everything is deleted.
-+ * unlike cut_file_items, it does not end current transaction if -E_REPEAT
-+ * is returned by cut_tree_object. */
-+int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
-+ const reiser4_key * to, struct inode *inode, int truncate)
-+{
-+ int result;
-+ int progress;
-+
-+ do {
-+ result = reiser4_cut_tree_object(tree, from, to, NULL,
-+ inode, truncate, &progress);
-+ } while (result == -E_REPEAT);
-+
-+ return result;
-+}
-+
-+/* finishing reiser4 initialization */
-+int reiser4_init_tree(reiser4_tree * tree /* pointer to structure being
-+ * initialized */ ,
-+ const reiser4_block_nr * root_block /* address of a root block
-+ * on a disk */ ,
-+ tree_level height /* height of a tree */ ,
-+ node_plugin * nplug /* default node plugin */ )
-+{
-+ int result;
-+
-+ assert("nikita-306", tree != NULL);
-+ assert("nikita-307", root_block != NULL);
-+ assert("nikita-308", height > 0);
-+ assert("nikita-309", nplug != NULL);
-+ assert("zam-587", tree->super != NULL);
-+
-+ tree->root_block = *root_block;
-+ tree->height = height;
-+ tree->estimate_one_insert = calc_estimate_one_insert(height);
-+ tree->nplug = nplug;
-+
-+ tree->znode_epoch = 1ull;
-+
-+ cbk_cache_init(&tree->cbk_cache);
-+
-+ result = znodes_tree_init(tree);
-+ if (result == 0)
-+ result = jnodes_tree_init(tree);
-+ if (result == 0) {
-+ tree->uber = zget(tree, &UBER_TREE_ADDR, NULL, 0,
-+ reiser4_ctx_gfp_mask_get());
-+ if (IS_ERR(tree->uber)) {
-+ result = PTR_ERR(tree->uber);
-+ tree->uber = NULL;
-+ }
-+ }
-+ return result;
-+}
-+
-+/* release resources associated with @tree */
-+void reiser4_done_tree(reiser4_tree * tree /* tree to release */ )
-+{
-+ if (tree == NULL)
-+ return;
-+
-+ if (tree->uber != NULL) {
-+ zput(tree->uber);
-+ tree->uber = NULL;
-+ }
-+ znodes_tree_done(tree);
-+ jnodes_tree_done(tree);
-+ cbk_cache_done(&tree->cbk_cache);
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/tree.h linux-2.6.24/fs/reiser4/tree.h
---- linux-2.6.24.orig/fs/reiser4/tree.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tree.h 2008-01-25 11:39:07.096248905 +0300
-@@ -0,0 +1,577 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Tree operations. See fs/reiser4/tree.c for comments */
-+
-+#if !defined( __REISER4_TREE_H__ )
-+#define __REISER4_TREE_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "znode.h"
-+#include "tap.h"
-+
-+#include <linux/types.h> /* for __u?? */
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/spinlock.h>
-+#include <linux/sched.h> /* for struct task_struct */
-+
-+/* fictive block number never actually used */
-+extern const reiser4_block_nr UBER_TREE_ADDR;
-+
-+/* &cbk_cache_slot - entry in a coord cache.
-+
-+ This is entry in a coord_by_key (cbk) cache, represented by
-+ &cbk_cache.
-+
-+*/
-+typedef struct cbk_cache_slot {
-+ /* cached node */
-+ znode *node;
-+ /* linkage to the next cbk cache slot in a LRU order */
-+ struct list_head lru;
-+} cbk_cache_slot;
-+
-+/* &cbk_cache - coord cache. This is part of reiser4_tree.
-+
-+ cbk_cache is supposed to speed up tree lookups by caching results of recent
-+ successful lookups (we don't cache negative results as dentry cache
-+ does). Cache consists of relatively small number of entries kept in a LRU
-+ order. Each entry (&cbk_cache_slot) contains a pointer to znode, from
-+ which we can obtain a range of keys that covered by this znode. Before
-+ embarking into real tree traversal we scan cbk_cache slot by slot and for
-+ each slot check whether key we are looking for is between minimal and
-+ maximal keys for node pointed to by this slot. If no match is found, real
-+ tree traversal is performed and if result is successful, appropriate entry
-+ is inserted into cache, possibly pulling least recently used entry out of
-+ it.
-+
-+ Tree spin lock is used to protect coord cache. If contention for this
-+ lock proves to be too high, more finer grained locking can be added.
-+
-+ Invariants involving parts of this data-type:
-+
-+ [cbk-cache-invariant]
-+*/
-+typedef struct cbk_cache {
-+ /* serializator */
-+ rwlock_t guard;
-+ int nr_slots;
-+ /* head of LRU list of cache slots */
-+ struct list_head lru;
-+ /* actual array of slots */
-+ cbk_cache_slot *slot;
-+} cbk_cache;
-+
-+/* level_lookup_result - possible outcome of looking up key at some level.
-+ This is used by coord_by_key when traversing tree downward. */
-+typedef enum {
-+ /* continue to the next level */
-+ LOOKUP_CONT,
-+ /* done. Either required item was found, or we can prove it
-+ doesn't exist, or some error occurred. */
-+ LOOKUP_DONE,
-+ /* restart traversal from the root. Infamous "repetition". */
-+ LOOKUP_REST
-+} level_lookup_result;
-+
-+/* This is representation of internal reiser4 tree where all file-system
-+ data and meta-data are stored. This structure is passed to all tree
-+ manipulation functions. It's different from the super block because:
-+ we don't want to limit ourselves to strictly one to one mapping
-+ between super blocks and trees, and, because they are logically
-+ different: there are things in a super block that have no relation to
-+ the tree (bitmaps, journalling area, mount options, etc.) and there
-+ are things in a tree that bear no relation to the super block, like
-+ tree of znodes.
-+
-+ At this time, there is only one tree
-+ per filesystem, and this struct is part of the super block. We only
-+ call the super block the super block for historical reasons (most
-+ other filesystems call the per filesystem metadata the super block).
-+*/
-+
-+struct reiser4_tree {
-+ /* block_nr == 0 is fake znode. Write lock it, while changing
-+ tree height. */
-+ /* disk address of root node of a tree */
-+ reiser4_block_nr root_block;
-+
-+ /* level of the root node. If this is 1, tree consists of root
-+ node only */
-+ tree_level height;
-+
-+ /*
-+ * this is cached here avoid calling plugins through function
-+ * dereference all the time.
-+ */
-+ __u64 estimate_one_insert;
-+
-+ /* cache of recent tree lookup results */
-+ cbk_cache cbk_cache;
-+
-+ /* hash table to look up znodes by block number. */
-+ z_hash_table zhash_table;
-+ z_hash_table zfake_table;
-+ /* hash table to look up jnodes by inode and offset. */
-+ j_hash_table jhash_table;
-+
-+ /* lock protecting:
-+ - parent pointers,
-+ - sibling pointers,
-+ - znode hash table
-+ - coord cache
-+ */
-+ /* NOTE: The "giant" tree lock can be replaced by more spin locks,
-+ hoping they will be less contented. We can use one spin lock per one
-+ znode hash bucket. With adding of some code complexity, sibling
-+ pointers can be protected by both znode spin locks. However it looks
-+ more SMP scalable we should test this locking change on n-ways (n >
-+ 4) SMP machines. Current 4-ways machine test does not show that tree
-+ lock is contented and it is a bottleneck (2003.07.25). */
-+
-+ rwlock_t tree_lock;
-+
-+ /* lock protecting delimiting keys */
-+ rwlock_t dk_lock;
-+
-+ /* spin lock protecting znode_epoch */
-+ spinlock_t epoch_lock;
-+ /* version stamp used to mark znode updates. See seal.[ch] for more
-+ * information. */
-+ __u64 znode_epoch;
-+
-+ znode *uber;
-+ node_plugin *nplug;
-+ struct super_block *super;
-+ struct {
-+ /* carry flags used for insertion of new nodes */
-+ __u32 new_node_flags;
-+ /* carry flags used for insertion of new extents */
-+ __u32 new_extent_flags;
-+ /* carry flags used for paste operations */
-+ __u32 paste_flags;
-+ /* carry flags used for insert operations */
-+ __u32 insert_flags;
-+ } carry;
-+};
-+
-+extern int reiser4_init_tree(reiser4_tree * tree,
-+ const reiser4_block_nr * root_block,
-+ tree_level height, node_plugin * default_plugin);
-+extern void reiser4_done_tree(reiser4_tree * tree);
-+
-+/* cbk flags: options for coord_by_key() */
-+typedef enum {
-+ /* coord_by_key() is called for insertion. This is necessary because
-+ of extents being located at the twig level. For explanation, see
-+ comment just above is_next_item_internal().
-+ */
-+ CBK_FOR_INSERT = (1 << 0),
-+ /* coord_by_key() is called with key that is known to be unique */
-+ CBK_UNIQUE = (1 << 1),
-+ /* coord_by_key() can trust delimiting keys. This options is not user
-+ accessible. coord_by_key() will set it automatically. It will be
-+ only cleared by special-case in extents-on-the-twig-level handling
-+ where it is necessary to insert item with a key smaller than
-+ leftmost key in a node. This is necessary because of extents being
-+ located at the twig level. For explanation, see comment just above
-+ is_next_item_internal().
-+ */
-+ CBK_TRUST_DK = (1 << 2),
-+ CBK_READA = (1 << 3), /* original: readahead leaves which contain items of certain file */
-+ CBK_READDIR_RA = (1 << 4), /* readdir: readahead whole directory and all its stat datas */
-+ CBK_DKSET = (1 << 5),
-+ CBK_EXTENDED_COORD = (1 << 6), /* coord_t is actually */
-+ CBK_IN_CACHE = (1 << 7), /* node is already in cache */
-+ CBK_USE_CRABLOCK = (1 << 8) /* use crab_lock in stead of long term
-+ * lock */
-+} cbk_flags;
-+
-+/* insertion outcome. IBK = insert by key */
-+typedef enum {
-+ IBK_INSERT_OK = 0,
-+ IBK_ALREADY_EXISTS = -EEXIST,
-+ IBK_IO_ERROR = -EIO,
-+ IBK_NO_SPACE = -E_NODE_FULL,
-+ IBK_OOM = -ENOMEM
-+} insert_result;
-+
-+#define IS_CBKERR(err) ((err) != CBK_COORD_FOUND && (err) != CBK_COORD_NOTFOUND)
-+
-+typedef int (*tree_iterate_actor_t) (reiser4_tree * tree, coord_t * coord,
-+ lock_handle * lh, void *arg);
-+extern int reiser4_iterate_tree(reiser4_tree * tree, coord_t * coord,
-+ lock_handle * lh,
-+ tree_iterate_actor_t actor, void *arg,
-+ znode_lock_mode mode, int through_units_p);
-+extern int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
-+ znode_lock_request pri, lock_handle * lh);
-+
-+/* return node plugin of @node */
-+static inline node_plugin *node_plugin_by_node(const znode *
-+ node /* node to query */ )
-+{
-+ assert("vs-213", node != NULL);
-+ assert("vs-214", znode_is_loaded(node));
-+
-+ return node->nplug;
-+}
-+
-+/* number of items in @node */
-+static inline pos_in_node_t node_num_items(const znode * node)
-+{
-+ assert("nikita-2754", znode_is_loaded(node));
-+ assert("nikita-2468",
-+ node_plugin_by_node(node)->num_of_items(node) == node->nr_items);
-+
-+ return node->nr_items;
-+}
-+
-+/* Return the number of items at the present node. Asserts coord->node !=
-+ NULL. */
-+static inline unsigned coord_num_items(const coord_t * coord)
-+{
-+ assert("jmacd-9805", coord->node != NULL);
-+
-+ return node_num_items(coord->node);
-+}
-+
-+/* true if @node is empty */
-+static inline int node_is_empty(const znode * node)
-+{
-+ return node_num_items(node) == 0;
-+}
-+
-+typedef enum {
-+ SHIFTED_SOMETHING = 0,
-+ SHIFT_NO_SPACE = -E_NODE_FULL,
-+ SHIFT_IO_ERROR = -EIO,
-+ SHIFT_OOM = -ENOMEM,
-+} shift_result;
-+
-+extern node_plugin *node_plugin_by_coord(const coord_t * coord);
-+extern int is_coord_in_node(const coord_t * coord);
-+extern int key_in_node(const reiser4_key *, const coord_t *);
-+extern void coord_item_move_to(coord_t * coord, int items);
-+extern void coord_unit_move_to(coord_t * coord, int units);
-+
-+/* there are two types of repetitive accesses (ra): intra-syscall
-+ (local) and inter-syscall (global). Local ra is used when
-+ during single syscall we add/delete several items and units in the
-+ same place in a tree. Note that plan-A fragments local ra by
-+ separating stat-data and file body in key-space. Global ra is
-+ used when user does repetitive modifications in the same place in a
-+ tree.
-+
-+ Our ra implementation serves following purposes:
-+ 1 it affects balancing decisions so that next operation in a row
-+ can be performed faster;
-+ 2 it affects lower-level read-ahead in page-cache;
-+ 3 it allows to avoid unnecessary lookups by maintaining some state
-+ across several operations (this is only for local ra);
-+ 4 it leaves room for lazy-micro-balancing: when we start a sequence of
-+ operations they are performed without actually doing any intra-node
-+ shifts, until we finish sequence or scope of sequence leaves
-+ current node, only then we really pack node (local ra only).
-+*/
-+
-+/* another thing that can be useful is to keep per-tree and/or
-+ per-process cache of recent lookups. This cache can be organised as a
-+ list of block numbers of formatted nodes sorted by starting key in
-+ this node. Balancings should invalidate appropriate parts of this
-+ cache.
-+*/
-+
-+lookup_result coord_by_key(reiser4_tree * tree, const reiser4_key * key,
-+ coord_t * coord, lock_handle * handle,
-+ znode_lock_mode lock, lookup_bias bias,
-+ tree_level lock_level, tree_level stop_level,
-+ __u32 flags, ra_info_t *);
-+
-+lookup_result reiser4_object_lookup(struct inode *object,
-+ const reiser4_key * key,
-+ coord_t * coord,
-+ lock_handle * lh,
-+ znode_lock_mode lock_mode,
-+ lookup_bias bias,
-+ tree_level lock_level,
-+ tree_level stop_level,
-+ __u32 flags, ra_info_t * info);
-+
-+insert_result insert_by_key(reiser4_tree * tree, const reiser4_key * key,
-+ reiser4_item_data * data, coord_t * coord,
-+ lock_handle * lh,
-+ tree_level stop_level, __u32 flags);
-+insert_result insert_by_coord(coord_t * coord,
-+ reiser4_item_data * data, const reiser4_key * key,
-+ lock_handle * lh, __u32);
-+insert_result insert_extent_by_coord(coord_t * coord,
-+ reiser4_item_data * data,
-+ const reiser4_key * key, lock_handle * lh);
-+int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key,
-+ const reiser4_key * to_key,
-+ reiser4_key * smallest_removed);
-+int kill_node_content(coord_t * from, coord_t * to,
-+ const reiser4_key * from_key, const reiser4_key * to_key,
-+ reiser4_key * smallest_removed,
-+ znode * locked_left_neighbor, struct inode *inode,
-+ int truncate);
-+
-+int reiser4_resize_item(coord_t * coord, reiser4_item_data * data,
-+ reiser4_key * key, lock_handle * lh, cop_insert_flag);
-+int insert_into_item(coord_t * coord, lock_handle * lh, const reiser4_key * key,
-+ reiser4_item_data * data, unsigned);
-+int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f);
-+int find_new_child_ptr(znode * parent, znode * child, znode * left,
-+ coord_t * result);
-+
-+int shift_right_of_but_excluding_insert_coord(coord_t * insert_coord);
-+int shift_left_of_and_including_insert_coord(coord_t * insert_coord);
-+
-+void fake_kill_hook_tail(struct inode *, loff_t start, loff_t end, int);
-+
-+extern int cut_tree_worker_common(tap_t *, const reiser4_key *,
-+ const reiser4_key *, reiser4_key *,
-+ struct inode *, int, int *);
-+extern int reiser4_cut_tree_object(reiser4_tree *, const reiser4_key *,
-+ const reiser4_key *, reiser4_key *,
-+ struct inode *, int, int *);
-+extern int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
-+ const reiser4_key * to, struct inode *, int);
-+
-+extern int reiser4_delete_node(znode *, reiser4_key *, struct inode *, int);
-+extern int check_tree_pointer(const coord_t * pointer, const znode * child);
-+extern int find_new_child_ptr(znode * parent, znode * child UNUSED_ARG,
-+ znode * left, coord_t * result);
-+extern int find_child_ptr(znode * parent, znode * child, coord_t * result);
-+extern int set_child_delimiting_keys(znode * parent, const coord_t * in_parent,
-+ znode * child);
-+extern znode *child_znode(const coord_t * in_parent, znode * parent,
-+ int incore_p, int setup_dkeys_p);
-+
-+extern int cbk_cache_init(cbk_cache * cache);
-+extern void cbk_cache_done(cbk_cache * cache);
-+extern void cbk_cache_invalidate(const znode * node, reiser4_tree * tree);
-+
-+extern char *sprint_address(const reiser4_block_nr * block);
-+
-+#if REISER4_DEBUG
-+extern void print_coord_content(const char *prefix, coord_t * p);
-+extern void reiser4_print_address(const char *prefix,
-+ const reiser4_block_nr * block);
-+extern void print_tree_rec(const char *prefix, reiser4_tree * tree,
-+ __u32 flags);
-+extern void check_dkeys(znode *node);
-+#else
-+#define print_coord_content(p, c) noop
-+#define reiser4_print_address(p, b) noop
-+#endif
-+
-+extern void forget_znode(lock_handle * handle);
-+extern int deallocate_znode(znode * node);
-+
-+extern int is_disk_addr_unallocated(const reiser4_block_nr * addr);
-+
-+/* struct used internally to pack all numerous arguments of tree lookup.
-+ Used to avoid passing a lot of arguments to helper functions. */
-+typedef struct cbk_handle {
-+ /* tree we are in */
-+ reiser4_tree *tree;
-+ /* key we are going after */
-+ const reiser4_key *key;
-+ /* coord we will store result in */
-+ coord_t *coord;
-+ /* type of lock to take on target node */
-+ znode_lock_mode lock_mode;
-+ /* lookup bias. See comments at the declaration of lookup_bias */
-+ lookup_bias bias;
-+ /* lock level: level starting from which tree traversal starts taking
-+ * write locks. */
-+ tree_level lock_level;
-+ /* level where search will stop. Either item will be found between
-+ lock_level and stop_level, or CBK_COORD_NOTFOUND will be
-+ returned.
-+ */
-+ tree_level stop_level;
-+ /* level we are currently at */
-+ tree_level level;
-+ /* block number of @active node. Tree traversal operates on two
-+ nodes: active and parent. */
-+ reiser4_block_nr block;
-+ /* put here error message to be printed by caller */
-+ const char *error;
-+ /* result passed back to caller */
-+ lookup_result result;
-+ /* lock handles for active and parent */
-+ lock_handle *parent_lh;
-+ lock_handle *active_lh;
-+ reiser4_key ld_key;
-+ reiser4_key rd_key;
-+ /* flags, passed to the cbk routine. Bits of this bitmask are defined
-+ in tree.h:cbk_flags enum. */
-+ __u32 flags;
-+ ra_info_t *ra_info;
-+ struct inode *object;
-+} cbk_handle;
-+
-+extern znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h);
-+
-+/* eottl.c */
-+extern int handle_eottl(cbk_handle *h, int *outcome);
-+
-+int lookup_multikey(cbk_handle * handle, int nr_keys);
-+int lookup_couple(reiser4_tree * tree,
-+ const reiser4_key * key1, const reiser4_key * key2,
-+ coord_t * coord1, coord_t * coord2,
-+ lock_handle * lh1, lock_handle * lh2,
-+ znode_lock_mode lock_mode, lookup_bias bias,
-+ tree_level lock_level, tree_level stop_level, __u32 flags,
-+ int *result1, int *result2);
-+
-+static inline void read_lock_tree(reiser4_tree *tree)
-+{
-+ /* check that tree is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
-+ LOCK_CNT_NIL(read_locked_tree) &&
-+ LOCK_CNT_NIL(write_locked_tree)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(spin_locked_stack)));
-+
-+ read_lock(&(tree->tree_lock));
-+
-+ LOCK_CNT_INC(read_locked_tree);
-+ LOCK_CNT_INC(rw_locked_tree);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void read_unlock_tree(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(read_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(read_locked_tree);
-+ LOCK_CNT_DEC(rw_locked_tree);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ read_unlock(&(tree->tree_lock));
-+}
-+
-+static inline void write_lock_tree(reiser4_tree *tree)
-+{
-+ /* check that tree is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
-+ LOCK_CNT_NIL(read_locked_tree) &&
-+ LOCK_CNT_NIL(write_locked_tree)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(spin_locked_stack)));
-+
-+ write_lock(&(tree->tree_lock));
-+
-+ LOCK_CNT_INC(write_locked_tree);
-+ LOCK_CNT_INC(rw_locked_tree);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void write_unlock_tree(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(write_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(write_locked_tree);
-+ LOCK_CNT_DEC(rw_locked_tree);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ write_unlock(&(tree->tree_lock));
-+}
-+
-+static inline void read_lock_dk(reiser4_tree *tree)
-+{
-+ /* check that dk is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(read_locked_dk) &&
-+ LOCK_CNT_NIL(write_locked_dk)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", LOCK_CNT_NIL(spin_locked_stack));
-+
-+ read_lock(&((tree)->dk_lock));
-+
-+ LOCK_CNT_INC(read_locked_dk);
-+ LOCK_CNT_INC(rw_locked_dk);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void read_unlock_dk(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(read_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(read_locked_dk);
-+ LOCK_CNT_DEC(rw_locked_dk);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ read_unlock(&(tree->dk_lock));
-+}
-+
-+static inline void write_lock_dk(reiser4_tree *tree)
-+{
-+ /* check that dk is not locked */
-+ assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(read_locked_dk) &&
-+ LOCK_CNT_NIL(write_locked_dk)));
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", LOCK_CNT_NIL(spin_locked_stack));
-+
-+ write_lock(&((tree)->dk_lock));
-+
-+ LOCK_CNT_INC(write_locked_dk);
-+ LOCK_CNT_INC(rw_locked_dk);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void write_unlock_dk(reiser4_tree *tree)
-+{
-+ assert("nikita-1375", LOCK_CNT_GTZ(write_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(write_locked_dk);
-+ LOCK_CNT_DEC(rw_locked_dk);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ write_unlock(&(tree->dk_lock));
-+}
-+
-+/* estimate api. Implementation is in estimate.c */
-+reiser4_block_nr estimate_one_insert_item(reiser4_tree *);
-+reiser4_block_nr estimate_one_insert_into_item(reiser4_tree *);
-+reiser4_block_nr estimate_insert_flow(tree_level);
-+reiser4_block_nr estimate_one_item_removal(reiser4_tree *);
-+reiser4_block_nr calc_estimate_one_insert(tree_level);
-+reiser4_block_nr estimate_dirty_cluster(struct inode *);
-+reiser4_block_nr estimate_insert_cluster(struct inode *);
-+reiser4_block_nr estimate_update_cluster(struct inode *);
-+
-+/* __REISER4_TREE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/tree_mod.c linux-2.6.24/fs/reiser4/tree_mod.c
---- linux-2.6.24.orig/fs/reiser4/tree_mod.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tree_mod.c 2008-01-25 11:39:07.100249935 +0300
-@@ -0,0 +1,386 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/*
-+ * Functions to add/delete new nodes to/from the tree.
-+ *
-+ * Functions from this file are used by carry (see carry*) to handle:
-+ *
-+ * . insertion of new formatted node into tree
-+ *
-+ * . addition of new tree root, increasing tree height
-+ *
-+ * . removing tree root, decreasing tree height
-+ *
-+ */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/plugin.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "tree_mod.h"
-+#include "block_alloc.h"
-+#include "tree_walk.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+#include <linux/err.h>
-+
-+static int add_child_ptr(znode * parent, znode * child);
-+/* warning only issued if error is not -E_REPEAT */
-+#define ewarning( error, ... ) \
-+ if( ( error ) != -E_REPEAT ) \
-+ warning( __VA_ARGS__ )
-+
-+/* allocate new node on the @level and immediately on the right of @brother. */
-+znode * reiser4_new_node(znode * brother /* existing left neighbor
-+ * of new node */,
-+ tree_level level /* tree level at which new node is to
-+ * be allocated */)
-+{
-+ znode *result;
-+ int retcode;
-+ reiser4_block_nr blocknr;
-+
-+ assert("nikita-930", brother != NULL);
-+ assert("umka-264", level < REAL_MAX_ZTREE_HEIGHT);
-+
-+ retcode = assign_fake_blocknr_formatted(&blocknr);
-+ if (retcode == 0) {
-+ result =
-+ zget(znode_get_tree(brother), &blocknr, NULL, level,
-+ reiser4_ctx_gfp_mask_get());
-+ if (IS_ERR(result)) {
-+ ewarning(PTR_ERR(result), "nikita-929",
-+ "Cannot allocate znode for carry: %li",
-+ PTR_ERR(result));
-+ return result;
-+ }
-+ /* cheap test, can be executed even when debugging is off */
-+ if (!znode_just_created(result)) {
-+ warning("nikita-2213",
-+ "Allocated already existing block: %llu",
-+ (unsigned long long)blocknr);
-+ zput(result);
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+
-+ assert("nikita-931", result != NULL);
-+ result->nplug = znode_get_tree(brother)->nplug;
-+ assert("nikita-933", result->nplug != NULL);
-+
-+ retcode = zinit_new(result, reiser4_ctx_gfp_mask_get());
-+ if (retcode == 0) {
-+ ZF_SET(result, JNODE_CREATED);
-+ zrelse(result);
-+ } else {
-+ zput(result);
-+ result = ERR_PTR(retcode);
-+ }
-+ } else {
-+ /* failure to allocate new node during balancing.
-+ This should never happen. Ever. Returning -E_REPEAT
-+ is not viable solution, because "out of disk space"
-+ is not transient error that will go away by itself.
-+ */
-+ ewarning(retcode, "nikita-928",
-+ "Cannot allocate block for carry: %i", retcode);
-+ result = ERR_PTR(retcode);
-+ }
-+ assert("nikita-1071", result != NULL);
-+ return result;
-+}
-+
-+/* allocate new root and add it to the tree
-+
-+ This helper function is called by add_new_root().
-+
-+*/
-+znode *reiser4_add_tree_root(znode * old_root /* existing tree root */ ,
-+ znode * fake /* "fake" znode */ )
-+{
-+ reiser4_tree *tree = znode_get_tree(old_root);
-+ znode *new_root = NULL; /* to shut gcc up */
-+ int result;
-+
-+ assert("nikita-1069", old_root != NULL);
-+ assert("umka-262", fake != NULL);
-+ assert("umka-263", tree != NULL);
-+
-+ /* "fake" znode---one always hanging just above current root. This
-+ node is locked when new root is created or existing root is
-+ deleted. Downward tree traversal takes lock on it before taking
-+ lock on a root node. This avoids race conditions with root
-+ manipulations.
-+
-+ */
-+ assert("nikita-1348", znode_above_root(fake));
-+ assert("nikita-1211", znode_is_root(old_root));
-+
-+ result = 0;
-+ if (tree->height >= REAL_MAX_ZTREE_HEIGHT) {
-+ warning("nikita-1344", "Tree is too tall: %i", tree->height);
-+ /* ext2 returns -ENOSPC when it runs out of free inodes with a
-+ following comment (fs/ext2/ialloc.c:441): Is it really
-+ ENOSPC?
-+
-+ -EXFULL? -EINVAL?
-+ */
-+ result = RETERR(-ENOSPC);
-+ } else {
-+ /* Allocate block for new root. It's not that
-+ important where it will be allocated, as root is
-+ almost always in memory. Moreover, allocate on
-+ flush can be going here.
-+ */
-+ assert("nikita-1448", znode_is_root(old_root));
-+ new_root = reiser4_new_node(fake, tree->height + 1);
-+ if (!IS_ERR(new_root) && (result = zload(new_root)) == 0) {
-+ lock_handle rlh;
-+
-+ init_lh(&rlh);
-+ result =
-+ longterm_lock_znode(&rlh, new_root,
-+ ZNODE_WRITE_LOCK,
-+ ZNODE_LOCK_LOPRI);
-+ if (result == 0) {
-+ parent_coord_t *in_parent;
-+
-+ znode_make_dirty(fake);
-+
-+ /* new root is a child of "fake" node */
-+ write_lock_tree(tree);
-+
-+ ++tree->height;
-+
-+ /* recalculate max balance overhead */
-+ tree->estimate_one_insert =
-+ estimate_one_insert_item(tree);
-+
-+ tree->root_block = *znode_get_block(new_root);
-+ in_parent = &new_root->in_parent;
-+ init_parent_coord(in_parent, fake);
-+ /* manually insert new root into sibling
-+ * list. With this all nodes involved into
-+ * balancing are connected after balancing is
-+ * done---useful invariant to check. */
-+ sibling_list_insert_nolock(new_root, NULL);
-+ write_unlock_tree(tree);
-+
-+ /* insert into new root pointer to the
-+ @old_root. */
-+ assert("nikita-1110",
-+ WITH_DATA(new_root,
-+ node_is_empty(new_root)));
-+ write_lock_dk(tree);
-+ znode_set_ld_key(new_root, reiser4_min_key());
-+ znode_set_rd_key(new_root, reiser4_max_key());
-+ write_unlock_dk(tree);
-+ if (REISER4_DEBUG) {
-+ ZF_CLR(old_root, JNODE_LEFT_CONNECTED);
-+ ZF_CLR(old_root, JNODE_RIGHT_CONNECTED);
-+ ZF_SET(old_root, JNODE_ORPHAN);
-+ }
-+ result = add_child_ptr(new_root, old_root);
-+ done_lh(&rlh);
-+ }
-+ zrelse(new_root);
-+ }
-+ }
-+ if (result != 0)
-+ new_root = ERR_PTR(result);
-+ return new_root;
-+}
-+
-+/* build &reiser4_item_data for inserting child pointer
-+
-+ Build &reiser4_item_data that can be later used to insert pointer to @child
-+ in its parent.
-+
-+*/
-+void build_child_ptr_data(znode * child /* node pointer to which will be
-+ * inserted */ ,
-+ reiser4_item_data * data /* where to store result */ )
-+{
-+ assert("nikita-1116", child != NULL);
-+ assert("nikita-1117", data != NULL);
-+
-+ /*
-+ * NOTE: use address of child's blocknr as address of data to be
-+ * inserted. As result of this data gets into on-disk structure in cpu
-+ * byte order. internal's create_hook converts it to little endian byte
-+ * order.
-+ */
-+ data->data = (char *)znode_get_block(child);
-+ /* data -> data is kernel space */
-+ data->user = 0;
-+ data->length = sizeof(reiser4_block_nr);
-+ /* FIXME-VS: hardcoded internal item? */
-+
-+ /* AUDIT: Is it possible that "item_plugin_by_id" may find nothing? */
-+ data->iplug = item_plugin_by_id(NODE_POINTER_ID);
-+}
-+
-+/* add pointer to @child into empty @parent.
-+
-+ This is used when pointer to old root is inserted into new root which is
-+ empty.
-+*/
-+static int add_child_ptr(znode * parent, znode * child)
-+{
-+ coord_t coord;
-+ reiser4_item_data data;
-+ int result;
-+ reiser4_key key;
-+
-+ assert("nikita-1111", parent != NULL);
-+ assert("nikita-1112", child != NULL);
-+ assert("nikita-1115",
-+ znode_get_level(parent) == znode_get_level(child) + 1);
-+
-+ result = zload(parent);
-+ if (result != 0)
-+ return result;
-+ assert("nikita-1113", node_is_empty(parent));
-+ coord_init_first_unit(&coord, parent);
-+
-+ build_child_ptr_data(child, &data);
-+ data.arg = NULL;
-+
-+ read_lock_dk(znode_get_tree(parent));
-+ key = *znode_get_ld_key(child);
-+ read_unlock_dk(znode_get_tree(parent));
-+
-+ result = node_plugin_by_node(parent)->create_item(&coord, &key, &data,
-+ NULL);
-+ znode_make_dirty(parent);
-+ zrelse(parent);
-+ return result;
-+}
-+
-+/* actually remove tree root */
-+static int reiser4_kill_root(reiser4_tree * tree /* tree from which root is
-+ * being removed */,
-+ znode * old_root /* root node that is being
-+ * removed */ ,
-+ znode * new_root /* new root---sole child of
-+ * @old_root */,
-+ const reiser4_block_nr * new_root_blk /* disk address of
-+ * @new_root */)
-+{
-+ znode *uber;
-+ int result;
-+ lock_handle handle_for_uber;
-+
-+ assert("umka-265", tree != NULL);
-+ assert("nikita-1198", new_root != NULL);
-+ assert("nikita-1199",
-+ znode_get_level(new_root) + 1 == znode_get_level(old_root));
-+
-+ assert("nikita-1201", znode_is_write_locked(old_root));
-+
-+ assert("nikita-1203",
-+ disk_addr_eq(new_root_blk, znode_get_block(new_root)));
-+
-+ init_lh(&handle_for_uber);
-+ /* obtain and lock "fake" znode protecting changes in tree height. */
-+ result = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
-+ &handle_for_uber);
-+ if (result == 0) {
-+ uber = handle_for_uber.node;
-+
-+ znode_make_dirty(uber);
-+
-+ /* don't take long term lock a @new_root. Take spinlock. */
-+
-+ write_lock_tree(tree);
-+
-+ tree->root_block = *new_root_blk;
-+ --tree->height;
-+
-+ /* recalculate max balance overhead */
-+ tree->estimate_one_insert = estimate_one_insert_item(tree);
-+
-+ assert("nikita-1202",
-+ tree->height == znode_get_level(new_root));
-+
-+ /* new root is child on "fake" node */
-+ init_parent_coord(&new_root->in_parent, uber);
-+ ++uber->c_count;
-+
-+ /* sibling_list_insert_nolock(new_root, NULL); */
-+ write_unlock_tree(tree);
-+
-+ /* reinitialise old root. */
-+ result = node_plugin_by_node(old_root)->init(old_root);
-+ znode_make_dirty(old_root);
-+ if (result == 0) {
-+ assert("nikita-1279", node_is_empty(old_root));
-+ ZF_SET(old_root, JNODE_HEARD_BANSHEE);
-+ old_root->c_count = 0;
-+ }
-+ }
-+ done_lh(&handle_for_uber);
-+
-+ return result;
-+}
-+
-+/* remove tree root
-+
-+ This function removes tree root, decreasing tree height by one. Tree root
-+ and its only child (that is going to become new tree root) are write locked
-+ at the entry.
-+
-+ To remove tree root we need to take lock on special "fake" znode that
-+ protects changes of tree height. See comments in reiser4_add_tree_root() for
-+ more on this.
-+
-+ Also parent pointers have to be updated in
-+ old and new root. To simplify code, function is split into two parts: outer
-+ reiser4_kill_tree_root() collects all necessary arguments and calls
-+ reiser4_kill_root() to do the actual job.
-+
-+*/
-+int reiser4_kill_tree_root(znode * old_root /* tree root that we are
-+ removing*/)
-+{
-+ int result;
-+ coord_t down_link;
-+ znode *new_root;
-+ reiser4_tree *tree;
-+
-+ assert("umka-266", current_tree != NULL);
-+ assert("nikita-1194", old_root != NULL);
-+ assert("nikita-1196", znode_is_root(old_root));
-+ assert("nikita-1200", node_num_items(old_root) == 1);
-+ assert("nikita-1401", znode_is_write_locked(old_root));
-+
-+ coord_init_first_unit(&down_link, old_root);
-+
-+ tree = znode_get_tree(old_root);
-+ new_root = child_znode(&down_link, old_root, 0, 1);
-+ if (!IS_ERR(new_root)) {
-+ result =
-+ reiser4_kill_root(tree, old_root, new_root,
-+ znode_get_block(new_root));
-+ zput(new_root);
-+ } else
-+ result = PTR_ERR(new_root);
-+
-+ return result;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/tree_mod.h linux-2.6.24/fs/reiser4/tree_mod.h
---- linux-2.6.24.orig/fs/reiser4/tree_mod.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tree_mod.h 2008-01-25 11:39:07.100249935 +0300
-@@ -0,0 +1,29 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Functions to add/delete new nodes to/from the tree. See tree_mod.c for
-+ * comments. */
-+
-+#if !defined( __REISER4_TREE_MOD_H__ )
-+#define __REISER4_TREE_MOD_H__
-+
-+#include "forward.h"
-+
-+znode *reiser4_new_node(znode * brother, tree_level level);
-+znode *reiser4_add_tree_root(znode * old_root, znode * fake);
-+int reiser4_kill_tree_root(znode * old_root);
-+void build_child_ptr_data(znode * child, reiser4_item_data * data);
-+
-+/* __REISER4_TREE_MOD_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/tree_walk.c linux-2.6.24/fs/reiser4/tree_walk.c
---- linux-2.6.24.orig/fs/reiser4/tree_walk.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tree_walk.c 2008-01-25 11:39:07.100249935 +0300
-@@ -0,0 +1,927 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Routines and macros to:
-+
-+ get_left_neighbor()
-+
-+ get_right_neighbor()
-+
-+ get_parent()
-+
-+ get_first_child()
-+
-+ get_last_child()
-+
-+ various routines to walk the whole tree and do things to it like
-+ repack it, or move it to tertiary storage. Please make them as
-+ generic as is reasonable.
-+
-+*/
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "tree_walk.h"
-+#include "tree.h"
-+#include "super.h"
-+
-+/* These macros are used internally in tree_walk.c in attempt to make
-+ lock_neighbor() code usable to build lock_parent(), lock_right_neighbor,
-+ lock_left_neighbor */
-+#define GET_NODE_BY_PTR_OFFSET(node, off) (*(znode**)(((unsigned long)(node)) + (off)))
-+#define FIELD_OFFSET(name) offsetof(znode, name)
-+#define PARENT_PTR_OFFSET FIELD_OFFSET(in_parent.node)
-+#define LEFT_PTR_OFFSET FIELD_OFFSET(left)
-+#define RIGHT_PTR_OFFSET FIELD_OFFSET(right)
-+
-+/* This is the generic procedure to get and lock `generic' neighbor (left or
-+ right neighbor or parent). It implements common algorithm for all cases of
-+ getting lock on neighbor node, only znode structure field is different in
-+ each case. This is parameterized by ptr_offset argument, which is byte
-+ offset for the pointer to the desired neighbor within the current node's
-+ znode structure. This function should be called with the tree lock held */
-+static int lock_neighbor(
-+ /* resulting lock handle */
-+ lock_handle * result,
-+ /* znode to lock */
-+ znode * node,
-+ /* pointer to neighbor (or parent) znode field offset, in bytes from
-+ the base address of znode structure */
-+ int ptr_offset,
-+ /* lock mode for longterm_lock_znode call */
-+ znode_lock_mode mode,
-+ /* lock request for longterm_lock_znode call */
-+ znode_lock_request req,
-+ /* GN_* flags */
-+ int flags, int rlocked)
-+{
-+ reiser4_tree *tree = znode_get_tree(node);
-+ znode *neighbor;
-+ int ret;
-+
-+ assert("umka-236", node != NULL);
-+ assert("umka-237", tree != NULL);
-+ assert_rw_locked(&(tree->tree_lock));
-+
-+ if (flags & GN_TRY_LOCK)
-+ req |= ZNODE_LOCK_NONBLOCK;
-+ if (flags & GN_SAME_ATOM)
-+ req |= ZNODE_LOCK_DONT_FUSE;
-+
-+ /* get neighbor's address by using of sibling link, quit while loop
-+ (and return) if link is not available. */
-+ while (1) {
-+ neighbor = GET_NODE_BY_PTR_OFFSET(node, ptr_offset);
-+
-+ /* return -E_NO_NEIGHBOR if parent or side pointer is NULL or if
-+ * node pointed by it is not connected.
-+ *
-+ * However, GN_ALLOW_NOT_CONNECTED option masks "connected"
-+ * check and allows passing reference to not connected znode to
-+ * subsequent longterm_lock_znode() call. This kills possible
-+ * busy loop if we are trying to get longterm lock on locked but
-+ * not yet connected parent node. */
-+ if (neighbor == NULL || !((flags & GN_ALLOW_NOT_CONNECTED)
-+ || znode_is_connected(neighbor))) {
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+
-+ /* protect it from deletion. */
-+ zref(neighbor);
-+
-+ rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
-+
-+ ret = longterm_lock_znode(result, neighbor, mode, req);
-+
-+ /* The lock handle obtains its own reference, release the one from above. */
-+ zput(neighbor);
-+
-+ rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
-+
-+ /* restart if node we got reference to is being
-+ invalidated. we should not get reference to this node
-+ again. */
-+ if (ret == -EINVAL)
-+ continue;
-+ if (ret)
-+ return ret;
-+
-+ /* check if neighbor link still points to just locked znode;
-+ the link could have been changed while the process slept. */
-+ if (neighbor == GET_NODE_BY_PTR_OFFSET(node, ptr_offset))
-+ return 0;
-+
-+ /* znode was locked by mistake; unlock it and restart locking
-+ process from beginning. */
-+ rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
-+ longterm_unlock_znode(result);
-+ rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
-+ }
-+}
-+
-+/* get parent node with longterm lock, accepts GN* flags. */
-+int reiser4_get_parent_flags(lock_handle * lh /* resulting lock handle */ ,
-+ znode * node /* child node */ ,
-+ znode_lock_mode mode
-+ /* type of lock: read or write */ ,
-+ int flags /* GN_* flags */ )
-+{
-+ int result;
-+
-+ read_lock_tree(znode_get_tree(node));
-+ result = lock_neighbor(lh, node, PARENT_PTR_OFFSET, mode,
-+ ZNODE_LOCK_HIPRI, flags, 1);
-+ read_unlock_tree(znode_get_tree(node));
-+ return result;
-+}
-+
-+/* wrapper function to lock right or left neighbor depending on GN_GO_LEFT
-+ bit in @flags parameter */
-+/* Audited by: umka (2002.06.14) */
-+static inline int
-+lock_side_neighbor(lock_handle * result,
-+ znode * node, znode_lock_mode mode, int flags, int rlocked)
-+{
-+ int ret;
-+ int ptr_offset;
-+ znode_lock_request req;
-+
-+ if (flags & GN_GO_LEFT) {
-+ ptr_offset = LEFT_PTR_OFFSET;
-+ req = ZNODE_LOCK_LOPRI;
-+ } else {
-+ ptr_offset = RIGHT_PTR_OFFSET;
-+ req = ZNODE_LOCK_HIPRI;
-+ }
-+
-+ ret =
-+ lock_neighbor(result, node, ptr_offset, mode, req, flags, rlocked);
-+
-+ if (ret == -E_NO_NEIGHBOR) /* if we walk left or right -E_NO_NEIGHBOR does not
-+ * guarantee that neighbor is absent in the
-+ * tree; in this case we return -ENOENT --
-+ * means neighbor at least not found in
-+ * cache */
-+ return RETERR(-ENOENT);
-+
-+ return ret;
-+}
-+
-+#if REISER4_DEBUG
-+
-+int check_sibling_list(znode * node)
-+{
-+ znode *scan;
-+ znode *next;
-+
-+ assert("nikita-3283", LOCK_CNT_GTZ(write_locked_tree));
-+
-+ if (node == NULL)
-+ return 1;
-+
-+ if (ZF_ISSET(node, JNODE_RIP))
-+ return 1;
-+
-+ assert("nikita-3270", node != NULL);
-+ assert_rw_write_locked(&(znode_get_tree(node)->tree_lock));
-+
-+ for (scan = node; znode_is_left_connected(scan); scan = next) {
-+ next = scan->left;
-+ if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
-+ assert("nikita-3271", znode_is_right_connected(next));
-+ assert("nikita-3272", next->right == scan);
-+ } else
-+ break;
-+ }
-+ for (scan = node; znode_is_right_connected(scan); scan = next) {
-+ next = scan->right;
-+ if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
-+ assert("nikita-3273", znode_is_left_connected(next));
-+ assert("nikita-3274", next->left == scan);
-+ } else
-+ break;
-+ }
-+ return 1;
-+}
-+
-+#endif
-+
-+/* Znode sibling pointers maintenence. */
-+
-+/* Znode sibling pointers are established between any neighbored nodes which are
-+ in cache. There are two znode state bits (JNODE_LEFT_CONNECTED,
-+ JNODE_RIGHT_CONNECTED), if left or right sibling pointer contains actual
-+ value (even NULL), corresponded JNODE_*_CONNECTED bit is set.
-+
-+ Reiser4 tree operations which may allocate new znodes (CBK, tree balancing)
-+ take care about searching (hash table lookup may be required) of znode
-+ neighbors, establishing sibling pointers between them and setting
-+ JNODE_*_CONNECTED state bits. */
-+
-+/* adjusting of sibling pointers and `connected' states for two
-+ neighbors; works if one neighbor is NULL (was not found). */
-+
-+/* FIXME-VS: this is unstatic-ed to use in tree.c in prepare_twig_cut */
-+void link_left_and_right(znode * left, znode * right)
-+{
-+ assert("nikita-3275", check_sibling_list(left));
-+ assert("nikita-3275", check_sibling_list(right));
-+
-+ if (left != NULL) {
-+ if (left->right == NULL) {
-+ left->right = right;
-+ ZF_SET(left, JNODE_RIGHT_CONNECTED);
-+
-+ ON_DEBUG(left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+
-+ } else if (ZF_ISSET(left->right, JNODE_HEARD_BANSHEE)
-+ && left->right != right) {
-+
-+ ON_DEBUG(left->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ left->right_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ left->right->left = NULL;
-+ left->right = right;
-+ ZF_SET(left, JNODE_RIGHT_CONNECTED);
-+ } else
-+ /*
-+ * there is a race condition in renew_sibling_link()
-+ * and assertions below check that it is only one
-+ * there. Thread T1 calls renew_sibling_link() without
-+ * GN_NO_ALLOC flag. zlook() doesn't find neighbor
-+ * node, but before T1 gets to the
-+ * link_left_and_right(), another thread T2 creates
-+ * neighbor node and connects it. check for
-+ * left->right == NULL above protects T1 from
-+ * overwriting correct left->right pointer installed
-+ * by T2.
-+ */
-+ assert("nikita-3302",
-+ right == NULL || left->right == right);
-+ }
-+ if (right != NULL) {
-+ if (right->left == NULL) {
-+ right->left = left;
-+ ZF_SET(right, JNODE_LEFT_CONNECTED);
-+
-+ ON_DEBUG(right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+
-+ } else if (ZF_ISSET(right->left, JNODE_HEARD_BANSHEE)
-+ && right->left != left) {
-+
-+ ON_DEBUG(right->left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ right->left_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ right->left->right = NULL;
-+ right->left = left;
-+ ZF_SET(right, JNODE_LEFT_CONNECTED);
-+
-+ } else
-+ assert("nikita-3303",
-+ left == NULL || right->left == left);
-+ }
-+ assert("nikita-3275", check_sibling_list(left));
-+ assert("nikita-3275", check_sibling_list(right));
-+}
-+
-+/* Audited by: umka (2002.06.14) */
-+static void link_znodes(znode * first, znode * second, int to_left)
-+{
-+ if (to_left)
-+ link_left_and_right(second, first);
-+ else
-+ link_left_and_right(first, second);
-+}
-+
-+/* getting of next (to left or to right, depend on gn_to_left bit in flags)
-+ coord's unit position in horizontal direction, even across node
-+ boundary. Should be called under tree lock, it protects nonexistence of
-+ sibling link on parent level, if lock_side_neighbor() fails with
-+ -ENOENT. */
-+static int far_next_coord(coord_t * coord, lock_handle * handle, int flags)
-+{
-+ int ret;
-+ znode *node;
-+ reiser4_tree *tree;
-+
-+ assert("umka-243", coord != NULL);
-+ assert("umka-244", handle != NULL);
-+ assert("zam-1069", handle->node == NULL);
-+
-+ ret =
-+ (flags & GN_GO_LEFT) ? coord_prev_unit(coord) :
-+ coord_next_unit(coord);
-+ if (!ret)
-+ return 0;
-+
-+ ret =
-+ lock_side_neighbor(handle, coord->node, ZNODE_READ_LOCK, flags, 0);
-+ if (ret)
-+ return ret;
-+
-+ node = handle->node;
-+ tree = znode_get_tree(node);
-+ write_unlock_tree(tree);
-+
-+ coord_init_zero(coord);
-+
-+ /* We avoid synchronous read here if it is specified by flag. */
-+ if ((flags & GN_ASYNC) && znode_page(handle->node) == NULL) {
-+ ret = jstartio(ZJNODE(handle->node));
-+ if (!ret)
-+ ret = -E_REPEAT;
-+ goto error_locked;
-+ }
-+
-+ /* corresponded zrelse() should be called by the clients of
-+ far_next_coord(), in place when this node gets unlocked. */
-+ ret = zload(handle->node);
-+ if (ret)
-+ goto error_locked;
-+
-+ if (flags & GN_GO_LEFT)
-+ coord_init_last_unit(coord, node);
-+ else
-+ coord_init_first_unit(coord, node);
-+
-+ if (0) {
-+ error_locked:
-+ longterm_unlock_znode(handle);
-+ }
-+ write_lock_tree(tree);
-+ return ret;
-+}
-+
-+/* Very significant function which performs a step in horizontal direction
-+ when sibling pointer is not available. Actually, it is only function which
-+ does it.
-+ Note: this function does not restore locking status at exit,
-+ caller should does care about proper unlocking and zrelsing */
-+static int
-+renew_sibling_link(coord_t * coord, lock_handle * handle, znode * child,
-+ tree_level level, int flags, int *nr_locked)
-+{
-+ int ret;
-+ int to_left = flags & GN_GO_LEFT;
-+ reiser4_block_nr da;
-+ /* parent of the neighbor node; we set it to parent until not sharing
-+ of one parent between child and neighbor node is detected */
-+ znode *side_parent = coord->node;
-+ reiser4_tree *tree = znode_get_tree(child);
-+ znode *neighbor = NULL;
-+
-+ assert("umka-245", coord != NULL);
-+ assert("umka-246", handle != NULL);
-+ assert("umka-247", child != NULL);
-+ assert("umka-303", tree != NULL);
-+
-+ init_lh(handle);
-+ write_lock_tree(tree);
-+ ret = far_next_coord(coord, handle, flags);
-+
-+ if (ret) {
-+ if (ret != -ENOENT) {
-+ write_unlock_tree(tree);
-+ return ret;
-+ }
-+ } else {
-+ item_plugin *iplug;
-+
-+ if (handle->node != NULL) {
-+ (*nr_locked)++;
-+ side_parent = handle->node;
-+ }
-+
-+ /* does coord object points to internal item? We do not
-+ support sibling pointers between znode for formatted and
-+ unformatted nodes and return -E_NO_NEIGHBOR in that case. */
-+ iplug = item_plugin_by_coord(coord);
-+ if (!item_is_internal(coord)) {
-+ link_znodes(child, NULL, to_left);
-+ write_unlock_tree(tree);
-+ /* we know there can't be formatted neighbor */
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+ write_unlock_tree(tree);
-+
-+ iplug->s.internal.down_link(coord, NULL, &da);
-+
-+ if (flags & GN_NO_ALLOC) {
-+ neighbor = zlook(tree, &da);
-+ } else {
-+ neighbor =
-+ zget(tree, &da, side_parent, level,
-+ reiser4_ctx_gfp_mask_get());
-+ }
-+
-+ if (IS_ERR(neighbor)) {
-+ ret = PTR_ERR(neighbor);
-+ return ret;
-+ }
-+
-+ if (neighbor)
-+ /* update delimiting keys */
-+ set_child_delimiting_keys(coord->node, coord, neighbor);
-+
-+ write_lock_tree(tree);
-+ }
-+
-+ if (likely(neighbor == NULL ||
-+ (znode_get_level(child) == znode_get_level(neighbor)
-+ && child != neighbor)))
-+ link_znodes(child, neighbor, to_left);
-+ else {
-+ warning("nikita-3532",
-+ "Sibling nodes on the different levels: %i != %i\n",
-+ znode_get_level(child), znode_get_level(neighbor));
-+ ret = RETERR(-EIO);
-+ }
-+
-+ write_unlock_tree(tree);
-+
-+ /* if GN_NO_ALLOC isn't set we keep reference to neighbor znode */
-+ if (neighbor != NULL && (flags & GN_NO_ALLOC))
-+ /* atomic_dec(&ZJNODE(neighbor)->x_count); */
-+ zput(neighbor);
-+
-+ return ret;
-+}
-+
-+/* This function is for establishing of one side relation. */
-+/* Audited by: umka (2002.06.14) */
-+static int connect_one_side(coord_t * coord, znode * node, int flags)
-+{
-+ coord_t local;
-+ lock_handle handle;
-+ int nr_locked;
-+ int ret;
-+
-+ assert("umka-248", coord != NULL);
-+ assert("umka-249", node != NULL);
-+
-+ coord_dup_nocheck(&local, coord);
-+
-+ init_lh(&handle);
-+
-+ ret =
-+ renew_sibling_link(&local, &handle, node, znode_get_level(node),
-+ flags | GN_NO_ALLOC, &nr_locked);
-+
-+ if (handle.node != NULL) {
-+ /* complementary operations for zload() and lock() in far_next_coord() */
-+ zrelse(handle.node);
-+ longterm_unlock_znode(&handle);
-+ }
-+
-+ /* we catch error codes which are not interesting for us because we
-+ run renew_sibling_link() only for znode connection. */
-+ if (ret == -ENOENT || ret == -E_NO_NEIGHBOR)
-+ return 0;
-+
-+ return ret;
-+}
-+
-+/* if @child is not in `connected' state, performs hash searches for left and
-+ right neighbor nodes and establishes horizontal sibling links */
-+/* Audited by: umka (2002.06.14), umka (2002.06.15) */
-+int connect_znode(coord_t * parent_coord, znode * child)
-+{
-+ reiser4_tree *tree = znode_get_tree(child);
-+ int ret = 0;
-+
-+ assert("zam-330", parent_coord != NULL);
-+ assert("zam-331", child != NULL);
-+ assert("zam-332", parent_coord->node != NULL);
-+ assert("umka-305", tree != NULL);
-+
-+ /* it is trivial to `connect' root znode because it can't have
-+ neighbors */
-+ if (znode_above_root(parent_coord->node)) {
-+ child->left = NULL;
-+ child->right = NULL;
-+ ZF_SET(child, JNODE_LEFT_CONNECTED);
-+ ZF_SET(child, JNODE_RIGHT_CONNECTED);
-+
-+ ON_DEBUG(child->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ child->right_version =
-+ atomic_inc_return(&delim_key_version););
-+
-+ return 0;
-+ }
-+
-+ /* load parent node */
-+ coord_clear_iplug(parent_coord);
-+ ret = zload(parent_coord->node);
-+
-+ if (ret != 0)
-+ return ret;
-+
-+ /* protect `connected' state check by tree_lock */
-+ read_lock_tree(tree);
-+
-+ if (!znode_is_right_connected(child)) {
-+ read_unlock_tree(tree);
-+ /* connect right (default is right) */
-+ ret = connect_one_side(parent_coord, child, GN_NO_ALLOC);
-+ if (ret)
-+ goto zrelse_and_ret;
-+
-+ read_lock_tree(tree);
-+ }
-+
-+ ret = znode_is_left_connected(child);
-+
-+ read_unlock_tree(tree);
-+
-+ if (!ret) {
-+ ret =
-+ connect_one_side(parent_coord, child,
-+ GN_NO_ALLOC | GN_GO_LEFT);
-+ } else
-+ ret = 0;
-+
-+ zrelse_and_ret:
-+ zrelse(parent_coord->node);
-+
-+ return ret;
-+}
-+
-+/* this function is like renew_sibling_link() but allocates neighbor node if
-+ it doesn't exist and `connects' it. It may require making two steps in
-+ horizontal direction, first one for neighbor node finding/allocation,
-+ second one is for finding neighbor of neighbor to connect freshly allocated
-+ znode. */
-+/* Audited by: umka (2002.06.14), umka (2002.06.15) */
-+static int
-+renew_neighbor(coord_t * coord, znode * node, tree_level level, int flags)
-+{
-+ coord_t local;
-+ lock_handle empty[2];
-+ reiser4_tree *tree = znode_get_tree(node);
-+ znode *neighbor = NULL;
-+ int nr_locked = 0;
-+ int ret;
-+
-+ assert("umka-250", coord != NULL);
-+ assert("umka-251", node != NULL);
-+ assert("umka-307", tree != NULL);
-+ assert("umka-308", level <= tree->height);
-+
-+ /* umka (2002.06.14)
-+ Here probably should be a check for given "level" validness.
-+ Something like assert("xxx-yyy", level < REAL_MAX_ZTREE_HEIGHT);
-+ */
-+
-+ coord_dup(&local, coord);
-+
-+ ret =
-+ renew_sibling_link(&local, &empty[0], node, level,
-+ flags & ~GN_NO_ALLOC, &nr_locked);
-+ if (ret)
-+ goto out;
-+
-+ /* tree lock is not needed here because we keep parent node(s) locked
-+ and reference to neighbor znode incremented */
-+ neighbor = (flags & GN_GO_LEFT) ? node->left : node->right;
-+
-+ read_lock_tree(tree);
-+ ret = znode_is_connected(neighbor);
-+ read_unlock_tree(tree);
-+ if (ret) {
-+ ret = 0;
-+ goto out;
-+ }
-+
-+ ret =
-+ renew_sibling_link(&local, &empty[nr_locked], neighbor, level,
-+ flags | GN_NO_ALLOC, &nr_locked);
-+ /* second renew_sibling_link() call is used for znode connection only,
-+ so we can live with these errors */
-+ if (-ENOENT == ret || -E_NO_NEIGHBOR == ret)
-+ ret = 0;
-+
-+ out:
-+
-+ for (--nr_locked; nr_locked >= 0; --nr_locked) {
-+ zrelse(empty[nr_locked].node);
-+ longterm_unlock_znode(&empty[nr_locked]);
-+ }
-+
-+ if (neighbor != NULL)
-+ /* decrement znode reference counter without actually
-+ releasing it. */
-+ atomic_dec(&ZJNODE(neighbor)->x_count);
-+
-+ return ret;
-+}
-+
-+/*
-+ reiser4_get_neighbor() -- lock node's neighbor.
-+
-+ reiser4_get_neighbor() locks node's neighbor (left or right one, depends on
-+ given parameter) using sibling link to it. If sibling link is not available
-+ (i.e. neighbor znode is not in cache) and flags allow read blocks, we go one
-+ level up for information about neighbor's disk address. We lock node's
-+ parent, if it is common parent for both 'node' and its neighbor, neighbor's
-+ disk address is in next (to left or to right) down link from link that points
-+ to original node. If not, we need to lock parent's neighbor, read its content
-+ and take first(last) downlink with neighbor's disk address. That locking
-+ could be done by using sibling link and lock_neighbor() function, if sibling
-+ link exists. In another case we have to go level up again until we find
-+ common parent or valid sibling link. Then go down
-+ allocating/connecting/locking/reading nodes until neighbor of first one is
-+ locked.
-+
-+ @neighbor: result lock handle,
-+ @node: a node which we lock neighbor of,
-+ @lock_mode: lock mode {LM_READ, LM_WRITE},
-+ @flags: logical OR of {GN_*} (see description above) subset.
-+
-+ @return: 0 if success, negative value if lock was impossible due to an error
-+ or lack of neighbor node.
-+*/
-+
-+/* Audited by: umka (2002.06.14), umka (2002.06.15) */
-+int
-+reiser4_get_neighbor(lock_handle * neighbor, znode * node,
-+ znode_lock_mode lock_mode, int flags)
-+{
-+ reiser4_tree *tree = znode_get_tree(node);
-+ lock_handle path[REAL_MAX_ZTREE_HEIGHT];
-+
-+ coord_t coord;
-+
-+ tree_level base_level;
-+ tree_level h = 0;
-+ int ret;
-+
-+ assert("umka-252", tree != NULL);
-+ assert("umka-253", neighbor != NULL);
-+ assert("umka-254", node != NULL);
-+
-+ base_level = znode_get_level(node);
-+
-+ assert("umka-310", base_level <= tree->height);
-+
-+ coord_init_zero(&coord);
-+
-+ again:
-+ /* first, we try to use simple lock_neighbor() which requires sibling
-+ link existence */
-+ read_lock_tree(tree);
-+ ret = lock_side_neighbor(neighbor, node, lock_mode, flags, 1);
-+ read_unlock_tree(tree);
-+ if (!ret) {
-+ /* load znode content if it was specified */
-+ if (flags & GN_LOAD_NEIGHBOR) {
-+ ret = zload(node);
-+ if (ret)
-+ longterm_unlock_znode(neighbor);
-+ }
-+ return ret;
-+ }
-+
-+ /* only -ENOENT means we may look upward and try to connect
-+ @node with its neighbor (if @flags allow us to do it) */
-+ if (ret != -ENOENT || !(flags & GN_CAN_USE_UPPER_LEVELS))
-+ return ret;
-+
-+ /* before establishing of sibling link we lock parent node; it is
-+ required by renew_neighbor() to work. */
-+ init_lh(&path[0]);
-+ ret = reiser4_get_parent(&path[0], node, ZNODE_READ_LOCK);
-+ if (ret)
-+ return ret;
-+ if (znode_above_root(path[0].node)) {
-+ longterm_unlock_znode(&path[0]);
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+
-+ while (1) {
-+ znode *child = (h == 0) ? node : path[h - 1].node;
-+ znode *parent = path[h].node;
-+
-+ ret = zload(parent);
-+ if (ret)
-+ break;
-+
-+ ret = find_child_ptr(parent, child, &coord);
-+
-+ if (ret) {
-+ zrelse(parent);
-+ break;
-+ }
-+
-+ /* try to establish missing sibling link */
-+ ret = renew_neighbor(&coord, child, h + base_level, flags);
-+
-+ zrelse(parent);
-+
-+ switch (ret) {
-+ case 0:
-+ /* unlocking of parent znode prevents simple
-+ deadlock situation */
-+ done_lh(&path[h]);
-+
-+ /* depend on tree level we stay on we repeat first
-+ locking attempt ... */
-+ if (h == 0)
-+ goto again;
-+
-+ /* ... or repeat establishing of sibling link at
-+ one level below. */
-+ --h;
-+ break;
-+
-+ case -ENOENT:
-+ /* sibling link is not available -- we go
-+ upward. */
-+ init_lh(&path[h + 1]);
-+ ret =
-+ reiser4_get_parent(&path[h + 1], parent,
-+ ZNODE_READ_LOCK);
-+ if (ret)
-+ goto fail;
-+ ++h;
-+ if (znode_above_root(path[h].node)) {
-+ ret = RETERR(-E_NO_NEIGHBOR);
-+ goto fail;
-+ }
-+ break;
-+
-+ case -E_DEADLOCK:
-+ /* there was lock request from hi-pri locker. if
-+ it is possible we unlock last parent node and
-+ re-lock it again. */
-+ for (; reiser4_check_deadlock(); h--) {
-+ done_lh(&path[h]);
-+ if (h == 0)
-+ goto fail;
-+ }
-+
-+ break;
-+
-+ default: /* other errors. */
-+ goto fail;
-+ }
-+ }
-+ fail:
-+ ON_DEBUG(check_lock_node_data(node));
-+ ON_DEBUG(check_lock_data());
-+
-+ /* unlock path */
-+ do {
-+ /* FIXME-Zam: when we get here from case -E_DEADLOCK's goto
-+ fail; path[0] is already done_lh-ed, therefore
-+ longterm_unlock_znode(&path[h]); is not applicable */
-+ done_lh(&path[h]);
-+ --h;
-+ } while (h + 1 != 0);
-+
-+ return ret;
-+}
-+
-+/* remove node from sibling list */
-+/* Audited by: umka (2002.06.14) */
-+void sibling_list_remove(znode * node)
-+{
-+ reiser4_tree *tree;
-+
-+ tree = znode_get_tree(node);
-+ assert("umka-255", node != NULL);
-+ assert_rw_write_locked(&(tree->tree_lock));
-+ assert("nikita-3275", check_sibling_list(node));
-+
-+ write_lock_dk(tree);
-+ if (znode_is_right_connected(node) && node->right != NULL &&
-+ znode_is_left_connected(node) && node->left != NULL) {
-+ assert("zam-32245",
-+ keyeq(znode_get_rd_key(node),
-+ znode_get_ld_key(node->right)));
-+ znode_set_rd_key(node->left, znode_get_ld_key(node->right));
-+ }
-+ write_unlock_dk(tree);
-+
-+ if (znode_is_right_connected(node) && node->right != NULL) {
-+ assert("zam-322", znode_is_left_connected(node->right));
-+ node->right->left = node->left;
-+ ON_DEBUG(node->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ if (znode_is_left_connected(node) && node->left != NULL) {
-+ assert("zam-323", znode_is_right_connected(node->left));
-+ node->left->right = node->right;
-+ ON_DEBUG(node->left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+
-+ ZF_CLR(node, JNODE_LEFT_CONNECTED);
-+ ZF_CLR(node, JNODE_RIGHT_CONNECTED);
-+ ON_DEBUG(node->left = node->right = NULL;
-+ node->left_version = atomic_inc_return(&delim_key_version);
-+ node->right_version = atomic_inc_return(&delim_key_version););
-+ assert("nikita-3276", check_sibling_list(node));
-+}
-+
-+/* disconnect node from sibling list */
-+void sibling_list_drop(znode * node)
-+{
-+ znode *right;
-+ znode *left;
-+
-+ assert("nikita-2464", node != NULL);
-+ assert("nikita-3277", check_sibling_list(node));
-+
-+ right = node->right;
-+ if (right != NULL) {
-+ assert("nikita-2465", znode_is_left_connected(right));
-+ right->left = NULL;
-+ ON_DEBUG(right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ left = node->left;
-+ if (left != NULL) {
-+ assert("zam-323", znode_is_right_connected(left));
-+ left->right = NULL;
-+ ON_DEBUG(left->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ ZF_CLR(node, JNODE_LEFT_CONNECTED);
-+ ZF_CLR(node, JNODE_RIGHT_CONNECTED);
-+ ON_DEBUG(node->left = node->right = NULL;
-+ node->left_version = atomic_inc_return(&delim_key_version);
-+ node->right_version = atomic_inc_return(&delim_key_version););
-+}
-+
-+/* Insert new node into sibling list. Regular balancing inserts new node
-+ after (at right side) existing and locked node (@before), except one case
-+ of adding new tree root node. @before should be NULL in that case. */
-+void sibling_list_insert_nolock(znode * new, znode * before)
-+{
-+ assert("zam-334", new != NULL);
-+ assert("nikita-3298", !znode_is_left_connected(new));
-+ assert("nikita-3299", !znode_is_right_connected(new));
-+ assert("nikita-3300", new->left == NULL);
-+ assert("nikita-3301", new->right == NULL);
-+ assert("nikita-3278", check_sibling_list(new));
-+ assert("nikita-3279", check_sibling_list(before));
-+
-+ if (before != NULL) {
-+ assert("zam-333", znode_is_connected(before));
-+ new->right = before->right;
-+ new->left = before;
-+ ON_DEBUG(new->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ new->left_version =
-+ atomic_inc_return(&delim_key_version););
-+ if (before->right != NULL) {
-+ before->right->left = new;
-+ ON_DEBUG(before->right->left_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ }
-+ before->right = new;
-+ ON_DEBUG(before->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ );
-+ } else {
-+ new->right = NULL;
-+ new->left = NULL;
-+ ON_DEBUG(new->right_version =
-+ atomic_inc_return(&delim_key_version);
-+ new->left_version =
-+ atomic_inc_return(&delim_key_version););
-+ }
-+ ZF_SET(new, JNODE_LEFT_CONNECTED);
-+ ZF_SET(new, JNODE_RIGHT_CONNECTED);
-+ assert("nikita-3280", check_sibling_list(new));
-+ assert("nikita-3281", check_sibling_list(before));
-+}
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/tree_walk.h linux-2.6.24/fs/reiser4/tree_walk.h
---- linux-2.6.24.orig/fs/reiser4/tree_walk.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/tree_walk.h 2008-01-25 11:39:07.100249935 +0300
-@@ -0,0 +1,125 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+/* definitions of reiser4 tree walk functions */
-+
-+#ifndef __FS_REISER4_TREE_WALK_H__
-+#define __FS_REISER4_TREE_WALK_H__
-+
-+#include "debug.h"
-+#include "forward.h"
-+
-+/* establishes horizontal links between cached znodes */
-+int connect_znode(coord_t * coord, znode * node);
-+
-+/* tree traversal functions (reiser4_get_parent(), reiser4_get_neighbor())
-+ have the following common arguments:
-+
-+ return codes:
-+
-+ @return : 0 - OK,
-+
-+ZAM-FIXME-HANS: wrong return code name. Change them all.
-+ -ENOENT - neighbor is not in cache, what is detected by sibling
-+ link absence.
-+
-+ -E_NO_NEIGHBOR - we are sure that neighbor (or parent) node cannot be
-+ found (because we are left-/right- most node of the
-+ tree, for example). Also, this return code is for
-+ reiser4_get_parent() when we see no parent link -- it
-+ means that our node is root node.
-+
-+ -E_DEADLOCK - deadlock detected (request from high-priority process
-+ received), other error codes are conformed to
-+ /usr/include/asm/errno.h .
-+*/
-+
-+int
-+reiser4_get_parent_flags(lock_handle * result, znode * node,
-+ znode_lock_mode mode, int flags);
-+
-+/* bits definition for reiser4_get_neighbor function `flags' arg. */
-+typedef enum {
-+ /* If sibling pointer is NULL, this flag allows get_neighbor() to try to
-+ * find not allocated not connected neigbor by going though upper
-+ * levels */
-+ GN_CAN_USE_UPPER_LEVELS = 0x1,
-+ /* locking left neighbor instead of right one */
-+ GN_GO_LEFT = 0x2,
-+ /* automatically load neighbor node content */
-+ GN_LOAD_NEIGHBOR = 0x4,
-+ /* return -E_REPEAT if can't lock */
-+ GN_TRY_LOCK = 0x8,
-+ /* used internally in tree_walk.c, causes renew_sibling to not
-+ allocate neighbor znode, but only search for it in znode cache */
-+ GN_NO_ALLOC = 0x10,
-+ /* do not go across atom boundaries */
-+ GN_SAME_ATOM = 0x20,
-+ /* allow to lock not connected nodes */
-+ GN_ALLOW_NOT_CONNECTED = 0x40,
-+ /* Avoid synchronous jload, instead, call jstartio() and return -E_REPEAT. */
-+ GN_ASYNC = 0x80
-+} znode_get_neigbor_flags;
-+
-+/* A commonly used wrapper for reiser4_get_parent_flags(). */
-+static inline int reiser4_get_parent(lock_handle * result, znode * node,
-+ znode_lock_mode mode)
-+{
-+ return reiser4_get_parent_flags(result, node, mode,
-+ GN_ALLOW_NOT_CONNECTED);
-+}
-+
-+int reiser4_get_neighbor(lock_handle * neighbor, znode * node,
-+ znode_lock_mode lock_mode, int flags);
-+
-+/* there are wrappers for most common usages of reiser4_get_neighbor() */
-+static inline int
-+reiser4_get_left_neighbor(lock_handle * result, znode * node, int lock_mode,
-+ int flags)
-+{
-+ return reiser4_get_neighbor(result, node, lock_mode,
-+ flags | GN_GO_LEFT);
-+}
-+
-+static inline int
-+reiser4_get_right_neighbor(lock_handle * result, znode * node, int lock_mode,
-+ int flags)
-+{
-+ ON_DEBUG(check_lock_node_data(node));
-+ ON_DEBUG(check_lock_data());
-+ return reiser4_get_neighbor(result, node, lock_mode,
-+ flags & (~GN_GO_LEFT));
-+}
-+
-+extern void sibling_list_remove(znode * node);
-+extern void sibling_list_drop(znode * node);
-+extern void sibling_list_insert_nolock(znode * new, znode * before);
-+extern void link_left_and_right(znode * left, znode * right);
-+
-+/* Functions called by tree_walk() when tree_walk() ... */
-+struct tree_walk_actor {
-+ /* ... meets a formatted node, */
-+ int (*process_znode) (tap_t *, void *);
-+ /* ... meets an extent, */
-+ int (*process_extent) (tap_t *, void *);
-+ /* ... begins tree traversal or repeats it after -E_REPEAT was returned by
-+ * node or extent processing functions. */
-+ int (*before) (void *);
-+};
-+
-+#if REISER4_DEBUG
-+int check_sibling_list(znode * node);
-+#else
-+#define check_sibling_list(n) (1)
-+#endif
-+
-+#endif /* __FS_REISER4_TREE_WALK_H__ */
-+
-+/*
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/txnmgr.c linux-2.6.24/fs/reiser4/txnmgr.c
---- linux-2.6.24.orig/fs/reiser4/txnmgr.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/txnmgr.c 2008-01-25 11:39:07.108251996 +0300
-@@ -0,0 +1,3164 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Joshua MacDonald wrote the first draft of this code. */
-+
-+/* ZAM-LONGTERM-FIXME-HANS: The locking in this file is badly designed, and a
-+filesystem scales only as well as its worst locking design. You need to
-+substantially restructure this code. Josh was not as experienced a programmer
-+as you. Particularly review how the locking style differs from what you did
-+for znodes usingt hi-lo priority locking, and present to me an opinion on
-+whether the differences are well founded. */
-+
-+/* I cannot help but to disagree with the sentiment above. Locking of
-+ * transaction manager is _not_ badly designed, and, at the very least, is not
-+ * the scaling bottleneck. Scaling bottleneck is _exactly_ hi-lo priority
-+ * locking on znodes, especially on the root node of the tree. --nikita,
-+ * 2003.10.13 */
-+
-+/* The txnmgr is a set of interfaces that keep track of atoms and transcrash handles. The
-+ txnmgr processes capture_block requests and manages the relationship between jnodes and
-+ atoms through the various stages of a transcrash, and it also oversees the fusion and
-+ capture-on-copy processes. The main difficulty with this task is maintaining a
-+ deadlock-free lock ordering between atoms and jnodes/handles. The reason for the
-+ difficulty is that jnodes, handles, and atoms contain pointer circles, and the cycle
-+ must be broken. The main requirement is that atom-fusion be deadlock free, so once you
-+ hold the atom_lock you may then wait to acquire any jnode or handle lock. This implies
-+ that any time you check the atom-pointer of a jnode or handle and then try to lock that
-+ atom, you must use trylock() and possibly reverse the order.
-+
-+ This code implements the design documented at:
-+
-+ http://namesys.com/txn-doc.html
-+
-+ZAM-FIXME-HANS: update v4.html to contain all of the information present in the above (but updated), and then remove the
-+above document and reference the new. Be sure to provide some credit to Josh. I already have some writings on this
-+topic in v4.html, but they are lacking in details present in the above. Cure that. Remember to write for the bright 12
-+year old --- define all technical terms used.
-+
-+*/
-+
-+/* Thoughts on the external transaction interface:
-+
-+ In the current code, a TRANSCRASH handle is created implicitly by reiser4_init_context() (which
-+ creates state that lasts for the duration of a system call and is called at the start
-+ of ReiserFS methods implementing VFS operations), and closed by reiser4_exit_context(),
-+ occupying the scope of a single system call. We wish to give certain applications an
-+ interface to begin and close (commit) transactions. Since our implementation of
-+ transactions does not yet support isolation, allowing an application to open a
-+ transaction implies trusting it to later close the transaction. Part of the
-+ transaction interface will be aimed at enabling that trust, but the interface for
-+ actually using transactions is fairly narrow.
-+
-+ BEGIN_TRANSCRASH: Returns a transcrash identifier. It should be possible to translate
-+ this identifier into a string that a shell-script could use, allowing you to start a
-+ transaction by issuing a command. Once open, the transcrash should be set in the task
-+ structure, and there should be options (I suppose) to allow it to be carried across
-+ fork/exec. A transcrash has several options:
-+
-+ - READ_FUSING or WRITE_FUSING: The default policy is for txn-capture to capture only
-+ on writes (WRITE_FUSING) and allow "dirty reads". If the application wishes to
-+ capture on reads as well, it should set READ_FUSING.
-+
-+ - TIMEOUT: Since a non-isolated transcrash cannot be undone, every transcrash must
-+ eventually close (or else the machine must crash). If the application dies an
-+ unexpected death with an open transcrash, for example, or if it hangs for a long
-+ duration, one solution (to avoid crashing the machine) is to simply close it anyway.
-+ This is a dangerous option, but it is one way to solve the problem until isolated
-+ transcrashes are available for untrusted applications.
-+
-+ It seems to be what databases do, though it is unclear how one avoids a DoS attack
-+ creating a vulnerability based on resource starvation. Guaranteeing that some
-+ minimum amount of computational resources are made available would seem more correct
-+ than guaranteeing some amount of time. When we again have someone to code the work,
-+ this issue should be considered carefully. -Hans
-+
-+ RESERVE_BLOCKS: A running transcrash should indicate to the transaction manager how
-+ many dirty blocks it expects. The reserve_blocks interface should be called at a point
-+ where it is safe for the application to fail, because the system may not be able to
-+ grant the allocation and the application must be able to back-out. For this reason,
-+ the number of reserve-blocks can also be passed as an argument to BEGIN_TRANSCRASH, but
-+ the application may also wish to extend the allocation after beginning its transcrash.
-+
-+ CLOSE_TRANSCRASH: The application closes the transcrash when it is finished making
-+ modifications that require transaction protection. When isolated transactions are
-+ supported the CLOSE operation is replaced by either COMMIT or ABORT. For example, if a
-+ RESERVE_BLOCKS call fails for the application, it should "abort" by calling
-+ CLOSE_TRANSCRASH, even though it really commits any changes that were made (which is
-+ why, for safety, the application should call RESERVE_BLOCKS before making any changes).
-+
-+ For actually implementing these out-of-system-call-scopped transcrashes, the
-+ reiser4_context has a "txn_handle *trans" pointer that may be set to an open
-+ transcrash. Currently there are no dynamically-allocated transcrashes, but there is a
-+ "struct kmem_cache *_txnh_slab" created for that purpose in this file.
-+*/
-+
-+/* Extending the other system call interfaces for future transaction features:
-+
-+ Specialized applications may benefit from passing flags to the ordinary system call
-+ interface such as read(), write(), or stat(). For example, the application specifies
-+ WRITE_FUSING by default but wishes to add that a certain read() command should be
-+ treated as READ_FUSING. But which read? Is it the directory-entry read, the stat-data
-+ read, or the file-data read? These issues are straight-forward, but there are a lot of
-+ them and adding the necessary flags-passing code will be tedious.
-+
-+ When supporting isolated transactions, there is a corresponding READ_MODIFY_WRITE (RMW)
-+ flag, which specifies that although it is a read operation being requested, a
-+ write-lock should be taken. The reason is that read-locks are shared while write-locks
-+ are exclusive, so taking a read-lock when a later-write is known in advance will often
-+ leads to deadlock. If a reader knows it will write later, it should issue read
-+ requests with the RMW flag set.
-+*/
-+
-+/*
-+ The znode/atom deadlock avoidance.
-+
-+ FIXME(Zam): writing of this comment is in progress.
-+
-+ The atom's special stage ASTAGE_CAPTURE_WAIT introduces a kind of atom's
-+ long-term locking, which makes reiser4 locking scheme more complex. It had
-+ deadlocks until we implement deadlock avoidance algorithms. That deadlocks
-+ looked as the following: one stopped thread waits for a long-term lock on
-+ znode, the thread who owns that lock waits when fusion with another atom will
-+ be allowed.
-+
-+ The source of the deadlocks is an optimization of not capturing index nodes
-+ for read. Let's prove it. Suppose we have dumb node capturing scheme which
-+ unconditionally captures each block before locking it.
-+
-+ That scheme has no deadlocks. Let's begin with the thread which stage is
-+ ASTAGE_CAPTURE_WAIT and it waits for a znode lock. The thread can't wait for
-+ a capture because it's stage allows fusion with any atom except which are
-+ being committed currently. A process of atom commit can't deadlock because
-+ atom commit procedure does not acquire locks and does not fuse with other
-+ atoms. Reiser4 does capturing right before going to sleep inside the
-+ longtertm_lock_znode() function, it means the znode which we want to lock is
-+ already captured and its atom is in ASTAGE_CAPTURE_WAIT stage. If we
-+ continue the analysis we understand that no one process in the sequence may
-+ waits atom fusion. Thereby there are no deadlocks of described kind.
-+
-+ The capturing optimization makes the deadlocks possible. A thread can wait a
-+ lock which owner did not captured that node. The lock owner's current atom
-+ is not fused with the first atom and it does not get a ASTAGE_CAPTURE_WAIT
-+ state. A deadlock is possible when that atom meets another one which is in
-+ ASTAGE_CAPTURE_WAIT already.
-+
-+ The deadlock avoidance scheme includes two algorithms:
-+
-+ First algorithm is used when a thread captures a node which is locked but not
-+ captured by another thread. Those nodes are marked MISSED_IN_CAPTURE at the
-+ moment we skip their capturing. If such a node (marked MISSED_IN_CAPTURE) is
-+ being captured by a thread with current atom is in ASTAGE_CAPTURE_WAIT, the
-+ routine which forces all lock owners to join with current atom is executed.
-+
-+ Second algorithm does not allow to skip capturing of already captured nodes.
-+
-+ Both algorithms together prevent waiting a longterm lock without atom fusion
-+ with atoms of all lock owners, which is a key thing for getting atom/znode
-+ locking deadlocks.
-+*/
-+
-+/*
-+ * Transactions and mmap(2).
-+ *
-+ * 1. Transactions are not supported for accesses through mmap(2), because
-+ * this would effectively amount to user-level transactions whose duration
-+ * is beyond control of the kernel.
-+ *
-+ * 2. That said, we still want to preserve some decency with regard to
-+ * mmap(2). During normal write(2) call, following sequence of events
-+ * happens:
-+ *
-+ * 1. page is created;
-+ *
-+ * 2. jnode is created, dirtied and captured into current atom.
-+ *
-+ * 3. extent is inserted and modified.
-+ *
-+ * Steps (2) and (3) take place under long term lock on the twig node.
-+ *
-+ * When file is accessed through mmap(2) page is always created during
-+ * page fault.
-+ * After this (in reiser4_readpage()->reiser4_readpage_extent()):
-+ *
-+ * 1. if access is made to non-hole page new jnode is created, (if
-+ * necessary)
-+ *
-+ * 2. if access is made to the hole page, jnode is not created (XXX
-+ * not clear why).
-+ *
-+ * Also, even if page is created by write page fault it is not marked
-+ * dirty immediately by handle_mm_fault(). Probably this is to avoid races
-+ * with page write-out.
-+ *
-+ * Dirty bit installed by hardware is only transferred to the struct page
-+ * later, when page is unmapped (in zap_pte_range(), or
-+ * try_to_unmap_one()).
-+ *
-+ * So, with mmap(2) we have to handle following irksome situations:
-+ *
-+ * 1. there exists modified page (clean or dirty) without jnode
-+ *
-+ * 2. there exists modified page (clean or dirty) with clean jnode
-+ *
-+ * 3. clean page which is a part of atom can be transparently modified
-+ * at any moment through mapping without becoming dirty.
-+ *
-+ * (1) and (2) can lead to the out-of-memory situation: ->writepage()
-+ * doesn't know what to do with such pages and ->sync_sb()/->writepages()
-+ * don't see them, because these methods operate on atoms.
-+ *
-+ * (3) can lead to the loss of data: suppose we have dirty page with dirty
-+ * captured jnode captured by some atom. As part of early flush (for
-+ * example) page was written out. Dirty bit was cleared on both page and
-+ * jnode. After this page is modified through mapping, but kernel doesn't
-+ * notice and just discards page and jnode as part of commit. (XXX
-+ * actually it doesn't, because to reclaim page ->releasepage() has to be
-+ * called and before this dirty bit will be transferred to the struct
-+ * page).
-+ *
-+ */
-+
-+#include "debug.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "wander.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "page_cache.h"
-+#include "reiser4.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "flush.h"
-+
-+#include <asm/atomic.h>
-+#include <linux/types.h>
-+#include <linux/fs.h>
-+#include <linux/mm.h>
-+#include <linux/slab.h>
-+#include <linux/pagemap.h>
-+#include <linux/writeback.h>
-+#include <linux/swap.h> /* for totalram_pages */
-+
-+static void atom_free(txn_atom * atom);
-+
-+static int commit_txnh(txn_handle * txnh);
-+
-+static void wakeup_atom_waitfor_list(txn_atom * atom);
-+static void wakeup_atom_waiting_list(txn_atom * atom);
-+
-+static void capture_assign_txnh_nolock(txn_atom * atom, txn_handle * txnh);
-+
-+static void capture_assign_block_nolock(txn_atom * atom, jnode * node);
-+
-+static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node);
-+
-+static int capture_init_fusion(jnode * node, txn_handle * txnh,
-+ txn_capture mode);
-+
-+static int capture_fuse_wait(txn_handle *, txn_atom *, txn_atom *, txn_capture);
-+
-+static void capture_fuse_into(txn_atom * small, txn_atom * large);
-+
-+void reiser4_invalidate_list(struct list_head *);
-+
-+/* GENERIC STRUCTURES */
-+
-+typedef struct _txn_wait_links txn_wait_links;
-+
-+struct _txn_wait_links {
-+ lock_stack *_lock_stack;
-+ struct list_head _fwaitfor_link;
-+ struct list_head _fwaiting_link;
-+ int (*waitfor_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
-+ int (*waiting_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
-+};
-+
-+/* FIXME: In theory, we should be using the slab cache init & destructor
-+ methods instead of, e.g., jnode_init, etc. */
-+static struct kmem_cache *_atom_slab = NULL;
-+/* this is for user-visible, cross system-call transactions. */
-+static struct kmem_cache *_txnh_slab = NULL;
-+
-+/**
-+ * init_txnmgr_static - create transaction manager slab caches
-+ *
-+ * Initializes caches of txn-atoms and txn_handle. It is part of reiser4 module
-+ * initialization.
-+ */
-+int init_txnmgr_static(void)
-+{
-+ assert("jmacd-600", _atom_slab == NULL);
-+ assert("jmacd-601", _txnh_slab == NULL);
-+
-+ ON_DEBUG(atomic_set(&flush_cnt, 0));
-+
-+ _atom_slab = kmem_cache_create("txn_atom", sizeof(txn_atom), 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL);
-+ if (_atom_slab == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ _txnh_slab = kmem_cache_create("txn_handle", sizeof(txn_handle), 0,
-+ SLAB_HWCACHE_ALIGN, NULL);
-+ if (_txnh_slab == NULL) {
-+ kmem_cache_destroy(_atom_slab);
-+ _atom_slab = NULL;
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ return 0;
-+}
-+
-+/**
-+ * done_txnmgr_static - delete txn_atom and txn_handle caches
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_txnmgr_static(void)
-+{
-+ destroy_reiser4_cache(&_atom_slab);
-+ destroy_reiser4_cache(&_txnh_slab);
-+}
-+
-+/**
-+ * init_txnmgr - initialize a new transaction manager
-+ * @mgr: pointer to transaction manager embedded in reiser4 super block
-+ *
-+ * This is called on mount. Makes necessary initializations.
-+ */
-+void reiser4_init_txnmgr(txn_mgr *mgr)
-+{
-+ assert("umka-169", mgr != NULL);
-+
-+ mgr->atom_count = 0;
-+ mgr->id_count = 1;
-+ INIT_LIST_HEAD(&mgr->atoms_list);
-+ spin_lock_init(&mgr->tmgr_lock);
-+ mutex_init(&mgr->commit_mutex);
-+}
-+
-+/**
-+ * reiser4_done_txnmgr - stop transaction manager
-+ * @mgr: pointer to transaction manager embedded in reiser4 super block
-+ *
-+ * This is called on umount. Does sanity checks.
-+ */
-+void reiser4_done_txnmgr(txn_mgr *mgr)
-+{
-+ assert("umka-170", mgr != NULL);
-+ assert("umka-1701", list_empty_careful(&mgr->atoms_list));
-+ assert("umka-1702", mgr->atom_count == 0);
-+}
-+
-+/* Initialize a transaction handle. */
-+/* Audited by: umka (2002.06.13) */
-+static void txnh_init(txn_handle * txnh, txn_mode mode)
-+{
-+ assert("umka-171", txnh != NULL);
-+
-+ txnh->mode = mode;
-+ txnh->atom = NULL;
-+ reiser4_ctx_gfp_mask_set();
-+ txnh->flags = 0;
-+ spin_lock_init(&txnh->hlock);
-+ INIT_LIST_HEAD(&txnh->txnh_link);
-+}
-+
-+#if REISER4_DEBUG
-+/* Check if a transaction handle is clean. */
-+static int txnh_isclean(txn_handle * txnh)
-+{
-+ assert("umka-172", txnh != NULL);
-+ return txnh->atom == NULL &&
-+ LOCK_CNT_NIL(spin_locked_txnh);
-+}
-+#endif
-+
-+/* Initialize an atom. */
-+static void atom_init(txn_atom * atom)
-+{
-+ int level;
-+
-+ assert("umka-173", atom != NULL);
-+
-+ memset(atom, 0, sizeof(txn_atom));
-+
-+ atom->stage = ASTAGE_FREE;
-+ atom->start_time = jiffies;
-+
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1)
-+ INIT_LIST_HEAD(ATOM_DIRTY_LIST(atom, level));
-+
-+ INIT_LIST_HEAD(ATOM_CLEAN_LIST(atom));
-+ INIT_LIST_HEAD(ATOM_OVRWR_LIST(atom));
-+ INIT_LIST_HEAD(ATOM_WB_LIST(atom));
-+ INIT_LIST_HEAD(&atom->inodes);
-+ spin_lock_init(&(atom->alock));
-+ /* list of transaction handles */
-+ INIT_LIST_HEAD(&atom->txnh_list);
-+ /* link to transaction manager's list of atoms */
-+ INIT_LIST_HEAD(&atom->atom_link);
-+ INIT_LIST_HEAD(&atom->fwaitfor_list);
-+ INIT_LIST_HEAD(&atom->fwaiting_list);
-+ blocknr_set_init(&atom->delete_set);
-+ blocknr_set_init(&atom->wandered_map);
-+
-+ init_atom_fq_parts(atom);
-+}
-+
-+#if REISER4_DEBUG
-+/* Check if an atom is clean. */
-+static int atom_isclean(txn_atom * atom)
-+{
-+ int level;
-+
-+ assert("umka-174", atom != NULL);
-+
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ if (!list_empty_careful(ATOM_DIRTY_LIST(atom, level))) {
-+ return 0;
-+ }
-+ }
-+
-+ return atom->stage == ASTAGE_FREE &&
-+ atom->txnh_count == 0 &&
-+ atom->capture_count == 0 &&
-+ atomic_read(&atom->refcount) == 0 &&
-+ (&atom->atom_link == atom->atom_link.next &&
-+ &atom->atom_link == atom->atom_link.prev) &&
-+ list_empty_careful(&atom->txnh_list) &&
-+ list_empty_careful(ATOM_CLEAN_LIST(atom)) &&
-+ list_empty_careful(ATOM_OVRWR_LIST(atom)) &&
-+ list_empty_careful(ATOM_WB_LIST(atom)) &&
-+ list_empty_careful(&atom->fwaitfor_list) &&
-+ list_empty_careful(&atom->fwaiting_list) &&
-+ atom_fq_parts_are_clean(atom);
-+}
-+#endif
-+
-+/* Begin a transaction in this context. Currently this uses the reiser4_context's
-+ trans_in_ctx, which means that transaction handles are stack-allocated. Eventually
-+ this will be extended to allow transaction handles to span several contexts. */
-+/* Audited by: umka (2002.06.13) */
-+void reiser4_txn_begin(reiser4_context * context)
-+{
-+ assert("jmacd-544", context->trans == NULL);
-+
-+ context->trans = &context->trans_in_ctx;
-+
-+ /* FIXME_LATER_JMACD Currently there's no way to begin a TXN_READ_FUSING
-+ transcrash. Default should be TXN_WRITE_FUSING. Also, the _trans variable is
-+ stack allocated right now, but we would like to allow for dynamically allocated
-+ transcrashes that span multiple system calls.
-+ */
-+ txnh_init(context->trans, TXN_WRITE_FUSING);
-+}
-+
-+/* Finish a transaction handle context. */
-+int reiser4_txn_end(reiser4_context * context)
-+{
-+ long ret = 0;
-+ txn_handle *txnh;
-+
-+ assert("umka-283", context != NULL);
-+ assert("nikita-3012", reiser4_schedulable());
-+ assert("vs-24", context == get_current_context());
-+ assert("nikita-2967", lock_stack_isclean(get_current_lock_stack()));
-+
-+ txnh = context->trans;
-+ if (txnh != NULL) {
-+ if (txnh->atom != NULL)
-+ ret = commit_txnh(txnh);
-+ assert("jmacd-633", txnh_isclean(txnh));
-+ context->trans = NULL;
-+ }
-+ return ret;
-+}
-+
-+void reiser4_txn_restart(reiser4_context * context)
-+{
-+ reiser4_txn_end(context);
-+ reiser4_preempt_point();
-+ reiser4_txn_begin(context);
-+}
-+
-+void reiser4_txn_restart_current(void)
-+{
-+ reiser4_txn_restart(get_current_context());
-+}
-+
-+/* TXN_ATOM */
-+
-+/* Get the atom belonging to a txnh, which is not locked. Return txnh locked. Locks atom, if atom
-+ is not NULL. This performs the necessary spin_trylock to break the lock-ordering cycle. May
-+ return NULL. */
-+static txn_atom *txnh_get_atom(txn_handle * txnh)
-+{
-+ txn_atom *atom;
-+
-+ assert("umka-180", txnh != NULL);
-+ assert_spin_not_locked(&(txnh->hlock));
-+
-+ while (1) {
-+ spin_lock_txnh(txnh);
-+ atom = txnh->atom;
-+
-+ if (atom == NULL)
-+ break;
-+
-+ if (spin_trylock_atom(atom))
-+ break;
-+
-+ atomic_inc(&atom->refcount);
-+
-+ spin_unlock_txnh(txnh);
-+ spin_lock_atom(atom);
-+ spin_lock_txnh(txnh);
-+
-+ if (txnh->atom == atom) {
-+ atomic_dec(&atom->refcount);
-+ break;
-+ }
-+
-+ spin_unlock_txnh(txnh);
-+ atom_dec_and_unlock(atom);
-+ }
-+
-+ return atom;
-+}
-+
-+/* Get the current atom and spinlock it if current atom present. May return NULL */
-+txn_atom *get_current_atom_locked_nocheck(void)
-+{
-+ reiser4_context *cx;
-+ txn_atom *atom;
-+ txn_handle *txnh;
-+
-+ cx = get_current_context();
-+ assert("zam-437", cx != NULL);
-+
-+ txnh = cx->trans;
-+ assert("zam-435", txnh != NULL);
-+
-+ atom = txnh_get_atom(txnh);
-+
-+ spin_unlock_txnh(txnh);
-+ return atom;
-+}
-+
-+/* Get the atom belonging to a jnode, which is initially locked. Return with
-+ both jnode and atom locked. This performs the necessary spin_trylock to
-+ break the lock-ordering cycle. Assumes the jnode is already locked, and
-+ returns NULL if atom is not set. */
-+txn_atom *jnode_get_atom(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert("umka-181", node != NULL);
-+
-+ while (1) {
-+ assert_spin_locked(&(node->guard));
-+
-+ atom = node->atom;
-+ /* node is not in any atom */
-+ if (atom == NULL)
-+ break;
-+
-+ /* If atom is not locked, grab the lock and return */
-+ if (spin_trylock_atom(atom))
-+ break;
-+
-+ /* At least one jnode belongs to this atom it guarantees that
-+ * atom->refcount > 0, we can safely increment refcount. */
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_jnode(node);
-+
-+ /* re-acquire spin locks in the right order */
-+ spin_lock_atom(atom);
-+ spin_lock_jnode(node);
-+
-+ /* check if node still points to the same atom. */
-+ if (node->atom == atom) {
-+ atomic_dec(&atom->refcount);
-+ break;
-+ }
-+
-+ /* releasing of atom lock and reference requires not holding
-+ * locks on jnodes. */
-+ spin_unlock_jnode(node);
-+
-+ /* We do not sure that this atom has extra references except our
-+ * one, so we should call proper function which may free atom if
-+ * last reference is released. */
-+ atom_dec_and_unlock(atom);
-+
-+ /* lock jnode again for getting valid node->atom pointer
-+ * value. */
-+ spin_lock_jnode(node);
-+ }
-+
-+ return atom;
-+}
-+
-+/* Returns true if @node is dirty and part of the same atom as one of its neighbors. Used
-+ by flush code to indicate whether the next node (in some direction) is suitable for
-+ flushing. */
-+int
-+same_slum_check(jnode * node, jnode * check, int alloc_check, int alloc_value)
-+{
-+ int compat;
-+ txn_atom *atom;
-+
-+ assert("umka-182", node != NULL);
-+ assert("umka-183", check != NULL);
-+
-+ /* Not sure what this function is supposed to do if supplied with @check that is
-+ neither formatted nor unformatted (bitmap or so). */
-+ assert("nikita-2373", jnode_is_znode(check)
-+ || jnode_is_unformatted(check));
-+
-+ /* Need a lock on CHECK to get its atom and to check various state bits.
-+ Don't need a lock on NODE once we get the atom lock. */
-+ /* It is not enough to lock two nodes and check (node->atom ==
-+ check->atom) because atom could be locked and being fused at that
-+ moment, jnodes of the atom of that state (being fused) can point to
-+ different objects, but the atom is the same. */
-+ spin_lock_jnode(check);
-+
-+ atom = jnode_get_atom(check);
-+
-+ if (atom == NULL) {
-+ compat = 0;
-+ } else {
-+ compat = (node->atom == atom && JF_ISSET(check, JNODE_DIRTY));
-+
-+ if (compat && jnode_is_znode(check)) {
-+ compat &= znode_is_connected(JZNODE(check));
-+ }
-+
-+ if (compat && alloc_check) {
-+ compat &= (alloc_value == jnode_is_flushprepped(check));
-+ }
-+
-+ spin_unlock_atom(atom);
-+ }
-+
-+ spin_unlock_jnode(check);
-+
-+ return compat;
-+}
-+
-+/* Decrement the atom's reference count and if it falls to zero, free it. */
-+void atom_dec_and_unlock(txn_atom * atom)
-+{
-+ txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+
-+ assert("umka-186", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("zam-1039", atomic_read(&atom->refcount) > 0);
-+
-+ if (atomic_dec_and_test(&atom->refcount)) {
-+ /* take txnmgr lock and atom lock in proper order. */
-+ if (!spin_trylock_txnmgr(mgr)) {
-+ /* This atom should exist after we re-acquire its
-+ * spinlock, so we increment its reference counter. */
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_atom(atom);
-+ spin_lock_txnmgr(mgr);
-+ spin_lock_atom(atom);
-+
-+ if (!atomic_dec_and_test(&atom->refcount)) {
-+ spin_unlock_atom(atom);
-+ spin_unlock_txnmgr(mgr);
-+ return;
-+ }
-+ }
-+ assert_spin_locked(&(mgr->tmgr_lock));
-+ atom_free(atom);
-+ spin_unlock_txnmgr(mgr);
-+ } else
-+ spin_unlock_atom(atom);
-+}
-+
-+/* Create new atom and connect it to given transaction handle. This adds the
-+ atom to the transaction manager's list and sets its reference count to 1, an
-+ artificial reference which is kept until it commits. We play strange games
-+ to avoid allocation under jnode & txnh spinlocks.*/
-+
-+static int atom_begin_and_assign_to_txnh(txn_atom ** atom_alloc, txn_handle * txnh)
-+{
-+ txn_atom *atom;
-+ txn_mgr *mgr;
-+
-+ if (REISER4_DEBUG && rofs_tree(current_tree)) {
-+ warning("nikita-3366", "Creating atom on rofs");
-+ dump_stack();
-+ }
-+
-+ if (*atom_alloc == NULL) {
-+ (*atom_alloc) = kmem_cache_alloc(_atom_slab,
-+ reiser4_ctx_gfp_mask_get());
-+
-+ if (*atom_alloc == NULL)
-+ return RETERR(-ENOMEM);
-+ }
-+
-+ /* and, also, txnmgr spin lock should be taken before jnode and txnh
-+ locks. */
-+ mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+ spin_lock_txnmgr(mgr);
-+ spin_lock_txnh(txnh);
-+
-+ /* Check whether new atom still needed */
-+ if (txnh->atom != NULL) {
-+ /* NOTE-NIKITA probably it is rather better to free
-+ * atom_alloc here than thread it up to reiser4_try_capture() */
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_txnmgr(mgr);
-+
-+ return -E_REPEAT;
-+ }
-+
-+ atom = *atom_alloc;
-+ *atom_alloc = NULL;
-+
-+ atom_init(atom);
-+
-+ assert("jmacd-17", atom_isclean(atom));
-+
-+ /*
-+ * lock ordering is broken here. It is ok, as long as @atom is new
-+ * and inaccessible for others. We can't use spin_lock_atom or
-+ * spin_lock(&atom->alock) because they care about locking
-+ * dependencies. spin_trylock_lock doesn't.
-+ */
-+ check_me("", spin_trylock_atom(atom));
-+
-+ /* add atom to the end of transaction manager's list of atoms */
-+ list_add_tail(&atom->atom_link, &mgr->atoms_list);
-+ atom->atom_id = mgr->id_count++;
-+ mgr->atom_count += 1;
-+
-+ /* Release txnmgr lock */
-+ spin_unlock_txnmgr(mgr);
-+
-+ /* One reference until it commits. */
-+ atomic_inc(&atom->refcount);
-+ atom->stage = ASTAGE_CAPTURE_FUSE;
-+ atom->super = reiser4_get_current_sb();
-+ capture_assign_txnh_nolock(atom, txnh);
-+
-+ spin_unlock_atom(atom);
-+ spin_unlock_txnh(txnh);
-+
-+ return -E_REPEAT;
-+}
-+
-+/* Return true if an atom is currently "open". */
-+static int atom_isopen(const txn_atom * atom)
-+{
-+ assert("umka-185", atom != NULL);
-+
-+ return atom->stage > 0 && atom->stage < ASTAGE_PRE_COMMIT;
-+}
-+
-+/* Return the number of pointers to this atom that must be updated during fusion. This
-+ approximates the amount of work to be done. Fusion chooses the atom with fewer
-+ pointers to fuse into the atom with more pointers. */
-+static int atom_pointer_count(const txn_atom * atom)
-+{
-+ assert("umka-187", atom != NULL);
-+
-+ /* This is a measure of the amount of work needed to fuse this atom
-+ * into another. */
-+ return atom->txnh_count + atom->capture_count;
-+}
-+
-+/* Called holding the atom lock, this removes the atom from the transaction manager list
-+ and frees it. */
-+static void atom_free(txn_atom * atom)
-+{
-+ txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+
-+ assert("umka-188", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ /* Remove from the txn_mgr's atom list */
-+ assert_spin_locked(&(mgr->tmgr_lock));
-+ mgr->atom_count -= 1;
-+ list_del_init(&atom->atom_link);
-+
-+ /* Clean the atom */
-+ assert("jmacd-16",
-+ (atom->stage == ASTAGE_INVALID || atom->stage == ASTAGE_DONE));
-+ atom->stage = ASTAGE_FREE;
-+
-+ blocknr_set_destroy(&atom->delete_set);
-+ blocknr_set_destroy(&atom->wandered_map);
-+
-+ assert("jmacd-16", atom_isclean(atom));
-+
-+ spin_unlock_atom(atom);
-+
-+ kmem_cache_free(_atom_slab, atom);
-+}
-+
-+static int atom_is_dotard(const txn_atom * atom)
-+{
-+ return time_after(jiffies, atom->start_time +
-+ get_current_super_private()->tmgr.atom_max_age);
-+}
-+
-+static int atom_can_be_committed(txn_atom * atom)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ assert("zam-885", atom->txnh_count > atom->nr_waiters);
-+ return atom->txnh_count == atom->nr_waiters + 1;
-+}
-+
-+/* Return true if an atom should commit now. This is determined by aging, atom
-+ size or atom flags. */
-+static int atom_should_commit(const txn_atom * atom)
-+{
-+ assert("umka-189", atom != NULL);
-+ return
-+ (atom->flags & ATOM_FORCE_COMMIT) ||
-+ ((unsigned)atom_pointer_count(atom) >
-+ get_current_super_private()->tmgr.atom_max_size)
-+ || atom_is_dotard(atom);
-+}
-+
-+/* return 1 if current atom exists and requires commit. */
-+int current_atom_should_commit(void)
-+{
-+ txn_atom *atom;
-+ int result = 0;
-+
-+ atom = get_current_atom_locked_nocheck();
-+ if (atom) {
-+ result = atom_should_commit(atom);
-+ spin_unlock_atom(atom);
-+ }
-+ return result;
-+}
-+
-+static int atom_should_commit_asap(const txn_atom * atom)
-+{
-+ unsigned int captured;
-+ unsigned int pinnedpages;
-+
-+ assert("nikita-3309", atom != NULL);
-+
-+ captured = (unsigned)atom->capture_count;
-+ pinnedpages = (captured >> PAGE_CACHE_SHIFT) * sizeof(znode);
-+
-+ return (pinnedpages > (totalram_pages >> 3)) || (atom->flushed > 100);
-+}
-+
-+static jnode *find_first_dirty_in_list(struct list_head *head, int flags)
-+{
-+ jnode *first_dirty;
-+
-+ list_for_each_entry(first_dirty, head, capture_link) {
-+ if (!(flags & JNODE_FLUSH_COMMIT)) {
-+ /*
-+ * skip jnodes which "heard banshee" or having active
-+ * I/O
-+ */
-+ if (JF_ISSET(first_dirty, JNODE_HEARD_BANSHEE) ||
-+ JF_ISSET(first_dirty, JNODE_WRITEBACK))
-+ continue;
-+ }
-+ return first_dirty;
-+ }
-+ return NULL;
-+}
-+
-+/* Get first dirty node from the atom's dirty_nodes[n] lists; return NULL if atom has no dirty
-+ nodes on atom's lists */
-+jnode *find_first_dirty_jnode(txn_atom * atom, int flags)
-+{
-+ jnode *first_dirty;
-+ tree_level level;
-+
-+ assert_spin_locked(&(atom->alock));
-+
-+ /* The flush starts from LEAF_LEVEL (=1). */
-+ for (level = 1; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ if (list_empty_careful(ATOM_DIRTY_LIST(atom, level)))
-+ continue;
-+
-+ first_dirty =
-+ find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, level),
-+ flags);
-+ if (first_dirty)
-+ return first_dirty;
-+ }
-+
-+ /* znode-above-root is on the list #0. */
-+ return find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, 0), flags);
-+}
-+
-+static void dispatch_wb_list(txn_atom * atom, flush_queue_t * fq)
-+{
-+ jnode *cur;
-+
-+ assert("zam-905", atom_is_protected(atom));
-+
-+ cur = list_entry(ATOM_WB_LIST(atom)->next, jnode, capture_link);
-+ while (ATOM_WB_LIST(atom) != &cur->capture_link) {
-+ jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
-+
-+ spin_lock_jnode(cur);
-+ if (!JF_ISSET(cur, JNODE_WRITEBACK)) {
-+ if (JF_ISSET(cur, JNODE_DIRTY)) {
-+ queue_jnode(fq, cur);
-+ } else {
-+ /* move jnode to atom's clean list */
-+ list_move_tail(&cur->capture_link,
-+ ATOM_CLEAN_LIST(atom));
-+ }
-+ }
-+ spin_unlock_jnode(cur);
-+
-+ cur = next;
-+ }
-+}
-+
-+/* Scan current atom->writeback_nodes list, re-submit dirty and !writeback
-+ * jnodes to disk. */
-+static int submit_wb_list(void)
-+{
-+ int ret;
-+ flush_queue_t *fq;
-+
-+ fq = get_fq_for_current_atom();
-+ if (IS_ERR(fq))
-+ return PTR_ERR(fq);
-+
-+ dispatch_wb_list(fq->atom, fq);
-+ spin_unlock_atom(fq->atom);
-+
-+ ret = reiser4_write_fq(fq, NULL, 1);
-+ reiser4_fq_put(fq);
-+
-+ return ret;
-+}
-+
-+/* Wait completion of all writes, re-submit atom writeback list if needed. */
-+static int current_atom_complete_writes(void)
-+{
-+ int ret;
-+
-+ /* Each jnode from that list was modified and dirtied when it had i/o
-+ * request running already. After i/o completion we have to resubmit
-+ * them to disk again.*/
-+ ret = submit_wb_list();
-+ if (ret < 0)
-+ return ret;
-+
-+ /* Wait all i/o completion */
-+ ret = current_atom_finish_all_fq();
-+ if (ret)
-+ return ret;
-+
-+ /* Scan wb list again; all i/o should be completed, we re-submit dirty
-+ * nodes to disk */
-+ ret = submit_wb_list();
-+ if (ret < 0)
-+ return ret;
-+
-+ /* Wait all nodes we just submitted */
-+ return current_atom_finish_all_fq();
-+}
-+
-+#if REISER4_DEBUG
-+
-+static void reiser4_info_atom(const char *prefix, const txn_atom * atom)
-+{
-+ if (atom == NULL) {
-+ printk("%s: no atom\n", prefix);
-+ return;
-+ }
-+
-+ printk("%s: refcount: %i id: %i flags: %x txnh_count: %i"
-+ " capture_count: %i stage: %x start: %lu, flushed: %i\n", prefix,
-+ atomic_read(&atom->refcount), atom->atom_id, atom->flags,
-+ atom->txnh_count, atom->capture_count, atom->stage,
-+ atom->start_time, atom->flushed);
-+}
-+
-+#else /* REISER4_DEBUG */
-+
-+static inline void reiser4_info_atom(const char *prefix, const txn_atom * atom) {}
-+
-+#endif /* REISER4_DEBUG */
-+
-+#define TOOMANYFLUSHES (1 << 13)
-+
-+/* Called with the atom locked and no open "active" transaction handlers except
-+ ours, this function calls flush_current_atom() until all dirty nodes are
-+ processed. Then it initiates commit processing.
-+
-+ Called by the single remaining open "active" txnh, which is closing. Other
-+ open txnhs belong to processes which wait atom commit in commit_txnh()
-+ routine. They are counted as "waiters" in atom->nr_waiters. Therefore as
-+ long as we hold the atom lock none of the jnodes can be captured and/or
-+ locked.
-+
-+ Return value is an error code if commit fails.
-+*/
-+static int commit_current_atom(long *nr_submitted, txn_atom ** atom)
-+{
-+ reiser4_super_info_data *sbinfo = get_current_super_private();
-+ long ret = 0;
-+ /* how many times jnode_flush() was called as a part of attempt to
-+ * commit this atom. */
-+ int flushiters;
-+
-+ assert("zam-888", atom != NULL && *atom != NULL);
-+ assert_spin_locked(&((*atom)->alock));
-+ assert("zam-887", get_current_context()->trans->atom == *atom);
-+ assert("jmacd-151", atom_isopen(*atom));
-+
-+ assert("nikita-3184",
-+ get_current_super_private()->delete_mutex_owner != current);
-+
-+ for (flushiters = 0;; ++flushiters) {
-+ ret =
-+ flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS |
-+ JNODE_FLUSH_COMMIT,
-+ LONG_MAX /* nr_to_write */ ,
-+ nr_submitted, atom, NULL);
-+ if (ret != -E_REPEAT)
-+ break;
-+
-+ /* if atom's dirty list contains one znode which is
-+ HEARD_BANSHEE and is locked we have to allow lock owner to
-+ continue and uncapture that znode */
-+ reiser4_preempt_point();
-+
-+ *atom = get_current_atom_locked();
-+ if (flushiters > TOOMANYFLUSHES && IS_POW(flushiters)) {
-+ warning("nikita-3176",
-+ "Flushing like mad: %i", flushiters);
-+ reiser4_info_atom("atom", *atom);
-+ DEBUGON(flushiters > (1 << 20));
-+ }
-+ }
-+
-+ if (ret)
-+ return ret;
-+
-+ assert_spin_locked(&((*atom)->alock));
-+
-+ if (!atom_can_be_committed(*atom)) {
-+ spin_unlock_atom(*atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+
-+ if ((*atom)->capture_count == 0)
-+ goto done;
-+
-+ /* Up to this point we have been flushing and after flush is called we
-+ return -E_REPEAT. Now we can commit. We cannot return -E_REPEAT
-+ at this point, commit should be successful. */
-+ reiser4_atom_set_stage(*atom, ASTAGE_PRE_COMMIT);
-+ ON_DEBUG(((*atom)->committer = current));
-+ spin_unlock_atom(*atom);
-+
-+ ret = current_atom_complete_writes();
-+ if (ret)
-+ return ret;
-+
-+ assert("zam-906", list_empty(ATOM_WB_LIST(*atom)));
-+
-+ /* isolate critical code path which should be executed by only one
-+ * thread using tmgr mutex */
-+ mutex_lock(&sbinfo->tmgr.commit_mutex);
-+
-+ ret = reiser4_write_logs(nr_submitted);
-+ if (ret < 0)
-+ reiser4_panic("zam-597", "write log failed (%ld)\n", ret);
-+
-+ /* The atom->ovrwr_nodes list is processed under commit mutex held
-+ because of bitmap nodes which are captured by special way in
-+ reiser4_pre_commit_hook_bitmap(), that way does not include
-+ capture_fuse_wait() as a capturing of other nodes does -- the commit
-+ mutex is used for transaction isolation instead. */
-+ reiser4_invalidate_list(ATOM_OVRWR_LIST(*atom));
-+ mutex_unlock(&sbinfo->tmgr.commit_mutex);
-+
-+ reiser4_invalidate_list(ATOM_CLEAN_LIST(*atom));
-+ reiser4_invalidate_list(ATOM_WB_LIST(*atom));
-+ assert("zam-927", list_empty(&(*atom)->inodes));
-+
-+ spin_lock_atom(*atom);
-+ done:
-+ reiser4_atom_set_stage(*atom, ASTAGE_DONE);
-+ ON_DEBUG((*atom)->committer = NULL);
-+
-+ /* Atom's state changes, so wake up everybody waiting for this
-+ event. */
-+ wakeup_atom_waiting_list(*atom);
-+
-+ /* Decrement the "until commit" reference, at least one txnh (the caller) is
-+ still open. */
-+ atomic_dec(&(*atom)->refcount);
-+
-+ assert("jmacd-1070", atomic_read(&(*atom)->refcount) > 0);
-+ assert("jmacd-1062", (*atom)->capture_count == 0);
-+ BUG_ON((*atom)->capture_count != 0);
-+ assert_spin_locked(&((*atom)->alock));
-+
-+ return ret;
-+}
-+
-+/* TXN_TXNH */
-+
-+/**
-+ * force_commit_atom - commit current atom and wait commit completion
-+ * @txnh:
-+ *
-+ * Commits current atom and wait commit completion; current atom and @txnh have
-+ * to be spinlocked before call, this function unlocks them on exit.
-+ */
-+int force_commit_atom(txn_handle *txnh)
-+{
-+ txn_atom *atom;
-+
-+ assert("zam-837", txnh != NULL);
-+ assert_spin_locked(&(txnh->hlock));
-+ assert("nikita-2966", lock_stack_isclean(get_current_lock_stack()));
-+
-+ atom = txnh->atom;
-+
-+ assert("zam-834", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ /*
-+ * Set flags for atom and txnh: forcing atom commit and waiting for
-+ * commit completion
-+ */
-+ txnh->flags |= TXNH_WAIT_COMMIT;
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(atom);
-+
-+ /* commit is here */
-+ reiser4_txn_restart_current();
-+ return 0;
-+}
-+
-+/* Called to force commit of any outstanding atoms. @commit_all_atoms controls
-+ * should we commit all atoms including new ones which are created after this
-+ * functions is called. */
-+int txnmgr_force_commit_all(struct super_block *super, int commit_all_atoms)
-+{
-+ int ret;
-+ txn_atom *atom;
-+ txn_mgr *mgr;
-+ txn_handle *txnh;
-+ unsigned long start_time = jiffies;
-+ reiser4_context *ctx = get_current_context();
-+
-+ assert("nikita-2965", lock_stack_isclean(get_current_lock_stack()));
-+ assert("nikita-3058", reiser4_commit_check_locks());
-+
-+ reiser4_txn_restart_current();
-+
-+ mgr = &get_super_private(super)->tmgr;
-+
-+ txnh = ctx->trans;
-+
-+ again:
-+
-+ spin_lock_txnmgr(mgr);
-+
-+ list_for_each_entry(atom, &mgr->atoms_list, atom_link) {
-+ spin_lock_atom(atom);
-+
-+ /* Commit any atom which can be committed. If @commit_new_atoms
-+ * is not set we commit only atoms which were created before
-+ * this call is started. */
-+ if (commit_all_atoms
-+ || time_before_eq(atom->start_time, start_time)) {
-+ if (atom->stage <= ASTAGE_POST_COMMIT) {
-+ spin_unlock_txnmgr(mgr);
-+
-+ if (atom->stage < ASTAGE_PRE_COMMIT) {
-+ spin_lock_txnh(txnh);
-+ /* Add force-context txnh */
-+ capture_assign_txnh_nolock(atom, txnh);
-+ ret = force_commit_atom(txnh);
-+ if (ret)
-+ return ret;
-+ } else
-+ /* wait atom commit */
-+ reiser4_atom_wait_event(atom);
-+
-+ goto again;
-+ }
-+ }
-+
-+ spin_unlock_atom(atom);
-+ }
-+
-+#if REISER4_DEBUG
-+ if (commit_all_atoms) {
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ spin_lock_reiser4_super(sbinfo);
-+ assert("zam-813",
-+ sbinfo->blocks_fake_allocated_unformatted == 0);
-+ assert("zam-812", sbinfo->blocks_fake_allocated == 0);
-+ spin_unlock_reiser4_super(sbinfo);
-+ }
-+#endif
-+
-+ spin_unlock_txnmgr(mgr);
-+
-+ return 0;
-+}
-+
-+/* check whether commit_some_atoms() can commit @atom. Locking is up to the
-+ * caller */
-+static int atom_is_committable(txn_atom * atom)
-+{
-+ return
-+ atom->stage < ASTAGE_PRE_COMMIT &&
-+ atom->txnh_count == atom->nr_waiters && atom_should_commit(atom);
-+}
-+
-+/* called periodically from ktxnmgrd to commit old atoms. Releases ktxnmgrd spin
-+ * lock at exit */
-+int commit_some_atoms(txn_mgr * mgr)
-+{
-+ int ret = 0;
-+ txn_atom *atom;
-+ txn_handle *txnh;
-+ reiser4_context *ctx;
-+ struct list_head *pos, *tmp;
-+
-+ ctx = get_current_context();
-+ assert("nikita-2444", ctx != NULL);
-+
-+ txnh = ctx->trans;
-+ spin_lock_txnmgr(mgr);
-+
-+ /*
-+ * this is to avoid gcc complain that atom might be used
-+ * uninitialized
-+ */
-+ atom = NULL;
-+
-+ /* look for atom to commit */
-+ list_for_each_safe(pos, tmp, &mgr->atoms_list) {
-+ atom = list_entry(pos, txn_atom, atom_link);
-+ /*
-+ * first test without taking atom spin lock, whether it is
-+ * eligible for committing at all
-+ */
-+ if (atom_is_committable(atom)) {
-+ /* now, take spin lock and re-check */
-+ spin_lock_atom(atom);
-+ if (atom_is_committable(atom))
-+ break;
-+ spin_unlock_atom(atom);
-+ }
-+ }
-+
-+ ret = (&mgr->atoms_list == pos);
-+ spin_unlock_txnmgr(mgr);
-+
-+ if (ret) {
-+ /* nothing found */
-+ spin_unlock(&mgr->daemon->guard);
-+ return 0;
-+ }
-+
-+ spin_lock_txnh(txnh);
-+
-+ BUG_ON(atom == NULL);
-+ /* Set the atom to force committing */
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+
-+ /* Add force-context txnh */
-+ capture_assign_txnh_nolock(atom, txnh);
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(atom);
-+
-+ /* we are about to release daemon spin lock, notify daemon it
-+ has to rescan atoms */
-+ mgr->daemon->rescan = 1;
-+ spin_unlock(&mgr->daemon->guard);
-+ reiser4_txn_restart_current();
-+ return 0;
-+}
-+
-+static int txn_try_to_fuse_small_atom(txn_mgr * tmgr, txn_atom * atom)
-+{
-+ int atom_stage;
-+ txn_atom *atom_2;
-+ int repeat;
-+
-+ assert("zam-1051", atom->stage < ASTAGE_PRE_COMMIT);
-+
-+ atom_stage = atom->stage;
-+ repeat = 0;
-+
-+ if (!spin_trylock_txnmgr(tmgr)) {
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_atom(atom);
-+ spin_lock_txnmgr(tmgr);
-+ spin_lock_atom(atom);
-+ repeat = 1;
-+ if (atom->stage != atom_stage) {
-+ spin_unlock_txnmgr(tmgr);
-+ atom_dec_and_unlock(atom);
-+ return -E_REPEAT;
-+ }
-+ atomic_dec(&atom->refcount);
-+ }
-+
-+ list_for_each_entry(atom_2, &tmgr->atoms_list, atom_link) {
-+ if (atom == atom_2)
-+ continue;
-+ /*
-+ * if trylock does not succeed we just do not fuse with that
-+ * atom.
-+ */
-+ if (spin_trylock_atom(atom_2)) {
-+ if (atom_2->stage < ASTAGE_PRE_COMMIT) {
-+ spin_unlock_txnmgr(tmgr);
-+ capture_fuse_into(atom_2, atom);
-+ /* all locks are lost we can only repeat here */
-+ return -E_REPEAT;
-+ }
-+ spin_unlock_atom(atom_2);
-+ }
-+ }
-+ atom->flags |= ATOM_CANCEL_FUSION;
-+ spin_unlock_txnmgr(tmgr);
-+ if (repeat) {
-+ spin_unlock_atom(atom);
-+ return -E_REPEAT;
-+ }
-+ return 0;
-+}
-+
-+/* Calls jnode_flush for current atom if it exists; if not, just take another
-+ atom and call jnode_flush() for him. If current transaction handle has
-+ already assigned atom (current atom) we have to close current transaction
-+ prior to switch to another atom or do something with current atom. This
-+ code tries to flush current atom.
-+
-+ flush_some_atom() is called as part of memory clearing process. It is
-+ invoked from balance_dirty_pages(), pdflushd, and entd.
-+
-+ If we can flush no nodes, atom is committed, because this frees memory.
-+
-+ If atom is too large or too old it is committed also.
-+*/
-+int
-+flush_some_atom(jnode * start, long *nr_submitted, const struct writeback_control *wbc,
-+ int flags)
-+{
-+ reiser4_context *ctx = get_current_context();
-+ txn_mgr *tmgr = &get_super_private(ctx->super)->tmgr;
-+ txn_handle *txnh = ctx->trans;
-+ txn_atom *atom;
-+ int ret;
-+
-+ BUG_ON(wbc->nr_to_write == 0);
-+ BUG_ON(*nr_submitted != 0);
-+ assert("zam-1042", txnh != NULL);
-+ repeat:
-+ if (txnh->atom == NULL) {
-+ /* current atom is not available, take first from txnmgr */
-+ spin_lock_txnmgr(tmgr);
-+
-+ /* traverse the list of all atoms */
-+ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
-+ /* lock atom before checking its state */
-+ spin_lock_atom(atom);
-+
-+ /*
-+ * we need an atom which is not being committed and
-+ * which has no flushers (jnode_flush() add one flusher
-+ * at the beginning and subtract one at the end).
-+ */
-+ if (atom->stage < ASTAGE_PRE_COMMIT &&
-+ atom->nr_flushers == 0) {
-+ spin_lock_txnh(txnh);
-+ capture_assign_txnh_nolock(atom, txnh);
-+ spin_unlock_txnh(txnh);
-+
-+ goto found;
-+ }
-+
-+ spin_unlock_atom(atom);
-+ }
-+
-+ /*
-+ * Write throttling is case of no one atom can be
-+ * flushed/committed.
-+ */
-+ if (!current_is_pdflush() && !wbc->nonblocking) {
-+ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
-+ spin_lock_atom(atom);
-+ /* Repeat the check from the above. */
-+ if (atom->stage < ASTAGE_PRE_COMMIT
-+ && atom->nr_flushers == 0) {
-+ spin_lock_txnh(txnh);
-+ capture_assign_txnh_nolock(atom, txnh);
-+ spin_unlock_txnh(txnh);
-+
-+ goto found;
-+ }
-+ if (atom->stage <= ASTAGE_POST_COMMIT) {
-+ spin_unlock_txnmgr(tmgr);
-+ /*
-+ * we just wait until atom's flusher
-+ * makes a progress in flushing or
-+ * committing the atom
-+ */
-+ reiser4_atom_wait_event(atom);
-+ goto repeat;
-+ }
-+ spin_unlock_atom(atom);
-+ }
-+ }
-+ spin_unlock_txnmgr(tmgr);
-+ return 0;
-+ found:
-+ spin_unlock_txnmgr(tmgr);
-+ } else
-+ atom = get_current_atom_locked();
-+
-+ BUG_ON(atom->super != ctx->super);
-+ assert("vs-35", atom->super == ctx->super);
-+ if (start) {
-+ spin_lock_jnode(start);
-+ ret = (atom == start->atom) ? 1 : 0;
-+ spin_unlock_jnode(start);
-+ if (ret == 0)
-+ start = NULL;
-+ }
-+ ret = flush_current_atom(flags, wbc->nr_to_write, nr_submitted, &atom, start);
-+ if (ret == 0) {
-+ /* flush_current_atom returns 0 only if it submitted for write
-+ nothing */
-+ BUG_ON(*nr_submitted != 0);
-+ if (*nr_submitted == 0 || atom_should_commit_asap(atom)) {
-+ if (atom->capture_count < tmgr->atom_min_size &&
-+ !(atom->flags & ATOM_CANCEL_FUSION)) {
-+ ret = txn_try_to_fuse_small_atom(tmgr, atom);
-+ if (ret == -E_REPEAT) {
-+ reiser4_preempt_point();
-+ goto repeat;
-+ }
-+ }
-+ /* if early flushing could not make more nodes clean,
-+ * or atom is too old/large,
-+ * we force current atom to commit */
-+ /* wait for commit completion but only if this
-+ * wouldn't stall pdflushd and ent thread. */
-+ if (!wbc->nonblocking && !ctx->entd)
-+ txnh->flags |= TXNH_WAIT_COMMIT;
-+ atom->flags |= ATOM_FORCE_COMMIT;
-+ }
-+ spin_unlock_atom(atom);
-+ } else if (ret == -E_REPEAT) {
-+ if (*nr_submitted == 0) {
-+ /* let others who hampers flushing (hold longterm locks,
-+ for instance) to free the way for flush */
-+ reiser4_preempt_point();
-+ goto repeat;
-+ }
-+ ret = 0;
-+ }
-+/*
-+ if (*nr_submitted > wbc->nr_to_write)
-+ warning("", "asked for %ld, written %ld\n", wbc->nr_to_write, *nr_submitted);
-+*/
-+ reiser4_txn_restart(ctx);
-+
-+ return ret;
-+}
-+
-+/* Remove processed nodes from atom's clean list (thereby remove them from transaction). */
-+void reiser4_invalidate_list(struct list_head *head)
-+{
-+ while (!list_empty(head)) {
-+ jnode *node;
-+
-+ node = list_entry(head->next, jnode, capture_link);
-+ spin_lock_jnode(node);
-+ reiser4_uncapture_block(node);
-+ jput(node);
-+ }
-+}
-+
-+static void init_wlinks(txn_wait_links * wlinks)
-+{
-+ wlinks->_lock_stack = get_current_lock_stack();
-+ INIT_LIST_HEAD(&wlinks->_fwaitfor_link);
-+ INIT_LIST_HEAD(&wlinks->_fwaiting_link);
-+ wlinks->waitfor_cb = NULL;
-+ wlinks->waiting_cb = NULL;
-+}
-+
-+/* Add atom to the atom's waitfor list and wait for somebody to wake us up; */
-+void reiser4_atom_wait_event(txn_atom * atom)
-+{
-+ txn_wait_links _wlinks;
-+
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-3156",
-+ lock_stack_isclean(get_current_lock_stack()) ||
-+ atom->nr_running_queues > 0);
-+
-+ init_wlinks(&_wlinks);
-+ list_add_tail(&_wlinks._fwaitfor_link, &atom->fwaitfor_list);
-+ atomic_inc(&atom->refcount);
-+ spin_unlock_atom(atom);
-+
-+ reiser4_prepare_to_sleep(_wlinks._lock_stack);
-+ reiser4_go_to_sleep(_wlinks._lock_stack);
-+
-+ spin_lock_atom(atom);
-+ list_del(&_wlinks._fwaitfor_link);
-+ atom_dec_and_unlock(atom);
-+}
-+
-+void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage)
-+{
-+ assert("nikita-3535", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-3536", stage <= ASTAGE_INVALID);
-+ /* Excelsior! */
-+ assert("nikita-3537", stage >= atom->stage);
-+ if (atom->stage != stage) {
-+ atom->stage = stage;
-+ reiser4_atom_send_event(atom);
-+ }
-+}
-+
-+/* wake all threads which wait for an event */
-+void reiser4_atom_send_event(txn_atom * atom)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ wakeup_atom_waitfor_list(atom);
-+}
-+
-+/* Informs txn manager code that owner of this txn_handle should wait atom commit completion (for
-+ example, because it does fsync(2)) */
-+static int should_wait_commit(txn_handle * h)
-+{
-+ return h->flags & TXNH_WAIT_COMMIT;
-+}
-+
-+typedef struct commit_data {
-+ txn_atom *atom;
-+ txn_handle *txnh;
-+ long nr_written;
-+ /* as an optimization we start committing atom by first trying to
-+ * flush it few times without switching into ASTAGE_CAPTURE_WAIT. This
-+ * allows to reduce stalls due to other threads waiting for atom in
-+ * ASTAGE_CAPTURE_WAIT stage. ->preflush is counter of these
-+ * preliminary flushes. */
-+ int preflush;
-+ /* have we waited on atom. */
-+ int wait;
-+ int failed;
-+ int wake_ktxnmgrd_up;
-+} commit_data;
-+
-+/*
-+ * Called from commit_txnh() repeatedly, until either error happens, or atom
-+ * commits successfully.
-+ */
-+static int try_commit_txnh(commit_data * cd)
-+{
-+ int result;
-+
-+ assert("nikita-2968", lock_stack_isclean(get_current_lock_stack()));
-+
-+ /* Get the atom and txnh locked. */
-+ cd->atom = txnh_get_atom(cd->txnh);
-+ assert("jmacd-309", cd->atom != NULL);
-+ spin_unlock_txnh(cd->txnh);
-+
-+ if (cd->wait) {
-+ cd->atom->nr_waiters--;
-+ cd->wait = 0;
-+ }
-+
-+ if (cd->atom->stage == ASTAGE_DONE)
-+ return 0;
-+
-+ if (cd->failed)
-+ return 0;
-+
-+ if (atom_should_commit(cd->atom)) {
-+ /* if atom is _very_ large schedule it for commit as soon as
-+ * possible. */
-+ if (atom_should_commit_asap(cd->atom)) {
-+ /*
-+ * When atom is in PRE_COMMIT or later stage following
-+ * invariant (encoded in atom_can_be_committed())
-+ * holds: there is exactly one non-waiter transaction
-+ * handle opened on this atom. When thread wants to
-+ * wait until atom commits (for example sync()) it
-+ * waits on atom event after increasing
-+ * atom->nr_waiters (see blow in this function). It
-+ * cannot be guaranteed that atom is already committed
-+ * after receiving event, so loop has to be
-+ * re-started. But if atom switched into PRE_COMMIT
-+ * stage and became too large, we cannot change its
-+ * state back to CAPTURE_WAIT (atom stage can only
-+ * increase monotonically), hence this check.
-+ */
-+ if (cd->atom->stage < ASTAGE_CAPTURE_WAIT)
-+ reiser4_atom_set_stage(cd->atom,
-+ ASTAGE_CAPTURE_WAIT);
-+ cd->atom->flags |= ATOM_FORCE_COMMIT;
-+ }
-+ if (cd->txnh->flags & TXNH_DONT_COMMIT) {
-+ /*
-+ * this thread (transaction handle that is) doesn't
-+ * want to commit atom. Notify waiters that handle is
-+ * closed. This can happen, for example, when we are
-+ * under VFS directory lock and don't want to commit
-+ * atom right now to avoid stalling other threads
-+ * working in the same directory.
-+ */
-+
-+ /* Wake the ktxnmgrd up if the ktxnmgrd is needed to
-+ * commit this atom: no atom waiters and only one
-+ * (our) open transaction handle. */
-+ cd->wake_ktxnmgrd_up =
-+ cd->atom->txnh_count == 1 &&
-+ cd->atom->nr_waiters == 0;
-+ reiser4_atom_send_event(cd->atom);
-+ result = 0;
-+ } else if (!atom_can_be_committed(cd->atom)) {
-+ if (should_wait_commit(cd->txnh)) {
-+ /* sync(): wait for commit */
-+ cd->atom->nr_waiters++;
-+ cd->wait = 1;
-+ reiser4_atom_wait_event(cd->atom);
-+ result = RETERR(-E_REPEAT);
-+ } else {
-+ result = 0;
-+ }
-+ } else if (cd->preflush > 0 && !is_current_ktxnmgrd()) {
-+ /*
-+ * optimization: flush atom without switching it into
-+ * ASTAGE_CAPTURE_WAIT.
-+ *
-+ * But don't do this for ktxnmgrd, because ktxnmgrd
-+ * should never block on atom fusion.
-+ */
-+ result = flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS,
-+ LONG_MAX, &cd->nr_written,
-+ &cd->atom, NULL);
-+ if (result == 0) {
-+ spin_unlock_atom(cd->atom);
-+ cd->preflush = 0;
-+ result = RETERR(-E_REPEAT);
-+ } else /* Atoms wasn't flushed
-+ * completely. Rinse. Repeat. */
-+ --cd->preflush;
-+ } else {
-+ /* We change atom state to ASTAGE_CAPTURE_WAIT to
-+ prevent atom fusion and count ourself as an active
-+ flusher */
-+ reiser4_atom_set_stage(cd->atom, ASTAGE_CAPTURE_WAIT);
-+ cd->atom->flags |= ATOM_FORCE_COMMIT;
-+
-+ result =
-+ commit_current_atom(&cd->nr_written, &cd->atom);
-+ if (result != 0 && result != -E_REPEAT)
-+ cd->failed = 1;
-+ }
-+ } else
-+ result = 0;
-+
-+#if REISER4_DEBUG
-+ if (result == 0)
-+ assert_spin_locked(&(cd->atom->alock));
-+#endif
-+
-+ /* perfectly valid assertion, except that when atom/txnh is not locked
-+ * fusion can take place, and cd->atom points nowhere. */
-+ /*
-+ assert("jmacd-1028", ergo(result != 0, spin_atom_is_not_locked(cd->atom)));
-+ */
-+ return result;
-+}
-+
-+/* Called to commit a transaction handle. This decrements the atom's number of open
-+ handles and if it is the last handle to commit and the atom should commit, initiates
-+ atom commit. if commit does not fail, return number of written blocks */
-+static int commit_txnh(txn_handle * txnh)
-+{
-+ commit_data cd;
-+ assert("umka-192", txnh != NULL);
-+
-+ memset(&cd, 0, sizeof cd);
-+ cd.txnh = txnh;
-+ cd.preflush = 10;
-+
-+ /* calls try_commit_txnh() until either atom commits, or error
-+ * happens */
-+ while (try_commit_txnh(&cd) != 0)
-+ reiser4_preempt_point();
-+
-+ spin_lock_txnh(txnh);
-+
-+ cd.atom->txnh_count -= 1;
-+ txnh->atom = NULL;
-+ /* remove transaction handle from atom's list of transaction handles */
-+ list_del_init(&txnh->txnh_link);
-+
-+ spin_unlock_txnh(txnh);
-+ atom_dec_and_unlock(cd.atom);
-+ /* if we don't want to do a commit (TXNH_DONT_COMMIT is set, probably
-+ * because it takes time) by current thread, we do that work
-+ * asynchronously by ktxnmgrd daemon. */
-+ if (cd.wake_ktxnmgrd_up)
-+ ktxnmgrd_kick(&get_current_super_private()->tmgr);
-+
-+ return 0;
-+}
-+
-+/* TRY_CAPTURE */
-+
-+/* This routine attempts a single block-capture request. It may return -E_REPEAT if some
-+ condition indicates that the request should be retried, and it may block if the
-+ txn_capture mode does not include the TXN_CAPTURE_NONBLOCKING request flag.
-+
-+ This routine encodes the basic logic of block capturing described by:
-+
-+ http://namesys.com/v4/v4.html
-+
-+ Our goal here is to ensure that any two blocks that contain dependent modifications
-+ should commit at the same time. This function enforces this discipline by initiating
-+ fusion whenever a transaction handle belonging to one atom requests to read or write a
-+ block belonging to another atom (TXN_CAPTURE_WRITE or TXN_CAPTURE_READ_ATOMIC).
-+
-+ In addition, this routine handles the initial assignment of atoms to blocks and
-+ transaction handles. These are possible outcomes of this function:
-+
-+ 1. The block and handle are already part of the same atom: return immediate success
-+
-+ 2. The block is assigned but the handle is not: call capture_assign_txnh to assign
-+ the handle to the block's atom.
-+
-+ 3. The handle is assigned but the block is not: call capture_assign_block to assign
-+ the block to the handle's atom.
-+
-+ 4. Both handle and block are assigned, but to different atoms: call capture_init_fusion
-+ to fuse atoms.
-+
-+ 5. Neither block nor handle are assigned: create a new atom and assign them both.
-+
-+ 6. A read request for a non-captured block: return immediate success.
-+
-+ This function acquires and releases the handle's spinlock. This function is called
-+ under the jnode lock and if the return value is 0, it returns with the jnode lock still
-+ held. If the return is -E_REPEAT or some other error condition, the jnode lock is
-+ released. The external interface (reiser4_try_capture) manages re-aquiring the jnode
-+ lock in the failure case.
-+*/
-+static int try_capture_block(
-+ txn_handle * txnh, jnode * node, txn_capture mode,
-+ txn_atom ** atom_alloc)
-+{
-+ txn_atom *block_atom;
-+ txn_atom *txnh_atom;
-+
-+ /* Should not call capture for READ_NONCOM requests, handled in reiser4_try_capture. */
-+ assert("jmacd-567", CAPTURE_TYPE(mode) != TXN_CAPTURE_READ_NONCOM);
-+
-+ /* FIXME-ZAM-HANS: FIXME_LATER_JMACD Should assert that atom->tree ==
-+ * node->tree somewhere. */
-+ assert("umka-194", txnh != NULL);
-+ assert("umka-195", node != NULL);
-+
-+ /* The jnode is already locked! Being called from reiser4_try_capture(). */
-+ assert_spin_locked(&(node->guard));
-+ block_atom = node->atom;
-+
-+ /* Get txnh spinlock, this allows us to compare txn_atom pointers but it doesn't
-+ let us touch the atoms themselves. */
-+ spin_lock_txnh(txnh);
-+ txnh_atom = txnh->atom;
-+ /* Process of capturing continues into one of four branches depends on
-+ which atoms from (block atom (node->atom), current atom (txnh->atom))
-+ exist. */
-+ if (txnh_atom == NULL) {
-+ if (block_atom == NULL) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+ /* assign empty atom to the txnh and repeat */
-+ return atom_begin_and_assign_to_txnh(atom_alloc, txnh);
-+ } else {
-+ atomic_inc(&block_atom->refcount);
-+ /* node spin-lock isn't needed anymore */
-+ spin_unlock_jnode(node);
-+ if (!spin_trylock_atom(block_atom)) {
-+ spin_unlock_txnh(txnh);
-+ spin_lock_atom(block_atom);
-+ spin_lock_txnh(txnh);
-+ }
-+ /* re-check state after getting txnh and the node
-+ * atom spin-locked */
-+ if (node->atom != block_atom || txnh->atom != NULL) {
-+ spin_unlock_txnh(txnh);
-+ atom_dec_and_unlock(block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ atomic_dec(&block_atom->refcount);
-+ if (block_atom->stage > ASTAGE_CAPTURE_WAIT ||
-+ (block_atom->stage == ASTAGE_CAPTURE_WAIT &&
-+ block_atom->txnh_count != 0))
-+ return capture_fuse_wait(txnh, block_atom, NULL, mode);
-+ capture_assign_txnh_nolock(block_atom, txnh);
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ } else {
-+ /* It is time to perform deadlock prevention check over the
-+ node we want to capture. It is possible this node was locked
-+ for read without capturing it. The optimization which allows
-+ to do it helps us in keeping atoms independent as long as
-+ possible but it may cause lock/fuse deadlock problems.
-+
-+ A number of similar deadlock situations with locked but not
-+ captured nodes were found. In each situation there are two
-+ or more threads: one of them does flushing while another one
-+ does routine balancing or tree lookup. The flushing thread
-+ (F) sleeps in long term locking request for node (N), another
-+ thread (A) sleeps in trying to capture some node already
-+ belonging the atom F, F has a state which prevents
-+ immediately fusion .
-+
-+ Deadlocks of this kind cannot happen if node N was properly
-+ captured by thread A. The F thread fuse atoms before locking
-+ therefore current atom of thread F and current atom of thread
-+ A became the same atom and thread A may proceed. This does
-+ not work if node N was not captured because the fusion of
-+ atom does not happens.
-+
-+ The following scheme solves the deadlock: If
-+ longterm_lock_znode locks and does not capture a znode, that
-+ znode is marked as MISSED_IN_CAPTURE. A node marked this way
-+ is processed by the code below which restores the missed
-+ capture and fuses current atoms of all the node lock owners
-+ by calling the fuse_not_fused_lock_owners() function. */
-+ if (JF_ISSET(node, JNODE_MISSED_IN_CAPTURE)) {
-+ JF_CLR(node, JNODE_MISSED_IN_CAPTURE);
-+ if (jnode_is_znode(node) && znode_is_locked(JZNODE(node))) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+ fuse_not_fused_lock_owners(txnh, JZNODE(node));
-+ return RETERR(-E_REPEAT);
-+ }
-+ }
-+ if (block_atom == NULL) {
-+ atomic_inc(&txnh_atom->refcount);
-+ spin_unlock_txnh(txnh);
-+ if (!spin_trylock_atom(txnh_atom)) {
-+ spin_unlock_jnode(node);
-+ spin_lock_atom(txnh_atom);
-+ spin_lock_jnode(node);
-+ }
-+ if (txnh->atom != txnh_atom || node->atom != NULL
-+ || JF_ISSET(node, JNODE_IS_DYING)) {
-+ spin_unlock_jnode(node);
-+ atom_dec_and_unlock(txnh_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ atomic_dec(&txnh_atom->refcount);
-+ capture_assign_block_nolock(txnh_atom, node);
-+ spin_unlock_atom(txnh_atom);
-+ } else {
-+ if (txnh_atom != block_atom) {
-+ if (mode & TXN_CAPTURE_DONT_FUSE) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+ /* we are in a "no-fusion" mode and @node is
-+ * already part of transaction. */
-+ return RETERR(-E_NO_NEIGHBOR);
-+ }
-+ return capture_init_fusion(node, txnh, mode);
-+ }
-+ spin_unlock_txnh(txnh);
-+ }
-+ }
-+ return 0;
-+}
-+
-+static txn_capture
-+build_capture_mode(jnode * node, znode_lock_mode lock_mode, txn_capture flags)
-+{
-+ txn_capture cap_mode;
-+
-+ assert_spin_locked(&(node->guard));
-+
-+ /* FIXME_JMACD No way to set TXN_CAPTURE_READ_MODIFY yet. */
-+
-+ if (lock_mode == ZNODE_WRITE_LOCK) {
-+ cap_mode = TXN_CAPTURE_WRITE;
-+ } else if (node->atom != NULL) {
-+ cap_mode = TXN_CAPTURE_WRITE;
-+ } else if (0 && /* txnh->mode == TXN_READ_FUSING && */
-+ jnode_get_level(node) == LEAF_LEVEL) {
-+ /* NOTE-NIKITA TXN_READ_FUSING is not currently used */
-+ /* We only need a READ_FUSING capture at the leaf level. This
-+ is because the internal levels of the tree (twigs included)
-+ are redundant from the point of the user that asked for a
-+ read-fusing transcrash. The user only wants to read-fuse
-+ atoms due to reading uncommitted data that another user has
-+ written. It is the file system that reads/writes the
-+ internal tree levels, the user only reads/writes leaves. */
-+ cap_mode = TXN_CAPTURE_READ_ATOMIC;
-+ } else {
-+ /* In this case (read lock at a non-leaf) there's no reason to
-+ * capture. */
-+ /* cap_mode = TXN_CAPTURE_READ_NONCOM; */
-+ return 0;
-+ }
-+
-+ cap_mode |= (flags & (TXN_CAPTURE_NONBLOCKING | TXN_CAPTURE_DONT_FUSE));
-+ assert("nikita-3186", cap_mode != 0);
-+ return cap_mode;
-+}
-+
-+/* This is an external interface to try_capture_block(), it calls
-+ try_capture_block() repeatedly as long as -E_REPEAT is returned.
-+
-+ @node: node to capture,
-+ @lock_mode: read or write lock is used in capture mode calculation,
-+ @flags: see txn_capture flags enumeration,
-+ @can_coc : can copy-on-capture
-+
-+ @return: 0 - node was successfully captured, -E_REPEAT - capture request
-+ cannot be processed immediately as it was requested in flags,
-+ < 0 - other errors.
-+*/
-+int reiser4_try_capture(jnode *node, znode_lock_mode lock_mode,
-+ txn_capture flags)
-+{
-+ txn_atom *atom_alloc = NULL;
-+ txn_capture cap_mode;
-+ txn_handle *txnh = get_current_context()->trans;
-+ int ret;
-+
-+ assert_spin_locked(&(node->guard));
-+
-+ repeat:
-+ if (JF_ISSET(node, JNODE_IS_DYING))
-+ return RETERR(-EINVAL);
-+ if (node->atom != NULL && txnh->atom == node->atom)
-+ return 0;
-+ cap_mode = build_capture_mode(node, lock_mode, flags);
-+ if (cap_mode == 0 ||
-+ (!(cap_mode & TXN_CAPTURE_WTYPES) && node->atom == NULL)) {
-+ /* Mark this node as "MISSED". It helps in further deadlock
-+ * analysis */
-+ if (jnode_is_znode(node))
-+ JF_SET(node, JNODE_MISSED_IN_CAPTURE);
-+ return 0;
-+ }
-+ /* Repeat try_capture as long as -E_REPEAT is returned. */
-+ ret = try_capture_block(txnh, node, cap_mode, &atom_alloc);
-+ /* Regardless of non_blocking:
-+
-+ If ret == 0 then jnode is still locked.
-+ If ret != 0 then jnode is unlocked.
-+ */
-+#if REISER4_DEBUG
-+ if (ret == 0)
-+ assert_spin_locked(&(node->guard));
-+ else
-+ assert_spin_not_locked(&(node->guard));
-+#endif
-+ assert_spin_not_locked(&(txnh->guard));
-+
-+ if (ret == -E_REPEAT) {
-+ /* E_REPEAT implies all locks were released, therefore we need
-+ to take the jnode's lock again. */
-+ spin_lock_jnode(node);
-+
-+ /* Although this may appear to be a busy loop, it is not.
-+ There are several conditions that cause E_REPEAT to be
-+ returned by the call to try_capture_block, all cases
-+ indicating some kind of state change that means you should
-+ retry the request and will get a different result. In some
-+ cases this could be avoided with some extra code, but
-+ generally it is done because the necessary locks were
-+ released as a result of the operation and repeating is the
-+ simplest thing to do (less bug potential). The cases are:
-+ atom fusion returns E_REPEAT after it completes (jnode and
-+ txnh were unlocked); race conditions in assign_block,
-+ assign_txnh, and init_fusion return E_REPEAT (trylock
-+ failure); after going to sleep in capture_fuse_wait
-+ (request was blocked but may now succeed). I'm not quite
-+ sure how capture_copy works yet, but it may also return
-+ E_REPEAT. When the request is legitimately blocked, the
-+ requestor goes to sleep in fuse_wait, so this is not a busy
-+ loop. */
-+ /* NOTE-NIKITA: still don't understand:
-+
-+ try_capture_block->capture_assign_txnh->spin_trylock_atom->E_REPEAT
-+
-+ looks like busy loop?
-+ */
-+ goto repeat;
-+ }
-+
-+ /* free extra atom object that was possibly allocated by
-+ try_capture_block().
-+
-+ Do this before acquiring jnode spin lock to
-+ minimize time spent under lock. --nikita */
-+ if (atom_alloc != NULL) {
-+ kmem_cache_free(_atom_slab, atom_alloc);
-+ }
-+
-+ if (ret != 0) {
-+ if (ret == -E_BLOCK) {
-+ assert("nikita-3360",
-+ cap_mode & TXN_CAPTURE_NONBLOCKING);
-+ ret = -E_REPEAT;
-+ }
-+
-+ /* Failure means jnode is not locked. FIXME_LATER_JMACD May
-+ want to fix the above code to avoid releasing the lock and
-+ re-acquiring it, but there are cases were failure occurs
-+ when the lock is not held, and those cases would need to be
-+ modified to re-take the lock. */
-+ spin_lock_jnode(node);
-+ }
-+
-+ /* Jnode is still locked. */
-+ assert_spin_locked(&(node->guard));
-+ return ret;
-+}
-+
-+static void release_two_atoms(txn_atom *one, txn_atom *two)
-+{
-+ spin_unlock_atom(one);
-+ atom_dec_and_unlock(two);
-+ spin_lock_atom(one);
-+ atom_dec_and_unlock(one);
-+}
-+
-+/* This function sets up a call to try_capture_block and repeats as long as -E_REPEAT is
-+ returned by that routine. The txn_capture request mode is computed here depending on
-+ the transaction handle's type and the lock request. This is called from the depths of
-+ the lock manager with the jnode lock held and it always returns with the jnode lock
-+ held.
-+*/
-+
-+/* fuse all 'active' atoms of lock owners of given node. */
-+static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node)
-+{
-+ lock_handle *lh;
-+ int repeat;
-+ txn_atom *atomh, *atomf;
-+ reiser4_context *me = get_current_context();
-+ reiser4_context *ctx = NULL;
-+
-+ assert_spin_not_locked(&(ZJNODE(node)->guard));
-+ assert_spin_not_locked(&(txnh->hlock));
-+
-+ repeat:
-+ repeat = 0;
-+ atomh = txnh_get_atom(txnh);
-+ spin_unlock_txnh(txnh);
-+ assert("zam-692", atomh != NULL);
-+
-+ spin_lock_zlock(&node->lock);
-+ /* inspect list of lock owners */
-+ list_for_each_entry(lh, &node->lock.owners, owners_link) {
-+ ctx = get_context_by_lock_stack(lh->owner);
-+ if (ctx == me)
-+ continue;
-+ /* below we use two assumptions to avoid addition spin-locks
-+ for checking the condition :
-+
-+ 1) if the lock stack has lock, the transaction should be
-+ opened, i.e. ctx->trans != NULL;
-+
-+ 2) reading of well-aligned ctx->trans->atom is atomic, if it
-+ equals to the address of spin-locked atomh, we take that
-+ the atoms are the same, nothing has to be captured. */
-+ if (atomh != ctx->trans->atom) {
-+ reiser4_wake_up(lh->owner);
-+ repeat = 1;
-+ break;
-+ }
-+ }
-+ if (repeat) {
-+ if (!spin_trylock_txnh(ctx->trans)) {
-+ spin_unlock_zlock(&node->lock);
-+ spin_unlock_atom(atomh);
-+ goto repeat;
-+ }
-+ atomf = ctx->trans->atom;
-+ if (atomf == NULL) {
-+ capture_assign_txnh_nolock(atomh, ctx->trans);
-+ /* release zlock lock _after_ assigning the atom to the
-+ * transaction handle, otherwise the lock owner thread
-+ * may unlock all znodes, exit kernel context and here
-+ * we would access an invalid transaction handle. */
-+ spin_unlock_zlock(&node->lock);
-+ spin_unlock_atom(atomh);
-+ spin_unlock_txnh(ctx->trans);
-+ goto repeat;
-+ }
-+ assert("zam-1059", atomf != atomh);
-+ spin_unlock_zlock(&node->lock);
-+ atomic_inc(&atomh->refcount);
-+ atomic_inc(&atomf->refcount);
-+ spin_unlock_txnh(ctx->trans);
-+ if (atomf > atomh) {
-+ spin_lock_atom_nested(atomf);
-+ } else {
-+ spin_unlock_atom(atomh);
-+ spin_lock_atom(atomf);
-+ spin_lock_atom_nested(atomh);
-+ }
-+ if (atomh == atomf || !atom_isopen(atomh) || !atom_isopen(atomf)) {
-+ release_two_atoms(atomf, atomh);
-+ goto repeat;
-+ }
-+ atomic_dec(&atomh->refcount);
-+ atomic_dec(&atomf->refcount);
-+ capture_fuse_into(atomf, atomh);
-+ goto repeat;
-+ }
-+ spin_unlock_zlock(&node->lock);
-+ spin_unlock_atom(atomh);
-+}
-+
-+/* This is the interface to capture unformatted nodes via their struct page
-+ reference. Currently it is only used in reiser4_invalidatepage */
-+int try_capture_page_to_invalidate(struct page *pg)
-+{
-+ int ret;
-+ jnode *node;
-+
-+ assert("umka-292", pg != NULL);
-+ assert("nikita-2597", PageLocked(pg));
-+
-+ if (IS_ERR(node = jnode_of_page(pg))) {
-+ return PTR_ERR(node);
-+ }
-+
-+ spin_lock_jnode(node);
-+ unlock_page(pg);
-+
-+ ret = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
-+ spin_unlock_jnode(node);
-+ jput(node);
-+ lock_page(pg);
-+ return ret;
-+}
-+
-+/* This informs the transaction manager when a node is deleted. Add the block to the
-+ atom's delete set and uncapture the block.
-+
-+VS-FIXME-HANS: this E_REPEAT paradigm clutters the code and creates a need for
-+explanations. find all the functions that use it, and unless there is some very
-+good reason to use it (I have not noticed one so far and I doubt it exists, but maybe somewhere somehow....),
-+move the loop to inside the function.
-+
-+VS-FIXME-HANS: can this code be at all streamlined? In particular, can you lock and unlock the jnode fewer times?
-+ */
-+void reiser4_uncapture_page(struct page *pg)
-+{
-+ jnode *node;
-+ txn_atom *atom;
-+
-+ assert("umka-199", pg != NULL);
-+ assert("nikita-3155", PageLocked(pg));
-+
-+ clear_page_dirty_for_io(pg);
-+
-+ reiser4_wait_page_writeback(pg);
-+
-+ node = jprivate(pg);
-+ BUG_ON(node == NULL);
-+
-+ spin_lock_jnode(node);
-+
-+ atom = jnode_get_atom(node);
-+ if (atom == NULL) {
-+ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+
-+ /* We can remove jnode from transaction even if it is on flush queue
-+ * prepped list, we only need to be sure that flush queue is not being
-+ * written by reiser4_write_fq(). reiser4_write_fq() does not use atom
-+ * spin lock for protection of the prepped nodes list, instead
-+ * write_fq() increments atom's nr_running_queues counters for the time
-+ * when prepped list is not protected by spin lock. Here we check this
-+ * counter if we want to remove jnode from flush queue and, if the
-+ * counter is not zero, wait all reiser4_write_fq() for this atom to
-+ * complete. This is not significant overhead. */
-+ while (JF_ISSET(node, JNODE_FLUSH_QUEUED) && atom->nr_running_queues) {
-+ spin_unlock_jnode(node);
-+ /*
-+ * at this moment we want to wait for "atom event", viz. wait
-+ * until @node can be removed from flush queue. But
-+ * reiser4_atom_wait_event() cannot be called with page locked,
-+ * because it deadlocks with jnode_extent_write(). Unlock page,
-+ * after making sure (through page_cache_get()) that it cannot
-+ * be released from memory.
-+ */
-+ page_cache_get(pg);
-+ unlock_page(pg);
-+ reiser4_atom_wait_event(atom);
-+ lock_page(pg);
-+ /*
-+ * page may has been detached by ->writepage()->releasepage().
-+ */
-+ reiser4_wait_page_writeback(pg);
-+ spin_lock_jnode(node);
-+ page_cache_release(pg);
-+ atom = jnode_get_atom(node);
-+/* VS-FIXME-HANS: improve the commenting in this function */
-+ if (atom == NULL) {
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+ }
-+ reiser4_uncapture_block(node);
-+ spin_unlock_atom(atom);
-+ jput(node);
-+}
-+
-+/* this is used in extent's kill hook to uncapture and unhash jnodes attached to
-+ * inode's tree of jnodes */
-+void reiser4_uncapture_jnode(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert_spin_locked(&(node->guard));
-+ assert("", node->pg == 0);
-+
-+ atom = jnode_get_atom(node);
-+ if (atom == NULL) {
-+ assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
-+ spin_unlock_jnode(node);
-+ return;
-+ }
-+
-+ reiser4_uncapture_block(node);
-+ spin_unlock_atom(atom);
-+ jput(node);
-+}
-+
-+/* No-locking version of assign_txnh. Sets the transaction handle's atom pointer,
-+ increases atom refcount and txnh_count, adds to txnh_list. */
-+static void capture_assign_txnh_nolock(txn_atom *atom, txn_handle *txnh)
-+{
-+ assert("umka-200", atom != NULL);
-+ assert("umka-201", txnh != NULL);
-+
-+ assert_spin_locked(&(txnh->hlock));
-+ assert_spin_locked(&(atom->alock));
-+ assert("jmacd-824", txnh->atom == NULL);
-+ assert("nikita-3540", atom_isopen(atom));
-+ BUG_ON(txnh->atom != NULL);
-+
-+ atomic_inc(&atom->refcount);
-+ txnh->atom = atom;
-+ reiser4_ctx_gfp_mask_set();
-+ list_add_tail(&txnh->txnh_link, &atom->txnh_list);
-+ atom->txnh_count += 1;
-+}
-+
-+/* No-locking version of assign_block. Sets the block's atom pointer, references the
-+ block, adds it to the clean or dirty capture_jnode list, increments capture_count. */
-+static void capture_assign_block_nolock(txn_atom *atom, jnode *node)
-+{
-+ assert("umka-202", atom != NULL);
-+ assert("umka-203", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+ assert_spin_locked(&(atom->alock));
-+ assert("jmacd-323", node->atom == NULL);
-+ BUG_ON(!list_empty_careful(&node->capture_link));
-+ assert("nikita-3470", !JF_ISSET(node, JNODE_DIRTY));
-+
-+ /* Pointer from jnode to atom is not counted in atom->refcount. */
-+ node->atom = atom;
-+
-+ list_add_tail(&node->capture_link, ATOM_CLEAN_LIST(atom));
-+ atom->capture_count += 1;
-+ /* reference to jnode is acquired by atom. */
-+ jref(node);
-+
-+ ON_DEBUG(count_jnode(atom, node, NOT_CAPTURED, CLEAN_LIST, 1));
-+
-+ LOCK_CNT_INC(t_refs);
-+}
-+
-+/* common code for dirtying both unformatted jnodes and formatted znodes. */
-+static void do_jnode_make_dirty(jnode * node, txn_atom * atom)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert_spin_locked(&(atom->alock));
-+ assert("jmacd-3981", !JF_ISSET(node, JNODE_DIRTY));
-+
-+ JF_SET(node, JNODE_DIRTY);
-+
-+ get_current_context()->nr_marked_dirty++;
-+
-+ /* We grab2flush_reserve one additional block only if node was
-+ not CREATED and jnode_flush did not sort it into neither
-+ relocate set nor overwrite one. If node is in overwrite or
-+ relocate set we assume that atom's flush reserved counter was
-+ already adjusted. */
-+ if (!JF_ISSET(node, JNODE_CREATED) && !JF_ISSET(node, JNODE_RELOC)
-+ && !JF_ISSET(node, JNODE_OVRWR) && jnode_is_leaf(node)
-+ && !jnode_is_cluster_page(node)) {
-+ assert("vs-1093", !reiser4_blocknr_is_fake(&node->blocknr));
-+ assert("vs-1506", *jnode_get_block(node) != 0);
-+ grabbed2flush_reserved_nolock(atom, (__u64) 1);
-+ JF_SET(node, JNODE_FLUSH_RESERVED);
-+ }
-+
-+ if (!JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
-+ /* If the atom is not set yet, it will be added to the appropriate list in
-+ capture_assign_block_nolock. */
-+ /* Sometimes a node is set dirty before being captured -- the case for new
-+ jnodes. In that case the jnode will be added to the appropriate list
-+ in capture_assign_block_nolock. Another reason not to re-link jnode is
-+ that jnode is on a flush queue (see flush.c for details) */
-+
-+ int level = jnode_get_level(node);
-+
-+ assert("nikita-3152", !JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-654", atom->stage < ASTAGE_PRE_COMMIT);
-+ assert("nikita-2607", 0 <= level);
-+ assert("nikita-2606", level <= REAL_MAX_ZTREE_HEIGHT);
-+
-+ /* move node to atom's dirty list */
-+ list_move_tail(&node->capture_link, ATOM_DIRTY_LIST(atom, level));
-+ ON_DEBUG(count_jnode
-+ (atom, node, NODE_LIST(node), DIRTY_LIST, 1));
-+ }
-+}
-+
-+/* Set the dirty status for this (spin locked) jnode. */
-+void jnode_make_dirty_locked(jnode * node)
-+{
-+ assert("umka-204", node != NULL);
-+ assert_spin_locked(&(node->guard));
-+
-+ if (REISER4_DEBUG && rofs_jnode(node)) {
-+ warning("nikita-3365", "Dirtying jnode on rofs");
-+ dump_stack();
-+ }
-+
-+ /* Fast check for already dirty node */
-+ if (!JF_ISSET(node, JNODE_DIRTY)) {
-+ txn_atom *atom;
-+
-+ atom = jnode_get_atom(node);
-+ assert("vs-1094", atom);
-+ /* Check jnode dirty status again because node spin lock might
-+ * be released inside jnode_get_atom(). */
-+ if (likely(!JF_ISSET(node, JNODE_DIRTY)))
-+ do_jnode_make_dirty(node, atom);
-+ spin_unlock_atom(atom);
-+ }
-+}
-+
-+/* Set the dirty status for this znode. */
-+void znode_make_dirty(znode * z)
-+{
-+ jnode *node;
-+ struct page *page;
-+
-+ assert("umka-204", z != NULL);
-+ assert("nikita-3290", znode_above_root(z) || znode_is_loaded(z));
-+ assert("nikita-3560", znode_is_write_locked(z));
-+
-+ node = ZJNODE(z);
-+ /* znode is longterm locked, we can check dirty bit without spinlock */
-+ if (JF_ISSET(node, JNODE_DIRTY)) {
-+ /* znode is dirty already. All we have to do is to change znode version */
-+ z->version = znode_build_version(jnode_get_tree(node));
-+ return;
-+ }
-+
-+ spin_lock_jnode(node);
-+ jnode_make_dirty_locked(node);
-+ page = jnode_page(node);
-+ if (page != NULL) {
-+ /* this is useful assertion (allows one to check that no
-+ * modifications are lost due to update of in-flight page),
-+ * but it requires locking on page to check PG_writeback
-+ * bit. */
-+ /* assert("nikita-3292",
-+ !PageWriteback(page) || ZF_ISSET(z, JNODE_WRITEBACK)); */
-+ page_cache_get(page);
-+
-+ /* jnode lock is not needed for the rest of
-+ * znode_set_dirty(). */
-+ spin_unlock_jnode(node);
-+ /* reiser4 file write code calls set_page_dirty for
-+ * unformatted nodes, for formatted nodes we do it here. */
-+ reiser4_set_page_dirty_internal(page);
-+ page_cache_release(page);
-+ /* bump version counter in znode */
-+ z->version = znode_build_version(jnode_get_tree(node));
-+ } else {
-+ assert("zam-596", znode_above_root(JZNODE(node)));
-+ spin_unlock_jnode(node);
-+ }
-+
-+ assert("nikita-1900", znode_is_write_locked(z));
-+ assert("jmacd-9777", node->atom != NULL);
-+}
-+
-+int reiser4_sync_atom(txn_atom * atom)
-+{
-+ int result;
-+ txn_handle *txnh;
-+
-+ txnh = get_current_context()->trans;
-+
-+ result = 0;
-+ if (atom != NULL) {
-+ if (atom->stage < ASTAGE_PRE_COMMIT) {
-+ spin_lock_txnh(txnh);
-+ capture_assign_txnh_nolock(atom, txnh);
-+ result = force_commit_atom(txnh);
-+ } else if (atom->stage < ASTAGE_POST_COMMIT) {
-+ /* wait atom commit */
-+ reiser4_atom_wait_event(atom);
-+ /* try once more */
-+ result = RETERR(-E_REPEAT);
-+ } else
-+ spin_unlock_atom(atom);
-+ }
-+ return result;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/* move jnode form one list to another
-+ call this after atom->capture_count is updated */
-+void
-+count_jnode(txn_atom * atom, jnode * node, atom_list old_list,
-+ atom_list new_list, int check_lists)
-+{
-+ struct list_head *pos;
-+
-+ assert("zam-1018", atom_is_protected(atom));
-+ assert_spin_locked(&(node->guard));
-+ assert("", NODE_LIST(node) == old_list);
-+
-+ switch (NODE_LIST(node)) {
-+ case NOT_CAPTURED:
-+ break;
-+ case DIRTY_LIST:
-+ assert("", atom->dirty > 0);
-+ atom->dirty--;
-+ break;
-+ case CLEAN_LIST:
-+ assert("", atom->clean > 0);
-+ atom->clean--;
-+ break;
-+ case FQ_LIST:
-+ assert("", atom->fq > 0);
-+ atom->fq--;
-+ break;
-+ case WB_LIST:
-+ assert("", atom->wb > 0);
-+ atom->wb--;
-+ break;
-+ case OVRWR_LIST:
-+ assert("", atom->ovrwr > 0);
-+ atom->ovrwr--;
-+ break;
-+ default:
-+ impossible("", "");
-+ }
-+
-+ switch (new_list) {
-+ case NOT_CAPTURED:
-+ break;
-+ case DIRTY_LIST:
-+ atom->dirty++;
-+ break;
-+ case CLEAN_LIST:
-+ atom->clean++;
-+ break;
-+ case FQ_LIST:
-+ atom->fq++;
-+ break;
-+ case WB_LIST:
-+ atom->wb++;
-+ break;
-+ case OVRWR_LIST:
-+ atom->ovrwr++;
-+ break;
-+ default:
-+ impossible("", "");
-+ }
-+ ASSIGN_NODE_LIST(node, new_list);
-+ if (0 && check_lists) {
-+ int count;
-+ tree_level level;
-+
-+ count = 0;
-+
-+ /* flush queue list */
-+ /* reiser4_check_fq(atom); */
-+
-+ /* dirty list */
-+ count = 0;
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ list_for_each(pos, ATOM_DIRTY_LIST(atom, level))
-+ count++;
-+ }
-+ if (count != atom->dirty)
-+ warning("", "dirty counter %d, real %d\n", atom->dirty,
-+ count);
-+
-+ /* clean list */
-+ count = 0;
-+ list_for_each(pos, ATOM_CLEAN_LIST(atom))
-+ count++;
-+ if (count != atom->clean)
-+ warning("", "clean counter %d, real %d\n", atom->clean,
-+ count);
-+
-+ /* wb list */
-+ count = 0;
-+ list_for_each(pos, ATOM_WB_LIST(atom))
-+ count++;
-+ if (count != atom->wb)
-+ warning("", "wb counter %d, real %d\n", atom->wb,
-+ count);
-+
-+ /* overwrite list */
-+ count = 0;
-+ list_for_each(pos, ATOM_OVRWR_LIST(atom))
-+ count++;
-+
-+ if (count != atom->ovrwr)
-+ warning("", "ovrwr counter %d, real %d\n", atom->ovrwr,
-+ count);
-+ }
-+ assert("vs-1624", atom->num_queued == atom->fq);
-+ if (atom->capture_count !=
-+ atom->dirty + atom->clean + atom->ovrwr + atom->wb + atom->fq) {
-+ printk
-+ ("count %d, dirty %d clean %d ovrwr %d wb %d fq %d\n",
-+ atom->capture_count, atom->dirty, atom->clean, atom->ovrwr,
-+ atom->wb, atom->fq);
-+ assert("vs-1622",
-+ atom->capture_count ==
-+ atom->dirty + atom->clean + atom->ovrwr + atom->wb +
-+ atom->fq);
-+ }
-+}
-+
-+#endif
-+
-+/* Make node OVRWR and put it on atom->overwrite_nodes list, atom lock and jnode
-+ * lock should be taken before calling this function. */
-+void jnode_make_wander_nolock(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert("nikita-2431", node != NULL);
-+ assert("nikita-2432", !JF_ISSET(node, JNODE_RELOC));
-+ assert("nikita-3153", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-897", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
-+ assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
-+
-+ atom = node->atom;
-+
-+ assert("zam-895", atom != NULL);
-+ assert("zam-894", atom_is_protected(atom));
-+
-+ JF_SET(node, JNODE_OVRWR);
-+ /* move node to atom's overwrite list */
-+ list_move_tail(&node->capture_link, ATOM_OVRWR_LIST(atom));
-+ ON_DEBUG(count_jnode(atom, node, DIRTY_LIST, OVRWR_LIST, 1));
-+}
-+
-+/* Same as jnode_make_wander_nolock, but all necessary locks are taken inside
-+ * this function. */
-+void jnode_make_wander(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ spin_lock_jnode(node);
-+ atom = jnode_get_atom(node);
-+ assert("zam-913", atom != NULL);
-+ assert("zam-914", !JF_ISSET(node, JNODE_RELOC));
-+
-+ jnode_make_wander_nolock(node);
-+ spin_unlock_atom(atom);
-+ spin_unlock_jnode(node);
-+}
-+
-+/* this just sets RELOC bit */
-+static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node)
-+{
-+ assert_spin_locked(&(node->guard));
-+ assert("zam-916", JF_ISSET(node, JNODE_DIRTY));
-+ assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
-+ assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-920", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
-+ assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
-+ jnode_set_reloc(node);
-+}
-+
-+/* Make znode RELOC and put it on flush queue */
-+void znode_make_reloc(znode * z, flush_queue_t * fq)
-+{
-+ jnode *node;
-+ txn_atom *atom;
-+
-+ node = ZJNODE(z);
-+ spin_lock_jnode(node);
-+
-+ atom = jnode_get_atom(node);
-+ assert("zam-919", atom != NULL);
-+
-+ jnode_make_reloc_nolock(fq, node);
-+ queue_jnode(fq, node);
-+
-+ spin_unlock_atom(atom);
-+ spin_unlock_jnode(node);
-+
-+}
-+
-+/* Make unformatted node RELOC and put it on flush queue */
-+void unformatted_make_reloc(jnode *node, flush_queue_t *fq)
-+{
-+ assert("vs-1479", jnode_is_unformatted(node));
-+
-+ jnode_make_reloc_nolock(fq, node);
-+ queue_jnode(fq, node);
-+}
-+
-+int reiser4_capture_super_block(struct super_block *s)
-+{
-+ int result;
-+ znode *uber;
-+ lock_handle lh;
-+
-+ init_lh(&lh);
-+ result = get_uber_znode(reiser4_get_tree(s),
-+ ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI, &lh);
-+ if (result)
-+ return result;
-+
-+ uber = lh.node;
-+ /* Grabbing one block for superblock */
-+ result = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
-+ if (result != 0)
-+ return result;
-+
-+ znode_make_dirty(uber);
-+
-+ done_lh(&lh);
-+ return 0;
-+}
-+
-+/* Wakeup every handle on the atom's WAITFOR list */
-+static void wakeup_atom_waitfor_list(txn_atom * atom)
-+{
-+ txn_wait_links *wlinks;
-+
-+ assert("umka-210", atom != NULL);
-+
-+ /* atom is locked */
-+ list_for_each_entry(wlinks, &atom->fwaitfor_list, _fwaitfor_link) {
-+ if (wlinks->waitfor_cb == NULL ||
-+ wlinks->waitfor_cb(atom, wlinks))
-+ /* Wake up. */
-+ reiser4_wake_up(wlinks->_lock_stack);
-+ }
-+}
-+
-+/* Wakeup every handle on the atom's WAITING list */
-+static void wakeup_atom_waiting_list(txn_atom * atom)
-+{
-+ txn_wait_links *wlinks;
-+
-+ assert("umka-211", atom != NULL);
-+
-+ /* atom is locked */
-+ list_for_each_entry(wlinks, &atom->fwaiting_list, _fwaiting_link) {
-+ if (wlinks->waiting_cb == NULL ||
-+ wlinks->waiting_cb(atom, wlinks))
-+ /* Wake up. */
-+ reiser4_wake_up(wlinks->_lock_stack);
-+ }
-+}
-+
-+/* helper function used by capture_fuse_wait() to avoid "spurious wake-ups" */
-+static int wait_for_fusion(txn_atom * atom, txn_wait_links * wlinks)
-+{
-+ assert("nikita-3330", atom != NULL);
-+ assert_spin_locked(&(atom->alock));
-+
-+ /* atom->txnh_count == 1 is for waking waiters up if we are releasing
-+ * last transaction handle. */
-+ return atom->stage != ASTAGE_CAPTURE_WAIT || atom->txnh_count == 1;
-+}
-+
-+/* The general purpose of this function is to wait on the first of two possible events.
-+ The situation is that a handle (and its atom atomh) is blocked trying to capture a
-+ block (i.e., node) but the node's atom (atomf) is in the CAPTURE_WAIT state. The
-+ handle's atom (atomh) is not in the CAPTURE_WAIT state. However, atomh could fuse with
-+ another atom or, due to age, enter the CAPTURE_WAIT state itself, at which point it
-+ needs to unblock the handle to avoid deadlock. When the txnh is unblocked it will
-+ proceed and fuse the two atoms in the CAPTURE_WAIT state.
-+
-+ In other words, if either atomh or atomf change state, the handle will be awakened,
-+ thus there are two lists per atom: WAITING and WAITFOR.
-+
-+ This is also called by capture_assign_txnh with (atomh == NULL) to wait for atomf to
-+ close but it is not assigned to an atom of its own.
-+
-+ Lock ordering in this method: all four locks are held: JNODE_LOCK, TXNH_LOCK,
-+ BOTH_ATOM_LOCKS. Result: all four locks are released.
-+*/
-+static int capture_fuse_wait(txn_handle * txnh, txn_atom * atomf,
-+ txn_atom * atomh, txn_capture mode)
-+{
-+ int ret;
-+ txn_wait_links wlinks;
-+
-+ assert("umka-213", txnh != NULL);
-+ assert("umka-214", atomf != NULL);
-+
-+ if ((mode & TXN_CAPTURE_NONBLOCKING) != 0) {
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_atom(atomf);
-+
-+ if (atomh) {
-+ spin_unlock_atom(atomh);
-+ }
-+
-+ return RETERR(-E_BLOCK);
-+ }
-+
-+ /* Initialize the waiting list links. */
-+ init_wlinks(&wlinks);
-+
-+ /* Add txnh to atomf's waitfor list, unlock atomf. */
-+ list_add_tail(&wlinks._fwaitfor_link, &atomf->fwaitfor_list);
-+ wlinks.waitfor_cb = wait_for_fusion;
-+ atomic_inc(&atomf->refcount);
-+ spin_unlock_atom(atomf);
-+
-+ if (atomh) {
-+ /* Add txnh to atomh's waiting list, unlock atomh. */
-+ list_add_tail(&wlinks._fwaiting_link, &atomh->fwaiting_list);
-+ atomic_inc(&atomh->refcount);
-+ spin_unlock_atom(atomh);
-+ }
-+
-+ /* Go to sleep. */
-+ spin_unlock_txnh(txnh);
-+
-+ ret = reiser4_prepare_to_sleep(wlinks._lock_stack);
-+ if (ret == 0) {
-+ reiser4_go_to_sleep(wlinks._lock_stack);
-+ ret = RETERR(-E_REPEAT);
-+ }
-+
-+ /* Remove from the waitfor list. */
-+ spin_lock_atom(atomf);
-+
-+ list_del(&wlinks._fwaitfor_link);
-+ atom_dec_and_unlock(atomf);
-+
-+ if (atomh) {
-+ /* Remove from the waiting list. */
-+ spin_lock_atom(atomh);
-+ list_del(&wlinks._fwaiting_link);
-+ atom_dec_and_unlock(atomh);
-+ }
-+ return ret;
-+}
-+
-+static void lock_two_atoms(txn_atom * one, txn_atom * two)
-+{
-+ assert("zam-1067", one != two);
-+
-+ /* lock the atom with lesser address first */
-+ if (one < two) {
-+ spin_lock_atom(one);
-+ spin_lock_atom_nested(two);
-+ } else {
-+ spin_lock_atom(two);
-+ spin_lock_atom_nested(one);
-+ }
-+}
-+
-+/* Perform the necessary work to prepare for fusing two atoms, which involves
-+ * acquiring two atom locks in the proper order. If one of the node's atom is
-+ * blocking fusion (i.e., it is in the CAPTURE_WAIT stage) and the handle's
-+ * atom is not then the handle's request is put to sleep. If the node's atom
-+ * is committing, then the node can be copy-on-captured. Otherwise, pick the
-+ * atom with fewer pointers to be fused into the atom with more pointer and
-+ * call capture_fuse_into.
-+ */
-+static int capture_init_fusion(jnode *node, txn_handle *txnh, txn_capture mode)
-+{
-+ txn_atom * txnh_atom = txnh->atom;
-+ txn_atom * block_atom = node->atom;
-+
-+ atomic_inc(&txnh_atom->refcount);
-+ atomic_inc(&block_atom->refcount);
-+
-+ spin_unlock_txnh(txnh);
-+ spin_unlock_jnode(node);
-+
-+ lock_two_atoms(txnh_atom, block_atom);
-+
-+ if (txnh->atom != txnh_atom || node->atom != block_atom ) {
-+ release_two_atoms(txnh_atom, block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+
-+ atomic_dec(&txnh_atom->refcount);
-+ atomic_dec(&block_atom->refcount);
-+
-+ assert ("zam-1066", atom_isopen(txnh_atom));
-+
-+ if (txnh_atom->stage >= block_atom->stage ||
-+ (block_atom->stage == ASTAGE_CAPTURE_WAIT && block_atom->txnh_count == 0)) {
-+ capture_fuse_into(txnh_atom, block_atom);
-+ return RETERR(-E_REPEAT);
-+ }
-+ spin_lock_txnh(txnh);
-+ return capture_fuse_wait(txnh, block_atom, txnh_atom, mode);
-+}
-+
-+/* This function splices together two jnode lists (small and large) and sets all jnodes in
-+ the small list to point to the large atom. Returns the length of the list. */
-+static int
-+capture_fuse_jnode_lists(txn_atom *large, struct list_head *large_head,
-+ struct list_head *small_head)
-+{
-+ int count = 0;
-+ jnode *node;
-+
-+ assert("umka-218", large != NULL);
-+ assert("umka-219", large_head != NULL);
-+ assert("umka-220", small_head != NULL);
-+ /* small atom should be locked also. */
-+ assert_spin_locked(&(large->alock));
-+
-+ /* For every jnode on small's capture list... */
-+ list_for_each_entry(node, small_head, capture_link) {
-+ count += 1;
-+
-+ /* With the jnode lock held, update atom pointer. */
-+ spin_lock_jnode(node);
-+ node->atom = large;
-+ spin_unlock_jnode(node);
-+ }
-+
-+ /* Splice the lists. */
-+ list_splice_init(small_head, large_head->prev);
-+
-+ return count;
-+}
-+
-+/* This function splices together two txnh lists (small and large) and sets all txn handles in
-+ the small list to point to the large atom. Returns the length of the list. */
-+static int
-+capture_fuse_txnh_lists(txn_atom *large, struct list_head *large_head,
-+ struct list_head *small_head)
-+{
-+ int count = 0;
-+ txn_handle *txnh;
-+
-+ assert("umka-221", large != NULL);
-+ assert("umka-222", large_head != NULL);
-+ assert("umka-223", small_head != NULL);
-+
-+ /* Adjust every txnh to the new atom. */
-+ list_for_each_entry(txnh, small_head, txnh_link) {
-+ count += 1;
-+
-+ /* With the txnh lock held, update atom pointer. */
-+ spin_lock_txnh(txnh);
-+ txnh->atom = large;
-+ spin_unlock_txnh(txnh);
-+ }
-+
-+ /* Splice the txn_handle list. */
-+ list_splice_init(small_head, large_head->prev);
-+
-+ return count;
-+}
-+
-+/* This function fuses two atoms. The captured nodes and handles belonging to SMALL are
-+ added to LARGE and their ->atom pointers are all updated. The associated counts are
-+ updated as well, and any waiting handles belonging to either are awakened. Finally the
-+ smaller atom's refcount is decremented.
-+*/
-+static void capture_fuse_into(txn_atom * small, txn_atom * large)
-+{
-+ int level;
-+ unsigned zcount = 0;
-+ unsigned tcount = 0;
-+
-+ assert("umka-224", small != NULL);
-+ assert("umka-225", small != NULL);
-+
-+ assert_spin_locked(&(large->alock));
-+ assert_spin_locked(&(small->alock));
-+
-+ assert("jmacd-201", atom_isopen(small));
-+ assert("jmacd-202", atom_isopen(large));
-+
-+ /* Splice and update the per-level dirty jnode lists */
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
-+ zcount +=
-+ capture_fuse_jnode_lists(large,
-+ ATOM_DIRTY_LIST(large, level),
-+ ATOM_DIRTY_LIST(small, level));
-+ }
-+
-+ /* Splice and update the [clean,dirty] jnode and txnh lists */
-+ zcount +=
-+ capture_fuse_jnode_lists(large, ATOM_CLEAN_LIST(large),
-+ ATOM_CLEAN_LIST(small));
-+ zcount +=
-+ capture_fuse_jnode_lists(large, ATOM_OVRWR_LIST(large),
-+ ATOM_OVRWR_LIST(small));
-+ zcount +=
-+ capture_fuse_jnode_lists(large, ATOM_WB_LIST(large),
-+ ATOM_WB_LIST(small));
-+ zcount +=
-+ capture_fuse_jnode_lists(large, &large->inodes, &small->inodes);
-+ tcount +=
-+ capture_fuse_txnh_lists(large, &large->txnh_list,
-+ &small->txnh_list);
-+
-+ /* Check our accounting. */
-+ assert("jmacd-1063",
-+ zcount + small->num_queued == small->capture_count);
-+ assert("jmacd-1065", tcount == small->txnh_count);
-+
-+ /* sum numbers of waiters threads */
-+ large->nr_waiters += small->nr_waiters;
-+ small->nr_waiters = 0;
-+
-+ /* splice flush queues */
-+ reiser4_fuse_fq(large, small);
-+
-+ /* update counter of jnode on every atom' list */
-+ ON_DEBUG(large->dirty += small->dirty;
-+ small->dirty = 0;
-+ large->clean += small->clean;
-+ small->clean = 0;
-+ large->ovrwr += small->ovrwr;
-+ small->ovrwr = 0;
-+ large->wb += small->wb;
-+ small->wb = 0;
-+ large->fq += small->fq;
-+ small->fq = 0;);
-+
-+ /* count flushers in result atom */
-+ large->nr_flushers += small->nr_flushers;
-+ small->nr_flushers = 0;
-+
-+ /* update counts of flushed nodes */
-+ large->flushed += small->flushed;
-+ small->flushed = 0;
-+
-+ /* Transfer list counts to large. */
-+ large->txnh_count += small->txnh_count;
-+ large->capture_count += small->capture_count;
-+
-+ /* Add all txnh references to large. */
-+ atomic_add(small->txnh_count, &large->refcount);
-+ atomic_sub(small->txnh_count, &small->refcount);
-+
-+ /* Reset small counts */
-+ small->txnh_count = 0;
-+ small->capture_count = 0;
-+
-+ /* Assign the oldest start_time, merge flags. */
-+ large->start_time = min(large->start_time, small->start_time);
-+ large->flags |= small->flags;
-+
-+ /* Merge blocknr sets. */
-+ blocknr_set_merge(&small->delete_set, &large->delete_set);
-+ blocknr_set_merge(&small->wandered_map, &large->wandered_map);
-+
-+ /* Merge allocated/deleted file counts */
-+ large->nr_objects_deleted += small->nr_objects_deleted;
-+ large->nr_objects_created += small->nr_objects_created;
-+
-+ small->nr_objects_deleted = 0;
-+ small->nr_objects_created = 0;
-+
-+ /* Merge allocated blocks counts */
-+ large->nr_blocks_allocated += small->nr_blocks_allocated;
-+
-+ large->nr_running_queues += small->nr_running_queues;
-+ small->nr_running_queues = 0;
-+
-+ /* Merge blocks reserved for overwrite set. */
-+ large->flush_reserved += small->flush_reserved;
-+ small->flush_reserved = 0;
-+
-+ if (large->stage < small->stage) {
-+ /* Large only needs to notify if it has changed state. */
-+ reiser4_atom_set_stage(large, small->stage);
-+ wakeup_atom_waiting_list(large);
-+ }
-+
-+ reiser4_atom_set_stage(small, ASTAGE_INVALID);
-+
-+ /* Notify any waiters--small needs to unload its wait lists. Waiters
-+ actually remove themselves from the list before returning from the
-+ fuse_wait function. */
-+ wakeup_atom_waiting_list(small);
-+
-+ /* Unlock atoms */
-+ spin_unlock_atom(large);
-+ atom_dec_and_unlock(small);
-+}
-+
-+/* TXNMGR STUFF */
-+
-+/* Release a block from the atom, reversing the effects of being captured,
-+ do not release atom's reference to jnode due to holding spin-locks.
-+ Currently this is only called when the atom commits.
-+
-+ NOTE: this function does not release a (journal) reference to jnode
-+ due to locking optimizations, you should call jput() somewhere after
-+ calling reiser4_uncapture_block(). */
-+void reiser4_uncapture_block(jnode * node)
-+{
-+ txn_atom *atom;
-+
-+ assert("umka-226", node != NULL);
-+ atom = node->atom;
-+ assert("umka-228", atom != NULL);
-+
-+ assert("jmacd-1021", node->atom == atom);
-+ assert_spin_locked(&(node->guard));
-+ assert("jmacd-1023", atom_is_protected(atom));
-+
-+ JF_CLR(node, JNODE_DIRTY);
-+ JF_CLR(node, JNODE_RELOC);
-+ JF_CLR(node, JNODE_OVRWR);
-+ JF_CLR(node, JNODE_CREATED);
-+ JF_CLR(node, JNODE_WRITEBACK);
-+ JF_CLR(node, JNODE_REPACK);
-+
-+ list_del_init(&node->capture_link);
-+ if (JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
-+ assert("zam-925", atom_isopen(atom));
-+ assert("vs-1623", NODE_LIST(node) == FQ_LIST);
-+ ON_DEBUG(atom->num_queued--);
-+ JF_CLR(node, JNODE_FLUSH_QUEUED);
-+ }
-+ atom->capture_count -= 1;
-+ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), NOT_CAPTURED, 1));
-+ node->atom = NULL;
-+
-+ spin_unlock_jnode(node);
-+ LOCK_CNT_DEC(t_refs);
-+}
-+
-+/* Unconditional insert of jnode into atom's overwrite list. Currently used in
-+ bitmap-based allocator code for adding modified bitmap blocks the
-+ transaction. @atom and @node are spin locked */
-+void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node)
-+{
-+ assert("zam-538", atom_is_protected(atom));
-+ assert_spin_locked(&(node->guard));
-+ assert("zam-899", JF_ISSET(node, JNODE_OVRWR));
-+ assert("zam-543", node->atom == NULL);
-+ assert("vs-1433", !jnode_is_unformatted(node) && !jnode_is_znode(node));
-+
-+ list_add(&node->capture_link, ATOM_OVRWR_LIST(atom));
-+ jref(node);
-+ node->atom = atom;
-+ atom->capture_count++;
-+ ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), OVRWR_LIST, 1));
-+}
-+
-+static int count_deleted_blocks_actor(txn_atom * atom,
-+ const reiser4_block_nr * a,
-+ const reiser4_block_nr * b, void *data)
-+{
-+ reiser4_block_nr *counter = data;
-+
-+ assert("zam-995", data != NULL);
-+ assert("zam-996", a != NULL);
-+ if (b == NULL)
-+ *counter += 1;
-+ else
-+ *counter += *b;
-+ return 0;
-+}
-+
-+reiser4_block_nr txnmgr_count_deleted_blocks(void)
-+{
-+ reiser4_block_nr result;
-+ txn_mgr *tmgr = &get_super_private(reiser4_get_current_sb())->tmgr;
-+ txn_atom *atom;
-+
-+ result = 0;
-+
-+ spin_lock_txnmgr(tmgr);
-+ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
-+ spin_lock_atom(atom);
-+ if (atom_isopen(atom))
-+ blocknr_set_iterator(
-+ atom, &atom->delete_set,
-+ count_deleted_blocks_actor, &result, 0);
-+ spin_unlock_atom(atom);
-+ }
-+ spin_unlock_txnmgr(tmgr);
-+
-+ return result;
-+}
-+
-+/*
-+ * Local variables:
-+ * c-indentation-style: "K&R"
-+ * mode-name: "LC"
-+ * c-basic-offset: 8
-+ * tab-width: 8
-+ * fill-column: 79
-+ * End:
-+ */
-diff -urN linux-2.6.24.orig/fs/reiser4/txnmgr.h linux-2.6.24/fs/reiser4/txnmgr.h
---- linux-2.6.24.orig/fs/reiser4/txnmgr.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/txnmgr.h 2008-01-25 11:39:07.112253026 +0300
-@@ -0,0 +1,701 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* data-types and function declarations for transaction manager. See txnmgr.c
-+ * for details. */
-+
-+#ifndef __REISER4_TXNMGR_H__
-+#define __REISER4_TXNMGR_H__
-+
-+#include "forward.h"
-+#include "dformat.h"
-+
-+#include <linux/fs.h>
-+#include <linux/mm.h>
-+#include <linux/types.h>
-+#include <linux/spinlock.h>
-+#include <asm/atomic.h>
-+#include <linux/wait.h>
-+
-+/* TYPE DECLARATIONS */
-+
-+/* This enumeration describes the possible types of a capture request (reiser4_try_capture).
-+ A capture request dynamically assigns a block to the calling thread's transaction
-+ handle. */
-+typedef enum {
-+ /* A READ_ATOMIC request indicates that a block will be read and that the caller's
-+ atom should fuse in order to ensure that the block commits atomically with the
-+ caller. */
-+ TXN_CAPTURE_READ_ATOMIC = (1 << 0),
-+
-+ /* A READ_NONCOM request indicates that a block will be read and that the caller is
-+ willing to read a non-committed block without causing atoms to fuse. */
-+ TXN_CAPTURE_READ_NONCOM = (1 << 1),
-+
-+ /* A READ_MODIFY request indicates that a block will be read but that the caller
-+ wishes for the block to be captured as it will be written. This capture request
-+ mode is not currently used, but eventually it will be useful for preventing
-+ deadlock in read-modify-write cycles. */
-+ TXN_CAPTURE_READ_MODIFY = (1 << 2),
-+
-+ /* A WRITE capture request indicates that a block will be modified and that atoms
-+ should fuse to make the commit atomic. */
-+ TXN_CAPTURE_WRITE = (1 << 3),
-+
-+ /* CAPTURE_TYPES is a mask of the four above capture types, used to separate the
-+ exclusive type designation from extra bits that may be supplied -- see
-+ below. */
-+ TXN_CAPTURE_TYPES = (TXN_CAPTURE_READ_ATOMIC |
-+ TXN_CAPTURE_READ_NONCOM | TXN_CAPTURE_READ_MODIFY |
-+ TXN_CAPTURE_WRITE),
-+
-+ /* A subset of CAPTURE_TYPES, CAPTURE_WTYPES is a mask of request types that
-+ indicate modification will occur. */
-+ TXN_CAPTURE_WTYPES = (TXN_CAPTURE_READ_MODIFY | TXN_CAPTURE_WRITE),
-+
-+ /* An option to reiser4_try_capture, NONBLOCKING indicates that the caller would
-+ prefer not to sleep waiting for an aging atom to commit. */
-+ TXN_CAPTURE_NONBLOCKING = (1 << 4),
-+
-+ /* An option to reiser4_try_capture to prevent atom fusion, just simple
-+ capturing is allowed */
-+ TXN_CAPTURE_DONT_FUSE = (1 << 5)
-+
-+ /* This macro selects only the exclusive capture request types, stripping out any
-+ options that were supplied (i.e., NONBLOCKING). */
-+#define CAPTURE_TYPE(x) ((x) & TXN_CAPTURE_TYPES)
-+} txn_capture;
-+
-+/* There are two kinds of transaction handle: WRITE_FUSING and READ_FUSING, the only
-+ difference is in the handling of read requests. A WRITE_FUSING transaction handle
-+ defaults read capture requests to TXN_CAPTURE_READ_NONCOM whereas a READ_FUSIONG
-+ transaction handle defaults to TXN_CAPTURE_READ_ATOMIC. */
-+typedef enum {
-+ TXN_WRITE_FUSING = (1 << 0),
-+ TXN_READ_FUSING = (1 << 1) | TXN_WRITE_FUSING, /* READ implies WRITE */
-+} txn_mode;
-+
-+/* Every atom has a stage, which is one of these exclusive values: */
-+typedef enum {
-+ /* Initially an atom is free. */
-+ ASTAGE_FREE = 0,
-+
-+ /* An atom begins by entering the CAPTURE_FUSE stage, where it proceeds to capture
-+ blocks and fuse with other atoms. */
-+ ASTAGE_CAPTURE_FUSE = 1,
-+
-+ /* We need to have a ASTAGE_CAPTURE_SLOW in which an atom fuses with one node for every X nodes it flushes to disk where X > 1. */
-+
-+ /* When an atom reaches a certain age it must do all it can to commit. An atom in
-+ the CAPTURE_WAIT stage refuses new transaction handles and prevents fusion from
-+ atoms in the CAPTURE_FUSE stage. */
-+ ASTAGE_CAPTURE_WAIT = 2,
-+
-+ /* Waiting for I/O before commit. Copy-on-capture (see
-+ http://namesys.com/v4/v4.html). */
-+ ASTAGE_PRE_COMMIT = 3,
-+
-+ /* Post-commit overwrite I/O. Steal-on-capture. */
-+ ASTAGE_POST_COMMIT = 4,
-+
-+ /* Atom which waits for the removal of the last reference to (it? ) to
-+ * be deleted from memory */
-+ ASTAGE_DONE = 5,
-+
-+ /* invalid atom. */
-+ ASTAGE_INVALID = 6,
-+
-+} txn_stage;
-+
-+/* Certain flags may be set in the txn_atom->flags field. */
-+typedef enum {
-+ /* Indicates that the atom should commit as soon as possible. */
-+ ATOM_FORCE_COMMIT = (1 << 0),
-+ /* to avoid endless loop, mark the atom (which was considered as too
-+ * small) after failed attempt to fuse it. */
-+ ATOM_CANCEL_FUSION = (1 << 1)
-+} txn_flags;
-+
-+/* Flags for controlling commit_txnh */
-+typedef enum {
-+ /* Wait commit atom completion in commit_txnh */
-+ TXNH_WAIT_COMMIT = 0x2,
-+ /* Don't commit atom when this handle is closed */
-+ TXNH_DONT_COMMIT = 0x4
-+} txn_handle_flags_t;
-+
-+/* TYPE DEFINITIONS */
-+
-+/* A note on lock ordering: the handle & jnode spinlock protects reading of their ->atom
-+ fields, so typically an operation on the atom through either of these objects must (1)
-+ lock the object, (2) read the atom pointer, (3) lock the atom.
-+
-+ During atom fusion, the process holds locks on both atoms at once. Then, it iterates
-+ through the list of handles and pages held by the smaller of the two atoms. For each
-+ handle and page referencing the smaller atom, the fusing process must: (1) lock the
-+ object, and (2) update the atom pointer.
-+
-+ You can see that there is a conflict of lock ordering here, so the more-complex
-+ procedure should have priority, i.e., the fusing process has priority so that it is
-+ guaranteed to make progress and to avoid restarts.
-+
-+ This decision, however, means additional complexity for aquiring the atom lock in the
-+ first place.
-+
-+ The general original procedure followed in the code was:
-+
-+ TXN_OBJECT *obj = ...;
-+ TXN_ATOM *atom;
-+
-+ spin_lock (& obj->_lock);
-+
-+ atom = obj->_atom;
-+
-+ if (! spin_trylock_atom (atom))
-+ {
-+ spin_unlock (& obj->_lock);
-+ RESTART OPERATION, THERE WAS A RACE;
-+ }
-+
-+ ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
-+
-+ It has however been found that this wastes CPU a lot in a manner that is
-+ hard to profile. So, proper refcounting was added to atoms, and new
-+ standard locking sequence is like following:
-+
-+ TXN_OBJECT *obj = ...;
-+ TXN_ATOM *atom;
-+
-+ spin_lock (& obj->_lock);
-+
-+ atom = obj->_atom;
-+
-+ if (! spin_trylock_atom (atom))
-+ {
-+ atomic_inc (& atom->refcount);
-+ spin_unlock (& obj->_lock);
-+ spin_lock (&atom->_lock);
-+ atomic_dec (& atom->refcount);
-+ // HERE atom is locked
-+ spin_unlock (&atom->_lock);
-+ RESTART OPERATION, THERE WAS A RACE;
-+ }
-+
-+ ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
-+
-+ (core of this is implemented in trylock_throttle() function)
-+
-+ See the jnode_get_atom() function for a common case.
-+
-+ As an additional (and important) optimization allowing to avoid restarts,
-+ it is possible to re-check required pre-conditions at the HERE point in
-+ code above and proceed without restarting if they are still satisfied.
-+*/
-+
-+/* An atomic transaction: this is the underlying system representation
-+ of a transaction, not the one seen by clients.
-+
-+ Invariants involving this data-type:
-+
-+ [sb-fake-allocated]
-+*/
-+struct txn_atom {
-+ /* The spinlock protecting the atom, held during fusion and various other state
-+ changes. */
-+ spinlock_t alock;
-+
-+ /* The atom's reference counter, increasing (in case of a duplication
-+ of an existing reference or when we are sure that some other
-+ reference exists) may be done without taking spinlock, decrementing
-+ of the ref. counter requires a spinlock to be held.
-+
-+ Each transaction handle counts in ->refcount. All jnodes count as
-+ one reference acquired in atom_begin_andlock(), released in
-+ commit_current_atom().
-+ */
-+ atomic_t refcount;
-+
-+ /* The atom_id identifies the atom in persistent records such as the log. */
-+ __u32 atom_id;
-+
-+ /* Flags holding any of the txn_flags enumerated values (e.g.,
-+ ATOM_FORCE_COMMIT). */
-+ __u32 flags;
-+
-+ /* Number of open handles. */
-+ __u32 txnh_count;
-+
-+ /* The number of znodes captured by this atom. Equal to the sum of lengths of the
-+ dirty_nodes[level] and clean_nodes lists. */
-+ __u32 capture_count;
-+
-+#if REISER4_DEBUG
-+ int clean;
-+ int dirty;
-+ int ovrwr;
-+ int wb;
-+ int fq;
-+#endif
-+
-+ __u32 flushed;
-+
-+ /* Current transaction stage. */
-+ txn_stage stage;
-+
-+ /* Start time. */
-+ unsigned long start_time;
-+
-+ /* The atom's delete set. It collects block numbers of the nodes
-+ which were deleted during the transaction. */
-+ struct list_head delete_set;
-+
-+ /* The atom's wandered_block mapping. */
-+ struct list_head wandered_map;
-+
-+ /* The transaction's list of dirty captured nodes--per level. Index
-+ by (level). dirty_nodes[0] is for znode-above-root */
-+ struct list_head dirty_nodes[REAL_MAX_ZTREE_HEIGHT + 1];
-+
-+ /* The transaction's list of clean captured nodes. */
-+ struct list_head clean_nodes;
-+
-+ /* The atom's overwrite set */
-+ struct list_head ovrwr_nodes;
-+
-+ /* nodes which are being written to disk */
-+ struct list_head writeback_nodes;
-+
-+ /* list of inodes */
-+ struct list_head inodes;
-+
-+ /* List of handles associated with this atom. */
-+ struct list_head txnh_list;
-+
-+ /* Transaction list link: list of atoms in the transaction manager. */
-+ struct list_head atom_link;
-+
-+ /* List of handles waiting FOR this atom: see 'capture_fuse_wait' comment. */
-+ struct list_head fwaitfor_list;
-+
-+ /* List of this atom's handles that are waiting: see 'capture_fuse_wait' comment. */
-+ struct list_head fwaiting_list;
-+
-+ /* Numbers of objects which were deleted/created in this transaction
-+ thereby numbers of objects IDs which were released/deallocated. */
-+ int nr_objects_deleted;
-+ int nr_objects_created;
-+ /* number of blocks allocated during the transaction */
-+ __u64 nr_blocks_allocated;
-+ /* All atom's flush queue objects are on this list */
-+ struct list_head flush_queues;
-+#if REISER4_DEBUG
-+ /* number of flush queues for this atom. */
-+ int nr_flush_queues;
-+ /* Number of jnodes which were removed from atom's lists and put
-+ on flush_queue */
-+ int num_queued;
-+#endif
-+ /* number of threads who wait for this atom to complete commit */
-+ int nr_waiters;
-+ /* number of threads which do jnode_flush() over this atom */
-+ int nr_flushers;
-+ /* number of flush queues which are IN_USE and jnodes from fq->prepped
-+ are submitted to disk by the reiser4_write_fq() routine. */
-+ int nr_running_queues;
-+ /* A counter of grabbed unformatted nodes, see a description of the
-+ * reiser4 space reservation scheme at block_alloc.c */
-+ reiser4_block_nr flush_reserved;
-+#if REISER4_DEBUG
-+ void *committer;
-+#endif
-+ struct super_block *super;
-+};
-+
-+#define ATOM_DIRTY_LIST(atom, level) (&(atom)->dirty_nodes[level])
-+#define ATOM_CLEAN_LIST(atom) (&(atom)->clean_nodes)
-+#define ATOM_OVRWR_LIST(atom) (&(atom)->ovrwr_nodes)
-+#define ATOM_WB_LIST(atom) (&(atom)->writeback_nodes)
-+#define ATOM_FQ_LIST(fq) (&(fq)->prepped)
-+
-+#define NODE_LIST(node) (node)->list
-+#define ASSIGN_NODE_LIST(node, list) ON_DEBUG(NODE_LIST(node) = list)
-+ON_DEBUG(void
-+ count_jnode(txn_atom *, jnode *, atom_list old_list,
-+ atom_list new_list, int check_lists));
-+
-+/* A transaction handle: the client obtains and commits this handle which is assigned by
-+ the system to a txn_atom. */
-+struct txn_handle {
-+ /* Spinlock protecting ->atom pointer */
-+ spinlock_t hlock;
-+
-+ /* Flags for controlling commit_txnh() behavior */
-+ /* from txn_handle_flags_t */
-+ txn_handle_flags_t flags;
-+
-+ /* Whether it is READ_FUSING or WRITE_FUSING. */
-+ txn_mode mode;
-+
-+ /* If assigned, the atom it is part of. */
-+ txn_atom *atom;
-+
-+ /* Transaction list link. Head is in txn_atom. */
-+ struct list_head txnh_link;
-+};
-+
-+/* The transaction manager: one is contained in the reiser4_super_info_data */
-+struct txn_mgr {
-+ /* A spinlock protecting the atom list, id_count, flush_control */
-+ spinlock_t tmgr_lock;
-+
-+ /* List of atoms. */
-+ struct list_head atoms_list;
-+
-+ /* Number of atoms. */
-+ int atom_count;
-+
-+ /* A counter used to assign atom->atom_id values. */
-+ __u32 id_count;
-+
-+ /* a mutex object for commit serialization */
-+ struct mutex commit_mutex;
-+
-+ /* a list of all txnmrgs served by particular daemon. */
-+ struct list_head linkage;
-+
-+ /* description of daemon for this txnmgr */
-+ ktxnmgrd_context *daemon;
-+
-+ /* parameters. Adjustable through mount options. */
-+ unsigned int atom_max_size;
-+ unsigned int atom_max_age;
-+ unsigned int atom_min_size;
-+ /* max number of concurrent flushers for one atom, 0 - unlimited. */
-+ unsigned int atom_max_flushers;
-+ struct dentry *debugfs_atom_count;
-+ struct dentry *debugfs_id_count;
-+};
-+
-+/* FUNCTION DECLARATIONS */
-+
-+/* These are the externally (within Reiser4) visible transaction functions, therefore they
-+ are prefixed with "txn_". For comments, see txnmgr.c. */
-+
-+extern int init_txnmgr_static(void);
-+extern void done_txnmgr_static(void);
-+
-+extern void reiser4_init_txnmgr(txn_mgr *);
-+extern void reiser4_done_txnmgr(txn_mgr *);
-+
-+extern int reiser4_txn_reserve(int reserved);
-+
-+extern void reiser4_txn_begin(reiser4_context * context);
-+extern int reiser4_txn_end(reiser4_context * context);
-+
-+extern void reiser4_txn_restart(reiser4_context * context);
-+extern void reiser4_txn_restart_current(void);
-+
-+extern int txnmgr_force_commit_all(struct super_block *, int);
-+extern int current_atom_should_commit(void);
-+
-+extern jnode *find_first_dirty_jnode(txn_atom *, int);
-+
-+extern int commit_some_atoms(txn_mgr *);
-+extern int force_commit_atom(txn_handle *);
-+extern int flush_current_atom(int, long, long *, txn_atom **, jnode *);
-+
-+extern int flush_some_atom(jnode *, long *, const struct writeback_control *, int);
-+
-+extern void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage);
-+
-+extern int same_slum_check(jnode * base, jnode * check, int alloc_check,
-+ int alloc_value);
-+extern void atom_dec_and_unlock(txn_atom * atom);
-+
-+extern int reiser4_try_capture(jnode * node, znode_lock_mode mode, txn_capture flags);
-+extern int try_capture_page_to_invalidate(struct page *pg);
-+
-+extern void reiser4_uncapture_page(struct page *pg);
-+extern void reiser4_uncapture_block(jnode *);
-+extern void reiser4_uncapture_jnode(jnode *);
-+
-+extern int reiser4_capture_inode(struct inode *);
-+extern int reiser4_uncapture_inode(struct inode *);
-+
-+extern txn_atom *get_current_atom_locked_nocheck(void);
-+
-+#if REISER4_DEBUG
-+
-+/**
-+ * atom_is_protected - make sure that nobody but us can do anything with atom
-+ * @atom: atom to be checked
-+ *
-+ * This is used to assert that atom either entered commit stages or is spin
-+ * locked.
-+ */
-+static inline int atom_is_protected(txn_atom *atom)
-+{
-+ if (atom->stage >= ASTAGE_PRE_COMMIT)
-+ return 1;
-+ assert_spin_locked(&(atom->alock));
-+ return 1;
-+}
-+
-+#endif
-+
-+/* Get the current atom and spinlock it if current atom present. May not return NULL */
-+static inline txn_atom *get_current_atom_locked(void)
-+{
-+ txn_atom *atom;
-+
-+ atom = get_current_atom_locked_nocheck();
-+ assert("zam-761", atom != NULL);
-+
-+ return atom;
-+}
-+
-+extern txn_atom *jnode_get_atom(jnode *);
-+
-+extern void reiser4_atom_wait_event(txn_atom *);
-+extern void reiser4_atom_send_event(txn_atom *);
-+
-+extern void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node);
-+extern int reiser4_capture_super_block(struct super_block *s);
-+int capture_bulk(jnode **, int count);
-+
-+/* See the comment on the function blocknrset.c:blocknr_set_add for the
-+ calling convention of these three routines. */
-+extern void blocknr_set_init(struct list_head * bset);
-+extern void blocknr_set_destroy(struct list_head * bset);
-+extern void blocknr_set_merge(struct list_head * from, struct list_head * into);
-+extern int blocknr_set_add_extent(txn_atom * atom,
-+ struct list_head * bset,
-+ blocknr_set_entry ** new_bsep,
-+ const reiser4_block_nr * start,
-+ const reiser4_block_nr * len);
-+extern int blocknr_set_add_pair(txn_atom * atom, struct list_head * bset,
-+ blocknr_set_entry ** new_bsep,
-+ const reiser4_block_nr * a,
-+ const reiser4_block_nr * b);
-+
-+typedef int (*blocknr_set_actor_f) (txn_atom *, const reiser4_block_nr *,
-+ const reiser4_block_nr *, void *);
-+
-+extern int blocknr_set_iterator(txn_atom * atom, struct list_head * bset,
-+ blocknr_set_actor_f actor, void *data,
-+ int delete);
-+
-+/* flush code takes care about how to fuse flush queues */
-+extern void flush_init_atom(txn_atom * atom);
-+extern void flush_fuse_queues(txn_atom * large, txn_atom * small);
-+
-+static inline void spin_lock_atom(txn_atom *atom)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_atom) &&
-+ LOCK_CNT_NIL(spin_locked_jnode) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock(&(atom->alock));
-+
-+ LOCK_CNT_INC(spin_locked_atom);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline void spin_lock_atom_nested(txn_atom *atom)
-+{
-+ assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_jnode) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock_nested(&(atom->alock), SINGLE_DEPTH_NESTING);
-+
-+ LOCK_CNT_INC(spin_locked_atom);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline int spin_trylock_atom(txn_atom *atom)
-+{
-+ if (spin_trylock(&(atom->alock))) {
-+ LOCK_CNT_INC(spin_locked_atom);
-+ LOCK_CNT_INC(spin_locked);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+static inline void spin_unlock_atom(txn_atom *atom)
-+{
-+ assert_spin_locked(&(atom->alock));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_atom));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_atom);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(atom->alock));
-+}
-+
-+static inline void spin_lock_txnh(txn_handle *txnh)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock(&(txnh->hlock));
-+
-+ LOCK_CNT_INC(spin_locked_txnh);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline int spin_trylock_txnh(txn_handle *txnh)
-+{
-+ if (spin_trylock(&(txnh->hlock))) {
-+ LOCK_CNT_INC(spin_locked_txnh);
-+ LOCK_CNT_INC(spin_locked);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+static inline void spin_unlock_txnh(txn_handle *txnh)
-+{
-+ assert_spin_locked(&(txnh->hlock));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnh));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_txnh);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(txnh->hlock));
-+}
-+
-+#define spin_ordering_pred_txnmgr(tmgr) \
-+ ( LOCK_CNT_NIL(spin_locked_atom) && \
-+ LOCK_CNT_NIL(spin_locked_txnh) && \
-+ LOCK_CNT_NIL(spin_locked_jnode) && \
-+ LOCK_CNT_NIL(rw_locked_zlock) && \
-+ LOCK_CNT_NIL(rw_locked_dk) && \
-+ LOCK_CNT_NIL(rw_locked_tree) )
-+
-+static inline void spin_lock_txnmgr(txn_mgr *mgr)
-+{
-+ /* check that spinlocks of lower priorities are not held */
-+ assert("", (LOCK_CNT_NIL(spin_locked_atom) &&
-+ LOCK_CNT_NIL(spin_locked_txnh) &&
-+ LOCK_CNT_NIL(spin_locked_jnode) &&
-+ LOCK_CNT_NIL(spin_locked_zlock) &&
-+ LOCK_CNT_NIL(rw_locked_dk) &&
-+ LOCK_CNT_NIL(rw_locked_tree)));
-+
-+ spin_lock(&(mgr->tmgr_lock));
-+
-+ LOCK_CNT_INC(spin_locked_txnmgr);
-+ LOCK_CNT_INC(spin_locked);
-+}
-+
-+static inline int spin_trylock_txnmgr(txn_mgr *mgr)
-+{
-+ if (spin_trylock(&(mgr->tmgr_lock))) {
-+ LOCK_CNT_INC(spin_locked_txnmgr);
-+ LOCK_CNT_INC(spin_locked);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+static inline void spin_unlock_txnmgr(txn_mgr *mgr)
-+{
-+ assert_spin_locked(&(mgr->tmgr_lock));
-+ assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnmgr));
-+ assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
-+
-+ LOCK_CNT_DEC(spin_locked_txnmgr);
-+ LOCK_CNT_DEC(spin_locked);
-+
-+ spin_unlock(&(mgr->tmgr_lock));
-+}
-+
-+typedef enum {
-+ FQ_IN_USE = 0x1
-+} flush_queue_state_t;
-+
-+typedef struct flush_queue flush_queue_t;
-+
-+/* This is an accumulator for jnodes prepared for writing to disk. A flush queue
-+ is filled by the jnode_flush() routine, and written to disk under memory
-+ pressure or at atom commit time. */
-+/* LOCKING: fq state and fq->atom are protected by guard spinlock, fq->nr_queued
-+ field and fq->prepped list can be modified if atom is spin-locked and fq
-+ object is "in-use" state. For read-only traversal of the fq->prepped list
-+ and reading of the fq->nr_queued field it is enough to keep fq "in-use" or
-+ only have atom spin-locked. */
-+struct flush_queue {
-+ /* linkage element is the first in this structure to make debugging
-+ easier. See field in atom struct for description of list. */
-+ struct list_head alink;
-+ /* A spinlock to protect changes of fq state and fq->atom pointer */
-+ spinlock_t guard;
-+ /* flush_queue state: [in_use | ready] */
-+ flush_queue_state_t state;
-+ /* A list which contains queued nodes, queued nodes are removed from any
-+ * atom's list and put on this ->prepped one. */
-+ struct list_head prepped;
-+ /* number of submitted i/o requests */
-+ atomic_t nr_submitted;
-+ /* number of i/o errors */
-+ atomic_t nr_errors;
-+ /* An atom this flush queue is attached to */
-+ txn_atom *atom;
-+ /* A wait queue head to wait on i/o completion */
-+ wait_queue_head_t wait;
-+#if REISER4_DEBUG
-+ /* A thread which took this fq in exclusive use, NULL if fq is free,
-+ * used for debugging. */
-+ struct task_struct *owner;
-+#endif
-+};
-+
-+extern int reiser4_fq_by_atom(txn_atom *, flush_queue_t **);
-+extern void reiser4_fq_put_nolock(flush_queue_t *);
-+extern void reiser4_fq_put(flush_queue_t *);
-+extern void reiser4_fuse_fq(txn_atom * to, txn_atom * from);
-+extern void queue_jnode(flush_queue_t *, jnode *);
-+
-+extern int reiser4_write_fq(flush_queue_t *, long *, int);
-+extern int current_atom_finish_all_fq(void);
-+extern void init_atom_fq_parts(txn_atom *);
-+
-+extern reiser4_block_nr txnmgr_count_deleted_blocks(void);
-+
-+extern void znode_make_dirty(znode * node);
-+extern void jnode_make_dirty_locked(jnode * node);
-+
-+extern int reiser4_sync_atom(txn_atom * atom);
-+
-+#if REISER4_DEBUG
-+extern int atom_fq_parts_are_clean(txn_atom *);
-+#endif
-+
-+extern void add_fq_to_bio(flush_queue_t *, struct bio *);
-+extern flush_queue_t *get_fq_for_current_atom(void);
-+
-+void reiser4_invalidate_list(struct list_head * head);
-+
-+# endif /* __REISER4_TXNMGR_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/type_safe_hash.h linux-2.6.24/fs/reiser4/type_safe_hash.h
---- linux-2.6.24.orig/fs/reiser4/type_safe_hash.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/type_safe_hash.h 2008-01-25 11:39:07.112253026 +0300
-@@ -0,0 +1,320 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* A hash table class that uses hash chains (singly-linked) and is
-+ parametrized to provide type safety. */
-+
-+#ifndef __REISER4_TYPE_SAFE_HASH_H__
-+#define __REISER4_TYPE_SAFE_HASH_H__
-+
-+#include "debug.h"
-+
-+#include <asm/errno.h>
-+/* Step 1: Use TYPE_SAFE_HASH_DECLARE() to define the TABLE and LINK objects
-+ based on the object type. You need to declare the item type before
-+ this definition, define it after this definition. */
-+#define TYPE_SAFE_HASH_DECLARE(PREFIX,ITEM_TYPE) \
-+ \
-+typedef struct PREFIX##_hash_table_ PREFIX##_hash_table; \
-+typedef struct PREFIX##_hash_link_ PREFIX##_hash_link; \
-+ \
-+struct PREFIX##_hash_table_ \
-+{ \
-+ ITEM_TYPE **_table; \
-+ __u32 _buckets; \
-+}; \
-+ \
-+struct PREFIX##_hash_link_ \
-+{ \
-+ ITEM_TYPE *_next; \
-+}
-+
-+/* Step 2: Define the object type of the hash: give it field of type
-+ PREFIX_hash_link. */
-+
-+/* Step 3: Use TYPE_SAFE_HASH_DEFINE to define the hash table interface using
-+ the type and field name used in step 3. The arguments are:
-+
-+ ITEM_TYPE The item type being hashed
-+ KEY_TYPE The type of key being hashed
-+ KEY_NAME The name of the key field within the item
-+ LINK_NAME The name of the link field within the item, which you must make type PREFIX_hash_link)
-+ HASH_FUNC The name of the hash function (or macro, takes const pointer to key)
-+ EQ_FUNC The name of the equality function (or macro, takes const pointer to two keys)
-+
-+ It implements these functions:
-+
-+ prefix_hash_init Initialize the table given its size.
-+ prefix_hash_insert Insert an item
-+ prefix_hash_insert_index Insert an item w/ precomputed hash_index
-+ prefix_hash_find Find an item by key
-+ prefix_hash_find_index Find an item w/ precomputed hash_index
-+ prefix_hash_remove Remove an item, returns 1 if found, 0 if not found
-+ prefix_hash_remove_index Remove an item w/ precomputed hash_index
-+
-+ If you'd like something to be done differently, feel free to ask me
-+ for modifications. Additional features that could be added but
-+ have not been:
-+
-+ prefix_hash_remove_key Find and remove an item by key
-+ prefix_hash_remove_key_index Find and remove an item by key w/ precomputed hash_index
-+
-+ The hash_function currently receives only the key as an argument,
-+ meaning it must somehow know the number of buckets. If this is a
-+ problem let me know.
-+
-+ This hash table uses a single-linked hash chain. This means
-+ insertion is fast but deletion requires searching the chain.
-+
-+ There is also the doubly-linked hash chain approach, under which
-+ deletion requires no search but the code is longer and it takes two
-+ pointers per item.
-+
-+ The circularly-linked approach has the shortest code but requires
-+ two pointers per bucket, doubling the size of the bucket array (in
-+ addition to two pointers per item).
-+*/
-+#define TYPE_SAFE_HASH_DEFINE(PREFIX,ITEM_TYPE,KEY_TYPE,KEY_NAME,LINK_NAME,HASH_FUNC,EQ_FUNC) \
-+ \
-+static __inline__ void \
-+PREFIX##_check_hash (PREFIX##_hash_table *table UNUSED_ARG, \
-+ __u32 hash UNUSED_ARG) \
-+{ \
-+ assert("nikita-2780", hash < table->_buckets); \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_init (PREFIX##_hash_table *hash, \
-+ __u32 buckets) \
-+{ \
-+ hash->_table = (ITEM_TYPE**) KMALLOC (sizeof (ITEM_TYPE*) * buckets); \
-+ hash->_buckets = buckets; \
-+ if (hash->_table == NULL) \
-+ { \
-+ return RETERR(-ENOMEM); \
-+ } \
-+ memset (hash->_table, 0, sizeof (ITEM_TYPE*) * buckets); \
-+ ON_DEBUG(printk(#PREFIX "_hash_table: %i buckets\n", buckets)); \
-+ return 0; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_done (PREFIX##_hash_table *hash) \
-+{ \
-+ if (REISER4_DEBUG && hash->_table != NULL) { \
-+ __u32 i; \
-+ for (i = 0 ; i < hash->_buckets ; ++ i) \
-+ assert("nikita-2905", hash->_table[i] == NULL); \
-+ } \
-+ if (hash->_table != NULL) \
-+ KFREE (hash->_table, sizeof (ITEM_TYPE*) * hash->_buckets); \
-+ hash->_table = NULL; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_prefetch_next (ITEM_TYPE *item) \
-+{ \
-+ prefetch(item->LINK_NAME._next); \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_prefetch_bucket (PREFIX##_hash_table *hash, \
-+ __u32 index) \
-+{ \
-+ prefetch(hash->_table[index]); \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find_index (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ ITEM_TYPE *item; \
-+ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ for (item = hash->_table[hash_index]; \
-+ item != NULL; \
-+ item = item->LINK_NAME._next) \
-+ { \
-+ prefetch(item->LINK_NAME._next); \
-+ prefetch(item->LINK_NAME._next + offsetof(ITEM_TYPE, KEY_NAME)); \
-+ if (EQ_FUNC (& item->KEY_NAME, find_key)) \
-+ { \
-+ return item; \
-+ } \
-+ } \
-+ \
-+ return NULL; \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find_index_lru (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ ITEM_TYPE ** item = &hash->_table[hash_index]; \
-+ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ while (*item != NULL) { \
-+ prefetch(&(*item)->LINK_NAME._next); \
-+ if (EQ_FUNC (&(*item)->KEY_NAME, find_key)) { \
-+ ITEM_TYPE *found; \
-+ \
-+ found = *item; \
-+ *item = found->LINK_NAME._next; \
-+ found->LINK_NAME._next = hash->_table[hash_index]; \
-+ hash->_table[hash_index] = found; \
-+ return found; \
-+ } \
-+ item = &(*item)->LINK_NAME._next; \
-+ } \
-+ return NULL; \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_remove_index (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ ITEM_TYPE *del_item) \
-+{ \
-+ ITEM_TYPE ** hash_item_p = &hash->_table[hash_index]; \
-+ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ while (*hash_item_p != NULL) { \
-+ prefetch(&(*hash_item_p)->LINK_NAME._next); \
-+ if (*hash_item_p == del_item) { \
-+ *hash_item_p = (*hash_item_p)->LINK_NAME._next; \
-+ return 1; \
-+ } \
-+ hash_item_p = &(*hash_item_p)->LINK_NAME._next; \
-+ } \
-+ return 0; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert_index (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ ins_item->LINK_NAME._next = hash->_table[hash_index]; \
-+ hash->_table[hash_index] = ins_item; \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert_index_rcu (PREFIX##_hash_table *hash, \
-+ __u32 hash_index, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ PREFIX##_check_hash(hash, hash_index); \
-+ \
-+ ins_item->LINK_NAME._next = hash->_table[hash_index]; \
-+ smp_wmb(); \
-+ hash->_table[hash_index] = ins_item; \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find (PREFIX##_hash_table *hash, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ return PREFIX##_hash_find_index (hash, HASH_FUNC(hash, find_key), find_key); \
-+} \
-+ \
-+static __inline__ ITEM_TYPE* \
-+PREFIX##_hash_find_lru (PREFIX##_hash_table *hash, \
-+ KEY_TYPE const *find_key) \
-+{ \
-+ return PREFIX##_hash_find_index_lru (hash, HASH_FUNC(hash, find_key), find_key); \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_remove (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *del_item) \
-+{ \
-+ return PREFIX##_hash_remove_index (hash, \
-+ HASH_FUNC(hash, &del_item->KEY_NAME), del_item); \
-+} \
-+ \
-+static __inline__ int \
-+PREFIX##_hash_remove_rcu (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *del_item) \
-+{ \
-+ return PREFIX##_hash_remove (hash, del_item); \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ return PREFIX##_hash_insert_index (hash, \
-+ HASH_FUNC(hash, &ins_item->KEY_NAME), ins_item); \
-+} \
-+ \
-+static __inline__ void \
-+PREFIX##_hash_insert_rcu (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *ins_item) \
-+{ \
-+ return PREFIX##_hash_insert_index_rcu (hash, HASH_FUNC(hash, &ins_item->KEY_NAME), \
-+ ins_item); \
-+} \
-+ \
-+static __inline__ ITEM_TYPE * \
-+PREFIX##_hash_first (PREFIX##_hash_table *hash, __u32 ind) \
-+{ \
-+ ITEM_TYPE *first; \
-+ \
-+ for (first = NULL; ind < hash->_buckets; ++ ind) { \
-+ first = hash->_table[ind]; \
-+ if (first != NULL) \
-+ break; \
-+ } \
-+ return first; \
-+} \
-+ \
-+static __inline__ ITEM_TYPE * \
-+PREFIX##_hash_next (PREFIX##_hash_table *hash, \
-+ ITEM_TYPE *item) \
-+{ \
-+ ITEM_TYPE *next; \
-+ \
-+ if (item == NULL) \
-+ return NULL; \
-+ next = item->LINK_NAME._next; \
-+ if (next == NULL) \
-+ next = PREFIX##_hash_first (hash, HASH_FUNC(hash, &item->KEY_NAME) + 1); \
-+ return next; \
-+} \
-+ \
-+typedef struct {} PREFIX##_hash_dummy
-+
-+#define for_all_ht_buckets(table, head) \
-+for ((head) = &(table) -> _table[ 0 ] ; \
-+ (head) != &(table) -> _table[ (table) -> _buckets ] ; ++ (head))
-+
-+#define for_all_in_bucket(bucket, item, next, field) \
-+for ((item) = *(bucket), (next) = (item) ? (item) -> field._next : NULL ; \
-+ (item) != NULL ; \
-+ (item) = (next), (next) = (item) ? (item) -> field._next : NULL )
-+
-+#define for_all_in_htable(table, prefix, item, next) \
-+for ((item) = prefix ## _hash_first ((table), 0), \
-+ (next) = prefix ## _hash_next ((table), (item)) ; \
-+ (item) != NULL ; \
-+ (item) = (next), \
-+ (next) = prefix ## _hash_next ((table), (item)))
-+
-+/* __REISER4_TYPE_SAFE_HASH_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/vfs_ops.c linux-2.6.24/fs/reiser4/vfs_ops.c
---- linux-2.6.24.orig/fs/reiser4/vfs_ops.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/vfs_ops.c 2008-01-25 11:39:07.112253026 +0300
-@@ -0,0 +1,259 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Interface to VFS. Reiser4 {super|export|dentry}_operations are defined
-+ here. */
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "coord.h"
-+#include "plugin/item/item.h"
-+#include "plugin/file/file.h"
-+#include "plugin/security/perm.h"
-+#include "plugin/disk_format/disk_format.h"
-+#include "plugin/plugin.h"
-+#include "plugin/plugin_set.h"
-+#include "plugin/object.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "vfs_ops.h"
-+#include "inode.h"
-+#include "page_cache.h"
-+#include "ktxnmgrd.h"
-+#include "super.h"
-+#include "reiser4.h"
-+#include "entd.h"
-+#include "status_flags.h"
-+#include "flush.h"
-+#include "dscale.h"
-+
-+#include <linux/profile.h>
-+#include <linux/types.h>
-+#include <linux/mount.h>
-+#include <linux/vfs.h>
-+#include <linux/mm.h>
-+#include <linux/buffer_head.h>
-+#include <linux/dcache.h>
-+#include <linux/list.h>
-+#include <linux/pagemap.h>
-+#include <linux/slab.h>
-+#include <linux/seq_file.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/writeback.h>
-+#include <linux/blkdev.h>
-+#include <linux/quotaops.h>
-+#include <linux/security.h>
-+#include <linux/reboot.h>
-+#include <linux/rcupdate.h>
-+
-+/* update inode stat-data by calling plugin */
-+int reiser4_update_sd(struct inode *object)
-+{
-+ file_plugin *fplug;
-+
-+ assert("nikita-2338", object != NULL);
-+ /* check for read-only file system. */
-+ if (IS_RDONLY(object))
-+ return 0;
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-2339", fplug != NULL);
-+ return fplug->write_sd_by_inode(object);
-+}
-+
-+/* helper function: increase inode nlink count and call plugin method to save
-+ updated stat-data.
-+
-+ Used by link/create and during creation of dot and dotdot in mkdir
-+*/
-+int reiser4_add_nlink(struct inode *object /* object to which link is added */ ,
-+ struct inode *parent /* parent where new entry will be */
-+ ,
-+ int write_sd_p /* true if stat-data has to be
-+ * updated */ )
-+{
-+ file_plugin *fplug;
-+ int result;
-+
-+ assert("nikita-1351", object != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-1445", fplug != NULL);
-+
-+ /* ask plugin whether it can add yet another link to this
-+ object */
-+ if (!fplug->can_add_link(object))
-+ return RETERR(-EMLINK);
-+
-+ assert("nikita-2211", fplug->add_link != NULL);
-+ /* call plugin to do actual addition of link */
-+ result = fplug->add_link(object, parent);
-+
-+ /* optionally update stat data */
-+ if (result == 0 && write_sd_p)
-+ result = fplug->write_sd_by_inode(object);
-+ return result;
-+}
-+
-+/* helper function: decrease inode nlink count and call plugin method to save
-+ updated stat-data.
-+
-+ Used by unlink/create
-+*/
-+int reiser4_del_nlink(struct inode *object /* object from which link is
-+ * removed */ ,
-+ struct inode *parent /* parent where entry was */ ,
-+ int write_sd_p /* true is stat-data has to be
-+ * updated */ )
-+{
-+ file_plugin *fplug;
-+ int result;
-+
-+ assert("nikita-1349", object != NULL);
-+
-+ fplug = inode_file_plugin(object);
-+ assert("nikita-1350", fplug != NULL);
-+ assert("nikita-1446", object->i_nlink > 0);
-+ assert("nikita-2210", fplug->rem_link != NULL);
-+
-+ /* call plugin to do actual deletion of link */
-+ result = fplug->rem_link(object, parent);
-+
-+ /* optionally update stat data */
-+ if (result == 0 && write_sd_p)
-+ result = fplug->write_sd_by_inode(object);
-+ return result;
-+}
-+
-+/* Release reiser4 dentry. This is d_op->d_release() method. */
-+static void reiser4_d_release(struct dentry *dentry /* dentry released */ )
-+{
-+ reiser4_free_dentry_fsdata(dentry);
-+}
-+
-+/*
-+ * Called by reiser4_sync_inodes(), during speculative write-back (through
-+ * pdflush, or balance_dirty_pages()).
-+ */
-+void reiser4_writeout(struct super_block *sb, struct writeback_control *wbc)
-+{
-+ long written = 0;
-+ int repeats = 0;
-+ int result;
-+ struct address_space *mapping;
-+
-+ /*
-+ * Performs early flushing, trying to free some memory. If there is
-+ * nothing to flush, commits some atoms.
-+ */
-+
-+ /* Commit all atoms if reiser4_writepages() is called from sys_sync() or
-+ sys_fsync(). */
-+ if (wbc->sync_mode != WB_SYNC_NONE) {
-+ txnmgr_force_commit_all(sb, 0);
-+ return;
-+ }
-+
-+ BUG_ON(reiser4_get_super_fake(sb) == NULL);
-+ mapping = reiser4_get_super_fake(sb)->i_mapping;
-+ do {
-+ long nr_submitted = 0;
-+ jnode *node = NULL;
-+
-+ /* do not put more requests to overload write queue */
-+ if (wbc->nonblocking &&
-+ bdi_write_congested(mapping->backing_dev_info)) {
-+ blk_run_address_space(mapping);
-+ wbc->encountered_congestion = 1;
-+ break;
-+ }
-+ repeats++;
-+ BUG_ON(wbc->nr_to_write <= 0);
-+
-+ if (get_current_context()->entd) {
-+ entd_context *ent = get_entd_context(sb);
-+
-+ if (ent->cur_request->node)
-+ /*
-+ * this is ent thread and it managed to capture
-+ * requested page itself - start flush from
-+ * that page
-+ */
-+ node = jref(ent->cur_request->node);
-+ }
-+
-+ result = flush_some_atom(node, &nr_submitted, wbc,
-+ JNODE_FLUSH_WRITE_BLOCKS);
-+ if (result != 0)
-+ warning("nikita-31001", "Flush failed: %i", result);
-+ if (node)
-+ jput(node);
-+ if (!nr_submitted)
-+ break;
-+
-+ wbc->nr_to_write -= nr_submitted;
-+ written += nr_submitted;
-+ } while (wbc->nr_to_write > 0);
-+}
-+
-+void reiser4_throttle_write(struct inode *inode)
-+{
-+ reiser4_txn_restart_current();
-+ balance_dirty_pages_ratelimited(inode->i_mapping);
-+}
-+
-+const char *REISER4_SUPER_MAGIC_STRING = "ReIsEr4";
-+const int REISER4_MAGIC_OFFSET = 16 * 4096; /* offset to magic string from the
-+ * beginning of device */
-+
-+/*
-+ * Reiser4 initialization/shutdown.
-+ *
-+ * Code below performs global reiser4 initialization that is done either as
-+ * part of kernel initialization (when reiser4 is statically built-in), or
-+ * during reiser4 module load (when compiled as module).
-+ */
-+
-+void reiser4_handle_error(void)
-+{
-+ struct super_block *sb = reiser4_get_current_sb();
-+
-+ if (!sb)
-+ return;
-+ reiser4_status_write(REISER4_STATUS_DAMAGED, 0,
-+ "Filesystem error occured");
-+ switch (get_super_private(sb)->onerror) {
-+ case 0:
-+ reiser4_panic("foobar-42", "Filesystem error occured\n");
-+ case 1:
-+ default:
-+ if (sb->s_flags & MS_RDONLY)
-+ return;
-+ sb->s_flags |= MS_RDONLY;
-+ break;
-+ }
-+}
-+
-+struct dentry_operations reiser4_dentry_operations = {
-+ .d_revalidate = NULL,
-+ .d_hash = NULL,
-+ .d_compare = NULL,
-+ .d_delete = NULL,
-+ .d_release = reiser4_d_release,
-+ .d_iput = NULL,
-+};
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/vfs_ops.h linux-2.6.24/fs/reiser4/vfs_ops.h
---- linux-2.6.24.orig/fs/reiser4/vfs_ops.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/vfs_ops.h 2008-01-25 11:39:07.112253026 +0300
-@@ -0,0 +1,53 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* vfs_ops.c's exported symbols */
-+
-+#if !defined( __FS_REISER4_VFS_OPS_H__ )
-+#define __FS_REISER4_VFS_OPS_H__
-+
-+#include "forward.h"
-+#include "coord.h"
-+#include "seal.h"
-+#include "plugin/file/file.h"
-+#include "super.h"
-+#include "readahead.h"
-+
-+#include <linux/types.h> /* for loff_t */
-+#include <linux/fs.h> /* for struct address_space */
-+#include <linux/dcache.h> /* for struct dentry */
-+#include <linux/mm.h>
-+#include <linux/backing-dev.h>
-+
-+/* address space operations */
-+int reiser4_writepage(struct page *, struct writeback_control *);
-+int reiser4_set_page_dirty(struct page *);
-+void reiser4_invalidatepage(struct page *, unsigned long offset);
-+int reiser4_releasepage(struct page *, gfp_t);
-+
-+extern int reiser4_update_sd(struct inode *);
-+extern int reiser4_add_nlink(struct inode *, struct inode *, int);
-+extern int reiser4_del_nlink(struct inode *, struct inode *, int);
-+
-+extern int reiser4_start_up_io(struct page *page);
-+extern void reiser4_throttle_write(struct inode *);
-+extern int jnode_is_releasable(jnode *);
-+
-+#define CAPTURE_APAGE_BURST (1024l)
-+void reiser4_writeout(struct super_block *, struct writeback_control *);
-+
-+extern void reiser4_handle_error(void);
-+
-+/* __FS_REISER4_VFS_OPS_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/wander.c linux-2.6.24/fs/reiser4/wander.c
---- linux-2.6.24.orig/fs/reiser4/wander.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/wander.c 2008-01-25 11:39:07.116254057 +0300
-@@ -0,0 +1,1797 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Reiser4 Wandering Log */
-+
-+/* You should read http://www.namesys.com/txn-doc.html
-+
-+ That describes how filesystem operations are performed as atomic
-+ transactions, and how we try to arrange it so that we can write most of the
-+ data only once while performing the operation atomically.
-+
-+ For the purposes of this code, it is enough for it to understand that it
-+ has been told a given block should be written either once, or twice (if
-+ twice then once to the wandered location and once to the real location).
-+
-+ This code guarantees that those blocks that are defined to be part of an
-+ atom either all take effect or none of them take effect.
-+
-+ Relocate set nodes are submitted to write by the jnode_flush() routine, and
-+ the overwrite set is submitted by reiser4_write_log(). This is because with
-+ the overwrite set we seek to optimize writes, and with the relocate set we
-+ seek to cause disk order to correlate with the parent first pre-order.
-+
-+ reiser4_write_log() allocates and writes wandered blocks and maintains
-+ additional on-disk structures of the atom as wander records (each wander
-+ record occupies one block) for storing of the "wandered map" (a table which
-+ contains a relation between wandered and real block numbers) and other
-+ information which might be needed at transaction recovery time.
-+
-+ The wander records are unidirectionally linked into a circle: each wander
-+ record contains a block number of the next wander record, the last wander
-+ record points to the first one.
-+
-+ One wander record (named "tx head" in this file) has a format which is
-+ different from the other wander records. The "tx head" has a reference to the
-+ "tx head" block of the previously committed atom. Also, "tx head" contains
-+ fs information (the free blocks counter, and the oid allocator state) which
-+ is logged in a special way .
-+
-+ There are two journal control blocks, named journal header and journal
-+ footer which have fixed on-disk locations. The journal header has a
-+ reference to the "tx head" block of the last committed atom. The journal
-+ footer points to the "tx head" of the last flushed atom. The atom is
-+ "played" when all blocks from its overwrite set are written to disk the
-+ second time (i.e. written to their real locations).
-+
-+ NOTE: People who know reiserfs internals and its journal structure might be
-+ confused with these terms journal footer and journal header. There is a table
-+ with terms of similar semantics in reiserfs (reiser3) and reiser4:
-+
-+ REISER3 TERM | REISER4 TERM | DESCRIPTION
-+ --------------------+-----------------------+----------------------------
-+ commit record | journal header | atomic write of this record
-+ | | ends transaction commit
-+ --------------------+-----------------------+----------------------------
-+ journal header | journal footer | atomic write of this record
-+ | | ends post-commit writes.
-+ | | After successful
-+ | | writing of this journal
-+ | | blocks (in reiser3) or
-+ | | wandered blocks/records are
-+ | | free for re-use.
-+ --------------------+-----------------------+----------------------------
-+
-+ The atom commit process is the following:
-+
-+ 1. The overwrite set is taken from atom's clean list, and its size is
-+ counted.
-+
-+ 2. The number of necessary wander records (including tx head) is calculated,
-+ and the wander record blocks are allocated.
-+
-+ 3. Allocate wandered blocks and populate wander records by wandered map.
-+
-+ 4. submit write requests for wander records and wandered blocks.
-+
-+ 5. wait until submitted write requests complete.
-+
-+ 6. update journal header: change the pointer to the block number of just
-+ written tx head, submit an i/o for modified journal header block and wait
-+ for i/o completion.
-+
-+ NOTE: The special logging for bitmap blocks and some reiser4 super block
-+ fields makes processes of atom commit, flush and recovering a bit more
-+ complex (see comments in the source code for details).
-+
-+ The atom playing process is the following:
-+
-+ 1. Write atom's overwrite set in-place.
-+
-+ 2. Wait on i/o.
-+
-+ 3. Update journal footer: change the pointer to block number of tx head
-+ block of the atom we currently flushing, submit an i/o, wait on i/o
-+ completion.
-+
-+ 4. Free disk space which was used for wandered blocks and wander records.
-+
-+ After the freeing of wandered blocks and wander records we have that journal
-+ footer points to the on-disk structure which might be overwritten soon.
-+ Neither the log writer nor the journal recovery procedure use that pointer
-+ for accessing the data. When the journal recovery procedure finds the oldest
-+ transaction it compares the journal footer pointer value with the "prev_tx"
-+ pointer value in tx head, if values are equal the oldest not flushed
-+ transaction is found.
-+
-+ NOTE on disk space leakage: the information about of what blocks and how many
-+ blocks are allocated for wandered blocks, wandered records is not written to
-+ the disk because of special logging for bitmaps and some super blocks
-+ counters. After a system crash we the reiser4 does not remember those
-+ objects allocation, thus we have no such a kind of disk space leakage.
-+*/
-+
-+/* Special logging of reiser4 super block fields. */
-+
-+/* There are some reiser4 super block fields (free block count and OID allocator
-+ state (number of files and next free OID) which are logged separately from
-+ super block to avoid unnecessary atom fusion.
-+
-+ So, the reiser4 super block can be not captured by a transaction with
-+ allocates/deallocates disk blocks or create/delete file objects. Moreover,
-+ the reiser4 on-disk super block is not touched when such a transaction is
-+ committed and flushed. Those "counters logged specially" are logged in "tx
-+ head" blocks and in the journal footer block.
-+
-+ A step-by-step description of special logging:
-+
-+ 0. The per-atom information about deleted or created files and allocated or
-+ freed blocks is collected during the transaction. The atom's
-+ ->nr_objects_created and ->nr_objects_deleted are for object
-+ deletion/creation tracking, the numbers of allocated and freed blocks are
-+ calculated using atom's delete set and atom's capture list -- all new and
-+ relocated nodes should be on atom's clean list and should have JNODE_RELOC
-+ bit set.
-+
-+ 1. The "logged specially" reiser4 super block fields have their "committed"
-+ versions in the reiser4 in-memory super block. They get modified only at
-+ atom commit time. The atom's commit thread has an exclusive access to those
-+ "committed" fields because the log writer implementation supports only one
-+ atom commit a time (there is a per-fs "commit" mutex). At
-+ that time "committed" counters are modified using per-atom information
-+ collected during the transaction. These counters are stored on disk as a
-+ part of tx head block when atom is committed.
-+
-+ 2. When the atom is flushed the value of the free block counter and the OID
-+ allocator state get written to the journal footer block. A special journal
-+ procedure (journal_recover_sb_data()) takes those values from the journal
-+ footer and updates the reiser4 in-memory super block.
-+
-+ NOTE: That means free block count and OID allocator state are logged
-+ separately from the reiser4 super block regardless of the fact that the
-+ reiser4 super block has fields to store both the free block counter and the
-+ OID allocator.
-+
-+ Writing the whole super block at commit time requires knowing true values of
-+ all its fields without changes made by not yet committed transactions. It is
-+ possible by having their "committed" version of the super block like the
-+ reiser4 bitmap blocks have "committed" and "working" versions. However,
-+ another scheme was implemented which stores special logged values in the
-+ unused free space inside transaction head block. In my opinion it has an
-+ advantage of not writing whole super block when only part of it was
-+ modified. */
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "page_cache.h"
-+#include "wander.h"
-+#include "reiser4.h"
-+#include "super.h"
-+#include "vfs_ops.h"
-+#include "writeout.h"
-+#include "inode.h"
-+#include "entd.h"
-+
-+#include <linux/types.h>
-+#include <linux/fs.h> /* for struct super_block */
-+#include <linux/mm.h> /* for struct page */
-+#include <linux/pagemap.h>
-+#include <linux/bio.h> /* for struct bio */
-+#include <linux/blkdev.h>
-+
-+static int write_jnodes_to_disk_extent(
-+ jnode *, int, const reiser4_block_nr *, flush_queue_t *, int);
-+
-+/* The commit_handle is a container for objects needed at atom commit time */
-+struct commit_handle {
-+ /* A pointer to atom's list of OVRWR nodes */
-+ struct list_head *overwrite_set;
-+ /* atom's overwrite set size */
-+ int overwrite_set_size;
-+ /* jnodes for wander record blocks */
-+ struct list_head tx_list;
-+ /* number of wander records */
-+ __u32 tx_size;
-+ /* 'committed' sb counters are saved here until atom is completely
-+ flushed */
-+ __u64 free_blocks;
-+ __u64 nr_files;
-+ __u64 next_oid;
-+ /* A pointer to the atom which is being committed */
-+ txn_atom *atom;
-+ /* A pointer to current super block */
-+ struct super_block *super;
-+ /* The counter of modified bitmaps */
-+ reiser4_block_nr nr_bitmap;
-+};
-+
-+static void init_commit_handle(struct commit_handle *ch, txn_atom *atom)
-+{
-+ memset(ch, 0, sizeof(struct commit_handle));
-+ INIT_LIST_HEAD(&ch->tx_list);
-+
-+ ch->atom = atom;
-+ ch->super = reiser4_get_current_sb();
-+}
-+
-+static void done_commit_handle(struct commit_handle *ch)
-+{
-+ assert("zam-690", list_empty(&ch->tx_list));
-+}
-+
-+static inline int reiser4_use_write_barrier(struct super_block * s)
-+{
-+ return !reiser4_is_set(s, REISER4_NO_WRITE_BARRIER);
-+}
-+
-+static void disable_write_barrier(struct super_block * s)
-+{
-+ notice("zam-1055", "%s does not support write barriers,"
-+ " using synchronous write instead.", s->s_id);
-+ set_bit((int)REISER4_NO_WRITE_BARRIER, &get_super_private(s)->fs_flags);
-+}
-+
-+/* fill journal header block data */
-+static void format_journal_header(struct commit_handle *ch)
-+{
-+ struct reiser4_super_info_data *sbinfo;
-+ struct journal_header *header;
-+ jnode *txhead;
-+
-+ sbinfo = get_super_private(ch->super);
-+ assert("zam-479", sbinfo != NULL);
-+ assert("zam-480", sbinfo->journal_header != NULL);
-+
-+ txhead = list_entry(ch->tx_list.next, jnode, capture_link);
-+
-+ jload(sbinfo->journal_header);
-+
-+ header = (struct journal_header *)jdata(sbinfo->journal_header);
-+ assert("zam-484", header != NULL);
-+
-+ put_unaligned(cpu_to_le64(*jnode_get_block(txhead)),
-+ &header->last_committed_tx);
-+
-+ jrelse(sbinfo->journal_header);
-+}
-+
-+/* fill journal footer block data */
-+static void format_journal_footer(struct commit_handle *ch)
-+{
-+ struct reiser4_super_info_data *sbinfo;
-+ struct journal_footer *footer;
-+ jnode *tx_head;
-+
-+ sbinfo = get_super_private(ch->super);
-+
-+ tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
-+
-+ assert("zam-493", sbinfo != NULL);
-+ assert("zam-494", sbinfo->journal_header != NULL);
-+
-+ check_me("zam-691", jload(sbinfo->journal_footer) == 0);
-+
-+ footer = (struct journal_footer *)jdata(sbinfo->journal_footer);
-+ assert("zam-495", footer != NULL);
-+
-+ put_unaligned(cpu_to_le64(*jnode_get_block(tx_head)),
-+ &footer->last_flushed_tx);
-+ put_unaligned(cpu_to_le64(ch->free_blocks), &footer->free_blocks);
-+
-+ put_unaligned(cpu_to_le64(ch->nr_files), &footer->nr_files);
-+ put_unaligned(cpu_to_le64(ch->next_oid), &footer->next_oid);
-+
-+ jrelse(sbinfo->journal_footer);
-+}
-+
-+/* wander record capacity depends on current block size */
-+static int wander_record_capacity(const struct super_block *super)
-+{
-+ return (super->s_blocksize -
-+ sizeof(struct wander_record_header)) /
-+ sizeof(struct wander_entry);
-+}
-+
-+/* Fill first wander record (tx head) in accordance with supplied given data */
-+static void format_tx_head(struct commit_handle *ch)
-+{
-+ jnode *tx_head;
-+ jnode *next;
-+ struct tx_header *header;
-+
-+ tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
-+ assert("zam-692", &ch->tx_list != &tx_head->capture_link);
-+
-+ next = list_entry(tx_head->capture_link.next, jnode, capture_link);
-+ if (&ch->tx_list == &next->capture_link)
-+ next = tx_head;
-+
-+ header = (struct tx_header *)jdata(tx_head);
-+
-+ assert("zam-460", header != NULL);
-+ assert("zam-462", ch->super->s_blocksize >= sizeof(struct tx_header));
-+
-+ memset(jdata(tx_head), 0, (size_t) ch->super->s_blocksize);
-+ memcpy(jdata(tx_head), TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE);
-+
-+ put_unaligned(cpu_to_le32(ch->tx_size), &header->total);
-+ put_unaligned(cpu_to_le64(get_super_private(ch->super)->last_committed_tx),
-+ &header->prev_tx);
-+ put_unaligned(cpu_to_le64(*jnode_get_block(next)), &header->next_block);
-+ put_unaligned(cpu_to_le64(ch->free_blocks), &header->free_blocks);
-+ put_unaligned(cpu_to_le64(ch->nr_files), &header->nr_files);
-+ put_unaligned(cpu_to_le64(ch->next_oid), &header->next_oid);
-+}
-+
-+/* prepare ordinary wander record block (fill all service fields) */
-+static void
-+format_wander_record(struct commit_handle *ch, jnode *node, __u32 serial)
-+{
-+ struct wander_record_header *LRH;
-+ jnode *next;
-+
-+ assert("zam-464", node != NULL);
-+
-+ LRH = (struct wander_record_header *)jdata(node);
-+ next = list_entry(node->capture_link.next, jnode, capture_link);
-+
-+ if (&ch->tx_list == &next->capture_link)
-+ next = list_entry(ch->tx_list.next, jnode, capture_link);
-+
-+ assert("zam-465", LRH != NULL);
-+ assert("zam-463",
-+ ch->super->s_blocksize > sizeof(struct wander_record_header));
-+
-+ memset(jdata(node), 0, (size_t) ch->super->s_blocksize);
-+ memcpy(jdata(node), WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE);
-+
-+ put_unaligned(cpu_to_le32(ch->tx_size), &LRH->total);
-+ put_unaligned(cpu_to_le32(serial), &LRH->serial);
-+ put_unaligned(cpu_to_le64(*jnode_get_block(next)), &LRH->next_block);
-+}
-+
-+/* add one wandered map entry to formatted wander record */
-+static void
-+store_entry(jnode * node, int index, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b)
-+{
-+ char *data;
-+ struct wander_entry *pairs;
-+
-+ data = jdata(node);
-+ assert("zam-451", data != NULL);
-+
-+ pairs =
-+ (struct wander_entry *)(data + sizeof(struct wander_record_header));
-+
-+ put_unaligned(cpu_to_le64(*a), &pairs[index].original);
-+ put_unaligned(cpu_to_le64(*b), &pairs[index].wandered);
-+}
-+
-+/* currently, wander records contains contain only wandered map, which depend on
-+ overwrite set size */
-+static void get_tx_size(struct commit_handle *ch)
-+{
-+ assert("zam-440", ch->overwrite_set_size != 0);
-+ assert("zam-695", ch->tx_size == 0);
-+
-+ /* count all ordinary wander records
-+ (<overwrite_set_size> - 1) / <wander_record_capacity> + 1 and add one
-+ for tx head block */
-+ ch->tx_size =
-+ (ch->overwrite_set_size - 1) / wander_record_capacity(ch->super) +
-+ 2;
-+}
-+
-+/* A special structure for using in store_wmap_actor() for saving its state
-+ between calls */
-+struct store_wmap_params {
-+ jnode *cur; /* jnode of current wander record to fill */
-+ int idx; /* free element index in wander record */
-+ int capacity; /* capacity */
-+
-+#if REISER4_DEBUG
-+ struct list_head *tx_list;
-+#endif
-+};
-+
-+/* an actor for use in blocknr_set_iterator routine which populates the list
-+ of pre-formatted wander records by wandered map info */
-+static int
-+store_wmap_actor(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
-+ const reiser4_block_nr * b, void *data)
-+{
-+ struct store_wmap_params *params = data;
-+
-+ if (params->idx >= params->capacity) {
-+ /* a new wander record should be taken from the tx_list */
-+ params->cur = list_entry(params->cur->capture_link.next, jnode, capture_link);
-+ assert("zam-454",
-+ params->tx_list != ¶ms->cur->capture_link);
-+
-+ params->idx = 0;
-+ }
-+
-+ store_entry(params->cur, params->idx, a, b);
-+ params->idx++;
-+
-+ return 0;
-+}
-+
-+/* This function is called after Relocate set gets written to disk, Overwrite
-+ set is written to wandered locations and all wander records are written
-+ also. Updated journal header blocks contains a pointer (block number) to
-+ first wander record of the just written transaction */
-+static int update_journal_header(struct commit_handle *ch, int use_barrier)
-+{
-+ struct reiser4_super_info_data *sbinfo = get_super_private(ch->super);
-+ jnode *jh = sbinfo->journal_header;
-+ jnode *head = list_entry(ch->tx_list.next, jnode, capture_link);
-+ int ret;
-+
-+ format_journal_header(ch);
-+
-+ ret = write_jnodes_to_disk_extent(jh, 1, jnode_get_block(jh), NULL,
-+ use_barrier ? WRITEOUT_BARRIER : 0);
-+ if (ret)
-+ return ret;
-+
-+ // blk_run_address_space(sbinfo->fake->i_mapping);
-+ /*blk_run_queues(); */
-+
-+ ret = jwait_io(jh, WRITE);
-+
-+ if (ret)
-+ return ret;
-+
-+ sbinfo->last_committed_tx = *jnode_get_block(head);
-+
-+ return 0;
-+}
-+
-+/* This function is called after write-back is finished. We update journal
-+ footer block and free blocks which were occupied by wandered blocks and
-+ transaction wander records */
-+static int update_journal_footer(struct commit_handle *ch, int use_barrier)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(ch->super);
-+
-+ jnode *jf = sbinfo->journal_footer;
-+
-+ int ret;
-+
-+ format_journal_footer(ch);
-+
-+ ret = write_jnodes_to_disk_extent(jf, 1, jnode_get_block(jf), NULL,
-+ use_barrier ? WRITEOUT_BARRIER : 0);
-+ if (ret)
-+ return ret;
-+
-+ // blk_run_address_space(sbinfo->fake->i_mapping);
-+ /*blk_run_queue(); */
-+
-+ ret = jwait_io(jf, WRITE);
-+ if (ret)
-+ return ret;
-+
-+ return 0;
-+}
-+
-+/* free block numbers of wander records of already written in place transaction */
-+static void dealloc_tx_list(struct commit_handle *ch)
-+{
-+ while (!list_empty(&ch->tx_list)) {
-+ jnode *cur = list_entry(ch->tx_list.next, jnode, capture_link);
-+ list_del(&cur->capture_link);
-+ ON_DEBUG(INIT_LIST_HEAD(&cur->capture_link));
-+ reiser4_dealloc_block(jnode_get_block(cur), BLOCK_NOT_COUNTED,
-+ BA_FORMATTED);
-+
-+ unpin_jnode_data(cur);
-+ reiser4_drop_io_head(cur);
-+ }
-+}
-+
-+/* An actor for use in block_nr_iterator() routine which frees wandered blocks
-+ from atom's overwrite set. */
-+static int
-+dealloc_wmap_actor(txn_atom * atom UNUSED_ARG,
-+ const reiser4_block_nr * a UNUSED_ARG,
-+ const reiser4_block_nr * b, void *data UNUSED_ARG)
-+{
-+
-+ assert("zam-499", b != NULL);
-+ assert("zam-500", *b != 0);
-+ assert("zam-501", !reiser4_blocknr_is_fake(b));
-+
-+ reiser4_dealloc_block(b, BLOCK_NOT_COUNTED, BA_FORMATTED);
-+ return 0;
-+}
-+
-+/* free wandered block locations of already written in place transaction */
-+static void dealloc_wmap(struct commit_handle *ch)
-+{
-+ assert("zam-696", ch->atom != NULL);
-+
-+ blocknr_set_iterator(ch->atom, &ch->atom->wandered_map,
-+ dealloc_wmap_actor, NULL, 1);
-+}
-+
-+/* helper function for alloc wandered blocks, which refill set of block
-+ numbers needed for wandered blocks */
-+static int
-+get_more_wandered_blocks(int count, reiser4_block_nr * start, int *len)
-+{
-+ reiser4_blocknr_hint hint;
-+ int ret;
-+
-+ reiser4_block_nr wide_len = count;
-+
-+ /* FIXME-ZAM: A special policy needed for allocation of wandered blocks
-+ ZAM-FIXME-HANS: yes, what happened to our discussion of using a fixed
-+ reserved allocation area so as to get the best qualities of fixed
-+ journals? */
-+ reiser4_blocknr_hint_init(&hint);
-+ hint.block_stage = BLOCK_GRABBED;
-+
-+ ret = reiser4_alloc_blocks(&hint, start, &wide_len,
-+ BA_FORMATTED | BA_USE_DEFAULT_SEARCH_START);
-+ *len = (int)wide_len;
-+
-+ return ret;
-+}
-+
-+/*
-+ * roll back changes made before issuing BIO in the case of IO error.
-+ */
-+static void undo_bio(struct bio *bio)
-+{
-+ int i;
-+
-+ for (i = 0; i < bio->bi_vcnt; ++i) {
-+ struct page *pg;
-+ jnode *node;
-+
-+ pg = bio->bi_io_vec[i].bv_page;
-+ end_page_writeback(pg);
-+ node = jprivate(pg);
-+ spin_lock_jnode(node);
-+ JF_CLR(node, JNODE_WRITEBACK);
-+ JF_SET(node, JNODE_DIRTY);
-+ spin_unlock_jnode(node);
-+ }
-+ bio_put(bio);
-+}
-+
-+/* put overwrite set back to atom's clean list */
-+static void put_overwrite_set(struct commit_handle *ch)
-+{
-+ jnode *cur;
-+
-+ list_for_each_entry(cur, ch->overwrite_set, capture_link)
-+ jrelse_tail(cur);
-+}
-+
-+/* Count overwrite set size, grab disk space for wandered blocks allocation.
-+ Since we have a separate list for atom's overwrite set we just scan the list,
-+ count bitmap and other not leaf nodes which wandered blocks allocation we
-+ have to grab space for. */
-+static int get_overwrite_set(struct commit_handle *ch)
-+{
-+ int ret;
-+ jnode *cur;
-+ __u64 nr_not_leaves = 0;
-+#if REISER4_DEBUG
-+ __u64 nr_formatted_leaves = 0;
-+ __u64 nr_unformatted_leaves = 0;
-+#endif
-+
-+ assert("zam-697", ch->overwrite_set_size == 0);
-+
-+ ch->overwrite_set = ATOM_OVRWR_LIST(ch->atom);
-+ cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
-+
-+ while (ch->overwrite_set != &cur->capture_link) {
-+ jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
-+
-+ /* Count bitmap locks for getting correct statistics what number
-+ * of blocks were cleared by the transaction commit. */
-+ if (jnode_get_type(cur) == JNODE_BITMAP)
-+ ch->nr_bitmap++;
-+
-+ assert("zam-939", JF_ISSET(cur, JNODE_OVRWR)
-+ || jnode_get_type(cur) == JNODE_BITMAP);
-+
-+ if (jnode_is_znode(cur) && znode_above_root(JZNODE(cur))) {
-+ /* we replace fake znode by another (real)
-+ znode which is suggested by disk_layout
-+ plugin */
-+
-+ /* FIXME: it looks like fake znode should be
-+ replaced by jnode supplied by
-+ disk_layout. */
-+
-+ struct super_block *s = reiser4_get_current_sb();
-+ reiser4_super_info_data *sbinfo =
-+ get_current_super_private();
-+
-+ if (sbinfo->df_plug->log_super) {
-+ jnode *sj = sbinfo->df_plug->log_super(s);
-+
-+ assert("zam-593", sj != NULL);
-+
-+ if (IS_ERR(sj))
-+ return PTR_ERR(sj);
-+
-+ spin_lock_jnode(sj);
-+ JF_SET(sj, JNODE_OVRWR);
-+ insert_into_atom_ovrwr_list(ch->atom, sj);
-+ spin_unlock_jnode(sj);
-+
-+ /* jload it as the rest of overwrite set */
-+ jload_gfp(sj, reiser4_ctx_gfp_mask_get(), 0);
-+
-+ ch->overwrite_set_size++;
-+ }
-+ spin_lock_jnode(cur);
-+ reiser4_uncapture_block(cur);
-+ jput(cur);
-+
-+ } else {
-+ int ret;
-+ ch->overwrite_set_size++;
-+ ret = jload_gfp(cur, reiser4_ctx_gfp_mask_get(), 0);
-+ if (ret)
-+ reiser4_panic("zam-783",
-+ "cannot load e-flushed jnode back (ret = %d)\n",
-+ ret);
-+ }
-+
-+ /* Count not leaves here because we have to grab disk space
-+ * for wandered blocks. They were not counted as "flush
-+ * reserved". Counting should be done _after_ nodes are pinned
-+ * into memory by jload(). */
-+ if (!jnode_is_leaf(cur))
-+ nr_not_leaves++;
-+ else {
-+#if REISER4_DEBUG
-+ /* at this point @cur either has JNODE_FLUSH_RESERVED
-+ * or is eflushed. Locking is not strong enough to
-+ * write an assertion checking for this. */
-+ if (jnode_is_znode(cur))
-+ nr_formatted_leaves++;
-+ else
-+ nr_unformatted_leaves++;
-+#endif
-+ JF_CLR(cur, JNODE_FLUSH_RESERVED);
-+ }
-+
-+ cur = next;
-+ }
-+
-+ /* Grab space for writing (wandered blocks) of not leaves found in
-+ * overwrite set. */
-+ ret = reiser4_grab_space_force(nr_not_leaves, BA_RESERVED);
-+ if (ret)
-+ return ret;
-+
-+ /* Disk space for allocation of wandered blocks of leaf nodes already
-+ * reserved as "flush reserved", move it to grabbed space counter. */
-+ spin_lock_atom(ch->atom);
-+ assert("zam-940",
-+ nr_formatted_leaves + nr_unformatted_leaves <=
-+ ch->atom->flush_reserved);
-+ flush_reserved2grabbed(ch->atom, ch->atom->flush_reserved);
-+ spin_unlock_atom(ch->atom);
-+
-+ return ch->overwrite_set_size;
-+}
-+
-+/**
-+ * write_jnodes_to_disk_extent - submit write request
-+ * @head:
-+ * @first: first jnode of the list
-+ * @nr: number of jnodes on the list
-+ * @block_p:
-+ * @fq:
-+ * @flags: used to decide whether page is to get PG_reclaim flag
-+ *
-+ * Submits a write request for @nr jnodes beginning from the @first, other
-+ * jnodes are after the @first on the double-linked "capture" list. All jnodes
-+ * will be written to the disk region of @nr blocks starting with @block_p block
-+ * number. If @fq is not NULL it means that waiting for i/o completion will be
-+ * done more efficiently by using flush_queue_t objects.
-+ * This function is the one which writes list of jnodes in batch mode. It does
-+ * all low-level things as bio construction and page states manipulation.
-+ *
-+ * ZAM-FIXME-HANS: brief me on why this function exists, and why bios are
-+ * aggregated in this function instead of being left to the layers below
-+ *
-+ * FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that?
-+ * Why that layer needed? Why BIOs cannot be constructed here?
-+ */
-+static int write_jnodes_to_disk_extent(
-+ jnode *first, int nr, const reiser4_block_nr *block_p,
-+ flush_queue_t *fq, int flags)
-+{
-+ struct super_block *super = reiser4_get_current_sb();
-+ int write_op = ( flags & WRITEOUT_BARRIER ) ? WRITE_BARRIER : WRITE;
-+ int max_blocks;
-+ jnode *cur = first;
-+ reiser4_block_nr block;
-+
-+ assert("zam-571", first != NULL);
-+ assert("zam-572", block_p != NULL);
-+ assert("zam-570", nr > 0);
-+
-+ block = *block_p;
-+ max_blocks = min(bio_get_nr_vecs(super->s_bdev), BIO_MAX_PAGES);
-+
-+ while (nr > 0) {
-+ struct bio *bio;
-+ int nr_blocks = min(nr, max_blocks);
-+ int i;
-+ int nr_used;
-+
-+ bio = bio_alloc(GFP_NOIO, nr_blocks);
-+ if (!bio)
-+ return RETERR(-ENOMEM);
-+
-+ bio->bi_bdev = super->s_bdev;
-+ bio->bi_sector = block * (super->s_blocksize >> 9);
-+ for (nr_used = 0, i = 0; i < nr_blocks; i++) {
-+ struct page *pg;
-+
-+ pg = jnode_page(cur);
-+ assert("zam-573", pg != NULL);
-+
-+ page_cache_get(pg);
-+
-+ lock_and_wait_page_writeback(pg);
-+
-+ if (!bio_add_page(bio, pg, super->s_blocksize, 0)) {
-+ /*
-+ * underlying device is satiated. Stop adding
-+ * pages to the bio.
-+ */
-+ unlock_page(pg);
-+ page_cache_release(pg);
-+ break;
-+ }
-+
-+ spin_lock_jnode(cur);
-+ assert("nikita-3166",
-+ pg->mapping == jnode_get_mapping(cur));
-+ assert("zam-912", !JF_ISSET(cur, JNODE_WRITEBACK));
-+#if REISER4_DEBUG
-+ spin_lock(&cur->load);
-+ assert("nikita-3165", !jnode_is_releasable(cur));
-+ spin_unlock(&cur->load);
-+#endif
-+ JF_SET(cur, JNODE_WRITEBACK);
-+ JF_CLR(cur, JNODE_DIRTY);
-+ ON_DEBUG(cur->written++);
-+ spin_unlock_jnode(cur);
-+
-+ ClearPageError(pg);
-+ set_page_writeback(pg);
-+
-+ if (get_current_context()->entd) {
-+ /* this is ent thread */
-+ entd_context *ent = get_entd_context(super);
-+ struct wbq *rq, *next;
-+
-+ spin_lock(&ent->guard);
-+
-+ if (pg == ent->cur_request->page) {
-+ /*
-+ * entd is called for this page. This
-+ * request is not in th etodo list
-+ */
-+ ent->cur_request->written = 1;
-+ } else {
-+ /*
-+ * if we have written a page for which writepage
-+ * is called for - move request to another list.
-+ */
-+ list_for_each_entry_safe(rq, next, &ent->todo_list, link) {
-+ assert("", rq->magic == WBQ_MAGIC);
-+ if (pg == rq->page) {
-+ /*
-+ * remove request from
-+ * entd's queue, but do
-+ * not wake up a thread
-+ * which put this
-+ * request
-+ */
-+ list_del_init(&rq->link);
-+ ent->nr_todo_reqs --;
-+ list_add_tail(&rq->link, &ent->done_list);
-+ ent->nr_done_reqs ++;
-+ rq->written = 1;
-+ break;
-+ }
-+ }
-+ }
-+ spin_unlock(&ent->guard);
-+ }
-+
-+ clear_page_dirty_for_io(pg);
-+
-+ unlock_page(pg);
-+
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ nr_used++;
-+ }
-+ if (nr_used > 0) {
-+ assert("nikita-3453",
-+ bio->bi_size == super->s_blocksize * nr_used);
-+ assert("nikita-3454", bio->bi_vcnt == nr_used);
-+
-+ /* Check if we are allowed to write at all */
-+ if (super->s_flags & MS_RDONLY)
-+ undo_bio(bio);
-+ else {
-+ int not_supported;
-+
-+ add_fq_to_bio(fq, bio);
-+ bio_get(bio);
-+ reiser4_submit_bio(write_op, bio);
-+ not_supported = bio_flagged(bio, BIO_EOPNOTSUPP);
-+ bio_put(bio);
-+ if (not_supported)
-+ return -EOPNOTSUPP;
-+ }
-+
-+ block += nr_used - 1;
-+ update_blocknr_hint_default(super, &block);
-+ block += 1;
-+ } else {
-+ bio_put(bio);
-+ }
-+ nr -= nr_used;
-+ }
-+
-+ return 0;
-+}
-+
-+/* This is a procedure which recovers a contiguous sequences of disk block
-+ numbers in the given list of j-nodes and submits write requests on this
-+ per-sequence basis */
-+int
-+write_jnode_list(struct list_head *head, flush_queue_t *fq,
-+ long *nr_submitted, int flags)
-+{
-+ int ret;
-+ jnode *beg = list_entry(head->next, jnode, capture_link);
-+
-+ while (head != &beg->capture_link) {
-+ int nr = 1;
-+ jnode *cur = list_entry(beg->capture_link.next, jnode, capture_link);
-+
-+ while (head != &cur->capture_link) {
-+ if (*jnode_get_block(cur) != *jnode_get_block(beg) + nr)
-+ break;
-+ ++nr;
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+
-+ ret = write_jnodes_to_disk_extent(
-+ beg, nr, jnode_get_block(beg), fq, flags);
-+ if (ret)
-+ return ret;
-+
-+ if (nr_submitted)
-+ *nr_submitted += nr;
-+
-+ beg = cur;
-+ }
-+
-+ return 0;
-+}
-+
-+/* add given wandered mapping to atom's wandered map */
-+static int
-+add_region_to_wmap(jnode * cur, int len, const reiser4_block_nr * block_p)
-+{
-+ int ret;
-+ blocknr_set_entry *new_bsep = NULL;
-+ reiser4_block_nr block;
-+
-+ txn_atom *atom;
-+
-+ assert("zam-568", block_p != NULL);
-+ block = *block_p;
-+ assert("zam-569", len > 0);
-+
-+ while ((len--) > 0) {
-+ do {
-+ atom = get_current_atom_locked();
-+ assert("zam-536",
-+ !reiser4_blocknr_is_fake(jnode_get_block(cur)));
-+ ret =
-+ blocknr_set_add_pair(atom, &atom->wandered_map,
-+ &new_bsep,
-+ jnode_get_block(cur), &block);
-+ } while (ret == -E_REPEAT);
-+
-+ if (ret) {
-+ /* deallocate blocks which were not added to wandered
-+ map */
-+ reiser4_block_nr wide_len = len;
-+
-+ reiser4_dealloc_blocks(&block, &wide_len,
-+ BLOCK_NOT_COUNTED,
-+ BA_FORMATTED
-+ /* formatted, without defer */ );
-+
-+ return ret;
-+ }
-+
-+ spin_unlock_atom(atom);
-+
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ ++block;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Allocate wandered blocks for current atom's OVERWRITE SET and immediately
-+ submit IO for allocated blocks. We assume that current atom is in a stage
-+ when any atom fusion is impossible and atom is unlocked and it is safe. */
-+static int alloc_wandered_blocks(struct commit_handle *ch, flush_queue_t *fq)
-+{
-+ reiser4_block_nr block;
-+
-+ int rest;
-+ int len;
-+ int ret;
-+
-+ jnode *cur;
-+
-+ assert("zam-534", ch->overwrite_set_size > 0);
-+
-+ rest = ch->overwrite_set_size;
-+
-+ cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
-+ while (ch->overwrite_set != &cur->capture_link) {
-+ assert("zam-567", JF_ISSET(cur, JNODE_OVRWR));
-+
-+ ret = get_more_wandered_blocks(rest, &block, &len);
-+ if (ret)
-+ return ret;
-+
-+ rest -= len;
-+
-+ ret = add_region_to_wmap(cur, len, &block);
-+ if (ret)
-+ return ret;
-+
-+ ret = write_jnodes_to_disk_extent(cur, len, &block, fq, 0);
-+ if (ret)
-+ return ret;
-+
-+ while ((len--) > 0) {
-+ assert("zam-604",
-+ ch->overwrite_set != &cur->capture_link);
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* allocate given number of nodes over the journal area and link them into a
-+ list, return pointer to the first jnode in the list */
-+static int alloc_tx(struct commit_handle *ch, flush_queue_t * fq)
-+{
-+ reiser4_blocknr_hint hint;
-+ reiser4_block_nr allocated = 0;
-+ reiser4_block_nr first, len;
-+ jnode *cur;
-+ jnode *txhead;
-+ int ret;
-+ reiser4_context *ctx;
-+ reiser4_super_info_data *sbinfo;
-+
-+ assert("zam-698", ch->tx_size > 0);
-+ assert("zam-699", list_empty_careful(&ch->tx_list));
-+
-+ ctx = get_current_context();
-+ sbinfo = get_super_private(ctx->super);
-+
-+ while (allocated < (unsigned)ch->tx_size) {
-+ len = (ch->tx_size - allocated);
-+
-+ reiser4_blocknr_hint_init(&hint);
-+
-+ hint.block_stage = BLOCK_GRABBED;
-+
-+ /* FIXME: there should be some block allocation policy for
-+ nodes which contain wander records */
-+
-+ /* We assume that disk space for wandered record blocks can be
-+ * taken from reserved area. */
-+ ret = reiser4_alloc_blocks(&hint, &first, &len,
-+ BA_FORMATTED | BA_RESERVED |
-+ BA_USE_DEFAULT_SEARCH_START);
-+ reiser4_blocknr_hint_done(&hint);
-+
-+ if (ret)
-+ return ret;
-+
-+ allocated += len;
-+
-+ /* create jnodes for all wander records */
-+ while (len--) {
-+ cur = reiser4_alloc_io_head(&first);
-+
-+ if (cur == NULL) {
-+ ret = RETERR(-ENOMEM);
-+ goto free_not_assigned;
-+ }
-+
-+ ret = jinit_new(cur, reiser4_ctx_gfp_mask_get());
-+
-+ if (ret != 0) {
-+ jfree(cur);
-+ goto free_not_assigned;
-+ }
-+
-+ pin_jnode_data(cur);
-+
-+ list_add_tail(&cur->capture_link, &ch->tx_list);
-+
-+ first++;
-+ }
-+ }
-+
-+ { /* format a on-disk linked list of wander records */
-+ int serial = 1;
-+
-+ txhead = list_entry(ch->tx_list.next, jnode, capture_link);
-+ format_tx_head(ch);
-+
-+ cur = list_entry(txhead->capture_link.next, jnode, capture_link);
-+ while (&ch->tx_list != &cur->capture_link) {
-+ format_wander_record(ch, cur, serial++);
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ { /* Fill wander records with Wandered Set */
-+ struct store_wmap_params params;
-+ txn_atom *atom;
-+
-+ params.cur = list_entry(txhead->capture_link.next, jnode, capture_link);
-+
-+ params.idx = 0;
-+ params.capacity =
-+ wander_record_capacity(reiser4_get_current_sb());
-+
-+ atom = get_current_atom_locked();
-+ blocknr_set_iterator(atom, &atom->wandered_map,
-+ &store_wmap_actor, ¶ms, 0);
-+ spin_unlock_atom(atom);
-+ }
-+
-+ { /* relse all jnodes from tx_list */
-+ cur = list_entry(ch->tx_list.next, jnode, capture_link);
-+ while (&ch->tx_list != &cur->capture_link) {
-+ jrelse(cur);
-+ cur = list_entry(cur->capture_link.next, jnode, capture_link);
-+ }
-+ }
-+
-+ ret = write_jnode_list(&ch->tx_list, fq, NULL, 0);
-+
-+ return ret;
-+
-+ free_not_assigned:
-+ /* We deallocate blocks not yet assigned to jnodes on tx_list. The
-+ caller takes care about invalidating of tx list */
-+ reiser4_dealloc_blocks(&first, &len, BLOCK_NOT_COUNTED, BA_FORMATTED);
-+
-+ return ret;
-+}
-+
-+static int commit_tx(struct commit_handle *ch)
-+{
-+ flush_queue_t *fq;
-+ int barrier;
-+ int ret;
-+
-+ /* Grab more space for wandered records. */
-+ ret = reiser4_grab_space_force((__u64) (ch->tx_size), BA_RESERVED);
-+ if (ret)
-+ return ret;
-+
-+ fq = get_fq_for_current_atom();
-+ if (IS_ERR(fq))
-+ return PTR_ERR(fq);
-+
-+ spin_unlock_atom(fq->atom);
-+ do {
-+ ret = alloc_wandered_blocks(ch, fq);
-+ if (ret)
-+ break;
-+ ret = alloc_tx(ch, fq);
-+ if (ret)
-+ break;
-+ } while (0);
-+
-+ reiser4_fq_put(fq);
-+ if (ret)
-+ return ret;
-+ repeat_wo_barrier:
-+ barrier = reiser4_use_write_barrier(ch->super);
-+ if (!barrier) {
-+ ret = current_atom_finish_all_fq();
-+ if (ret)
-+ return ret;
-+ }
-+ ret = update_journal_header(ch, barrier);
-+ if (barrier) {
-+ if (ret) {
-+ if (ret == -EOPNOTSUPP) {
-+ disable_write_barrier(ch->super);
-+ goto repeat_wo_barrier;
-+ }
-+ return ret;
-+ }
-+ ret = current_atom_finish_all_fq();
-+ }
-+ return ret;
-+}
-+
-+static int write_tx_back(struct commit_handle * ch)
-+{
-+ flush_queue_t *fq;
-+ int ret;
-+ int barrier;
-+
-+ reiser4_post_commit_hook();
-+ fq = get_fq_for_current_atom();
-+ if (IS_ERR(fq))
-+ return PTR_ERR(fq);
-+ spin_unlock_atom(fq->atom);
-+ ret = write_jnode_list(
-+ ch->overwrite_set, fq, NULL, WRITEOUT_FOR_PAGE_RECLAIM);
-+ reiser4_fq_put(fq);
-+ if (ret)
-+ return ret;
-+ repeat_wo_barrier:
-+ barrier = reiser4_use_write_barrier(ch->super);
-+ if (!barrier) {
-+ ret = current_atom_finish_all_fq();
-+ if (ret)
-+ return ret;
-+ }
-+ ret = update_journal_footer(ch, barrier);
-+ if (barrier) {
-+ if (ret) {
-+ if (ret == -EOPNOTSUPP) {
-+ disable_write_barrier(ch->super);
-+ goto repeat_wo_barrier;
-+ }
-+ return ret;
-+ }
-+ ret = current_atom_finish_all_fq();
-+ }
-+ if (ret)
-+ return ret;
-+ reiser4_post_write_back_hook();
-+ return 0;
-+}
-+
-+/* We assume that at this moment all captured blocks are marked as RELOC or
-+ WANDER (belong to Relocate o Overwrite set), all nodes from Relocate set
-+ are submitted to write.
-+*/
-+
-+int reiser4_write_logs(long *nr_submitted)
-+{
-+ txn_atom *atom;
-+ struct super_block *super = reiser4_get_current_sb();
-+ reiser4_super_info_data *sbinfo = get_super_private(super);
-+ struct commit_handle ch;
-+ int ret;
-+
-+ writeout_mode_enable();
-+
-+ /* block allocator may add j-nodes to the clean_list */
-+ ret = reiser4_pre_commit_hook();
-+ if (ret)
-+ return ret;
-+
-+ /* No locks are required if we take atom which stage >=
-+ * ASTAGE_PRE_COMMIT */
-+ atom = get_current_context()->trans->atom;
-+ assert("zam-965", atom != NULL);
-+
-+ /* relocate set is on the atom->clean_nodes list after
-+ * current_atom_complete_writes() finishes. It can be safely
-+ * uncaptured after commit_mutex is locked, because any atom that
-+ * captures these nodes is guaranteed to commit after current one.
-+ *
-+ * This can only be done after reiser4_pre_commit_hook(), because it is where
-+ * early flushed jnodes with CREATED bit are transferred to the
-+ * overwrite list. */
-+ reiser4_invalidate_list(ATOM_CLEAN_LIST(atom));
-+ spin_lock_atom(atom);
-+ /* There might be waiters for the relocate nodes which we have
-+ * released, wake them up. */
-+ reiser4_atom_send_event(atom);
-+ spin_unlock_atom(atom);
-+
-+ if (REISER4_DEBUG) {
-+ int level;
-+
-+ for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; ++level)
-+ assert("nikita-3352",
-+ list_empty_careful(ATOM_DIRTY_LIST(atom, level)));
-+ }
-+
-+ sbinfo->nr_files_committed += (unsigned)atom->nr_objects_created;
-+ sbinfo->nr_files_committed -= (unsigned)atom->nr_objects_deleted;
-+
-+ init_commit_handle(&ch, atom);
-+
-+ ch.free_blocks = sbinfo->blocks_free_committed;
-+ ch.nr_files = sbinfo->nr_files_committed;
-+ /* ZAM-FIXME-HANS: email me what the contention level is for the super
-+ * lock. */
-+ ch.next_oid = oid_next(super);
-+
-+ /* count overwrite set and place it in a separate list */
-+ ret = get_overwrite_set(&ch);
-+
-+ if (ret <= 0) {
-+ /* It is possible that overwrite set is empty here, it means
-+ all captured nodes are clean */
-+ goto up_and_ret;
-+ }
-+
-+ /* Inform the caller about what number of dirty pages will be
-+ * submitted to disk. */
-+ *nr_submitted += ch.overwrite_set_size - ch.nr_bitmap;
-+
-+ /* count all records needed for storing of the wandered set */
-+ get_tx_size(&ch);
-+
-+ ret = commit_tx(&ch);
-+ if (ret)
-+ goto up_and_ret;
-+
-+ spin_lock_atom(atom);
-+ reiser4_atom_set_stage(atom, ASTAGE_POST_COMMIT);
-+ spin_unlock_atom(atom);
-+
-+ ret = write_tx_back(&ch);
-+ reiser4_post_write_back_hook();
-+
-+ up_and_ret:
-+ if (ret) {
-+ /* there could be fq attached to current atom; the only way to
-+ remove them is: */
-+ current_atom_finish_all_fq();
-+ }
-+
-+ /* free blocks of flushed transaction */
-+ dealloc_tx_list(&ch);
-+ dealloc_wmap(&ch);
-+
-+ put_overwrite_set(&ch);
-+
-+ done_commit_handle(&ch);
-+
-+ writeout_mode_disable();
-+
-+ return ret;
-+}
-+
-+/* consistency checks for journal data/control blocks: header, footer, log
-+ records, transactions head blocks. All functions return zero on success. */
-+
-+static int check_journal_header(const jnode * node UNUSED_ARG)
-+{
-+ /* FIXME: journal header has no magic field yet. */
-+ return 0;
-+}
-+
-+/* wait for write completion for all jnodes from given list */
-+static int wait_on_jnode_list(struct list_head *head)
-+{
-+ jnode *scan;
-+ int ret = 0;
-+
-+ list_for_each_entry(scan, head, capture_link) {
-+ struct page *pg = jnode_page(scan);
-+
-+ if (pg) {
-+ if (PageWriteback(pg))
-+ wait_on_page_writeback(pg);
-+
-+ if (PageError(pg))
-+ ret++;
-+ }
-+ }
-+
-+ return ret;
-+}
-+
-+static int check_journal_footer(const jnode * node UNUSED_ARG)
-+{
-+ /* FIXME: journal footer has no magic field yet. */
-+ return 0;
-+}
-+
-+static int check_tx_head(const jnode * node)
-+{
-+ struct tx_header *header = (struct tx_header *)jdata(node);
-+
-+ if (memcmp(&header->magic, TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE) != 0) {
-+ warning("zam-627", "tx head at block %s corrupted\n",
-+ sprint_address(jnode_get_block(node)));
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+static int check_wander_record(const jnode * node)
-+{
-+ struct wander_record_header *RH =
-+ (struct wander_record_header *)jdata(node);
-+
-+ if (memcmp(&RH->magic, WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE) !=
-+ 0) {
-+ warning("zam-628", "wander record at block %s corrupted\n",
-+ sprint_address(jnode_get_block(node)));
-+ return RETERR(-EIO);
-+ }
-+
-+ return 0;
-+}
-+
-+/* fill commit_handler structure by everything what is needed for update_journal_footer */
-+static int restore_commit_handle(struct commit_handle *ch, jnode *tx_head)
-+{
-+ struct tx_header *TXH;
-+ int ret;
-+
-+ ret = jload(tx_head);
-+ if (ret)
-+ return ret;
-+
-+ TXH = (struct tx_header *)jdata(tx_head);
-+
-+ ch->free_blocks = le64_to_cpu(get_unaligned(&TXH->free_blocks));
-+ ch->nr_files = le64_to_cpu(get_unaligned(&TXH->nr_files));
-+ ch->next_oid = le64_to_cpu(get_unaligned(&TXH->next_oid));
-+
-+ jrelse(tx_head);
-+
-+ list_add(&tx_head->capture_link, &ch->tx_list);
-+
-+ return 0;
-+}
-+
-+/* replay one transaction: restore and write overwrite set in place */
-+static int replay_transaction(const struct super_block *s,
-+ jnode * tx_head,
-+ const reiser4_block_nr * log_rec_block_p,
-+ const reiser4_block_nr * end_block,
-+ unsigned int nr_wander_records)
-+{
-+ reiser4_block_nr log_rec_block = *log_rec_block_p;
-+ struct commit_handle ch;
-+ LIST_HEAD(overwrite_set);
-+ jnode *log;
-+ int ret;
-+
-+ init_commit_handle(&ch, NULL);
-+ ch.overwrite_set = &overwrite_set;
-+
-+ restore_commit_handle(&ch, tx_head);
-+
-+ while (log_rec_block != *end_block) {
-+ struct wander_record_header *header;
-+ struct wander_entry *entry;
-+
-+ int i;
-+
-+ if (nr_wander_records == 0) {
-+ warning("zam-631",
-+ "number of wander records in the linked list"
-+ " greater than number stored in tx head.\n");
-+ ret = RETERR(-EIO);
-+ goto free_ow_set;
-+ }
-+
-+ log = reiser4_alloc_io_head(&log_rec_block);
-+ if (log == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ ret = jload(log);
-+ if (ret < 0) {
-+ reiser4_drop_io_head(log);
-+ return ret;
-+ }
-+
-+ ret = check_wander_record(log);
-+ if (ret) {
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+ return ret;
-+ }
-+
-+ header = (struct wander_record_header *)jdata(log);
-+ log_rec_block = le64_to_cpu(get_unaligned(&header->next_block));
-+
-+ entry = (struct wander_entry *)(header + 1);
-+
-+ /* restore overwrite set from wander record content */
-+ for (i = 0; i < wander_record_capacity(s); i++) {
-+ reiser4_block_nr block;
-+ jnode *node;
-+
-+ block = le64_to_cpu(get_unaligned(&entry->wandered));
-+ if (block == 0)
-+ break;
-+
-+ node = reiser4_alloc_io_head(&block);
-+ if (node == NULL) {
-+ ret = RETERR(-ENOMEM);
-+ /*
-+ * FIXME-VS:???
-+ */
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+ goto free_ow_set;
-+ }
-+
-+ ret = jload(node);
-+
-+ if (ret < 0) {
-+ reiser4_drop_io_head(node);
-+ /*
-+ * FIXME-VS:???
-+ */
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+ goto free_ow_set;
-+ }
-+
-+ block = le64_to_cpu(get_unaligned(&entry->original));
-+
-+ assert("zam-603", block != 0);
-+
-+ jnode_set_block(node, &block);
-+
-+ list_add_tail(&node->capture_link, ch.overwrite_set);
-+
-+ ++entry;
-+ }
-+
-+ jrelse(log);
-+ reiser4_drop_io_head(log);
-+
-+ --nr_wander_records;
-+ }
-+
-+ if (nr_wander_records != 0) {
-+ warning("zam-632", "number of wander records in the linked list"
-+ " less than number stored in tx head.\n");
-+ ret = RETERR(-EIO);
-+ goto free_ow_set;
-+ }
-+
-+ { /* write wandered set in place */
-+ write_jnode_list(ch.overwrite_set, NULL, NULL, 0);
-+ ret = wait_on_jnode_list(ch.overwrite_set);
-+
-+ if (ret) {
-+ ret = RETERR(-EIO);
-+ goto free_ow_set;
-+ }
-+ }
-+
-+ ret = update_journal_footer(&ch, 0);
-+
-+ free_ow_set:
-+
-+ while (!list_empty(ch.overwrite_set)) {
-+ jnode *cur = list_entry(ch.overwrite_set->next, jnode, capture_link);
-+ list_del_init(&cur->capture_link);
-+ jrelse(cur);
-+ reiser4_drop_io_head(cur);
-+ }
-+
-+ list_del_init(&tx_head->capture_link);
-+
-+ done_commit_handle(&ch);
-+
-+ return ret;
-+}
-+
-+/* find oldest committed and not played transaction and play it. The transaction
-+ * was committed and journal header block was updated but the blocks from the
-+ * process of writing the atom's overwrite set in-place and updating of journal
-+ * footer block were not completed. This function completes the process by
-+ * recovering the atom's overwrite set from their wandered locations and writes
-+ * them in-place and updating the journal footer. */
-+static int replay_oldest_transaction(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ jnode *jf = sbinfo->journal_footer;
-+ unsigned int total;
-+ struct journal_footer *F;
-+ struct tx_header *T;
-+
-+ reiser4_block_nr prev_tx;
-+ reiser4_block_nr last_flushed_tx;
-+ reiser4_block_nr log_rec_block = 0;
-+
-+ jnode *tx_head;
-+
-+ int ret;
-+
-+ if ((ret = jload(jf)) < 0)
-+ return ret;
-+
-+ F = (struct journal_footer *)jdata(jf);
-+
-+ last_flushed_tx = le64_to_cpu(get_unaligned(&F->last_flushed_tx));
-+
-+ jrelse(jf);
-+
-+ if (sbinfo->last_committed_tx == last_flushed_tx) {
-+ /* all transactions are replayed */
-+ return 0;
-+ }
-+
-+ prev_tx = sbinfo->last_committed_tx;
-+
-+ /* searching for oldest not flushed transaction */
-+ while (1) {
-+ tx_head = reiser4_alloc_io_head(&prev_tx);
-+ if (!tx_head)
-+ return RETERR(-ENOMEM);
-+
-+ ret = jload(tx_head);
-+ if (ret < 0) {
-+ reiser4_drop_io_head(tx_head);
-+ return ret;
-+ }
-+
-+ ret = check_tx_head(tx_head);
-+ if (ret) {
-+ jrelse(tx_head);
-+ reiser4_drop_io_head(tx_head);
-+ return ret;
-+ }
-+
-+ T = (struct tx_header *)jdata(tx_head);
-+
-+ prev_tx = le64_to_cpu(get_unaligned(&T->prev_tx));
-+
-+ if (prev_tx == last_flushed_tx)
-+ break;
-+
-+ jrelse(tx_head);
-+ reiser4_drop_io_head(tx_head);
-+ }
-+
-+ total = le32_to_cpu(get_unaligned(&T->total));
-+ log_rec_block = le64_to_cpu(get_unaligned(&T->next_block));
-+
-+ pin_jnode_data(tx_head);
-+ jrelse(tx_head);
-+
-+ ret =
-+ replay_transaction(s, tx_head, &log_rec_block,
-+ jnode_get_block(tx_head), total - 1);
-+
-+ unpin_jnode_data(tx_head);
-+ reiser4_drop_io_head(tx_head);
-+
-+ if (ret)
-+ return ret;
-+ return -E_REPEAT;
-+}
-+
-+/* The reiser4 journal current implementation was optimized to not to capture
-+ super block if certain super blocks fields are modified. Currently, the set
-+ is (<free block count>, <OID allocator>). These fields are logged by
-+ special way which includes storing them in each transaction head block at
-+ atom commit time and writing that information to journal footer block at
-+ atom flush time. For getting info from journal footer block to the
-+ in-memory super block there is a special function
-+ reiser4_journal_recover_sb_data() which should be called after disk format
-+ plugin re-reads super block after journal replaying.
-+*/
-+
-+/* get the information from journal footer in-memory super block */
-+int reiser4_journal_recover_sb_data(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ struct journal_footer *jf;
-+ int ret;
-+
-+ assert("zam-673", sbinfo->journal_footer != NULL);
-+
-+ ret = jload(sbinfo->journal_footer);
-+ if (ret != 0)
-+ return ret;
-+
-+ ret = check_journal_footer(sbinfo->journal_footer);
-+ if (ret != 0)
-+ goto out;
-+
-+ jf = (struct journal_footer *)jdata(sbinfo->journal_footer);
-+
-+ /* was there at least one flushed transaction? */
-+ if (jf->last_flushed_tx) {
-+
-+ /* restore free block counter logged in this transaction */
-+ reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&jf->free_blocks)));
-+
-+ /* restore oid allocator state */
-+ oid_init_allocator(s,
-+ le64_to_cpu(get_unaligned(&jf->nr_files)),
-+ le64_to_cpu(get_unaligned(&jf->next_oid)));
-+ }
-+ out:
-+ jrelse(sbinfo->journal_footer);
-+ return ret;
-+}
-+
-+/* reiser4 replay journal procedure */
-+int reiser4_journal_replay(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ jnode *jh, *jf;
-+ struct journal_header *header;
-+ int nr_tx_replayed = 0;
-+ int ret;
-+
-+ assert("zam-582", sbinfo != NULL);
-+
-+ jh = sbinfo->journal_header;
-+ jf = sbinfo->journal_footer;
-+
-+ if (!jh || !jf) {
-+ /* it is possible that disk layout does not support journal
-+ structures, we just warn about this */
-+ warning("zam-583",
-+ "journal control blocks were not loaded by disk layout plugin. "
-+ "journal replaying is not possible.\n");
-+ return 0;
-+ }
-+
-+ /* Take free block count from journal footer block. The free block
-+ counter value corresponds the last flushed transaction state */
-+ ret = jload(jf);
-+ if (ret < 0)
-+ return ret;
-+
-+ ret = check_journal_footer(jf);
-+ if (ret) {
-+ jrelse(jf);
-+ return ret;
-+ }
-+
-+ jrelse(jf);
-+
-+ /* store last committed transaction info in reiser4 in-memory super
-+ block */
-+ ret = jload(jh);
-+ if (ret < 0)
-+ return ret;
-+
-+ ret = check_journal_header(jh);
-+ if (ret) {
-+ jrelse(jh);
-+ return ret;
-+ }
-+
-+ header = (struct journal_header *)jdata(jh);
-+ sbinfo->last_committed_tx = le64_to_cpu(get_unaligned(&header->last_committed_tx));
-+
-+ jrelse(jh);
-+
-+ /* replay committed transactions */
-+ while ((ret = replay_oldest_transaction(s)) == -E_REPEAT)
-+ nr_tx_replayed++;
-+
-+ return ret;
-+}
-+
-+/* load journal control block (either journal header or journal footer block) */
-+static int
-+load_journal_control_block(jnode ** node, const reiser4_block_nr * block)
-+{
-+ int ret;
-+
-+ *node = reiser4_alloc_io_head(block);
-+ if (!(*node))
-+ return RETERR(-ENOMEM);
-+
-+ ret = jload(*node);
-+
-+ if (ret) {
-+ reiser4_drop_io_head(*node);
-+ *node = NULL;
-+ return ret;
-+ }
-+
-+ pin_jnode_data(*node);
-+ jrelse(*node);
-+
-+ return 0;
-+}
-+
-+/* unload journal header or footer and free jnode */
-+static void unload_journal_control_block(jnode ** node)
-+{
-+ if (*node) {
-+ unpin_jnode_data(*node);
-+ reiser4_drop_io_head(*node);
-+ *node = NULL;
-+ }
-+}
-+
-+/* release journal control blocks */
-+void reiser4_done_journal_info(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+
-+ assert("zam-476", sbinfo != NULL);
-+
-+ unload_journal_control_block(&sbinfo->journal_header);
-+ unload_journal_control_block(&sbinfo->journal_footer);
-+ rcu_barrier();
-+}
-+
-+/* load journal control blocks */
-+int reiser4_init_journal_info(struct super_block *s)
-+{
-+ reiser4_super_info_data *sbinfo = get_super_private(s);
-+ journal_location *loc;
-+ int ret;
-+
-+ loc = &sbinfo->jloc;
-+
-+ assert("zam-651", loc != NULL);
-+ assert("zam-652", loc->header != 0);
-+ assert("zam-653", loc->footer != 0);
-+
-+ ret = load_journal_control_block(&sbinfo->journal_header, &loc->header);
-+
-+ if (ret)
-+ return ret;
-+
-+ ret = load_journal_control_block(&sbinfo->journal_footer, &loc->footer);
-+
-+ if (ret) {
-+ unload_journal_control_block(&sbinfo->journal_header);
-+ }
-+
-+ return ret;
-+}
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/wander.h linux-2.6.24/fs/reiser4/wander.h
---- linux-2.6.24.orig/fs/reiser4/wander.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/wander.h 2008-01-25 11:39:07.116254057 +0300
-@@ -0,0 +1,135 @@
-+/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__FS_REISER4_WANDER_H__)
-+#define __FS_REISER4_WANDER_H__
-+
-+#include "dformat.h"
-+
-+#include <linux/fs.h> /* for struct super_block */
-+
-+/* REISER4 JOURNAL ON-DISK DATA STRUCTURES */
-+
-+#define TX_HEADER_MAGIC "TxMagic4"
-+#define WANDER_RECORD_MAGIC "LogMagc4"
-+
-+#define TX_HEADER_MAGIC_SIZE (8)
-+#define WANDER_RECORD_MAGIC_SIZE (8)
-+
-+/* journal header block format */
-+struct journal_header {
-+ /* last written transaction head location */
-+ d64 last_committed_tx;
-+};
-+
-+typedef struct journal_location {
-+ reiser4_block_nr footer;
-+ reiser4_block_nr header;
-+} journal_location;
-+
-+/* The wander.c head comment describes usage and semantic of all these structures */
-+/* journal footer block format */
-+struct journal_footer {
-+ /* last flushed transaction location. */
-+ /* This block number is no more valid after the transaction it points
-+ to gets flushed, this number is used only at journal replaying time
-+ for detection of the end of on-disk list of committed transactions
-+ which were not flushed completely */
-+ d64 last_flushed_tx;
-+
-+ /* free block counter is written in journal footer at transaction
-+ flushing , not in super block because free blocks counter is logged
-+ by another way than super block fields (root pointer, for
-+ example). */
-+ d64 free_blocks;
-+
-+ /* number of used OIDs and maximal used OID are logged separately from
-+ super block */
-+ d64 nr_files;
-+ d64 next_oid;
-+};
-+
-+/* Each wander record (except the first one) has unified format with wander
-+ record header followed by an array of log entries */
-+struct wander_record_header {
-+ /* when there is no predefined location for wander records, this magic
-+ string should help reiser4fsck. */
-+ char magic[WANDER_RECORD_MAGIC_SIZE];
-+
-+ /* transaction id */
-+ d64 id;
-+
-+ /* total number of wander records in current transaction */
-+ d32 total;
-+
-+ /* this block number in transaction */
-+ d32 serial;
-+
-+ /* number of previous block in commit */
-+ d64 next_block;
-+};
-+
-+/* The first wander record (transaction head) of written transaction has the
-+ special format */
-+struct tx_header {
-+ /* magic string makes first block in transaction different from other
-+ logged blocks, it should help fsck. */
-+ char magic[TX_HEADER_MAGIC_SIZE];
-+
-+ /* transaction id */
-+ d64 id;
-+
-+ /* total number of records (including this first tx head) in the
-+ transaction */
-+ d32 total;
-+
-+ /* align next field to 8-byte boundary; this field always is zero */
-+ d32 padding;
-+
-+ /* block number of previous transaction head */
-+ d64 prev_tx;
-+
-+ /* next wander record location */
-+ d64 next_block;
-+
-+ /* committed versions of free blocks counter */
-+ d64 free_blocks;
-+
-+ /* number of used OIDs (nr_files) and maximal used OID are logged
-+ separately from super block */
-+ d64 nr_files;
-+ d64 next_oid;
-+};
-+
-+/* A transaction gets written to disk as a set of wander records (each wander
-+ record size is fs block) */
-+
-+/* As it was told above a wander The rest of wander record is filled by these log entries, unused space filled
-+ by zeroes */
-+struct wander_entry {
-+ d64 original; /* block original location */
-+ d64 wandered; /* block wandered location */
-+};
-+
-+/* REISER4 JOURNAL WRITER FUNCTIONS */
-+
-+extern int reiser4_write_logs(long *);
-+extern int reiser4_journal_replay(struct super_block *);
-+extern int reiser4_journal_recover_sb_data(struct super_block *);
-+
-+extern int reiser4_init_journal_info(struct super_block *);
-+extern void reiser4_done_journal_info(struct super_block *);
-+
-+extern int write_jnode_list(struct list_head *, flush_queue_t *, long *, int);
-+
-+#endif /* __FS_REISER4_WANDER_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ scroll-step: 1
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/writeout.h linux-2.6.24/fs/reiser4/writeout.h
---- linux-2.6.24.orig/fs/reiser4/writeout.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/writeout.h 2008-01-25 11:39:07.120255087 +0300
-@@ -0,0 +1,21 @@
-+/* Copyright 2002, 2003, 2004 by Hans Reiser, licensing governed by reiser4/README */
-+
-+#if !defined (__FS_REISER4_WRITEOUT_H__)
-+
-+#define WRITEOUT_SINGLE_STREAM (0x1)
-+#define WRITEOUT_FOR_PAGE_RECLAIM (0x2)
-+#define WRITEOUT_BARRIER (0x4)
-+
-+extern int reiser4_get_writeout_flags(void);
-+
-+#endif /* __FS_REISER4_WRITEOUT_H__ */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 80
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/znode.c linux-2.6.24/fs/reiser4/znode.c
---- linux-2.6.24.orig/fs/reiser4/znode.c 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/znode.c 2008-01-25 11:39:07.120255087 +0300
-@@ -0,0 +1,1029 @@
-+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+/* Znode manipulation functions. */
-+/* Znode is the in-memory header for a tree node. It is stored
-+ separately from the node itself so that it does not get written to
-+ disk. In this respect znode is like buffer head or page head. We
-+ also use znodes for additional reiser4 specific purposes:
-+
-+ . they are organized into tree structure which is a part of whole
-+ reiser4 tree.
-+ . they are used to implement node grained locking
-+ . they are used to keep additional state associated with a
-+ node
-+ . they contain links to lists used by the transaction manager
-+
-+ Znode is attached to some variable "block number" which is instance of
-+ fs/reiser4/tree.h:reiser4_block_nr type. Znode can exist without
-+ appropriate node being actually loaded in memory. Existence of znode itself
-+ is regulated by reference count (->x_count) in it. Each time thread
-+ acquires reference to znode through call to zget(), ->x_count is
-+ incremented and decremented on call to zput(). Data (content of node) are
-+ brought in memory through call to zload(), which also increments ->d_count
-+ reference counter. zload can block waiting on IO. Call to zrelse()
-+ decreases this counter. Also, ->c_count keeps track of number of child
-+ znodes and prevents parent znode from being recycled until all of its
-+ children are. ->c_count is decremented whenever child goes out of existence
-+ (being actually recycled in zdestroy()) which can be some time after last
-+ reference to this child dies if we support some form of LRU cache for
-+ znodes.
-+
-+*/
-+/* EVERY ZNODE'S STORY
-+
-+ 1. His infancy.
-+
-+ Once upon a time, the znode was born deep inside of zget() by call to
-+ zalloc(). At the return from zget() znode had:
-+
-+ . reference counter (x_count) of 1
-+ . assigned block number, marked as used in bitmap
-+ . pointer to parent znode. Root znode parent pointer points
-+ to its father: "fake" znode. This, in turn, has NULL parent pointer.
-+ . hash table linkage
-+ . no data loaded from disk
-+ . no node plugin
-+ . no sibling linkage
-+
-+ 2. His childhood
-+
-+ Each node is either brought into memory as a result of tree traversal, or
-+ created afresh, creation of the root being a special case of the latter. In
-+ either case it's inserted into sibling list. This will typically require
-+ some ancillary tree traversing, but ultimately both sibling pointers will
-+ exist and JNODE_LEFT_CONNECTED and JNODE_RIGHT_CONNECTED will be true in
-+ zjnode.state.
-+
-+ 3. His youth.
-+
-+ If znode is bound to already existing node in a tree, its content is read
-+ from the disk by call to zload(). At that moment, JNODE_LOADED bit is set
-+ in zjnode.state and zdata() function starts to return non null for this
-+ znode. zload() further calls zparse() that determines which node layout
-+ this node is rendered in, and sets ->nplug on success.
-+
-+ If znode is for new node just created, memory for it is allocated and
-+ zinit_new() function is called to initialise data, according to selected
-+ node layout.
-+
-+ 4. His maturity.
-+
-+ After this point, znode lingers in memory for some time. Threads can
-+ acquire references to znode either by blocknr through call to zget(), or by
-+ following a pointer to unallocated znode from internal item. Each time
-+ reference to znode is obtained, x_count is increased. Thread can read/write
-+ lock znode. Znode data can be loaded through calls to zload(), d_count will
-+ be increased appropriately. If all references to znode are released
-+ (x_count drops to 0), znode is not recycled immediately. Rather, it is
-+ still cached in the hash table in the hope that it will be accessed
-+ shortly.
-+
-+ There are two ways in which znode existence can be terminated:
-+
-+ . sudden death: node bound to this znode is removed from the tree
-+ . overpopulation: znode is purged out of memory due to memory pressure
-+
-+ 5. His death.
-+
-+ Death is complex process.
-+
-+ When we irrevocably commit ourselves to decision to remove node from the
-+ tree, JNODE_HEARD_BANSHEE bit is set in zjnode.state of corresponding
-+ znode. This is done either in ->kill_hook() of internal item or in
-+ reiser4_kill_root() function when tree root is removed.
-+
-+ At this moment znode still has:
-+
-+ . locks held on it, necessary write ones
-+ . references to it
-+ . disk block assigned to it
-+ . data loaded from the disk
-+ . pending requests for lock
-+
-+ But once JNODE_HEARD_BANSHEE bit set, last call to unlock_znode() does node
-+ deletion. Node deletion includes two phases. First all ways to get
-+ references to that znode (sibling and parent links and hash lookup using
-+ block number stored in parent node) should be deleted -- it is done through
-+ sibling_list_remove(), also we assume that nobody uses down link from
-+ parent node due to its nonexistence or proper parent node locking and
-+ nobody uses parent pointers from children due to absence of them. Second we
-+ invalidate all pending lock requests which still are on znode's lock
-+ request queue, this is done by reiser4_invalidate_lock(). Another
-+ JNODE_IS_DYING znode status bit is used to invalidate pending lock requests.
-+ Once it set all requesters are forced to return -EINVAL from
-+ longterm_lock_znode(). Future locking attempts are not possible because all
-+ ways to get references to that znode are removed already. Last, node is
-+ uncaptured from transaction.
-+
-+ When last reference to the dying znode is just about to be released,
-+ block number for this lock is released and znode is removed from the
-+ hash table.
-+
-+ Now znode can be recycled.
-+
-+ [it's possible to free bitmap block and remove znode from the hash
-+ table when last lock is released. This will result in having
-+ referenced but completely orphaned znode]
-+
-+ 6. Limbo
-+
-+ As have been mentioned above znodes with reference counter 0 are
-+ still cached in a hash table. Once memory pressure increases they are
-+ purged out of there [this requires something like LRU list for
-+ efficient implementation. LRU list would also greatly simplify
-+ implementation of coord cache that would in this case morph to just
-+ scanning some initial segment of LRU list]. Data loaded into
-+ unreferenced znode are flushed back to the durable storage if
-+ necessary and memory is freed. Znodes themselves can be recycled at
-+ this point too.
-+
-+*/
-+
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/plugin_header.h"
-+#include "plugin/node/node.h"
-+#include "plugin/plugin.h"
-+#include "txnmgr.h"
-+#include "jnode.h"
-+#include "znode.h"
-+#include "block_alloc.h"
-+#include "tree.h"
-+#include "tree_walk.h"
-+#include "super.h"
-+#include "reiser4.h"
-+
-+#include <linux/pagemap.h>
-+#include <linux/spinlock.h>
-+#include <linux/slab.h>
-+#include <linux/err.h>
-+
-+static z_hash_table *get_htable(reiser4_tree *,
-+ const reiser4_block_nr * const blocknr);
-+static z_hash_table *znode_get_htable(const znode *);
-+static void zdrop(znode *);
-+
-+/* hash table support */
-+
-+/* compare two block numbers for equality. Used by hash-table macros */
-+static inline int
-+blknreq(const reiser4_block_nr * b1, const reiser4_block_nr * b2)
-+{
-+ assert("nikita-534", b1 != NULL);
-+ assert("nikita-535", b2 != NULL);
-+
-+ return *b1 == *b2;
-+}
-+
-+/* Hash znode by block number. Used by hash-table macros */
-+/* Audited by: umka (2002.06.11) */
-+static inline __u32
-+blknrhashfn(z_hash_table * table, const reiser4_block_nr * b)
-+{
-+ assert("nikita-536", b != NULL);
-+
-+ return *b & (REISER4_ZNODE_HASH_TABLE_SIZE - 1);
-+}
-+
-+/* The hash table definition */
-+#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
-+#define KFREE(ptr, size) kfree(ptr)
-+TYPE_SAFE_HASH_DEFINE(z, znode, reiser4_block_nr, zjnode.key.z, zjnode.link.z,
-+ blknrhashfn, blknreq);
-+#undef KFREE
-+#undef KMALLOC
-+
-+/* slab for znodes */
-+static struct kmem_cache *znode_cache;
-+
-+int znode_shift_order;
-+
-+/**
-+ * init_znodes - create znode cache
-+ *
-+ * Initializes slab cache of znodes. It is part of reiser4 module initialization.
-+ */
-+int init_znodes(void)
-+{
-+ znode_cache = kmem_cache_create("znode", sizeof(znode), 0,
-+ SLAB_HWCACHE_ALIGN |
-+ SLAB_RECLAIM_ACCOUNT, NULL);
-+ if (znode_cache == NULL)
-+ return RETERR(-ENOMEM);
-+
-+ for (znode_shift_order = 0; (1 << znode_shift_order) < sizeof(znode);
-+ ++znode_shift_order);
-+ --znode_shift_order;
-+ return 0;
-+}
-+
-+/**
-+ * done_znodes - delete znode cache
-+ *
-+ * This is called on reiser4 module unloading or system shutdown.
-+ */
-+void done_znodes(void)
-+{
-+ destroy_reiser4_cache(&znode_cache);
-+}
-+
-+/* call this to initialise tree of znodes */
-+int znodes_tree_init(reiser4_tree * tree /* tree to initialise znodes for */ )
-+{
-+ int result;
-+ assert("umka-050", tree != NULL);
-+
-+ rwlock_init(&tree->dk_lock);
-+
-+ result = z_hash_init(&tree->zhash_table, REISER4_ZNODE_HASH_TABLE_SIZE);
-+ if (result != 0)
-+ return result;
-+ result = z_hash_init(&tree->zfake_table, REISER4_ZNODE_HASH_TABLE_SIZE);
-+ return result;
-+}
-+
-+/* free this znode */
-+void zfree(znode * node /* znode to free */ )
-+{
-+ assert("nikita-465", node != NULL);
-+ assert("nikita-2120", znode_page(node) == NULL);
-+ assert("nikita-2301", list_empty_careful(&node->lock.owners));
-+ assert("nikita-2302", list_empty_careful(&node->lock.requestors));
-+ assert("nikita-2663", (list_empty_careful(&ZJNODE(node)->capture_link) &&
-+ NODE_LIST(ZJNODE(node)) == NOT_CAPTURED));
-+ assert("nikita-3220", list_empty(&ZJNODE(node)->jnodes));
-+ assert("nikita-3293", !znode_is_right_connected(node));
-+ assert("nikita-3294", !znode_is_left_connected(node));
-+ assert("nikita-3295", node->left == NULL);
-+ assert("nikita-3296", node->right == NULL);
-+
-+ /* not yet phash_jnode_destroy(ZJNODE(node)); */
-+
-+ kmem_cache_free(znode_cache, node);
-+}
-+
-+/* call this to free tree of znodes */
-+void znodes_tree_done(reiser4_tree * tree /* tree to finish with znodes of */ )
-+{
-+ znode *node;
-+ znode *next;
-+ z_hash_table *ztable;
-+
-+ /* scan znode hash-tables and kill all znodes, then free hash tables
-+ * themselves. */
-+
-+ assert("nikita-795", tree != NULL);
-+
-+ ztable = &tree->zhash_table;
-+
-+ if (ztable->_table != NULL) {
-+ for_all_in_htable(ztable, z, node, next) {
-+ node->c_count = 0;
-+ node->in_parent.node = NULL;
-+ assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
-+ zdrop(node);
-+ }
-+
-+ z_hash_done(&tree->zhash_table);
-+ }
-+
-+ ztable = &tree->zfake_table;
-+
-+ if (ztable->_table != NULL) {
-+ for_all_in_htable(ztable, z, node, next) {
-+ node->c_count = 0;
-+ node->in_parent.node = NULL;
-+ assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
-+ zdrop(node);
-+ }
-+
-+ z_hash_done(&tree->zfake_table);
-+ }
-+}
-+
-+/* ZNODE STRUCTURES */
-+
-+/* allocate fresh znode */
-+znode *zalloc(gfp_t gfp_flag /* allocation flag */ )
-+{
-+ znode *node;
-+
-+ node = kmem_cache_alloc(znode_cache, gfp_flag);
-+ return node;
-+}
-+
-+/* Initialize fields of znode
-+ @node: znode to initialize;
-+ @parent: parent znode;
-+ @tree: tree we are in. */
-+void zinit(znode * node, const znode * parent, reiser4_tree * tree)
-+{
-+ assert("nikita-466", node != NULL);
-+ assert("umka-268", current_tree != NULL);
-+
-+ memset(node, 0, sizeof *node);
-+
-+ assert("umka-051", tree != NULL);
-+
-+ jnode_init(&node->zjnode, tree, JNODE_FORMATTED_BLOCK);
-+ reiser4_init_lock(&node->lock);
-+ init_parent_coord(&node->in_parent, parent);
-+}
-+
-+/*
-+ * remove znode from indices. This is called jput() when last reference on
-+ * znode is released.
-+ */
-+void znode_remove(znode * node /* znode to remove */ , reiser4_tree * tree)
-+{
-+ assert("nikita-2108", node != NULL);
-+ assert("nikita-470", node->c_count == 0);
-+ assert_rw_write_locked(&(tree->tree_lock));
-+
-+ /* remove reference to this znode from cbk cache */
-+ cbk_cache_invalidate(node, tree);
-+
-+ /* update c_count of parent */
-+ if (znode_parent(node) != NULL) {
-+ assert("nikita-472", znode_parent(node)->c_count > 0);
-+ /* father, onto your hands I forward my spirit... */
-+ znode_parent(node)->c_count--;
-+ node->in_parent.node = NULL;
-+ } else {
-+ /* orphaned znode?! Root? */
-+ }
-+
-+ /* remove znode from hash-table */
-+ z_hash_remove_rcu(znode_get_htable(node), node);
-+}
-+
-+/* zdrop() -- Remove znode from the tree.
-+
-+ This is called when znode is removed from the memory. */
-+static void zdrop(znode * node /* znode to finish with */ )
-+{
-+ jdrop(ZJNODE(node));
-+}
-+
-+/*
-+ * put znode into right place in the hash table. This is called by relocate
-+ * code.
-+ */
-+int znode_rehash(znode * node /* node to rehash */ ,
-+ const reiser4_block_nr * new_block_nr /* new block number */ )
-+{
-+ z_hash_table *oldtable;
-+ z_hash_table *newtable;
-+ reiser4_tree *tree;
-+
-+ assert("nikita-2018", node != NULL);
-+
-+ tree = znode_get_tree(node);
-+ oldtable = znode_get_htable(node);
-+ newtable = get_htable(tree, new_block_nr);
-+
-+ write_lock_tree(tree);
-+ /* remove znode from hash-table */
-+ z_hash_remove_rcu(oldtable, node);
-+
-+ /* assertion no longer valid due to RCU */
-+ /* assert("nikita-2019", z_hash_find(newtable, new_block_nr) == NULL); */
-+
-+ /* update blocknr */
-+ znode_set_block(node, new_block_nr);
-+ node->zjnode.key.z = *new_block_nr;
-+
-+ /* insert it into hash */
-+ z_hash_insert_rcu(newtable, node);
-+ write_unlock_tree(tree);
-+ return 0;
-+}
-+
-+/* ZNODE LOOKUP, GET, PUT */
-+
-+/* zlook() - get znode with given block_nr in a hash table or return NULL
-+
-+ If result is non-NULL then the znode's x_count is incremented. Internal version
-+ accepts pre-computed hash index. The hash table is accessed under caller's
-+ tree->hash_lock.
-+*/
-+znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const blocknr)
-+{
-+ znode *result;
-+ __u32 hash;
-+ z_hash_table *htable;
-+
-+ assert("jmacd-506", tree != NULL);
-+ assert("jmacd-507", blocknr != NULL);
-+
-+ htable = get_htable(tree, blocknr);
-+ hash = blknrhashfn(htable, blocknr);
-+
-+ rcu_read_lock();
-+ result = z_hash_find_index(htable, hash, blocknr);
-+
-+ if (result != NULL) {
-+ add_x_ref(ZJNODE(result));
-+ result = znode_rip_check(tree, result);
-+ }
-+ rcu_read_unlock();
-+
-+ return result;
-+}
-+
-+/* return hash table where znode with block @blocknr is (or should be)
-+ * stored */
-+static z_hash_table *get_htable(reiser4_tree * tree,
-+ const reiser4_block_nr * const blocknr)
-+{
-+ z_hash_table *table;
-+ if (is_disk_addr_unallocated(blocknr))
-+ table = &tree->zfake_table;
-+ else
-+ table = &tree->zhash_table;
-+ return table;
-+}
-+
-+/* return hash table where znode @node is (or should be) stored */
-+static z_hash_table *znode_get_htable(const znode * node)
-+{
-+ return get_htable(znode_get_tree(node), znode_get_block(node));
-+}
-+
-+/* zget() - get znode from hash table, allocating it if necessary.
-+
-+ First a call to zlook, locating a x-referenced znode if one
-+ exists. If znode is not found, allocate new one and return. Result
-+ is returned with x_count reference increased.
-+
-+ LOCKS TAKEN: TREE_LOCK, ZNODE_LOCK
-+ LOCK ORDERING: NONE
-+*/
-+znode *zget(reiser4_tree * tree,
-+ const reiser4_block_nr * const blocknr,
-+ znode * parent, tree_level level, gfp_t gfp_flag)
-+{
-+ znode *result;
-+ __u32 hashi;
-+
-+ z_hash_table *zth;
-+
-+ assert("jmacd-512", tree != NULL);
-+ assert("jmacd-513", blocknr != NULL);
-+ assert("jmacd-514", level < REISER4_MAX_ZTREE_HEIGHT);
-+
-+ zth = get_htable(tree, blocknr);
-+ hashi = blknrhashfn(zth, blocknr);
-+
-+ /* NOTE-NIKITA address-as-unallocated-blocknr still is not
-+ implemented. */
-+
-+ z_hash_prefetch_bucket(zth, hashi);
-+
-+ rcu_read_lock();
-+ /* Find a matching BLOCKNR in the hash table. If the znode is found,
-+ we obtain an reference (x_count) but the znode remains unlocked.
-+ Have to worry about race conditions later. */
-+ result = z_hash_find_index(zth, hashi, blocknr);
-+ /* According to the current design, the hash table lock protects new
-+ znode references. */
-+ if (result != NULL) {
-+ add_x_ref(ZJNODE(result));
-+ /* NOTE-NIKITA it should be so, but special case during
-+ creation of new root makes such assertion highly
-+ complicated. */
-+ assert("nikita-2131", 1 || znode_parent(result) == parent ||
-+ (ZF_ISSET(result, JNODE_ORPHAN)
-+ && (znode_parent(result) == NULL)));
-+ result = znode_rip_check(tree, result);
-+ }
-+
-+ rcu_read_unlock();
-+
-+ if (!result) {
-+ znode *shadow;
-+
-+ result = zalloc(gfp_flag);
-+ if (!result) {
-+ return ERR_PTR(RETERR(-ENOMEM));
-+ }
-+
-+ zinit(result, parent, tree);
-+ ZJNODE(result)->blocknr = *blocknr;
-+ ZJNODE(result)->key.z = *blocknr;
-+ result->level = level;
-+
-+ write_lock_tree(tree);
-+
-+ shadow = z_hash_find_index(zth, hashi, blocknr);
-+ if (unlikely(shadow != NULL && !ZF_ISSET(shadow, JNODE_RIP))) {
-+ jnode_list_remove(ZJNODE(result));
-+ zfree(result);
-+ result = shadow;
-+ } else {
-+ result->version = znode_build_version(tree);
-+ z_hash_insert_index_rcu(zth, hashi, result);
-+
-+ if (parent != NULL)
-+ ++parent->c_count;
-+ }
-+
-+ add_x_ref(ZJNODE(result));
-+
-+ write_unlock_tree(tree);
-+ }
-+#if REISER4_DEBUG
-+ if (!reiser4_blocknr_is_fake(blocknr) && *blocknr != 0)
-+ reiser4_check_block(blocknr, 1);
-+#endif
-+ /* Check for invalid tree level, return -EIO */
-+ if (unlikely(znode_get_level(result) != level)) {
-+ warning("jmacd-504",
-+ "Wrong level for cached block %llu: %i expecting %i",
-+ (unsigned long long)(*blocknr), znode_get_level(result),
-+ level);
-+ zput(result);
-+ return ERR_PTR(RETERR(-EIO));
-+ }
-+
-+ assert("nikita-1227", znode_invariant(result));
-+
-+ return result;
-+}
-+
-+/* ZNODE PLUGINS/DATA */
-+
-+/* "guess" plugin for node loaded from the disk. Plugin id of node plugin is
-+ stored at the fixed offset from the beginning of the node. */
-+static node_plugin *znode_guess_plugin(const znode * node /* znode to guess
-+ * plugin of */ )
-+{
-+ reiser4_tree *tree;
-+
-+ assert("nikita-1053", node != NULL);
-+ assert("nikita-1055", zdata(node) != NULL);
-+
-+ tree = znode_get_tree(node);
-+ assert("umka-053", tree != NULL);
-+
-+ if (reiser4_is_set(tree->super, REISER4_ONE_NODE_PLUGIN)) {
-+ return tree->nplug;
-+ } else {
-+ return node_plugin_by_disk_id
-+ (tree, &((common_node_header *) zdata(node))->plugin_id);
-+#ifdef GUESS_EXISTS
-+ reiser4_plugin *plugin;
-+
-+ /* NOTE-NIKITA add locking here when dynamic plugins will be
-+ * implemented */
-+ for_all_plugins(REISER4_NODE_PLUGIN_TYPE, plugin) {
-+ if ((plugin->u.node.guess != NULL)
-+ && plugin->u.node.guess(node))
-+ return plugin;
-+ }
-+ warning("nikita-1057", "Cannot guess node plugin");
-+ print_znode("node", node);
-+ return NULL;
-+#endif
-+ }
-+}
-+
-+/* parse node header and install ->node_plugin */
-+int zparse(znode * node /* znode to parse */ )
-+{
-+ int result;
-+
-+ assert("nikita-1233", node != NULL);
-+ assert("nikita-2370", zdata(node) != NULL);
-+
-+ if (node->nplug == NULL) {
-+ node_plugin *nplug;
-+
-+ nplug = znode_guess_plugin(node);
-+ if (likely(nplug != NULL)) {
-+ result = nplug->parse(node);
-+ if (likely(result == 0))
-+ node->nplug = nplug;
-+ } else {
-+ result = RETERR(-EIO);
-+ }
-+ } else
-+ result = 0;
-+ return result;
-+}
-+
-+/* zload with readahead */
-+int zload_ra(znode * node /* znode to load */ , ra_info_t * info)
-+{
-+ int result;
-+
-+ assert("nikita-484", node != NULL);
-+ assert("nikita-1377", znode_invariant(node));
-+ assert("jmacd-7771", !znode_above_root(node));
-+ assert("nikita-2125", atomic_read(&ZJNODE(node)->x_count) > 0);
-+ assert("nikita-3016", reiser4_schedulable());
-+
-+ if (info)
-+ formatted_readahead(node, info);
-+
-+ result = jload(ZJNODE(node));
-+ assert("nikita-1378", znode_invariant(node));
-+ return result;
-+}
-+
-+/* load content of node into memory */
-+int zload(znode * node)
-+{
-+ return zload_ra(node, NULL);
-+}
-+
-+/* call node plugin to initialise newly allocated node. */
-+int zinit_new(znode * node /* znode to initialise */ , gfp_t gfp_flags)
-+{
-+ return jinit_new(ZJNODE(node), gfp_flags);
-+}
-+
-+/* drop reference to node data. When last reference is dropped, data are
-+ unloaded. */
-+void zrelse(znode * node /* znode to release references to */ )
-+{
-+ assert("nikita-1381", znode_invariant(node));
-+
-+ jrelse(ZJNODE(node));
-+}
-+
-+/* returns free space in node */
-+unsigned znode_free_space(znode * node /* znode to query */ )
-+{
-+ assert("nikita-852", node != NULL);
-+ return node_plugin_by_node(node)->free_space(node);
-+}
-+
-+/* left delimiting key of znode */
-+reiser4_key *znode_get_rd_key(znode * node /* znode to query */ )
-+{
-+ assert("nikita-958", node != NULL);
-+ assert_rw_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3067", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-30671", node->rd_key_version != 0);
-+ return &node->rd_key;
-+}
-+
-+/* right delimiting key of znode */
-+reiser4_key *znode_get_ld_key(znode * node /* znode to query */ )
-+{
-+ assert("nikita-974", node != NULL);
-+ assert_rw_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3068", LOCK_CNT_GTZ(rw_locked_dk));
-+ assert("nikita-30681", node->ld_key_version != 0);
-+ return &node->ld_key;
-+}
-+
-+ON_DEBUG(atomic_t delim_key_version = ATOMIC_INIT(0);
-+ )
-+
-+/* update right-delimiting key of @node */
-+reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key)
-+{
-+ assert("nikita-2937", node != NULL);
-+ assert("nikita-2939", key != NULL);
-+ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3069", LOCK_CNT_GTZ(write_locked_dk));
-+ assert("nikita-2944",
-+ znode_is_any_locked(node) ||
-+ znode_get_level(node) != LEAF_LEVEL ||
-+ keyge(key, &node->rd_key) ||
-+ keyeq(&node->rd_key, reiser4_min_key()) ||
-+ ZF_ISSET(node, JNODE_HEARD_BANSHEE));
-+
-+ node->rd_key = *key;
-+ ON_DEBUG(node->rd_key_version = atomic_inc_return(&delim_key_version));
-+ return &node->rd_key;
-+}
-+
-+/* update left-delimiting key of @node */
-+reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key)
-+{
-+ assert("nikita-2940", node != NULL);
-+ assert("nikita-2941", key != NULL);
-+ assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
-+ assert("nikita-3070", LOCK_CNT_GTZ(write_locked_dk));
-+ assert("nikita-2943",
-+ znode_is_any_locked(node) || keyeq(&node->ld_key,
-+ reiser4_min_key()));
-+
-+ node->ld_key = *key;
-+ ON_DEBUG(node->ld_key_version = atomic_inc_return(&delim_key_version));
-+ return &node->ld_key;
-+}
-+
-+/* true if @key is inside key range for @node */
-+int znode_contains_key(znode * node /* znode to look in */ ,
-+ const reiser4_key * key /* key to look for */ )
-+{
-+ assert("nikita-1237", node != NULL);
-+ assert("nikita-1238", key != NULL);
-+
-+ /* left_delimiting_key <= key <= right_delimiting_key */
-+ return keyle(znode_get_ld_key(node), key)
-+ && keyle(key, znode_get_rd_key(node));
-+}
-+
-+/* same as znode_contains_key(), but lock dk lock */
-+int znode_contains_key_lock(znode * node /* znode to look in */ ,
-+ const reiser4_key * key /* key to look for */ )
-+{
-+ int result;
-+
-+ assert("umka-056", node != NULL);
-+ assert("umka-057", key != NULL);
-+
-+ read_lock_dk(znode_get_tree(node));
-+ result = znode_contains_key(node, key);
-+ read_unlock_dk(znode_get_tree(node));
-+ return result;
-+}
-+
-+/* get parent pointer, assuming tree is not locked */
-+znode *znode_parent_nolock(const znode * node /* child znode */ )
-+{
-+ assert("nikita-1444", node != NULL);
-+ return node->in_parent.node;
-+}
-+
-+/* get parent pointer of znode */
-+znode *znode_parent(const znode * node /* child znode */ )
-+{
-+ assert("nikita-1226", node != NULL);
-+ assert("nikita-1406", LOCK_CNT_GTZ(rw_locked_tree));
-+ return znode_parent_nolock(node);
-+}
-+
-+/* detect uber znode used to protect in-superblock tree root pointer */
-+int znode_above_root(const znode * node /* znode to query */ )
-+{
-+ assert("umka-059", node != NULL);
-+
-+ return disk_addr_eq(&ZJNODE(node)->blocknr, &UBER_TREE_ADDR);
-+}
-+
-+/* check that @node is root---that its block number is recorder in the tree as
-+ that of root node */
-+#if REISER4_DEBUG
-+static int znode_is_true_root(const znode * node /* znode to query */ )
-+{
-+ assert("umka-060", node != NULL);
-+ assert("umka-061", current_tree != NULL);
-+
-+ return disk_addr_eq(znode_get_block(node),
-+ &znode_get_tree(node)->root_block);
-+}
-+#endif
-+
-+/* check that @node is root */
-+int znode_is_root(const znode * node /* znode to query */ )
-+{
-+ assert("nikita-1206", node != NULL);
-+
-+ return znode_get_level(node) == znode_get_tree(node)->height;
-+}
-+
-+/* Returns true is @node was just created by zget() and wasn't ever loaded
-+ into memory. */
-+/* NIKITA-HANS: yes */
-+int znode_just_created(const znode * node)
-+{
-+ assert("nikita-2188", node != NULL);
-+ return (znode_page(node) == NULL);
-+}
-+
-+/* obtain updated ->znode_epoch. See seal.c for description. */
-+__u64 znode_build_version(reiser4_tree * tree)
-+{
-+ __u64 result;
-+
-+ spin_lock(&tree->epoch_lock);
-+ result = ++tree->znode_epoch;
-+ spin_unlock(&tree->epoch_lock);
-+ return result;
-+}
-+
-+void init_load_count(load_count * dh)
-+{
-+ assert("nikita-2105", dh != NULL);
-+ memset(dh, 0, sizeof *dh);
-+}
-+
-+void done_load_count(load_count * dh)
-+{
-+ assert("nikita-2106", dh != NULL);
-+ if (dh->node != NULL) {
-+ for (; dh->d_ref > 0; --dh->d_ref)
-+ zrelse(dh->node);
-+ dh->node = NULL;
-+ }
-+}
-+
-+static int incr_load_count(load_count * dh)
-+{
-+ int result;
-+
-+ assert("nikita-2110", dh != NULL);
-+ assert("nikita-2111", dh->node != NULL);
-+
-+ result = zload(dh->node);
-+ if (result == 0)
-+ ++dh->d_ref;
-+ return result;
-+}
-+
-+int incr_load_count_znode(load_count * dh, znode * node)
-+{
-+ assert("nikita-2107", dh != NULL);
-+ assert("nikita-2158", node != NULL);
-+ assert("nikita-2109",
-+ ergo(dh->node != NULL, (dh->node == node) || (dh->d_ref == 0)));
-+
-+ dh->node = node;
-+ return incr_load_count(dh);
-+}
-+
-+int incr_load_count_jnode(load_count * dh, jnode * node)
-+{
-+ if (jnode_is_znode(node)) {
-+ return incr_load_count_znode(dh, JZNODE(node));
-+ }
-+ return 0;
-+}
-+
-+void copy_load_count(load_count * new, load_count * old)
-+{
-+ int ret = 0;
-+ done_load_count(new);
-+ new->node = old->node;
-+ new->d_ref = 0;
-+
-+ while ((new->d_ref < old->d_ref) && (ret = incr_load_count(new)) == 0) {
-+ }
-+
-+ assert("jmacd-87589", ret == 0);
-+}
-+
-+void move_load_count(load_count * new, load_count * old)
-+{
-+ done_load_count(new);
-+ new->node = old->node;
-+ new->d_ref = old->d_ref;
-+ old->node = NULL;
-+ old->d_ref = 0;
-+}
-+
-+/* convert parent pointer into coord */
-+void parent_coord_to_coord(const parent_coord_t * pcoord, coord_t * coord)
-+{
-+ assert("nikita-3204", pcoord != NULL);
-+ assert("nikita-3205", coord != NULL);
-+
-+ coord_init_first_unit_nocheck(coord, pcoord->node);
-+ coord_set_item_pos(coord, pcoord->item_pos);
-+ coord->between = AT_UNIT;
-+}
-+
-+/* pack coord into parent_coord_t */
-+void coord_to_parent_coord(const coord_t * coord, parent_coord_t * pcoord)
-+{
-+ assert("nikita-3206", pcoord != NULL);
-+ assert("nikita-3207", coord != NULL);
-+
-+ pcoord->node = coord->node;
-+ pcoord->item_pos = coord->item_pos;
-+}
-+
-+/* Initialize a parent hint pointer. (parent hint pointer is a field in znode,
-+ look for comments there) */
-+void init_parent_coord(parent_coord_t * pcoord, const znode * node)
-+{
-+ pcoord->node = (znode *) node;
-+ pcoord->item_pos = (unsigned short)~0;
-+}
-+
-+#if REISER4_DEBUG
-+
-+/* debugging aid: znode invariant */
-+static int znode_invariant_f(const znode * node /* znode to check */ ,
-+ char const **msg /* where to store error
-+ * message, if any */ )
-+{
-+#define _ergo(ant, con) \
-+ ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
-+
-+#define _equi(e1, e2) \
-+ ((*msg) = "{" #e1 "} <=> {" #e2 "}", equi((e1), (e2)))
-+
-+#define _check(exp) ((*msg) = #exp, (exp))
-+
-+ return jnode_invariant_f(ZJNODE(node), msg) &&
-+ /* [znode-fake] invariant */
-+ /* fake znode doesn't have a parent, and */
-+ _ergo(znode_get_level(node) == 0, znode_parent(node) == NULL) &&
-+ /* there is another way to express this very check, and */
-+ _ergo(znode_above_root(node), znode_parent(node) == NULL) &&
-+ /* it has special block number, and */
-+ _ergo(znode_get_level(node) == 0,
-+ disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
-+ /* it is the only znode with such block number, and */
-+ _ergo(!znode_above_root(node) && znode_is_loaded(node),
-+ !disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
-+ /* it is parent of the tree root node */
-+ _ergo(znode_is_true_root(node),
-+ znode_above_root(znode_parent(node))) &&
-+ /* [znode-level] invariant */
-+ /* level of parent znode is one larger than that of child,
-+ except for the fake znode, and */
-+ _ergo(znode_parent(node) && !znode_above_root(znode_parent(node)),
-+ znode_get_level(znode_parent(node)) ==
-+ znode_get_level(node) + 1) &&
-+ /* left neighbor is at the same level, and */
-+ _ergo(znode_is_left_connected(node) && node->left != NULL,
-+ znode_get_level(node) == znode_get_level(node->left)) &&
-+ /* right neighbor is at the same level */
-+ _ergo(znode_is_right_connected(node) && node->right != NULL,
-+ znode_get_level(node) == znode_get_level(node->right)) &&
-+ /* [znode-connected] invariant */
-+ _ergo(node->left != NULL, znode_is_left_connected(node)) &&
-+ _ergo(node->right != NULL, znode_is_right_connected(node)) &&
-+ _ergo(!znode_is_root(node) && node->left != NULL,
-+ znode_is_right_connected(node->left) &&
-+ node->left->right == node) &&
-+ _ergo(!znode_is_root(node) && node->right != NULL,
-+ znode_is_left_connected(node->right) &&
-+ node->right->left == node) &&
-+ /* [znode-c_count] invariant */
-+ /* for any znode, c_count of its parent is greater than 0 */
-+ _ergo(znode_parent(node) != NULL &&
-+ !znode_above_root(znode_parent(node)),
-+ znode_parent(node)->c_count > 0) &&
-+ /* leaves don't have children */
-+ _ergo(znode_get_level(node) == LEAF_LEVEL,
-+ node->c_count == 0) &&
-+ _check(node->zjnode.jnodes.prev != NULL) &&
-+ _check(node->zjnode.jnodes.next != NULL) &&
-+ /* orphan doesn't have a parent */
-+ _ergo(ZF_ISSET(node, JNODE_ORPHAN), znode_parent(node) == 0) &&
-+ /* [znode-modify] invariant */
-+ /* if znode is not write-locked, its checksum remains
-+ * invariant */
-+ /* unfortunately, zlock is unordered w.r.t. jnode_lock, so we
-+ * cannot check this. */
-+ /* [znode-refs] invariant */
-+ /* only referenced znode can be long-term locked */
-+ _ergo(znode_is_locked(node),
-+ atomic_read(&ZJNODE(node)->x_count) != 0);
-+}
-+
-+/* debugging aid: check znode invariant and panic if it doesn't hold */
-+int znode_invariant(znode * node /* znode to check */ )
-+{
-+ char const *failed_msg;
-+ int result;
-+
-+ assert("umka-063", node != NULL);
-+ assert("umka-064", current_tree != NULL);
-+
-+ spin_lock_znode(node);
-+ read_lock_tree(znode_get_tree(node));
-+ result = znode_invariant_f(node, &failed_msg);
-+ if (!result) {
-+ /* print_znode("corrupted node", node); */
-+ warning("jmacd-555", "Condition %s failed", failed_msg);
-+ }
-+ read_unlock_tree(znode_get_tree(node));
-+ spin_unlock_znode(node);
-+ return result;
-+}
-+
-+/* return non-0 iff data are loaded into znode */
-+int znode_is_loaded(const znode * node /* znode to query */ )
-+{
-+ assert("nikita-497", node != NULL);
-+ return jnode_is_loaded(ZJNODE(node));
-+}
-+
-+unsigned long znode_times_locked(const znode * z)
-+{
-+ return z->times_locked;
-+}
-+
-+#endif /* REISER4_DEBUG */
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/fs/reiser4/znode.h linux-2.6.24/fs/reiser4/znode.h
---- linux-2.6.24.orig/fs/reiser4/znode.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.24/fs/reiser4/znode.h 2008-01-25 11:39:07.120255087 +0300
-@@ -0,0 +1,434 @@
-+/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
-+ * reiser4/README */
-+
-+/* Declaration of znode (Zam's node). See znode.c for more details. */
-+
-+#ifndef __ZNODE_H__
-+#define __ZNODE_H__
-+
-+#include "forward.h"
-+#include "debug.h"
-+#include "dformat.h"
-+#include "key.h"
-+#include "coord.h"
-+#include "plugin/node/node.h"
-+#include "jnode.h"
-+#include "lock.h"
-+#include "readahead.h"
-+
-+#include <linux/types.h>
-+#include <linux/spinlock.h>
-+#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
-+#include <asm/atomic.h>
-+#include <asm/semaphore.h>
-+
-+/* znode tracks its position within parent (internal item in a parent node,
-+ * that contains znode's block number). */
-+typedef struct parent_coord {
-+ znode *node;
-+ pos_in_node_t item_pos;
-+} parent_coord_t;
-+
-+/* &znode - node in a reiser4 tree.
-+
-+ NOTE-NIKITA fields in this struct have to be rearranged (later) to reduce
-+ cacheline pressure.
-+
-+ Locking:
-+
-+ Long term: data in a disk node attached to this znode are protected
-+ by long term, deadlock aware lock ->lock;
-+
-+ Spin lock: the following fields are protected by the spin lock:
-+
-+ ->lock
-+
-+ Following fields are protected by the global tree lock:
-+
-+ ->left
-+ ->right
-+ ->in_parent
-+ ->c_count
-+
-+ Following fields are protected by the global delimiting key lock (dk_lock):
-+
-+ ->ld_key (to update ->ld_key long-term lock on the node is also required)
-+ ->rd_key
-+
-+ Following fields are protected by the long term lock:
-+
-+ ->nr_items
-+
-+ ->node_plugin is never changed once set. This means that after code made
-+ itself sure that field is valid it can be accessed without any additional
-+ locking.
-+
-+ ->level is immutable.
-+
-+ Invariants involving this data-type:
-+
-+ [znode-fake]
-+ [znode-level]
-+ [znode-connected]
-+ [znode-c_count]
-+ [znode-refs]
-+ [jnode-refs]
-+ [jnode-queued]
-+ [znode-modify]
-+
-+ For this to be made into a clustering or NUMA filesystem, we would want to eliminate all of the global locks.
-+ Suggestions for how to do that are desired.*/
-+struct znode {
-+ /* Embedded jnode. */
-+ jnode zjnode;
-+
-+ /* contains three subfields, node, pos_in_node, and pos_in_unit.
-+
-+ pos_in_node and pos_in_unit are only hints that are cached to
-+ speed up lookups during balancing. They are not required to be up to
-+ date. Synched in find_child_ptr().
-+
-+ This value allows us to avoid expensive binary searches.
-+
-+ in_parent->node points to the parent of this node, and is NOT a
-+ hint.
-+ */
-+ parent_coord_t in_parent;
-+
-+ /*
-+ * sibling list pointers
-+ */
-+
-+ /* left-neighbor */
-+ znode *left;
-+ /* right-neighbor */
-+ znode *right;
-+
-+ /* long term lock on node content. This lock supports deadlock
-+ detection. See lock.c
-+ */
-+ zlock lock;
-+
-+ /* You cannot remove from memory a node that has children in
-+ memory. This is because we rely on the fact that parent of given
-+ node can always be reached without blocking for io. When reading a
-+ node into memory you must increase the c_count of its parent, when
-+ removing it from memory you must decrease the c_count. This makes
-+ the code simpler, and the cases where it is suboptimal are truly
-+ obscure.
-+ */
-+ int c_count;
-+
-+ /* plugin of node attached to this znode. NULL if znode is not
-+ loaded. */
-+ node_plugin *nplug;
-+
-+ /* version of znode data. This is increased on each modification. This
-+ * is necessary to implement seals (see seal.[ch]) efficiently. */
-+ __u64 version;
-+
-+ /* left delimiting key. Necessary to efficiently perform
-+ balancing with node-level locking. Kept in memory only. */
-+ reiser4_key ld_key;
-+ /* right delimiting key. */
-+ reiser4_key rd_key;
-+
-+ /* znode's tree level */
-+ __u16 level;
-+ /* number of items in this node. This field is modified by node
-+ * plugin. */
-+ __u16 nr_items;
-+
-+#if REISER4_DEBUG
-+ void *creator;
-+ reiser4_key first_key;
-+ unsigned long times_locked;
-+ int left_version; /* when node->left was updated */
-+ int right_version; /* when node->right was updated */
-+ int ld_key_version; /* when node->ld_key was updated */
-+ int rd_key_version; /* when node->rd_key was updated */
-+#endif
-+
-+} __attribute__ ((aligned(16)));
-+
-+ON_DEBUG(extern atomic_t delim_key_version;
-+ )
-+
-+/* In general I think these macros should not be exposed. */
-+#define znode_is_locked(node) (lock_is_locked(&node->lock))
-+#define znode_is_rlocked(node) (lock_is_rlocked(&node->lock))
-+#define znode_is_wlocked(node) (lock_is_wlocked(&node->lock))
-+#define znode_is_wlocked_once(node) (lock_is_wlocked_once(&node->lock))
-+#define znode_can_be_rlocked(node) (lock_can_be_rlocked(&node->lock))
-+#define is_lock_compatible(node, mode) (lock_mode_compatible(&node->lock, mode))
-+/* Macros for accessing the znode state. */
-+#define ZF_CLR(p,f) JF_CLR (ZJNODE(p), (f))
-+#define ZF_ISSET(p,f) JF_ISSET(ZJNODE(p), (f))
-+#define ZF_SET(p,f) JF_SET (ZJNODE(p), (f))
-+extern znode *zget(reiser4_tree * tree, const reiser4_block_nr * const block,
-+ znode * parent, tree_level level, gfp_t gfp_flag);
-+extern znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const block);
-+extern int zload(znode * node);
-+extern int zload_ra(znode * node, ra_info_t * info);
-+extern int zinit_new(znode * node, gfp_t gfp_flags);
-+extern void zrelse(znode * node);
-+extern void znode_change_parent(znode * new_parent, reiser4_block_nr * block);
-+
-+/* size of data in znode */
-+static inline unsigned
-+znode_size(const znode * node UNUSED_ARG /* znode to query */ )
-+{
-+ assert("nikita-1416", node != NULL);
-+ return PAGE_CACHE_SIZE;
-+}
-+
-+extern void parent_coord_to_coord(const parent_coord_t * pcoord,
-+ coord_t * coord);
-+extern void coord_to_parent_coord(const coord_t * coord,
-+ parent_coord_t * pcoord);
-+extern void init_parent_coord(parent_coord_t * pcoord, const znode * node);
-+
-+extern unsigned znode_free_space(znode * node);
-+
-+extern reiser4_key *znode_get_rd_key(znode * node);
-+extern reiser4_key *znode_get_ld_key(znode * node);
-+
-+extern reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key);
-+extern reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key);
-+
-+/* `connected' state checks */
-+static inline int znode_is_right_connected(const znode * node)
-+{
-+ return ZF_ISSET(node, JNODE_RIGHT_CONNECTED);
-+}
-+
-+static inline int znode_is_left_connected(const znode * node)
-+{
-+ return ZF_ISSET(node, JNODE_LEFT_CONNECTED);
-+}
-+
-+static inline int znode_is_connected(const znode * node)
-+{
-+ return znode_is_right_connected(node) && znode_is_left_connected(node);
-+}
-+
-+extern int znode_shift_order;
-+extern int znode_rehash(znode * node, const reiser4_block_nr * new_block_nr);
-+extern void znode_remove(znode *, reiser4_tree *);
-+extern znode *znode_parent(const znode * node);
-+extern znode *znode_parent_nolock(const znode * node);
-+extern int znode_above_root(const znode * node);
-+extern int init_znodes(void);
-+extern void done_znodes(void);
-+extern int znodes_tree_init(reiser4_tree * ztree);
-+extern void znodes_tree_done(reiser4_tree * ztree);
-+extern int znode_contains_key(znode * node, const reiser4_key * key);
-+extern int znode_contains_key_lock(znode * node, const reiser4_key * key);
-+extern unsigned znode_save_free_space(znode * node);
-+extern unsigned znode_recover_free_space(znode * node);
-+extern znode *zalloc(gfp_t gfp_flag);
-+extern void zinit(znode *, const znode * parent, reiser4_tree *);
-+extern int zparse(znode * node);
-+
-+extern int znode_just_created(const znode * node);
-+
-+extern void zfree(znode * node);
-+
-+#if REISER4_DEBUG
-+extern void print_znode(const char *prefix, const znode * node);
-+#else
-+#define print_znode( p, n ) noop
-+#endif
-+
-+/* Make it look like various znode functions exist instead of treating znodes as
-+ jnodes in znode-specific code. */
-+#define znode_page(x) jnode_page ( ZJNODE(x) )
-+#define zdata(x) jdata ( ZJNODE(x) )
-+#define znode_get_block(x) jnode_get_block ( ZJNODE(x) )
-+#define znode_created(x) jnode_created ( ZJNODE(x) )
-+#define znode_set_created(x) jnode_set_created ( ZJNODE(x) )
-+#define znode_convertible(x) jnode_convertible (ZJNODE(x))
-+#define znode_set_convertible(x) jnode_set_convertible (ZJNODE(x))
-+
-+#define znode_is_dirty(x) jnode_is_dirty ( ZJNODE(x) )
-+#define znode_check_dirty(x) jnode_check_dirty ( ZJNODE(x) )
-+#define znode_make_clean(x) jnode_make_clean ( ZJNODE(x) )
-+#define znode_set_block(x, b) jnode_set_block ( ZJNODE(x), (b) )
-+
-+#define spin_lock_znode(x) spin_lock_jnode ( ZJNODE(x) )
-+#define spin_unlock_znode(x) spin_unlock_jnode ( ZJNODE(x) )
-+#define spin_trylock_znode(x) spin_trylock_jnode ( ZJNODE(x) )
-+#define spin_znode_is_locked(x) spin_jnode_is_locked ( ZJNODE(x) )
-+#define spin_znode_is_not_locked(x) spin_jnode_is_not_locked ( ZJNODE(x) )
-+
-+#if REISER4_DEBUG
-+extern int znode_x_count_is_protected(const znode * node);
-+extern int znode_invariant(znode * node);
-+#endif
-+
-+/* acquire reference to @node */
-+static inline znode *zref(znode * node)
-+{
-+ /* change of x_count from 0 to 1 is protected by tree spin-lock */
-+ return JZNODE(jref(ZJNODE(node)));
-+}
-+
-+/* release reference to @node */
-+static inline void zput(znode * node)
-+{
-+ assert("nikita-3564", znode_invariant(node));
-+ jput(ZJNODE(node));
-+}
-+
-+/* get the level field for a znode */
-+static inline tree_level znode_get_level(const znode * node)
-+{
-+ return node->level;
-+}
-+
-+/* get the level field for a jnode */
-+static inline tree_level jnode_get_level(const jnode * node)
-+{
-+ if (jnode_is_znode(node))
-+ return znode_get_level(JZNODE(node));
-+ else
-+ /* unformatted nodes are all at the LEAF_LEVEL and for
-+ "semi-formatted" nodes like bitmaps, level doesn't matter. */
-+ return LEAF_LEVEL;
-+}
-+
-+/* true if jnode is on leaf level */
-+static inline int jnode_is_leaf(const jnode * node)
-+{
-+ if (jnode_is_znode(node))
-+ return (znode_get_level(JZNODE(node)) == LEAF_LEVEL);
-+ if (jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK)
-+ return 1;
-+ return 0;
-+}
-+
-+/* return znode's tree */
-+static inline reiser4_tree *znode_get_tree(const znode * node)
-+{
-+ assert("nikita-2692", node != NULL);
-+ return jnode_get_tree(ZJNODE(node));
-+}
-+
-+/* resolve race with zput */
-+static inline znode *znode_rip_check(reiser4_tree * tree, znode * node)
-+{
-+ jnode *j;
-+
-+ j = jnode_rip_sync(tree, ZJNODE(node));
-+ if (likely(j != NULL))
-+ node = JZNODE(j);
-+ else
-+ node = NULL;
-+ return node;
-+}
-+
-+#if defined(REISER4_DEBUG)
-+int znode_is_loaded(const znode * node /* znode to query */ );
-+#endif
-+
-+extern __u64 znode_build_version(reiser4_tree * tree);
-+
-+/* Data-handles. A data handle object manages pairing calls to zload() and zrelse(). We
-+ must load the data for a node in many places. We could do this by simply calling
-+ zload() everywhere, the difficulty arises when we must release the loaded data by
-+ calling zrelse. In a function with many possible error/return paths, it requires extra
-+ work to figure out which exit paths must call zrelse and those which do not. The data
-+ handle automatically calls zrelse for every zload that it is responsible for. In that
-+ sense, it acts much like a lock_handle.
-+*/
-+typedef struct load_count {
-+ znode *node;
-+ int d_ref;
-+} load_count;
-+
-+extern void init_load_count(load_count * lc); /* Initialize a load_count set the current node to NULL. */
-+extern void done_load_count(load_count * dh); /* Finalize a load_count: call zrelse() if necessary */
-+extern int incr_load_count_znode(load_count * dh, znode * node); /* Set the argument znode to the current node, call zload(). */
-+extern int incr_load_count_jnode(load_count * dh, jnode * node); /* If the argument jnode is formatted, do the same as
-+ * incr_load_count_znode, otherwise do nothing (unformatted nodes
-+ * don't require zload/zrelse treatment). */
-+extern void move_load_count(load_count * new, load_count * old); /* Move the contents of a load_count. Old handle is released. */
-+extern void copy_load_count(load_count * new, load_count * old); /* Copy the contents of a load_count. Old handle remains held. */
-+
-+/* Variable initializers for load_count. */
-+#define INIT_LOAD_COUNT ( load_count * ){ .node = NULL, .d_ref = 0 }
-+#define INIT_LOAD_COUNT_NODE( n ) ( load_count ){ .node = ( n ), .d_ref = 0 }
-+/* A convenience macro for use in assertions or debug-only code, where loaded
-+ data is only required to perform the debugging check. This macro
-+ encapsulates an expression inside a pair of calls to zload()/zrelse(). */
-+#define WITH_DATA( node, exp ) \
-+({ \
-+ long __with_dh_result; \
-+ znode *__with_dh_node; \
-+ \
-+ __with_dh_node = ( node ); \
-+ __with_dh_result = zload( __with_dh_node ); \
-+ if( __with_dh_result == 0 ) { \
-+ __with_dh_result = ( long )( exp ); \
-+ zrelse( __with_dh_node ); \
-+ } \
-+ __with_dh_result; \
-+})
-+
-+/* Same as above, but accepts a return value in case zload fails. */
-+#define WITH_DATA_RET( node, ret, exp ) \
-+({ \
-+ int __with_dh_result; \
-+ znode *__with_dh_node; \
-+ \
-+ __with_dh_node = ( node ); \
-+ __with_dh_result = zload( __with_dh_node ); \
-+ if( __with_dh_result == 0 ) { \
-+ __with_dh_result = ( int )( exp ); \
-+ zrelse( __with_dh_node ); \
-+ } else \
-+ __with_dh_result = ( ret ); \
-+ __with_dh_result; \
-+})
-+
-+#define WITH_COORD(coord, exp) \
-+({ \
-+ coord_t *__coord; \
-+ \
-+ __coord = (coord); \
-+ coord_clear_iplug(__coord); \
-+ WITH_DATA(__coord->node, exp); \
-+})
-+
-+#if REISER4_DEBUG
-+#define STORE_COUNTERS \
-+ reiser4_lock_cnt_info __entry_counters = \
-+ *reiser4_lock_counters()
-+#define CHECK_COUNTERS \
-+ON_DEBUG_CONTEXT( \
-+({ \
-+ __entry_counters.x_refs = reiser4_lock_counters() -> x_refs; \
-+ __entry_counters.t_refs = reiser4_lock_counters() -> t_refs; \
-+ __entry_counters.d_refs = reiser4_lock_counters() -> d_refs; \
-+ assert("nikita-2159", \
-+ !memcmp(&__entry_counters, reiser4_lock_counters(), \
-+ sizeof __entry_counters)); \
-+}) )
-+
-+#else
-+#define STORE_COUNTERS
-+#define CHECK_COUNTERS noop
-+#endif
-+
-+/* __ZNODE_H__ */
-+#endif
-+
-+/* Make Linus happy.
-+ Local variables:
-+ c-indentation-style: "K&R"
-+ mode-name: "LC"
-+ c-basic-offset: 8
-+ tab-width: 8
-+ fill-column: 120
-+ End:
-+*/
-diff -urN linux-2.6.24.orig/include/linux/fs.h linux-2.6.24/include/linux/fs.h
---- linux-2.6.24.orig/include/linux/fs.h 2008-01-25 14:24:20.893378532 +0300
-+++ linux-2.6.24/include/linux/fs.h 2008-01-25 11:39:07.124256117 +0300
-@@ -1256,6 +1256,8 @@
- void (*clear_inode) (struct inode *);
- void (*umount_begin) (struct vfsmount *, int);
-
-+ void (*sync_inodes) (struct super_block *sb,
-+ struct writeback_control *wbc);
- int (*show_options)(struct seq_file *, struct vfsmount *);
- int (*show_stats)(struct seq_file *, struct vfsmount *);
- #ifdef CONFIG_QUOTA
-@@ -1671,6 +1673,7 @@
- extern int invalidate_inode_pages2_range(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
- extern int write_inode_now(struct inode *, int);
-+extern void generic_sync_sb_inodes(struct super_block *, struct writeback_control *);
- extern int filemap_fdatawrite(struct address_space *);
- extern int filemap_flush(struct address_space *);
- extern int filemap_fdatawait(struct address_space *);
-diff -urN linux-2.6.24.orig/mm/filemap.c linux-2.6.24/mm/filemap.c
---- linux-2.6.24.orig/mm/filemap.c 2008-01-25 14:24:21.569552179 +0300
-+++ linux-2.6.24/mm/filemap.c 2008-01-25 11:39:07.132258178 +0300
-@@ -137,6 +137,7 @@
- dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
- }
- }
-+EXPORT_SYMBOL(__remove_from_page_cache);
-
- void remove_from_page_cache(struct page *page)
- {
-@@ -148,6 +149,7 @@
- __remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
- }
-+EXPORT_SYMBOL(remove_from_page_cache);
-
- static int sync_page(void *word)
- {
-@@ -731,6 +733,7 @@
- read_unlock_irq(&mapping->tree_lock);
- return ret;
- }
-+EXPORT_SYMBOL(add_to_page_cache_lru);
-
- /**
- * find_get_pages_contig - gang contiguous pagecache lookup
-@@ -850,6 +853,7 @@
-
- ra->ra_pages /= 4;
- }
-+EXPORT_SYMBOL(find_get_pages);
-
- /**
- * do_generic_mapping_read - generic file read routine